Created
May 23, 2020 02:46
-
-
Save roshan-adusumilli/2858a503cb3efe6ec98b99caf3f31b21 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import re | |
| def cleanTxt(text): | |
| text = re.sub('@[A-Za-z0–9]+', '', text) | |
| text = re.sub('#', '', text) | |
| text = re.sub('RT[\s]+', '', text) | |
| text = re.sub('https?:\/\/\S+', '', text) | |
| text = re.sub('/', '', text) | |
| text = text.replace('\\', '') | |
| text = re.sub('x97', '', text) | |
| text = re.sub('xa3', '', text) | |
| text = re.sub('x98People', '', text) | |
| text = re.sub('x98', '', text) | |
| text = re.sub('xa0', '', text) | |
| text = re.sub('x94and', '', text) | |
| text = re.sub('x96', '', text) | |
| text = re.sub('x99s', '', text) | |
| text = re.sub('x91', '', text) | |
| text = re.sub('x8a', '', text) | |
| text = re.sub('xba', '', text) | |
| text = re.sub('x9b', '', text) | |
| text = re.sub('xbc', '', text) | |
| text = re.sub('x92', '', text) | |
| text = re.sub('xbf', '', text) | |
| text = re.sub('x89https', '', text) | |
| text = re.sub('x94By', '', text) | |
| text = re.sub('x8f', '', text) | |
| text = re.sub('xb8', '', text) | |
| text = re.sub('xa4', '', text) | |
| text = re.sub('xa5', '', text) | |
| text = re.sub('x87', '', text) | |
| text = re.sub('xa5WOW', '', text) | |
| text = re.sub('x94', '', text) | |
| text = re.sub('x95', '', text) | |
| text = re.sub('xb3', '', text) | |
| text = re.sub('x89', '', text) | |
| text = re.sub('x9f', '', text) | |
| text = re.sub('x9ccoronavirus', '', text) | |
| text = re.sub('xbd', '', text) | |
| text = re.sub('x9cnatural', '', text) | |
| text = re.sub('x9cmusic', '', text) | |
| text = re.sub('xa9', '', text) | |
| text = re.sub('x82', '', text) | |
| text = re.sub('xc2', '', text) | |
| text = re.sub('x83', '', text) | |
| text = re.sub('x99all', '', text) | |
| text = re.sub('xb1al', '', text) | |
| text = re.sub('x9cessential', '', text) | |
| text = re.sub('x9cEveryone', '', text) | |
| text = re.sub('x8e', '', text) | |
| text = re.sub('x98Reopen', '', text) | |
| text = re.sub('xe3', '', text) | |
| text = re.sub('xa2', '', text) | |
| text = re.sub('x80', '', text) | |
| text = re.sub('x99m', '', text) | |
| text = re.sub('x90', '', text) | |
| text = re.sub('x9e', '', text) | |
| text = re.sub('x99', '', text) | |
| text = re.sub('xb9', '', text) | |
| text = re.sub('xbb', '', text) | |
| text = re.sub('x99re', '', text) | |
| text = re.sub('xa3https', '', text) | |
| text = re.sub('x98Burden', '', text) | |
| text = re.sub('x9cprogressives', '', text) | |
| text = re.sub('xb1d19', '', text) | |
| text = re.sub('xaa', '', text) | |
| text = re.sub('x86', '', text) | |
| text = re.sub('x8c', '', text) | |
| text = re.sub('x93', '', text) | |
| text = re.sub('x9d', '', text) | |
| text = re.sub('x88', '', text) | |
| text = re.sub('x99t', '', text) | |
| text = re.sub('xef', '', text) | |
| text = re.sub('xf0', '', text) | |
| text = re.sub('xa7', '', text) | |
| text = re.sub('xb7', '', text) | |
| text = re.sub('x9cThe', '', text) | |
| text = re.sub('x9c', '', text) | |
| text = re.sub('x99mon', '', text) | |
| text = re.sub('x99d', '', text) | |
| text = re.sub('xb5', '', text) | |
| text = re.sub('xc3', '', text) | |
| text = re.sub('xe2', '', text) | |
| text = re.sub('x8d', '', text) | |
| text = re.sub('xb0', '', text) | |
| text = re.sub('xa6it', '', text) | |
| text = re.sub('x98CA', '', text) | |
| text = re.sub('xc4', '', text) | |
| text = re.sub('xa8', '', text) | |
| text = re.sub('x9cthe', '', text) | |
| text = re.sub('x99ve', '', text) | |
| text = re.sub('x81', '', text) | |
| text = re.sub('x8fTake', '', text) | |
| text = re.sub('x85', '', text) | |
| text = re.sub('x99S', '', text) | |
| text = re.sub('xb8OPEN', '', text) | |
| text = re.sub('xa6', '', text) | |
| text = re.sub('x8fUplifting', '', text) | |
| text = re.sub('xb8TYRANT', '', text) | |
| text = re.sub('xac', '', text) | |
| text = re.sub('x99ll', '', text) | |
| text = re.sub('x9cfix', '', text) | |
| text = re.sub('x98declared', '', text) | |
| text = re.sub('xa1', '', text) | |
| text = re.sub('x98fix', '', text) | |
| return text | |
| for i in range(len(ca_df['tweet_text'])): | |
| for n in range(len(ca_df['tweet_text'][i])): | |
| ca_df['tweet_text'][i][n] = cleanTxt(ca_df['tweet_text'][i][n]) | |
| for i in range(len(ny_df['tweet_text'])): | |
| for n in range(len(ny_df['tweet_text'][i])): | |
| ny_df['tweet_text'][i][n] = cleanTxt(ny_df['tweet_text'][i][n]) | |
| for i in range(len(tx_df['tweet_text'])): | |
| for n in range(len(tx_df['tweet_text'][i])): | |
| tx_df['tweet_text'][i][n] = cleanTxt(tx_df['tweet_text'][i][n]) | |
| ca_df.head() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment