This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| all_words = [] | |
| for i in range(len(ca_df.index)): | |
| for w in ca_df.iat[i, 1]: | |
| all_words.append(w.lower()) | |
| for i in range(len(ny_df.index)): | |
| for w in ny_df.iat[i, 1]: | |
| all_words.append(w.lower()) | |
| for i in range(len(tx_df.index)): |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| results_new_ca = [] | |
| for i in range(len(ca_new_df['tweet_text'])): | |
| feature = find_features(ca_new_df['tweet_text'][i]) | |
| results_new_ca.append(classifier.classify(feature)) | |
| results_new_ny = [] | |
| for i in range(len(ny_new_df['tweet_text'])): | |
| feature = find_features(ny_new_df['tweet_text'][i]) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| training_set = feature_sets[:1200] | |
| testing_set = feature_sets[1200:] | |
| classifier = nltk.NaiveBayesClassifier.train(training_set) | |
| results = [] | |
| for i in range(len(testing_set)): | |
| results.append(classifier.classify(testing_set[i][0])) | |
| error = 0 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import random | |
| dataset = [] | |
| ca_dataset = [] | |
| for row in ca_df.itertuples(index = False): | |
| ca_dataset.append((getattr(row, 'tweet_text'), getattr(row, 'values'))) | |
| ny_dataset = [] | |
| for row in ny_df.itertuples(index = False): | |
| ny_dataset.append((getattr(row, 'tweet_text'), getattr(row, 'values'))) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| nltk.download('wordnet') | |
| from nltk.stem import WordNetLemmatizer | |
| lemmatizer = WordNetLemmatizer() | |
| def lemmatize_tweets(tweet): | |
| tweet = lemmatizer.lemmatize(tweet) | |
| return tweet | |
| for i in range(len(ca_df['tweet_text'])): |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import re | |
| def cleanTxt(text): | |
| text = re.sub('@[A-Za-z0–9]+', '', text) | |
| text = re.sub('#', '', text) | |
| text = re.sub('RT[\s]+', '', text) | |
| text = re.sub('https?:\/\/\S+', '', text) | |
| text = re.sub('/', '', text) | |
| text = text.replace('\\', '') | |
| text = re.sub('x97', '', text) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| nltk.download('stopwords') | |
| from nltk.corpus import stopwords | |
| stop_words = set(stopwords.words('english')) | |
| def remove_stopwords(tweet): | |
| tweet = [n for n in tweet if not n in stop_words] | |
| return tweet | |
| ca_df['tweet_text'] = ca_df['tweet_text'].apply(remove_stopwords) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import nltk | |
| from nltk import TweetTokenizer | |
| tweet_tokenizer = TweetTokenizer() | |
| def tokenize_tweets(tweet): | |
| tweet = tweet_tokenizer.tokenize(tweet) | |
| return tweet | |
| ca_df['tweet_text'] = ca_df['tweet_text'].apply(tokenize_tweets) | |
| ny_df['tweet_text'] = ny_df['tweet_text'].apply(tokenize_tweets) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import pandas as pd | |
| ca_df = pd.read_csv('ca_labeled.csv', index_col=[0]) | |
| ny_df = pd.read_csv('ny_labeled.csv', index_col=[0]) | |
| tx_df = pd.read_csv('tx_labeled.csv', index_col=[0]) | |
| def features(value): | |
| if(value == -1): | |
| value = 'neg' | |
| return value |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import json | |
| import csv | |
| import tweepy | |
| import re | |
| def search_for_hashtags(consumer_key, consumer_secret, access_token, access_token_secret, hashtag_phrase): | |
| #create authentication for accessing Twitter | |
| auth = tweepy.OAuthHandler(consumer_key, consumer_secret) | |
| auth.set_access_token(access_token, access_token_secret) |
NewerOlder