Created
February 20, 2012 11:11
-
-
Save marians/1868815 to your computer and use it in GitHub Desktop.
Revisions
-
marians revised this gist
Feb 20, 2012 . 1 changed file with 1 addition and 1 deletion.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -6,7 +6,7 @@ words = ['keyword1', 'keyword2'] TWITTER_USER = "YourUserName" TWITTER_PASS = "YourPassword" DB_HOST = 'localhost' DB_USER = 'root' -
marians created this gist
Feb 20, 2012 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,16 @@ CREATE TABLE `tweets` ( `id` varchar(24) NOT NULL DEFAULT '', `created_at` datetime NOT NULL, `user_id` bigint(20) unsigned NOT NULL, `user_name` varchar(128) NOT NULL DEFAULT '', `user_followers` int(11) unsigned NOT NULL, `user_friends` int(10) unsigned DEFAULT NULL, `user_listed` int(10) unsigned DEFAULT NULL, `user_statuses` int(10) unsigned DEFAULT NULL, `user_location` varchar(100) DEFAULT NULL, `user_utc_offset` int(11) DEFAULT NULL, `is_retweet` tinyint(3) unsigned NOT NULL, `is_reply` tinyint(3) unsigned NOT NULL, `text` varchar(200) NOT NULL DEFAULT '', PRIMARY KEY (`id`) ) ENGINE=MyISAM DEFAULT CHARSET=utf8; This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,61 @@ import tweetstream import MySQLdb import sys import datetime words = ['keyword1', 'keyword2'] TWITTER_USER = "YourUserName" TWITTER_PASSWORD = "YourPassword" DB_HOST = 'localhost' DB_USER = 'root' DB_PASS = '' DB_NAME = 'tweets' def twitter_to_iso_time(dt): datestring = dt[4:7] + " " + dt[8:10] + " " + dt[11:19] + " " + dt[26:30] date = datetime.datetime.strptime(datestring, '%b %d %H:%M:%S %Y') return date.strftime('%Y-%m-%d %H:%M:%S') def save_tweet(tweet): global cursor is_retweet = 0 is_reply = 0 if tweet['retweeted']: is_retweet = 1 if tweet['in_reply_to_user_id_str'] is not None: is_reply = 1 location = None if tweet['user']['location'] is not None and tweet['user']['location'] != "": location = tweet['user']['location'].encode('utf-8') sql = """ INSERT LOW_PRIORITY IGNORE INTO tweets (id, created_at, user_id, user_name, user_followers, user_friends, user_listed, user_statuses, user_location, user_utc_offset, is_retweet, is_reply, text) VALUES ( %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s) """ cursor.execute(sql, [tweet['id'], twitter_to_iso_time(tweet['created_at']), tweet['user']['id'], tweet['user']['screen_name'].encode('utf-8'), tweet['user']['followers_count'], tweet['user']['friends_count'], tweet['user']['listed_count'], tweet['user']['statuses_count'], location, tweet['user']['utc_offset'], is_retweet, is_reply, tweet['text'].encode('utf-8')]) if __name__=='__main__': try: conn = MySQLdb.connect (host=DB_HOST, user=DB_USER, passwd=DB_PASS, db=DB_NAME) cursor = conn.cursor(MySQLdb.cursors.DictCursor) cursor.execute('SET CHARACTER SET utf8'); except MySQLdb.Error, e: print "Error %d: %s" % (e.args[0], e.args[1]) sys.exit (1) try: with tweetstream.FilterStream(TWITTER_USER, TWITTER_PASS, track=words) as stream: for tweet in stream: print "From: %s (%d)\n%s" % ( tweet["user"]["screen_name"], stream.count, tweet['text']) print "" save_tweet(tweet) except tweetstream.ConnectionError, e: print "Disconnected from twitter. Reason:", e.reason