Created
January 7, 2012 20:29
-
-
Save kellabyte/1575948 to your computer and use it in GitHub Desktop.
Data mining from the Twitter public stream and storing the JSON into Cassandra
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # | |
| # Commands executed in Cassandra | |
| # | |
| /* | |
| connect 127.0.0.1/9160; | |
| create keyspace twitter; | |
| use twitter; | |
| # Create a column family to contain the rowcount of how many tweets I have. | |
| CREATE COLUMN FAMILY rowcounts | |
| WITH default_validation_class = CounterColumnType | |
| AND replicate_on_write=true; | |
| # Create column family to store the JSON values of the tweets. | |
| CREATE COLUMN FAMILY tweets | |
| WITH comparator = UTF8Type | |
| AND key_validation_class=UTF8Type | |
| AND column_metadata = [ | |
| {column_name: status, validation_class: UTF8Type} | |
| ]; | |
| */ | |
| using System; | |
| using System.Text; | |
| using System.Net; | |
| using System.IO; | |
| using Apache.Cassandra; | |
| using Cassandraemon; | |
| namespace TweetStreamTest | |
| { | |
| class Program | |
| { | |
| static void Main(string[] args) | |
| { | |
| string uri = "https://stream.twitter.com/1/statuses/sample.json"; | |
| var request = WebRequest.Create(uri); | |
| request.Credentials = new NetworkCredential(username, password); | |
| var response = request.GetResponse(); | |
| if (response != null) | |
| { | |
| var encoding = Encoding.UTF8; | |
| var stream = response.GetResponseStream(); | |
| if (stream != null) | |
| { | |
| var reader = new StreamReader(stream, encoding); | |
| int count = 1; | |
| while (!reader.EndOfStream) | |
| { | |
| string json = reader.ReadLine(); | |
| string key = Guid.NewGuid().ToString(); | |
| using (var context = new CassandraContext("127.0.0.1", 9160, "twitter")) | |
| { | |
| // I don't think this is correct. Pulled some sample off Google | |
| // since I don't understand CounterColumns yet :P | |
| var columnCounter = new CounterColumn().SetNameValue("tweets", 1); | |
| context.InsertOnSubmit("rowcounts", "QVEF6C7562".ToCassandraByte(), columnCounter); | |
| var columnStatus = new Column().SetNameValue("status", json); | |
| var entityStatus = new CassandraEntity<Column>() | |
| .SetColumnFamily("tweets") | |
| .SetKey(key) | |
| .SetData(columnStatus); | |
| context.Column.InsertOnSubmit(entityStatus); | |
| context.SubmitChanges(); | |
| count++; | |
| Console.WriteLine(count); | |
| } | |
| } | |
| } | |
| } | |
| } | |
| } | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment