Skip to content

Instantly share code, notes, and snippets.

@kellabyte
Created January 7, 2012 20:29
Show Gist options
  • Select an option

  • Save kellabyte/1575948 to your computer and use it in GitHub Desktop.

Select an option

Save kellabyte/1575948 to your computer and use it in GitHub Desktop.
Data mining from the Twitter public stream and storing the JSON into Cassandra
#
# Commands executed in Cassandra
#
/*
connect 127.0.0.1/9160;
create keyspace twitter;
use twitter;
# Create a column family to contain the rowcount of how many tweets I have.
CREATE COLUMN FAMILY rowcounts
WITH default_validation_class = CounterColumnType
AND replicate_on_write=true;
# Create column family to store the JSON values of the tweets.
CREATE COLUMN FAMILY tweets
WITH comparator = UTF8Type
AND key_validation_class=UTF8Type
AND column_metadata = [
{column_name: status, validation_class: UTF8Type}
];
*/
using System;
using System.Text;
using System.Net;
using System.IO;
using Apache.Cassandra;
using Cassandraemon;
namespace TweetStreamTest
{
class Program
{
static void Main(string[] args)
{
string uri = "https://stream.twitter.com/1/statuses/sample.json";
var request = WebRequest.Create(uri);
request.Credentials = new NetworkCredential(username, password);
var response = request.GetResponse();
if (response != null)
{
var encoding = Encoding.UTF8;
var stream = response.GetResponseStream();
if (stream != null)
{
var reader = new StreamReader(stream, encoding);
int count = 1;
while (!reader.EndOfStream)
{
string json = reader.ReadLine();
string key = Guid.NewGuid().ToString();
using (var context = new CassandraContext("127.0.0.1", 9160, "twitter"))
{
// I don't think this is correct. Pulled some sample off Google
// since I don't understand CounterColumns yet :P
var columnCounter = new CounterColumn().SetNameValue("tweets", 1);
context.InsertOnSubmit("rowcounts", "QVEF6C7562".ToCassandraByte(), columnCounter);
var columnStatus = new Column().SetNameValue("status", json);
var entityStatus = new CassandraEntity<Column>()
.SetColumnFamily("tweets")
.SetKey(key)
.SetData(columnStatus);
context.Column.InsertOnSubmit(entityStatus);
context.SubmitChanges();
count++;
Console.WriteLine(count);
}
}
}
}
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment