import tweepy, simplejson, urllib, os,datetime,re #---------------------------------------------------------------- def getBitlyKey(): bu='USER' bkey='KEY' return bu,bkey def getTwapperkeeperKey(): key='KEY' return key def getTwitterKeys(): consumer_key='C_KEY' consumer_secret='C_SECRET' skey='S_KEY' ssecret='S_SECRET' return consumer_key,consumer_secret,skey,ssecret def expandBitlyURL(burl): bu,bkey=getBitlyKey() url='http://api.bit.ly/v3/expand?shortUrl='+urllib.quote(burl)+'&login='+bu+'&apiKey='+bkey+'&format=json' print 'url: '+url r=simplejson.load(urllib.urlopen(url)) return r['data']['expand'] # for j in r['data']['expand']: # print 'long '+j['long_url'] def generateGoogleCSEDefinitionFile(cse,tag, tw,typ='flat'): report("Generating Google CSE definition file") fname='listhomepages_'+typ+'.xml' f=openTimestampedFile(tag,fname) f.write("\n\t\n") for u in tw: un=tw[u] if type(un) is tweepy.models.User: l=un.url if l: urls = re.findall('http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+', l) for l in urls: #l=l.split(' ')[0] #if "http://bit.ly" in url: # urls=expandBitlyURL(burl) #l=urls[0] #l=l.strip() lo=l l=l.replace("http://","") if not l.endswith('/') and '?' not in l: l=l+"/*" else: if l[-1]=="/": l=l+"*" report("- using "+lo+" as "+l) weight=1.0 if typ is 'weighted': weight=un.status f.write("\t\t\n") f.write("\t\t\t\n") f.write("\t\n") report("...Google CSE definition file DONE") f.close() def googleCSEDefinitionFileWeighted(cse,tag, tw): generateGoogleCSEDefinitionFile(cse,tag, tw,'weighted') def googleCSEDefinitionFile(cse,tag, tw): generateGoogleCSEDefinitionFile(cse,tag, tw,'flat') #---------------------------------------------------------------- def getTwapperkeeperURL(tag,start,end,page=1): key=getTwapperkeeperKey() url='http://api.twapperkeeper.com/2/notebook/tweets/?apikey='+key+'&name='+tag+'&type=hashtag&since='+start+'&until='+end+'&rpp=1000&page='+str(page) return url #---------------------------------------------------------------- #---------------------------------------------------------------- def getTwapperkeeperPage(tag,start,end,page=1): report("Getting page "+str(page)) url= getTwapperkeeperURL(tag,start,end,page) r=simplejson.load(urllib.urlopen(url)) return r['response'] #---------------------------------------------------------------- #---------------------------------------------------------------- def parseTwapperkeeperResponse(tweeters,response,c): report("..parsing page") for i in response['tweets_returned']: c+=1 u=i['from_user'].strip() if u in tweeters: tweeters[u]['count']+=1 else: report("New user: "+u) tweeters[u]={} tweeters[u]['count']=1 return tweeters,c #---------------------------------------------------------------- #---------------------------------------------------------------- def getTwapperkeeperArchiveTweeters(tweeters,tag,start,end): report("Getting Twapperkeeper archive tweeters") count=0 num=0 r=getTwapperkeeperPage(tag,start,end) tweeters,count=parseTwapperkeeperResponse(tweeters,r,count) #if there is only one page, does Twapperkeeper report the tweets_found_count? if r['tweets_found_count']: num=int(r['tweets_found_count']) page=2 while count user VARCHAR,friend VARCHAR\n') i=0 membersid=[] for id in members: membersid.append(members[id].id) M=len(members) for id in members: friend=members[id] report("- finding "+typ+" of whatever (friends? followers?) was passed in of "+friend.screen_name) try: if typ is 'friends': foafs=api.friends_ids(friend.id) else: foafs=api.followers_ids(friend.id) cofriends=intersect(membersid,foafs) #being naughty - changing .status to record no. of foafs/no. in community members[id].status=0.7+0.3*len(cofriends)/M report("...weight: "+str(members[id].status)) for foaf in cofriends: f.write(str(friend.id)+','+str(foaf)+'\n') except tweepy.error.TweepError,e: report(e) def gephiOutputFile(api,dirname, members,typ="innerfriends",fname='Net.gdf'): report("Generating Gephi file using: "+typ) f=openTimestampedFile(dirname,typ+fname) gephiOutputNodeDef(f,members) if typ is 'innerfriends': gephiOutputEdgeDefInner(api,f,members,'friends') elif typ is 'innerfollowers': gephiOutputEdgeDefInner(api,f,members,'followers') f.close() report("...Gephi "+typ+" file generated") def extendUserList(tw,extensions): ttx={} for t in tw: ttx[t]={} ttx[t]['user']=tw[t] ttx[t]['classVals']={} for x in extensions: y=x.split(' ') ttx[t]['classVals'][y[0]]=y[1] return ttx def deExtendUserList(membersX): members={} for m in membersX: members[m]=membersX[m]['user'] return members def gephiOutputFileExtended(api,dirname, membersX,extensions,typ="innerfriends",fname='friendsNet.gdf'): report("Generating extended Gephi file using: "+typ) fname='X'+fname f=openTimestampedFile(dirname,fname) gephiOutputNodeDefExtended(f,membersX,extensions) members=deExtendUserList(membersX) if typ is 'innerfriends': gephiOutputEdgeDefInnerFriends(api,f,members) f.close() report("...extended Gephi "+typ+" file generated") def openTimestampedFile(fpath,fname): fpath='reports/'+fpath now = datetime.datetime.now() ts = now.strftime("_%Y-%m-%d-%H-%M-%S") checkDir(fpath) fpart=fname.split('.') f=open(fpath+'/'+fpart[0]+'%s.'%ts+fpart[1],'w') return f def checkDir(dirpath): if not os.path.exists(dirpath): os.makedirs(dirpath) #---------------------------------------------------------------- #return common members of two lists def intersect(a, b): return list(set(a) & set(b)) #---------------------------------------------------------------- #---------------------------------------------------------------- #Yield successive n-sized chunks from l def chunks(l, n): for i in xrange(0, len(l), n): yield l[i:i+n] #---------------------------------------------------------------- def listDetailsByID(tw,l,o,t): report("Fetching list details for "+t+"...") for u in tweepy.Cursor(l,owner=o,slug=t).items(): if type(u) is tweepy.models.User: tw[int(u.id)]=u return tw #---------------------------------------------------------------- #---------------------------------------------------------------- def listDetailsByScreenName(tw,l,o,t): report("Fetching list details for "+t+"...") for u in tweepy.Cursor(l,owner=o,slug=t).items(): if type(u) is tweepy.models.User: tw[u.screen_name]=u return tw #----------------------------------------------------------------