Skip to content

Instantly share code, notes, and snippets.

@92bondstreet
Last active December 28, 2015 04:59
Show Gist options
  • Select an option

  • Save 92bondstreet/7446511 to your computer and use it in GitHub Desktop.

Select an option

Save 92bondstreet/7446511 to your computer and use it in GitHub Desktop.
RealTweet is an algorithm to distinguish human and bot twitter users. The python script is based on Professor Marco Camisani Calzolari algorithm (www.camisanicalzolari.com/MCC-Twitter-ENG.pdf)
The MIT License (MIT)
Copyright (c) 2013 Yassine AZZOUT
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
from twitter import *
# Minimalist Twitter API for Python
# https://github.com/sixohsix/twitter/
import string
import re
class RealTweet:
# RealTweet is an algorithm to distinguish human and bot twitter users
# Based on Professor Marco Camisani Calzolari algorithm
# www.camisanicalzolari.com/MCC-Twitter-ENG.pdf
__twitter = None
__OAUTH_TOKEN = None
__OAUTH_SECRET = None
__CONSUMER_KEY = None
__CONSUMER_SECRET = None
__human = 0
__bot = 0
__HUMAN = "human"
__BOT = "bot"
__UNCERTAIN = "uncertain"
def __init__(self, oauth_token=None, oauth_secret=None, consumer_key=None, consumer_secret=None):
# Create authentication or not according keys
if oauth_token is None or oauth_secret is None or consumer_key is None or consumer_secret is None:
self.__twitter = Twitter( auth=NoAuth() )
print("No authentication")
else:
self.__OAUTH_TOKEN = oauth_token
self.__OAUTH_SECRET = oauth_secret
self.__CONSUMER_KEY = consumer_key
self.__CONSUMER_SECRET = consumer_secret
self.__twitter = Twitter( auth=OAuth(oauth_token, oauth_secret,consumer_key, consumer_secret) )
print("Authentication with secret keys")
def behaviour(self,username):
# Get the behaviour type of twitter account: human, uncertain or bot
self.__human = 0
self.__bot = 0
# user behaviour
self.__user(username)
# timeline behaviour
self.__timeline(username)
# According behaviours points return the profile
if self.__human > self.__bot:
return self.__HUMAN
elif self.__human > (self.__bot-4) and self.__human <= self.__bot:
return self.__UNCERTAIN
elif self.__human <= (self.__bot-4):
return self.__BOT
def __user(self,username):
# Get points from the user behaviour
user = self.__twitter.users.show(screen_name=username)
# Account verified so human
verified = user['verified']
if verified:
self.__human = 1000
self.__bot = 0
return
# One point characteristics for human else bot
# The profile contains a name
name = user['name']
if name:
self.__human += 1
else:
self.__bot += 1
# The profile has altered the theme or background
default_profile = user['default_profile']
if not default_profile:
self.__human += 1
else:
self.__bot += 1
# The profile contains a physical address
location = user['location']
if location:
self.__human += 1
else:
self.__bot += 1
# The profile contains a biography
description = user['description']
if description:
self.__human += 1
else:
self.__bot += 1
# The user has at least 30 followers
followers_count = user['followers_count']
if followers_count >= 30:
self.__human += 1
else:
self.__bot += 1
# The user has been added to a list by other users
listed_count = user['listed_count']
if listed_count >= 1:
self.__human += 1
else:
self.__bot += 1
# The user has written more than 50 posts
statuses_count = user['statuses_count']
if statuses_count >= 50:
self.__human += 1
else:
self.__bot += 1
# The user has been geolocalised
geo_enabled = user['geo_enabled']
if geo_enabled:
self.__human += 1
else:
self.__bot += 1
# The profile contains a URL
url = user['url']
if url:
self.__human += 1
else:
self.__bot += 1
# The user has been included in another user’s favourites
favourites_count = user['favourites_count']
if favourites_count >=1 :
self.__human += 1
else:
self.__bot += 1
# The user has a number of followers which, if doubled, is greater than the number they are following.
followers_count = followers_count * 2
friends_count = user['friends_count']
if followers_count > friends_count:
self.__human += 1
else:
self.__bot += 1
def __timeline(self,username):
# Get points from the tweets behaviour
# most recent tweet or retweet
timeline = self.__twitter.statuses.user_timeline(screen_name=username)
if len(timeline) is 0:
self.__bot += 4
return
# The user uses punctuation in posts
if self.__punctuation(timeline):
self.__human += 1
else:
self.__bot += 1
# The user has used a hashtag in their posts at least once
if self.__hashtag(timeline):
self.__human += 1
else:
self.__bot += 1
# The user has written the userID of another user inside at least one post
if self.__mention(timeline):
self.__human += 1
else:
self.__bot += 1
# The user publishes content which does not just contain URLs
if self.__urls(timeline):
self.__human += 1
else:
self.__bot += 1
# The user has posted in through different clients
self.__source(timeline)
# TWO POINTS
# At least one post has been retweeted by other users
if self.__retweeted(timeline):
self.__human += 2
# THRE POINTS
# the user has logged in through different clients
if self.__clients(timeline):
self.__human += 3
def __punctuation(self,timeline):
# The user uses punctuation in posts
for tweets in timeline:
if re.search('['+string.punctuation+']', tweets['text']):
return True
return None
def __hashtag(self,timeline):
# The user has used a hashtag in their posts at least once
for tweets in timeline:
if re.search('[#]', tweets['text']):
return True
return None
def __mention(self,timeline):
# The user has written the userID of another user inside at least one post
for tweets in timeline:
entities = tweets['entities']
if len(entities['user_mentions']) >= 1:
return True
return None
def __urls(self,timeline):
# The user publishes content which does not just contain URLs
for tweets in timeline:
entities = tweets['entities']
if len(entities['urls']) == 0:
return True
return None
def __source(self,timeline):
# The user has posted in through different clients
iphone = None
android = None
twittercom = None
foursquare = None
instagram = None
for tweets in timeline:
source = tweets['source']
if "iphone" in source:
iphone = True
elif "android" in source:
android = True
elif "web" in source:
twittercom = True
elif "foursquare" in source:
foursquare = True
elif "instagram" in source:
instagram = True
# The user has used an iPhone to log in to Twitter
if iphone:
self.__human += 1
# The user has used Android to log in to Twitter
if android:
self.__human += 1
# The user has posted with Foursquare
if foursquare:
self.__human += 1
# The user has posted with Instagram
if instagram:
self.__human += 1
# The user has used the Twitter.com website
if twittercom:
self.__human += 1
def __clients(self,timeline):
# The user has logged into Twitter through different clients
clients = []
for tweets in timeline:
clients.append(tweets['source'])
if len(set(clients)) > 2:
return True
else:
return None
def __retweeted(self,timeline):
# At least one post has been retweeted by other users
for tweets in timeline:
retweet_count= tweets['retweet_count']
if retweet_count > 0:
return True
return None
# Example
# Use token and key on https://dev.twitter.com/apps/new
fakedetector_auth = RealTweet(oauth_token, oauth_secret,consumer_key, consumer_secret)
username = "gvanrossum"
print(username + ' is probably ' + fakedetector_auth.behaviour(username))
username = "valeria311998"
print(username + ' is probably ' + fakedetector_auth.behaviour(username))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment