Skip to content

Instantly share code, notes, and snippets.

@hinchley
Created March 17, 2017 23:21
Show Gist options
  • Select an option

  • Save hinchley/05c2872620b5618389d130107e084d6b to your computer and use it in GitHub Desktop.

Select an option

Save hinchley/05c2872620b5618389d130107e084d6b to your computer and use it in GitHub Desktop.

Revisions

  1. Peter Hinchley created this gist Mar 17, 2017.
    158 changes: 158 additions & 0 deletions tweets.php
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,158 @@
    <?php

    # define api keys.
    define('CONSUMER_KEY', 'XXX');
    define('CONSUMER_SECRET', 'XXX');
    define('OAUTH_TOKEN', 'XXX');
    define('OAUTH_SECRET', 'XXX');
    define('USER_TIMELINE', 'https://api.twitter.com/1.1/statuses/user_timeline.json');

    # tweets to retrieve per request.
    # max value is 200.
    define('PAGESIZE', 200);

    # establish db connection.
    $db = new PDO('sqlite:./db.sqlite3');
    $db->setAttribute(PDO::ATTR_ERRMODE, PDO::ERRMODE_EXCEPTION);

    # create the db table if it doesn't exist.
    $sql = <<<sql
    CREATE TABLE IF NOT EXISTS `tweets` (
    `id` INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT,
    `guid` TEXT NOT NULL,
    `json` TEXT NOT NULL,
    `text` TEXT NOT NULL,
    `isreply` BOOLEAN NOT NULL,
    `replyguid` TEXT,
    `replyuser` TEXT,
    `isretweet` BOOLEAN NOT NULL,
    `retweetguid` TEXT,
    `retweetuser` TEXT,
    `timestamp` TEXT NOT NULL
    );
    sql;

    $db->exec($sql);

    # recursively sort an array.
    function rksort(&$a) {
    foreach ($a as &$v) if (is_array($v)) rksort($v);
    return ksort($a);
    }

    # get the highest tweet id.
    function since($db) {
    $q = 'SELECT guid FROM tweets ORDER BY guid + 0 DESC LIMIT 1';
    return $db->query($q)->fetchcolumn() ?: null;
    }

    # prepare query string.
    function query($max, $since = null) {
    $query = [
    'count' => PAGESIZE,
    'tweet_mode' => 'extended',
    'trim_user' => 'true',
    'include_rts' => 'true',
    'exclude_replies' => 'false'
    ];

    if ($max !== null) { $query['max_id'] = $max; }
    if ($since !== null) { $query['since_id'] = $since; }

    return array_map('rawurlencode', $query);
    }

    # prepare oauth request.
    function auth($query) {
    $oauth = [
    'oauth_consumer_key' => CONSUMER_KEY,
    'oauth_nonce' => md5(mt_rand()),
    'oauth_signature_method' => 'HMAC-SHA1',
    'oauth_timestamp' => time(),
    'oauth_token' => OAUTH_TOKEN,
    'oauth_version' => '1.0'];

    $oauth = array_map('rawurlencode', $oauth);
    $array = array_merge($oauth, $query);

    asort($array);
    ksort($array);

    $q = urldecode(http_build_query($array, '', '&'));

    $req = 'GET&' . rawurlencode(USER_TIMELINE) . '&' . rawurlencode($q);
    $key = rawurlencode(CONSUMER_SECRET) . '&' . rawurlencode(OAUTH_SECRET);
    $sig = rawurlencode(base64_encode(hash_hmac('sha1', $req, $key, true)));

    $oauth['oauth_signature'] = $sig;
    ksort($oauth);

    $oauth = array_map(function($s) {
    return '"' . $s . '"';
    }, $oauth);

    return 'OAuth ' . urldecode(http_build_query($oauth, '', ', '));
    }

    # request block of tweets.
    function tweets($max, $since) {
    $query = query($max, $since);
    $token = auth($query);

    $url = USER_TIMELINE . '?' . http_build_query($query);

    $options = [
    CURLOPT_HTTPHEADER => ["Authorization: $token"],
    CURLOPT_HEADER => false,
    CURLOPT_URL => $url,
    CURLOPT_RETURNTRANSFER => true,
    CURLOPT_SSL_VERIFYPEER => false];

    $feed = curl_init();
    curl_setopt_array($feed, $options);
    $json = curl_exec($feed);
    if (curl_error($feed)) echo 'Error: ' . curl_error($feed);
    curl_close($feed);

    return json_decode($json, true);
    }

    # request all available tweets.
    function archive($since, $max = null) {
    $archive = []; do {
    $tweets = tweets($max, $since);
    if (count($tweets) == 0) break;
    $archive = array_merge($archive, $tweets);
    $max = end($tweets)['id_str'] - 1;
    } while (true);

    return $archive;
    }

    # sort tweets from oldest to newest.
    $tweets = array_reverse(archive(since($db)));

    # sort properties of each tweet.
    rksort($tweets);

    # insert tweets into database.
    foreach ($tweets as $tweet) {
    $q = 'INSERT INTO tweets ' .
    '(guid, json, text, isreply, replyguid, replyuser, ' .
    'isretweet, retweetguid, retweetuser, timestamp) ' .
    'VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)';
    $s = $db->prepare($q);
    $s->execute([
    $tweet['id_str'],
    serialize($tweet),
    $tweet['full_text'],
    $tweet['in_reply_to_status_id_str'] !== null,
    $tweet['in_reply_to_status_id_str'],
    $tweet['in_reply_to_user_id_str'],
    $retweeted = $tweet['retweeted'],
    $retweeted ? $tweet['retweeted_status']['id_str'] : null,
    $retweeted ? $tweet['retweeted_status']['user']['id_str'] : null,
    $tweet['created_at']]);
    }

    echo $db->query('select count(*) from tweets')->fetchColumn();