Skip to content

Instantly share code, notes, and snippets.

@jdecool
Created June 2, 2016 16:00
Show Gist options
  • Select an option

  • Save jdecool/c4a0cccadcc2b686bb547ddc08db49c9 to your computer and use it in GitHub Desktop.

Select an option

Save jdecool/c4a0cccadcc2b686bb547ddc08db49c9 to your computer and use it in GitHub Desktop.

Revisions

  1. jdecool created this gist Jun 2, 2016.
    67 changes: 67 additions & 0 deletions github-archive.php
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,67 @@
    #!/usr/bin/env php
    <?php

    // URL => https://www.githubarchive.org/
    // wget http://data.githubarchive.org/2015-01-{01..30}-{0..23}.json.gz

    $dayMonth = [
    1 => 31,
    2 => 30,
    3 => 31,
    4 => 30,
    5 => 31,
    6 => 30,
    7 => 31,
    8 => 31,
    9 => 30,
    10 => 31,
    11 => 30,
    12 => 31,
    ];

    // 2011
    for ($day = 2; $day <= 31; $day++) {
    for ($hour = 0; $hour <= 23; $hour++) {
    $command = sprintf('wget http://data.githubarchive.org/2011-12-%1$02d-%2$d.json.gz -O 2011/2011-12-%1$02d-%2$02d.json.gz', $day, $hour);

    shell_exec($command);
    echo $command, PHP_EOL;
    }
    }

    // 2012 -> 2015
    for ($year = 2012; $year <= 2015; $year++) {
    for ($month = 1; $month <= 12; $month++){
    $folder = sprintf('%d/%02d', $year, $month);
    if (!file_exists($folder)) {
    mkdir($folder, 0777, true);
    }

    for ($day = 1; $day <= $dayMonth[$month]; $day++) {
    for ($hour = 0; $hour <= 23; $hour++) {
    $command = sprintf('wget http://data.githubarchive.org/%1$04d-%2$02d-%3$02d-%4$d.json.gz -O %1$04d/%2$02d/%1$04d-%2$02d-%3$02d-%4$02d.json.gz', $year, $month, $day, $hour);

    shell_exec($command);
    echo $command, PHP_EOL;
    }
    }
    }
    }

    // 2016
    $year = 2016;
    for ($month = 1; $month <= date('m'); $month++){
    $folder = sprintf('%d/%02d', $year, $month);
    if (!file_exists($folder)) {
    mkdir($folder, 0777, true);
    }

    for ($day = 1; $day <= 31; $day++) {
    for ($hour = 0; $hour <= 23; $hour++) {
    $command = sprintf('wget http://data.githubarchive.org/%1$04d-%2$02d-%3$02d-%4$d.json.gz -O %1$04d/%2$02d/%1$04d-%2$02d-%3$02d-%4$02d.json.gz', $year, $month, $day, $hour);

    shell_exec($command);
    echo $command, PHP_EOL;
    }
    }
    }