Skip to content

Instantly share code, notes, and snippets.

@jsutterfield
Last active August 29, 2015 14:01
Show Gist options
  • Select an option

  • Save jsutterfield/fde2004ffa6d55e8e320 to your computer and use it in GitHub Desktop.

Select an option

Save jsutterfield/fde2004ffa6d55e8e320 to your computer and use it in GitHub Desktop.

Revisions

  1. jsutterfield renamed this gist May 14, 2014. 1 changed file with 0 additions and 0 deletions.
    File renamed without changes.
  2. jsutterfield revised this gist May 14, 2014. 1 changed file with 0 additions and 15 deletions.
    15 changes: 0 additions & 15 deletions wikia_video_stats
    Original file line number Diff line number Diff line change
    @@ -51,20 +51,5 @@ def main():
    print "======================="
    print len(sorted_vids_wiki)


    # Get number of videos contained in top 5% of wikis with videos
    i = 1
    top_count = 0
    threshold = int(len(sorted_vids_wiki) * .05)
    for vid in sorted_vids_wiki:
    if i == threshold:
    break
    top_count += vid[1]
    i += 1

    print "\nTotal videos in top 5% of wikis with videos"
    print "==========================================="
    print top_count

    if __name__ == "__main__":
    main()
  3. jsutterfield revised this gist May 14, 2014. 1 changed file with 1 addition and 2 deletions.
    3 changes: 1 addition & 2 deletions wikia_video_stats
    Original file line number Diff line number Diff line change
    @@ -40,8 +40,7 @@ def main():
    print "=========="
    print sum_total

    # Top 10 wikis with most videos (skipping the first 2 which are
    # video and community)
    # Top 10 wikis with most videos (skipping the first 2 which are video and community)
    print "\nTop 10 wikis"
    print "============"
    for i in range(2, 12):
  4. jsutterfield revised this gist May 14, 2014. 1 changed file with 1 addition and 1 deletion.
    2 changes: 1 addition & 1 deletion wikia_video_stats
    Original file line number Diff line number Diff line change
    @@ -22,7 +22,7 @@ def main():
    fp.close()

    # Sort both dictionaries based on number of videos creating a list of tuples
    # eg [('minecraftinfinity\n', 1), ('lyinggame\n', 1), ('dope101\n', 1)...]
    # eg [('minecraftinfinity', 1), ('lyinggame', 1), ('dope101', 1)...]
    sorted_vids_wiki = sorted(vids_by_wiki.iteritems(), key=operator.itemgetter(1), reverse=True)
    # eg [('youtube', 748344), ('ign', 100159), ('screenplay', 60724)...]
    sorted_vids_provider = sorted(vids_by_provider.iteritems(), key=operator.itemgetter(1), reverse=True)
  5. jsutterfield revised this gist May 14, 2014. 1 changed file with 3 additions and 1 deletion.
    4 changes: 3 additions & 1 deletion wikia_video_stats
    Original file line number Diff line number Diff line change
    @@ -21,8 +21,10 @@ def main():

    fp.close()

    # Sort both dictionaries based on number of videos
    # Sort both dictionaries based on number of videos creating a list of tuples
    # eg [('minecraftinfinity\n', 1), ('lyinggame\n', 1), ('dope101\n', 1)...]
    sorted_vids_wiki = sorted(vids_by_wiki.iteritems(), key=operator.itemgetter(1), reverse=True)
    # eg [('youtube', 748344), ('ign', 100159), ('screenplay', 60724)...]
    sorted_vids_provider = sorted(vids_by_provider.iteritems(), key=operator.itemgetter(1), reverse=True)

    # Total number of videos, grouped by provider
  6. jsutterfield revised this gist May 14, 2014. 1 changed file with 2 additions and 0 deletions.
    2 changes: 2 additions & 0 deletions wikia_video_stats
    Original file line number Diff line number Diff line change
    @@ -18,6 +18,8 @@ def main():
    # Keep a tally of total videos by wiki, and by provider
    vids_by_provider[vals[0]] += int(vals[1])
    vids_by_wiki[vals[2]] += int(vals[1])

    fp.close()

    # Sort both dictionaries based on number of videos
    sorted_vids_wiki = sorted(vids_by_wiki.iteritems(), key=operator.itemgetter(1), reverse=True)
  7. jsutterfield revised this gist May 14, 2014. 1 changed file with 1 addition and 1 deletion.
    2 changes: 1 addition & 1 deletion wikia_video_stats
    Original file line number Diff line number Diff line change
    @@ -2,7 +2,7 @@
    import operator

    def main():
    fp = open("reportTotalVideosOnWikiaNonPrem.csv", "r")
    fp = open("reportTotalVideosOnWikiaAll.csv", "r")
    vids_by_provider = {}
    vids_by_wiki = {}

  8. jsutterfield revised this gist May 14, 2014. 1 changed file with 1 addition and 1 deletion.
    2 changes: 1 addition & 1 deletion wikia_video_stats
    Original file line number Diff line number Diff line change
    @@ -19,7 +19,7 @@ def main():
    vids_by_provider[vals[0]] += int(vals[1])
    vids_by_wiki[vals[2]] += int(vals[1])

    # Sort both dictionary based on number of videos
    # Sort both dictionaries based on number of videos
    sorted_vids_wiki = sorted(vids_by_wiki.iteritems(), key=operator.itemgetter(1), reverse=True)
    sorted_vids_provider = sorted(vids_by_provider.iteritems(), key=operator.itemgetter(1), reverse=True)

  9. jsutterfield created this gist May 6, 2014.
    67 changes: 67 additions & 0 deletions wikia_video_stats
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,67 @@
    #!/bin/python
    import operator

    def main():
    fp = open("reportTotalVideosOnWikiaNonPrem.csv", "r")
    vids_by_provider = {}
    vids_by_wiki = {}

    # Lines are in the csv form provider,count,wiki
    for line in fp.readlines():
    vals = line.split(",")

    if vals[0] not in vids_by_provider:
    vids_by_provider[vals[0]] = 0
    if vals[2] not in vids_by_wiki:
    vids_by_wiki[vals[2]] = 0

    # Keep a tally of total videos by wiki, and by provider
    vids_by_provider[vals[0]] += int(vals[1])
    vids_by_wiki[vals[2]] += int(vals[1])

    # Sort both dictionary based on number of videos
    sorted_vids_wiki = sorted(vids_by_wiki.iteritems(), key=operator.itemgetter(1), reverse=True)
    sorted_vids_provider = sorted(vids_by_provider.iteritems(), key=operator.itemgetter(1), reverse=True)

    # Total number of videos, grouped by provider
    print "\nVideos By Provider"
    print "================="
    sum_total = 0
    for vid in sorted_vids_provider:
    sum_total += vid[1]
    print "%s: %s" % (vid[0], vid[1])

    # Total number of videos overall
    print "\nTotal vids"
    print "=========="
    print sum_total

    # Top 10 wikis with most videos (skipping the first 2 which are
    # video and community)
    print "\nTop 10 wikis"
    print "============"
    for i in range(2, 12):
    print "%s: %s" % (sorted_vids_wiki[i][0].strip(), sorted_vids_wiki[i][1])


    print "\nTotal wikis with videos"
    print "======================="
    print len(sorted_vids_wiki)


    # Get number of videos contained in top 5% of wikis with videos
    i = 1
    top_count = 0
    threshold = int(len(sorted_vids_wiki) * .05)
    for vid in sorted_vids_wiki:
    if i == threshold:
    break
    top_count += vid[1]
    i += 1

    print "\nTotal videos in top 5% of wikis with videos"
    print "==========================================="
    print top_count

    if __name__ == "__main__":
    main()