Skip to content

Instantly share code, notes, and snippets.

@zhoufeng1989
Created July 16, 2016 04:54
Show Gist options
  • Select an option

  • Save zhoufeng1989/f084ca41ba6189505f5ab4b4d7accf4c to your computer and use it in GitHub Desktop.

Select an option

Save zhoufeng1989/f084ca41ba6189505f5ab4b4d7accf4c to your computer and use it in GitHub Desktop.
import requests
def get_content(url):
response = requests.get(url, stream=True)
for index, line in enumerate(response.iter_lines()):
if index == 0:
continue
yield line
def find_smaller(items, rating):
for index, item in enumerate(items):
if item[2] < rating:
return index
return None
def filter_stream(stream):
for line in stream:
try:
_id, running_time, rating = line.split(",")
t = (int(_id), float(running_time), float(rating))
yield t
except:
pass
def topN(stream, top=10):
items = [(-1, 0, 0)] * top
for item in filter_stream(stream):
index = find_smaller(items, item[2])
if index is not None:
items[index] = item
sorted_items = sorted(items, key=lambda item: item[2], reverse=True)
return map(lambda item: (item[0], item[2]), sorted_items)
if __name__ == "__main__":
url = 'https://gist.githubusercontent.com/tyrchen/32c50aadca48aee3da10a77a18479517/raw/7da00efd07b31ba8263611c42ec34fefdf2be2fd/movies.csv'
data = get_content(url)
print topN(data)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment