Last active
June 16, 2019 12:58
-
-
Save hannes/a5e5388c412598b4c13dcf482761acfe to your computer and use it in GitHub Desktop.
Revisions
-
hannes revised this gist
Jan 8, 2018 . 1 changed file with 7 additions and 1 deletion.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -20,19 +20,25 @@ def jsonify(url): return json.loads(urllib.request.urlopen(url).read().decode()) # get all wiki pages from category 'Space Shuttle missions' cat = jsonify("https://en.wikipedia.org/w/api.php?action=query&list=categorymembers&cmtitle=Category%3ASpace_Shuttle_missions&format=json&cmlimit=200") for page in cat['query']['categorymembers']: # make sure they are named 'STS-XXX' if sts.match(page["title"]): # get all sections from those articles sections = jsonify("https://en.wikipedia.org/w/api.php?action=parse&page=%s&prop=sections&format=json" % (page['title'])) for sec in sections['parse']['sections']: # find the sections named 'Wake-up calls' if (wuc.match(sec['line'])): print(page["title"]) # get the content of that section and parse table table_s = jsonify("https://en.wikipedia.org/w/api.php?action=parse&page=%s§ion=%s&prop=text&format=json" % (page['title'], sec['index'])) tables = pd.read_html(table_s['parse']['text']['*'])[0].iloc[1:] tracks = [] for index, row in tables.iterrows(): print (row[1], row[2]) # search Spotify API for the title and artist of the listed music and collect their IDs results = sp.search(q="%s artist:%s" % (row[1], row[2]), limit=1) if (len(results['tracks']['items']) > 0): tracks.append(results['tracks']['items'][0]['id']) -
hannes revised this gist
Jan 7, 2018 . 1 changed file with 0 additions and 2 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -24,8 +24,6 @@ def jsonify(url): for page in cat['query']['categorymembers']: if sts.match(page["title"]): sections = jsonify("https://en.wikipedia.org/w/api.php?action=parse&page=%s&prop=sections&format=json" % (page['title'])) for sec in sections['parse']['sections']: if (wuc.match(sec['line'])): -
hannes created this gist
Jan 7, 2018 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,43 @@ import urllib.request, json import re import pandas as pd import spotipy import itertools # you will need to create a spotify app and add the credentials below # also create a public spotify playlist and get it ID (the last part of its URI) # import spotipy.util as util # token = util.prompt_for_user_token('hfmuehleisen',"playlist-modify-public",client_id='XXX',client_secret='XXX',redirect_uri='http://example.com/callback') token = 'XXX' user = 'XXX' playlist = 'XXX' sp = spotipy.Spotify(auth=token) sts = re.compile("^STS-\\d+$") wuc = re.compile("^Wake-up.*$") def jsonify(url): return json.loads(urllib.request.urlopen(url).read().decode()) cat = jsonify("https://en.wikipedia.org/w/api.php?action=query&list=categorymembers&cmtitle=Category%3ASpace_Shuttle_missions&format=json&cmlimit=200") for page in cat['query']['categorymembers']: if sts.match(page["title"]): n = [int(''.join(i)) for is_digit, i in itertools.groupby(page["title"], str.isdigit) if is_digit][0] sections = jsonify("https://en.wikipedia.org/w/api.php?action=parse&page=%s&prop=sections&format=json" % (page['title'])) for sec in sections['parse']['sections']: if (wuc.match(sec['line'])): print(page["title"]) table_s = jsonify("https://en.wikipedia.org/w/api.php?action=parse&page=%s§ion=%s&prop=text&format=json" % (page['title'], sec['index'])) tables = pd.read_html(table_s['parse']['text']['*'])[0].iloc[1:] tracks = [] for index, row in tables.iterrows(): print (row[1], row[2]) results = sp.search(q="%s artist:%s" % (row[1], row[2]), limit=1) if (len(results['tracks']['items']) > 0): tracks.append(results['tracks']['items'][0]['id']) if len(tracks) > 0: sp.user_playlist_add_tracks(user, playlist, tracks)