hannes · June 16, 2019 12:58 · Jan 8, 2018 · Jan 7, 2018 · Jan 7, 2018
diff --git a/ststospotify.py b/ststospotify.py
@@ -20,19 +20,25 @@
 
 def jsonify(url):
 	return json.loads(urllib.request.urlopen(url).read().decode())
-cat = jsonify("https://en.wikipedia.org/w/api.php?action=query&list=categorymembers&cmtitle=Category%3ASpace_Shuttle_missions&format=json&cmlimit=200")
 
+# get all wiki pages from category 'Space Shuttle missions'
+cat = jsonify("https://en.wikipedia.org/w/api.php?action=query&list=categorymembers&cmtitle=Category%3ASpace_Shuttle_missions&format=json&cmlimit=200")
 for page in cat['query']['categorymembers']:
+	# make sure they are named 'STS-XXX'
 	if sts.match(page["title"]):
+		# get all sections from those articles
 		sections = jsonify("https://en.wikipedia.org/w/api.php?action=parse&page=%s&prop=sections&format=json" % (page['title']))
 		for sec in sections['parse']['sections']:
+			# find the sections named 'Wake-up calls'
 			if (wuc.match(sec['line'])):
 				print(page["title"])
+				# get the content of that section and parse table
 				table_s = jsonify("https://en.wikipedia.org/w/api.php?action=parse&page=%s&section=%s&prop=text&format=json" % (page['title'], sec['index']))
 				tables = pd.read_html(table_s['parse']['text']['*'])[0].iloc[1:]
 				tracks = []
 				for index, row in tables.iterrows():
 					print (row[1], row[2])
+					# search Spotify API for the title and artist of the listed music and collect their IDs
 					results = sp.search(q="%s artist:%s" % (row[1], row[2]), limit=1)
 					if (len(results['tracks']['items']) > 0):
 						tracks.append(results['tracks']['items'][0]['id'])

diff --git a/ststospotify.py b/ststospotify.py
@@ -24,8 +24,6 @@ def jsonify(url):
 
 for page in cat['query']['categorymembers']:
 	if sts.match(page["title"]):
-		n = [int(''.join(i)) for is_digit, i in itertools.groupby(page["title"], str.isdigit) if is_digit][0]
-
 		sections = jsonify("https://en.wikipedia.org/w/api.php?action=parse&page=%s&prop=sections&format=json" % (page['title']))
 		for sec in sections['parse']['sections']:
 			if (wuc.match(sec['line'])):

diff --git a/ststospotify.py b/ststospotify.py
@@ -0,0 +1,43 @@
+import urllib.request, json 
+import re
+import pandas as pd
+import spotipy 
+import itertools
+
+# you will need to create a spotify app and add the credentials below
+# also create a public spotify playlist and get it ID (the last part of its URI)
+# import spotipy.util as util
+# token = util.prompt_for_user_token('hfmuehleisen',"playlist-modify-public",client_id='XXX',client_secret='XXX',redirect_uri='http://example.com/callback')
+
+token = 'XXX'
+user = 'XXX'
+playlist = 'XXX'
+
+sp = spotipy.Spotify(auth=token)
+
+sts = re.compile("^STS-\\d+$")
+wuc = re.compile("^Wake-up.*$")
+
+def jsonify(url):
+	return json.loads(urllib.request.urlopen(url).read().decode())
+cat = jsonify("https://en.wikipedia.org/w/api.php?action=query&list=categorymembers&cmtitle=Category%3ASpace_Shuttle_missions&format=json&cmlimit=200")
+
+for page in cat['query']['categorymembers']:
+	if sts.match(page["title"]):
+		n = [int(''.join(i)) for is_digit, i in itertools.groupby(page["title"], str.isdigit) if is_digit][0]
+
+		sections = jsonify("https://en.wikipedia.org/w/api.php?action=parse&page=%s&prop=sections&format=json" % (page['title']))
+		for sec in sections['parse']['sections']:
+			if (wuc.match(sec['line'])):
+				print(page["title"])
+				table_s = jsonify("https://en.wikipedia.org/w/api.php?action=parse&page=%s&section=%s&prop=text&format=json" % (page['title'], sec['index']))
+				tables = pd.read_html(table_s['parse']['text']['*'])[0].iloc[1:]
+				tracks = []
+				for index, row in tables.iterrows():
+					print (row[1], row[2])
+					results = sp.search(q="%s artist:%s" % (row[1], row[2]), limit=1)
+					if (len(results['tracks']['items']) > 0):
+						tracks.append(results['tracks']['items'][0]['id'])
+
+				if len(tracks) > 0:
+					sp.user_playlist_add_tracks(user, playlist, tracks)
No results found