# (c) Ben Richeson 2022 # github.com/benricheson101 # USAGE: # python3 spotify_stats.py [year] # # SETUP: # 1. place this file in the same directory as `endsong_N.json` files in your spotify data package # 2. run the file, optionally appending a year to the end of the command from collections import Counter from typing import TypedDict, Optional from glob import glob from sys import argv import json top_n = 5 ms = 0 count = 0 top_tracks = Counter() top_artists = Counter() class StreamedTrack(TypedDict): ts: str username: str platform: str ms_played: int conn_country: str ip_addr_decrypted: str user_agent_decrypted: Optional[str] master_metadata_track_name: Optional[str] master_metadata_album_artist_name: Optional[str] master_metadata_album_album_name: Optional[str] spotify_track_uri: Optional[str] episode_name: Optional[str] episode_show_name: Optional[str] spotify_episode_uri: Optional[str] reason_start: str reason_end: str shuffle: bool skipped: Optional[bool] offline: bool offline_timestamp: str incognito_mode: bool filter_year = argv[1] if len(argv) > 1 else None paths = glob("endsong_*.json") if len(paths) == 0: print("could not find streaming history data files. make sure this file is in the same folder as `endsong_N.json` files") exit(1) for filename in paths: with open(filename, "r") as file: j: list[StreamedTrack] = json.loads(file.read()) for track in j: if filter_year and not track["ts"].startswith(filter_year): continue name = track["master_metadata_track_name"] or track["episode_name"] or "" artist = track["master_metadata_album_artist_name"] or "" top_tracks[(name, artist)] += 1 top_artists[artist] += 1 ms += int(track["ms_played"]) count += 1 seconds = ms / 1000 minutes = seconds / 60 hours = minutes / 60 days = hours / 24 months = days / 30 years = months / 12 if count == 0: print("no stats available") exit(0) if filter_year: print(f"-- spotify playback stats {filter_year} --") else: print("-- spotify playback stats --") print() print(f" ms : {ms:,.2f}") print(f" seconds: {seconds:,.2f}") print(f" minutes: {minutes:,.2f}") print(f" hours : {hours:,.2f}") print(f" days : {days:,.2f}") print(f" months : {months:,.2f}") print(f" years : {years:,.2f}") print(f" plays : {count:,}") print() print(" top tracks:") for t in top_tracks.most_common(top_n): print(f" {t[1]}: {t[0][1]} - {t[0][0]}") print() print(" top artists:") for a in top_artists.most_common(top_n): print(f" {a[1]}: {a[0]}")