Skip to content

Instantly share code, notes, and snippets.

@ercas
Last active July 21, 2021 15:58
Show Gist options
  • Select an option

  • Save ercas/f969bf8854ec8721c3283d64f228f040 to your computer and use it in GitHub Desktop.

Select an option

Save ercas/f969bf8854ec8721c3283d64f228f040 to your computer and use it in GitHub Desktop.
create graphs of listenbrainz data
library(dplyr)
library(ggplot2)
library(jsonlite)
library(lubridate)
library(stringr)
# load data ---------------------------------------------------------------
data <- fromJSON("ercas_lb-2020-12-23.json") %>%
flatten() %>%
mutate(
listened_at = with_tz(
as_datetime(listened_at),
system("readlink -f /etc/localtime | grep -o '[^/]*/[^/]*$'", intern = TRUE)
)
) %>%
filter(listened_at >= as_datetime("2020-01-01"))
# top artists -------------------------------------------------------------
n_artists <- 10
top_artists <- data$track_metadata.artist_name %>%
table() %>%
as.data.frame() %>%
arrange(desc(Freq)) %>%
.$. %>%
head(n_artists)
data %>%
mutate(
Artist = ifelse(
#track_metadata.artist_name %in% top_artists,
#track_metadata.artist_name,
str_detect(track_metadata.artist_name, paste(top_artists, collapse = "|")),
str_extract(track_metadata.artist_name, paste(top_artists, collapse = "|")),
"All others"
)
) %>%
ggplot() +
aes(x = listened_at, fill = Artist) +
geom_density(position = "stack", color = NA) +
labs(
x = "Date",
y = "Density",
title = sprintf("Top %d most listened-to artists over time", n_artists)
) +
scale_fill_brewer(palette = "Spectral") +
theme_minimal() +
scale_y_continuous(breaks = NULL)
# top albums --------------------------------------------------------------
n_albums <- 10
top_albums <- data$track_metadata.release_name %>%
table() %>%
as.data.frame() %>%
arrange(desc(Freq)) %>%
.$. %>%
head(n_albums)
data %>%
mutate(
Album = ifelse(
track_metadata.release_name %in% top_albums,
ifelse(
str_starts(track_metadata.release_name, "The Idler Wheel"),
"The Idler Wheel (...)",
track_metadata.release_name
),
"All others"
)
) %>%
ggplot() +
aes(x = listened_at, fill = Album) +
geom_density(position = "stack", color = NA) +
labs(
x = "Date",
y = "Density",
title = sprintf("Top %d most listened-to albums over time", n_albums)
) +
scale_fill_brewer(palette = "Spectral") +
theme_minimal() +
scale_y_continuous(breaks = NULL)
# top songs ---------------------------------------------------------------
n_songs <- 10
top_songs <- data$track_metadata.track_name %>%
table() %>%
as.data.frame() %>%
arrange(desc(Freq)) %>%
.$. %>%
head(n_songs)
data %>%
mutate(
Track = ifelse(
track_metadata.track_name %in% top_songs,
track_metadata.track_name,
"All others"
)
) %>%
ggplot() +
aes(x = listened_at, fill = Track) +
geom_density(position = "stack", color = NA) +
labs(
x = "Date",
y = "Density",
title = sprintf("Top %d most listened-to tracks over time", n_songs)
) +
scale_fill_brewer(palette = "Spectral") +
theme_minimal() +
scale_y_continuous(breaks = NULL)
# top listening times -----------------------------------------------------
data %>%
transmute(
week = week(listened_at),
hour = hour(listened_at)
) %>%
table() %>%
as.data.frame() %>%
ggplot() +
aes(week, hour, fill = log(Freq)) +
geom_tile() +
scale_fill_viridis_c(na.value = "black") +
labs(
x = "Week",
y = "Hour",
title = "Most active listening times (note log scale)"
) +
theme_minimal() +
theme(axis.text.x = element_text(angle=90))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment