# Data source: https://sites.google.com/eng.ucsd.edu/ucsdbookgraph/home ``` text @inproceedings{DBLP:conf/recsys/WanM18, author = {Mengting Wan and Julian J. McAuley}, editor = {Sole Pera and Michael D. Ekstrand and Xavier Amatriain and John O'Donovan}, title = {Item recommendation on monotonic behavior chains}, booktitle = {Proceedings of the 12th {ACM} Conference on Recommender Systems, RecSys 2018, Vancouver, BC, Canada, October 2-7, 2018}, pages = {86--94}, publisher = {{ACM}}, year = {2018}, url = {https://doi.org/10.1145/3240323.3240369}, doi = {10.1145/3240323.3240369}, timestamp = {Mon, 22 Jul 2019 19:11:02 +0200}, biburl = {https://dblp.org/rec/conf/recsys/WanM18.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} } ``` # Run in DuckDB: ```sql CREATE TABLE goodreads_reviews AS SELECT * FROM read_json_auto('/Users/vicki/viberary/data/goodreads_reviews_dedup.json',ignore_errors='true',lines='true') ; CREATE TABLE goodreads AS SELECT * FROM read_json_auto('/Users/vicki/viberary/data/goodreads_books.json',lines='true'); CREATE TABLE goodreads_authors AS SELECT * FROM read_json_auto('/Users/vicki/viberary/data/goodreads_book_authors.json',lines='true'); CREATE table authorid as select REGEXP_EXTRACT(authors, '[0-9]+') as author_id, title, description, average_rating, book_id FROM goodreads; COPY (SELECT review_text,title,description,authorid.average_rating, goodreads_authors.name as author FROM authorid JOIN goodreads_reviews ON authorid.book_id = goodreads_reviews.book_id JOIN goodreads_authors ON authorid.author_id = goodreads_authors.author_id where authorid.author_id NOT ILIKE '' ) TO '20230630_training.parquet' (FORMAT PARQUET); ```