""" Converts a large CSV into SQL, can process some of the smaller chunks Based on https://plot.ly/python/big-data-analytics-with-pandas-and-sqlite/ Original code probably from https://github.com/chriddyp """ import pandas from sqlalchemy import create_engine import tqdm CSV1 = 'movies.csv' CSV2 = 'countries.csv' SQL = 'db.sqlite3' engine = create_engine('sqlite:///{}'.format(SQL)):w def to_sqlite(csvfile, disk_engine, tablename): chunksize = 50000 for df in tqdm(pandas.read_csv(csvfile, chunksize=chunksize, iterator=True, encoding='utf-8')): df.to_sql(tablename, disk_engine, if_exists='append') # some sample SQL statements # it's important to filter the dataset with a 'where' clause, or use the 'limit' statement # otherwise we might end up loading the entire dataset into memory, which we want to avoid first5 = "select * from movies limit 5" # Everything we know about the first 5 entries names_of_top10 = "select title, score from movies order by score DESC limit 10" # the first 10 entries, by DESCending scoref