"""
Converts a large CSV into SQL, can process some of the smaller chunks

Based on https://plot.ly/python/big-data-analytics-with-pandas-and-sqlite/
Original code probably from https://github.com/chriddyp
"""
import pandas
from sqlalchemy import create_engine
import tqdm


CSV1 = 'movies.csv'
CSV2 = 'countries.csv'
SQL = 'db.sqlite3'


engine = create_engine('sqlite:///{}'.format(SQL)):w


def to_sqlite(csvfile, disk_engine, tablename):
  chunksize = 50000
  for df in tqdm(pandas.read_csv(csvfile, chunksize=chunksize, iterator=True, encoding='utf-8')):
    df.to_sql(tablename, disk_engine, if_exists='append')


# some sample SQL statements
# it's important to filter the dataset with a 'where' clause, or use the 'limit' statement
# otherwise we might end up loading the entire dataset into memory, which we want to avoid
first5 = "select * from movies limit 5" # Everything we know about the first 5 entries
names_of_top10 = "select title, score from movies order by score DESC limit 10" # the first 10 entries, by DESCending scoref