Skip to content

Instantly share code, notes, and snippets.

@bakugo
Last active May 27, 2021 08:09
Show Gist options
  • Select an option

  • Save bakugo/fc7082e2d90b0cf9c46e57ab749496da to your computer and use it in GitHub Desktop.

Select an option

Save bakugo/fc7082e2d90b0cf9c46e57ab749496da to your computer and use it in GitHub Desktop.
Basic MySQL data dumper with support for excluding columns
#!/usr/bin/python3
import sys
import os
import time
import argparse
import threading
import queue
import pymysql
def main(argv):
parser = argparse.ArgumentParser()
parser.add_argument("--host", default = "localhost", help = "database server host")
parser.add_argument("--port", default = 3306, type = int, help = "database server port")
parser.add_argument("--user", default = "root", help = "database server username")
parser.add_argument("--pass", default = "", help = "database server password")
parser.add_argument("--idcol", default = "id", help = "auto-increment column")
parser.add_argument("--idmin", default = None, type = int, help = "min auto increment id to include")
parser.add_argument("--idmax", default = None, type = int, help = "max auto increment id to include")
parser.add_argument("--colskip", default = "", help = "columns to skip, comma-separated")
parser.add_argument("--batch", default = 1000, type = int, help = "number of rows per batch/line")
parser.add_argument("--sleep", default = 0.0, type = float, help = "seconds to sleep between each batch")
parser.add_argument("--noignore", action = "store_true", help = "use normal INSERT instead of INSERT IGNORE")
parser.add_argument("database", default = None, help = "database name")
parser.add_argument("table", default = None, help = "table name")
args = parser.parse_args(argv[1:])
if not args.database: return 1
if not args.table: return 1
queue1 = queue.Queue(maxsize=10)
try:
database = \
pymysql.connect(
host = args.host,
port = args.port,
user = args.user,
password = getattr(args, "pass"),
db = args.database,
charset = "utf8mb4",
cursorclass = pymysql.cursors.DictCursor,
)
except Exception as err:
print("Error on conn:", err, file=sys.stderr)
return 1
def thr_read():
cur = 0
cols = []
if args.idmin != None:
cur = (args.idmin - 1)
try:
cursor = database.cursor()
while True:
query = "SELECT * FROM `{}` WHERE `{}` > {} AND `{}` <= {} ORDER BY `{}` ASC LIMIT {}"
query = query.format(args.table, args.idcol, cur, args.idcol, (args.idmax if args.idmax else 1000000000), args.idcol, args.batch)
cursor.execute(query)
ins_c = []
ins_v = []
rows = cursor.fetchall()
if len(rows) == 0: break
for row in rows:
id = int(row[args.idcol])
if id > cur: cur = id
if len(cols) == 0:
cols = []
skip = args.colskip.split(",")
for col in row:
if col in skip: continue
cols.append(col)
if len(ins_c) == 0:
for col in cols:
ins_c.append("`" + col + "`")
tmp = []
for col in cols:
tmp.append(database.escape(row[col]))
ins_v.append("(" + ",".join(tmp) + ")")
queue1.put(
"INSERT{} INTO `{}` ({}) VALUES {};\n".format(
("" if args.noignore else " IGNORE"),
args.table,
",".join(ins_c),
",".join(ins_v)
)
)
if args.sleep:
time.sleep(args)
continue
except Exception as err:
print("Error on read:", err, file=sys.stderr)
queue1.put(None)
def thr_write():
try:
while True:
item = queue1.get()
if item == None: break
sys.stdout.buffer.write(item.encode("utf8"))
sys.stdout.flush()
except Exception as err:
print("Error on write:", err, file=sys.stderr)
t1 = threading.Thread(target=thr_read, daemon=True)
t2 = threading.Thread(target=thr_write, daemon=True)
t1.start()
t2.start()
while True:
time.sleep(0.1)
alive = [
t1.is_alive(),
t2.is_alive(),
]
if not True in alive: break
continue
return 0
if __name__ == "__main__":
sys.exit(main(sys.argv))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment