-
-
Save h121h/8e4433f34d100ad8ba5dc66ff2b93bc9 to your computer and use it in GitHub Desktop.
* dumb split on first : * dumb convert any unknown utf-8 to surrogates and replace * can re-run on db if db has unique constraint on user&pass tuple
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| CREATE DATABASE pw | |
| WITH | |
| OWNER = XXXXXXXXXXXXXX | |
| ENCODING = 'UTF8' | |
| LC_COLLATE = 'en_US.UTF-8' | |
| LC_CTYPE = 'en_US.UTF-8' | |
| TABLESPACE = pg_default | |
| CONNECTION LIMIT = -1; | |
| CREATE TABLE public.passwords | |
| ( | |
| password_id bigint NOT NULL DEFAULT nextval('passwords_password_id_seq'::regclass), | |
| password_username text COLLATE pg_catalog."C.UTF-8", | |
| password_password text COLLATE pg_catalog."default", | |
| CONSTRAINT passwords_pkey PRIMARY KEY (password_id) | |
| ) | |
| WITH ( | |
| OIDS = FALSE | |
| ) | |
| TABLESPACE pg_default; | |
| ALTER TABLE public.passwords | |
| OWNER to XXXXXXXXXXXXXX; | |
| -- Index: i_password | |
| -- DROP INDEX public.i_password; | |
| CREATE INDEX i_password | |
| ON public.passwords USING btree | |
| (password_password COLLATE pg_catalog."default" varchar_ops) | |
| TABLESPACE pg_default; | |
| -- Index: i_username | |
| -- DROP INDEX public.i_username; | |
| CREATE INDEX i_username | |
| ON public.passwords USING btree | |
| (password_username COLLATE pg_catalog."C.UTF-8" bpchar_pattern_ops) | |
| TABLESPACE pg_default; | |
| -- Index: u_up | |
| -- DROP INDEX public.u_up; | |
| CREATE UNIQUE INDEX u_up | |
| ON public.passwords USING btree | |
| (password_username COLLATE pg_catalog."C.UTF-8", password_password COLLATE pg_catalog."default") | |
| TABLESPACE pg_default; |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env python3 | |
| # by Gunni @ github.com 2017-12-13 | |
| import glob | |
| import sys | |
| import os.path | |
| import pathlib | |
| import psycopg2 | |
| from random import shuffle | |
| from psycopg2 import sql | |
| files = glob.glob('data/*/*', recursive=True) | |
| connect_str = "dbname='pw' user='xxxxxxxxxx' host='xxxxxxxxxx' password='xxxxxxxxxx'" | |
| conn = psycopg2.connect(connect_str) | |
| cursor = conn.cursor() | |
| def mark(c): | |
| sys.stdout.write(c) | |
| sys.stdout.flush() | |
| shuffle(files) | |
| for pwfile in files: | |
| print('{} > '.format(pwfile), end = '') | |
| if os.path.isdir(os.path.join('done', pwfile)): | |
| print('file is in done') | |
| continue | |
| n = 0 | |
| b = 0 | |
| with open(pwfile, 'r', encoding='utf-8', errors='surrogateescape') as f: | |
| try: | |
| for line in f: | |
| up = line.encode('utf-8', errors='surrogateescape').decode('utf8', 'replace').split(':') | |
| user = up[0] | |
| if len(up) != 2: | |
| passw = '' | |
| else: | |
| passw = ':'.join(up[1:]) | |
| try: | |
| cursor.execute('insert into passwords (password_username, password_password) values (%s, %s)', [user, passw]) | |
| n += 1 | |
| if n % 1000 == 0: | |
| conn.commit() | |
| mark('.') | |
| except psycopg2.IntegrityError as e: | |
| conn.rollback() | |
| b += 1 | |
| if b % 1000 == 0: | |
| mark('!') | |
| except ValueError as e: | |
| conn.rollback() | |
| passw = '\\0' | |
| mark('0') | |
| except UnicodeDecodeError as e: | |
| print(e) | |
| raise e | |
| print() | |
| pathlib.Path(os.path.join('done', pwfile)).mkdir(parents=True, exist_ok=True) | |
| print('all done...') | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment