Skip to content

Instantly share code, notes, and snippets.

@jasonwoodland
Last active January 22, 2018 09:44
Show Gist options
  • Select an option

  • Save jasonwoodland/428aa357d2e754e5bd2b10f29f2af99f to your computer and use it in GitHub Desktop.

Select an option

Save jasonwoodland/428aa357d2e754e5bd2b10f29f2af99f to your computer and use it in GitHub Desktop.
Fix doubly UTF-8 encoded text in MySQL databases
#!/usr/bin/env python
import MySQLdb
import getpass
host = raw_input('Host? (default is "localhost") ') or 'localhost'
user = raw_input('User? (default is "root") ') or 'root'
passwd = getpass.getpass()
running = True
while running:
dbname = raw_input('Database? ')
db = MySQLdb.connect(host=host, user=user, passwd=passwd, db=dbname, charset='utf8mb4')
cursor = db.cursor()
cursor.execute("alter database %s character set 'utf8mb4' collate 'utf8mb4_unicode_ci'" % dbname)
sql = "select table_name, column_name, column_type from information_schema.columns where table_schema = '%s'" % dbname
cursor.execute(sql)
results = cursor.fetchall()
for row in results:
if row[2].find('text') != -1 or row[2].find('char') != -1:
sql = "update %s set %s = @txt where char_length(%s) = length(@txt := convert(binary convert(%s using latin1) using utf8mb4));" % (row[0], row[1], row[1], row[1])
print sql
cursor.execute(sql)
db.commit()
db.close()
running = raw_input('Done. Fix another database? ').lower()[0] == 'y'
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment