-
-
Save Ceesaxp/30fac9e0d044b1cdf33e74919b35b2b2 to your computer and use it in GitHub Desktop.
Quick import-export script to move WhatsApp discussion threads to Slack.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env python3 | |
| # -*- coding: utf-8 -*- | |
| import os | |
| import sys | |
| import argparse | |
| import datetime | |
| import re | |
| import traceback | |
| def main(): | |
| console_prefix = "$ " | |
| channel_name = "#whatsapp" | |
| delim = "," | |
| description = "Transform exported whatsapp discussions into ready-for-import slack.com threads." | |
| parser = argparse.ArgumentParser(description=description) | |
| parser.add_argument("input", type=argparse.FileType('r'), help="Input filename") | |
| parser.add_argument("-c", "--channel", default=channel_name, help="Slack.com channel name, default: "+channel_name) | |
| parser.add_argument("-o", "--output", type=argparse.FileType('w'), help="Output filename") | |
| parser.add_argument("-d", "--delimiter", default=delim, help="Slack workspace import file field delimiter, default: "+delim) | |
| # parser.print_help() | |
| args = parser.parse_args() | |
| # Print description in case of parse success | |
| print("\n π {0}: {1}\n".format(os.path.basename(sys.argv[0]), description)) | |
| input_file = args.input | |
| output_file = open("Slack Import "+args.input.name, 'w') if args.output is None else args.output | |
| print("{0}input filename: '{1}'".format(console_prefix, input_file.name)) | |
| print("{0}output filename: '{1}'".format(console_prefix, output_file.name)) | |
| print("{0}slack channel name: '{1}'".format(console_prefix, channel_name)) | |
| print("{0}import delimiter: '{1}'".format(console_prefix, delim)) | |
| print("{0}Reading input file...".format(console_prefix)) | |
| input_lines = input_file.read().splitlines() #lines() | |
| usernames_mapping = {} | |
| # Looping through raw lines to group combine lines | |
| output_line = None | |
| output_elements = {} | |
| with open(output_file.name, 'w') as outfile: | |
| # should first test file structure and record format | |
| # testing date and record format | |
| dt_start_char = offset = 0 | |
| dt_end_char = 19 | |
| date_pattern = "%d/%m/%Y %H:%M:%S" | |
| test_line = input_lines[0] | |
| if re.match(r"^\[", test_line): | |
| # if date is wrapped in brackets -- we need to shift by 1 where we start looking for the date | |
| dt_start_char += 1 | |
| dt_end_char += 1 | |
| offset = 2 # and shift offset by 2 | |
| date_pattern = "%d.%m.%Y, %H:%M:%S" | |
| for line in input_lines: | |
| line = line.strip("\u200e") # strip U+200E LEFT-TO-RIGHT MARK | |
| try: | |
| # FIXME time format, record format | |
| dt = datetime.datetime.strptime(line[dt_start_char:dt_end_char], date_pattern) | |
| except ValueError: | |
| # We cannot find a date, it's a continuation of a line, most probably... | |
| #print(f">>{line}<< \n\t --- is a continuation") | |
| print(">>" + line) | |
| print(">>" + line[dt_start_char:dt_end_char]) | |
| traceback.print_exc() | |
| output_elements["content"] += "\n"+line.strip() | |
| else: | |
| if output_elements.get("content", None) is not None: | |
| new_line = '"{0}"{4} "{1}"{4} "@{2}"{4} "{3}"'.format(output_elements["date"].timestamp(), channel_name, output_elements["username"], output_elements["content"], delim) | |
| print(new_line) | |
| outfile.write(new_line+"\n") | |
| output_elements = {} | |
| # We can find a date at start of line, it's a new line | |
| output_line = line.strip() | |
| output_elements["date"] = dt | |
| # Make sure to change all double quotes to standard ones | |
| for quote in ['"', 'β', 'β³', 'Λ', 'β']: | |
| output_line = output_line.replace(quote, '\"') | |
| # Oh, by the way, look for a username. The presence of a username followed by a colon is the only fkag we can use. | |
| if line[dt_end_char + offset:].count(':') > 0: | |
| input_username = line[dt_end_char + offset:].split(':')[0].strip() | |
| if input_username not in usernames_mapping.keys(): | |
| output_username = input("\n{0}Unknown username '{1}'. Enter corresponding Slack.com username (<Enter>=identical): ".format(console_prefix, input_username)) | |
| if len(output_username.strip()) > 0: | |
| usernames_mapping[input_username] = output_username.strip() | |
| output_username = usernames_mapping.get(input_username, None) | |
| if output_username is not None: | |
| output_elements["username"] = output_username | |
| output_elements["content"] = line[dt_end_char + offset:].replace(input_username+":", "").strip() | |
| # We need this to get the last line... | |
| if output_elements.get("content", None) is not None: | |
| new_line = '"{0}"{4} "{1}"{4} "@{2}"{4} "{3}"'.format(output_elements["date"].timestamp(), channel_name, output_elements["username"], output_elements["content"], delim) | |
| print(new_line) | |
| outfile.write(new_line+"\n") | |
| output_elements = {} | |
| print("\n π {0}Done. Enjoy!\n".format(console_prefix)) | |
| if __name__ == "__main__": | |
| main() | |
Author
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Added simple validation of WhatsApp file format (it seems to differ), stripped non-printable character that was causing havoc (a wrapper around emojis).