#!/usr/bin/env python3
#encoding: utf-8

# script to extract the script from chocobo no fushigi na dungeon .bin file
# and to inject the script back after changes hav been made !!!

import struct
import os
import sys

MSG_BIN_OFFSET = 0x1714c800
MSG_BIN_SIZE = 0x40000
BIN_SIZE = 709332624

CHUNK_DELIMITER = '\n\n\\\n\n'
ITEM_DELIMITER = '\n\n'

# decode translation table
decode_table = 'あいうえおかきくけこさしすせそたヴ！ァィゥ'\
               'ェォッ（）。＋，－．／０１２３４５６７８９'\
               '：ー〜・⋯？「ＡＢＣＤＥＦＧＨＩＪＫＬＭＮ'\
               'ＯＰＱＲＳＴＵＶＷＸＹＺャュョベボ」ａｂｃ'\
               'ｄｅｆｇｈｉｊｋｌｍｎｏｐｑｒｓｔｕｖｗｘ'\
               'ｙｚパピプペポちつてとなにぬねのはひふへほ'\
               'まみむめもやゆよらりるれろわをんがぎぐげご'\
               'ざじずぜぞだぢづでどばびぶべぼぱぴぶぺぽぁ'\
               'ぃぅぇぉっゃゅょアイウエオカキクケコサシス'\
               'セソタチツテトナニヌネノハヒフヘホマミムメ'\
               'モヤユヨラリルレロワヲンガギグゲゴザジズゼ'\
               'ゾダヂヅデドバビブ'

# encode translation table
encode_table_a = '()*+,-./0123456789:—~•…?“ABCDEFGHIJKLMNOPQRSTUVWXYZ'
encode_table_b = '”abcdefghijklmnopqrstuvwxyz'
encode_table = {'!': 33}
for i, c in enumerate(encode_table_a):
    encode_table[c] = i + 40
for i, c in enumerate(encode_table_b):
    encode_table[c] = i + 96

# special sequences
END_TK = 'end'
COLOR_TK = 'color'
SPEED_TK = 'speed'
DELAY_TK = 'delay'
NAME_TK = 'name'
EVENT_TK = 'event'
WAIT_TK = 'wait'
CLOSE_TK = 'close'

tk_val = {
    END_TK: 0x0,
    COLOR_TK: 0x4,
    SPEED_TK: 0x5,
    DELAY_TK: 0x6,
    NAME_TK: 0xb,
    EVENT_TK: 0xc,
    WAIT_TK: 0xd,
    CLOSE_TK: 0xe,
}

# colors
colors = ['white', 'red', 'green', 'blue', 'yellow', 'gray']

for i, color in enumerate(colors):
    tk_val[color] = i

def token(*args):
    s =' '.join([f'{x}' for x in args])
    return f'<{s}>'

def encode_text(text: str) -> bytes:
    i = 0
    out = b''
    while i < len(text):
        c = text[i]
        if c in decode_table:
            value = decode_table.index(c) + 16
            page = value // 0x100
            byte = value % 0x100
            if page > 0:
                out += bytes([page])
            out += bytes([byte])
        elif c == '\n':
            out += bytes([0xa])
        elif c == ' ':
            out += bytes([0xf])
        elif c == '<':
            buf = ''
            i += 1
            while text[i] != '>':
                buf += text[i]
                i += 1
            args = buf.split()
            args = [tk_val[arg] if arg in tk_val else int(arg) for arg in args]
            out += bytes(args)
        elif c in encode_table:
            out += bytes([encode_table[c]])
        else:
            raise Exception(f'cannot encode char "{c}"')
        i += 1
    return out

def decode_text(text: bytes) -> str:
    i = 0
    out = ''
    def dec_char (c):
        i = c - 16
        if i < len(decode_table):
            return decode_table[i]
        else:
            page = c // 0x100
            byte = c % 0x100
            if page == 0:
                return token(byte)
            else:
                return token(page, byte)
    while i < len(text):
        c = int(text[i])
        if c == 0x0:
            out += token(END_TK)
        elif c == 0x1:
            out += dec_char(text[i + 1] + 0x100)
            i += 1
        elif c == 0x2:
            out += dec_char(text[i + 1] + 0x200)
            i += 1
        elif c == 0x3:
            out += dec_char(text[i + 1] + 0x300)
            i += 1
        elif c == 0x4:
            out += token(COLOR_TK, colors[int(text[i + 1])])
            i += 1
        elif c == 0x5:
            out += token(SPEED_TK, int(text[i + 1]))
            i += 1
        elif c == 0x6:
            out += token(DELAY_TK, int(text[i + 1]))
            i += 1
        elif c == 0x7:
            out += token(c, int(text[i + 1]))
            i += 1
        elif c == 0xa:
            out += '\n'
        elif c == 0xb:
            out += token(NAME_TK)
        elif c == 0xc:
            out += token(EVENT_TK)
        elif c == 0xd:
            out += token(WAIT_TK)
        elif c == 0xe:
            out += token(CLOSE_TK)
        elif c == 0xf:
            out += ' '
        else:
            if c < 16:
                out += token(c)
            else:
                out += dec_char(c)
        i += 1
    return out

def do_encode(bin_path: str, script_path: str):

    assert os.path.getsize(bin_path) == BIN_SIZE

    # first read in the script file
    with open(script_path, 'rb') as f:
        script = f.read().decode('utf8').strip()

    # split out the chunks
    chunks_str = script.split(CHUNK_DELIMITER)

    # split out the text items
    for i, chunk_str in enumerate(chunks_str):
        chunks_str[i] = chunk_str.split(ITEM_DELIMITER)

    # encode each chunk
    chunks = [[encode_text(item) for item in chunk] for chunk in chunks_str]

    # format each chunk with offsets and stuff
    chunk_datas = []
    for chunk in chunks:
        num_items = len(chunk)
        offset = num_items * 2
        offsets = [offset]
        for item in chunk[:-1]:
            offset += len(item)
            offsets.append(offset)
        assert len(offsets) == num_items
        offsets_bytes = struct.pack(f'<{"H"*num_items}', *offsets)
        chunk_data = offsets_bytes + b''.join(chunk)
        padding = 0x8000 - len(chunk_data)
        chunk_data += b'\0' * padding
        chunk_datas.append(chunk_data)
    data = b''.join(chunk_datas)

    # write out to msg.bin file in the bin
    assert len(data) == MSG_BIN_SIZE
    with open(bin_path, 'r+b') as f:
        # we gotta deal with the sector offsets so....
        # lets break the data into sectors!
        num_sectors = MSG_BIN_SIZE // 0x800
        starts = [i * 0x800 for i in range(num_sectors)]
        sectors = [data[o:o+0x800] for o in starts]
        # now figure out the actual in file offsets for each sector
        sector = MSG_BIN_OFFSET // 0x800
        offset = sector * 0x930 + 0x18
        # write all the sectors
        for s in sectors:
            f.seek(offset)
            f.write(s)
            offset += 0x930

def do_decode(bin_path: str, script_path: str):

    assert os.path.getsize(bin_path) == BIN_SIZE

    # first read in the msg file from the bin file
    # that means we gotta extract the msg file from the bin file
    # stripping the extra meta data in the bin file
    # (sector data or something?)
    with open(bin_path, 'rb') as f:
        # so uh, first read in the sectors relevant here
        sector = MSG_BIN_OFFSET // 0x800
        sector_offset = sector * 0x930
        num_sectors = MSG_BIN_SIZE // 0x800
        sectors_size = num_sectors * 0x930
        f.seek(sector_offset)
        sector_data = f.read(sectors_size)
        # now strip the meta data from the sectors
        starts = [i * 0x930 for i in range(num_sectors)]
        sectors = [sector_data[o+0x18:o+0x818] for o in starts]
        data = b''.join(sectors)
        assert len(data) == MSG_BIN_SIZE
        with open('msg.bin', 'wb') as g:
            g.write(data)

    # split it into 0x8000 chunks
    chunks = [data[i:i+0x8000] for i in range(0, len(data), 0x8000)]

    # decode each chunk
    chunks_str = []
    for chunk in chunks:

        # strip off the trailing null bytes
        while chunk[-1] == 0:
            chunk = chunk[:-1]

        # figure out how many text items there are in this chunk
        # this is implied by the offset to the first item
        # which immediately follows the 16bit offset array
        first_offset = struct.unpack('<H', chunk[:2])[0]
        num_items = first_offset // 2

        # read all of the offsets
        offsets = list(struct.unpack(f'<{"H"*num_items}', chunk[:first_offset]))

        # get the ending offsets
        end_offsets = offsets[1:] + [len(chunk)]

        # collect all the text items
        items = [decode_text(chunk[start:end]) for start, end in zip(offsets, end_offsets)]

        # accumulate
        chunks_str.append(items)

    # write to output file
    out = CHUNK_DELIMITER.join([ITEM_DELIMITER.join(items) for items in chunks_str])
    with open(script_path, 'wb') as f:
        f.write(out.encode('utf8'))

if __name__ == '__main__':
    if len(sys.argv) != 4:
        print(f'usage: {sys.argv[0]} [encode|decode] [game.bin] [script.txt]')
        exit(1)
    if sys.argv[1] == 'encode':
        do_encode(sys.argv[2], sys.argv[3])
    elif sys.argv[1] == 'decode':
        do_decode(sys.argv[2], sys.argv[3])
    else:
        print('first argument must be "encode" or "decode"')