#!/usr/bin/env python

# Extract playlists from a non-XML iTunes Library file (.itl)
# Copyright (c) 2018 Benno Rice, released under the BSD (2 Clause) Licence.

# Important information on the encryption used in the .itl file found here:
# https://mrexodia.cf/reversing/2014/12/16/iTunes-Library-Format-1
# Highly useful information on the .itl format itself found here:
# https://github.com/josephw/titl/blob/master/titl-core/src/main/java/org/kafsemo/titl/ParseLibrary.java

import argparse
import collections
import csv
import enum
import io
import struct
import zlib

from Crypto.Cipher import AES


HEADER_LENGTH = 0x90
CRYPTO_KEY = b'BHUILuilfghuila3'


Hdfm = collections.namedtuple('Hdfm', field_names=[
    'file_length',
    'version',
])

Hdsm = collections.namedtuple('Hdsm', field_names=[
    'block_type',
    'block_length',
])

Hghm = collections.namedtuple('Hghm', field_names=[])

Hohm = collections.namedtuple('Hohm', field_names=[
    'record_length',
    'type',
    'data',
])

Halm = collections.namedtuple('Hghm', field_names=[])

Haim = collections.namedtuple('Haim', field_names=[])

Hilm = collections.namedtuple('Hilm', field_names=[])

Hiim = collections.namedtuple('Hiim', field_names=[])

Htlm = collections.namedtuple('Htlm', field_names=[])

Htim = collections.namedtuple('Htim', field_names=[
    'record_length',
    'sub_blocks',
    'song_id',
    'block_type',
    # 'file_type',
    # 'playtime',
    # 'track_number',
    # 'track_total',
    # 'year',
    # 'bit_rate',
    # 'sample_rate',
    # 'volume_adjustment',
    # 'start_time',
    # 'end_time',
    # 'play_count',
    # 'compilation',
    # 'last_played',
    # 'disk_number',
    # 'disk_total',
    # 'rating',
    # 'added',
])

Hqlm = collections.namedtuple('Hqlm', field_names=[])

Hqim = collections.namedtuple('Hqlm', field_names=[])

Hsts = collections.namedtuple('Hsts', field_names=[])

Hplm = collections.namedtuple('Hplm', field_names=[])

Hpim = collections.namedtuple('Hpim', field_names=[
    'item_count',
])

Hptm = collections.namedtuple('Hptm', field_names=[
    'key',
])

Hslm = collections.namedtuple('Hslm', field_names=[])

Hpsm = collections.namedtuple('Hpsm', field_names=[])

Hrlm = collections.namedtuple('Hrlm', field_names=[])

Hrpm = collections.namedtuple('Hrpm', field_names=[])


class HohmType(enum.IntEnum):
    TITLE = 0x02
    ALBUM_TITLE = 0x03
    ARTIST = 0x04
    PLAYLIST_TITLE = 0x64


HOHM_ODD_TYPES = (0x42, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x192, 0x1f7, 0x1f4, 0x202, 0x320)


class ItlIO(io.BytesIO):
    def __init__(self, *args, **kwargs):
        self.flipped = False
        super().__init__(*args, **kwargs)
    
    def skip(self, nbytes):
        self.read(nbytes)

    def read_ascii(self, nbytes):
        return self.read(nbytes).decode('ascii')

    def read_byte(self):
        return self.read(1)[0]

    def read_uint(self):
        if self.flipped:
            return struct.unpack('<I', self.read(4))[0]
        else:
            return struct.unpack('>I', self.read(4))[0]


class RecordParser:
    def __init__(self, data):
        self.data = ItlIO(data)
    
    def parse(self):
        while True:
            record_type = self.data.read_ascii(4)
            if not record_type:
                return

            if self.data.flipped:
                record_type = record_type[-1::-1]

            method = f'parse_{record_type}'

            if not hasattr(self, method):
                method = f'parse_{record_type[-1::-1]}'
                if not hasattr(self, method):
                    print(self.data.getvalue()[self.data.tell():])
                    raise ValueError(f"unknown record type: {record_type}")
                self.data.flipped = True

            length = self.data.read_uint()
            data = ItlIO(self.data.read(length - 8))
            if self.data.flipped:
                data.flipped = True
            yield getattr(self, method)(data)

    def parse_hdfm(self, data):
        file_length = data.read_uint()
        data.skip(4)
        version_length = data.read_byte()
        version = data.read_ascii(version_length)
        return Hdfm(file_length=file_length,
                    version=version)

    def parse_hdsm(self, data):
        record_length = data.read_uint()
        block_type = data.read_uint()

        if block_type in (4, 22):
            self.data.skip(record_length - len(data.getvalue()) - 8)

        return Hdsm(block_type=block_type, block_length=record_length)

    def parse_hghm(self, data):
        return Hghm()
    
    def parse_hohm(self, data):
        record_length = data.read_uint()
        hohm_type = data.read_uint()
        hohm_data = self.data.read(record_length - len(data.getvalue()) - 8)

        # print(hex(hohm_type), repr(hohm_data))

        if hohm_type not in HOHM_ODD_TYPES:
            hohm_data = hohm_data[16:]
            # What even is character encoding?
            # There might be something telling us what the encoding is but this
            # is sufficient for current purposes.
            if len(hohm_data) > 1 and len(hohm_data) % 2 == 0 and hohm_data[0] == 0:
                hohm_data = hohm_data.decode('utf-16be')
            elif len(hohm_data) > 1 and len(hohm_data) % 2 == 0 and hohm_data[-1] == 0:
                hohm_data = hohm_data.decode('utf-16le')
            else:
                hohm_data = hohm_data.decode('iso-8859-1')

        return Hohm(record_length=record_length, type=hohm_type, data=hohm_data)

    def parse_halm(self, data):
        return Halm()

    def parse_haim(self, data):
        return Haim()

    def parse_hilm(self, data):
        return Hilm()

    def parse_hiim(self, data):
        return Hiim()

    def parse_htlm(self, data):
        return Htlm()
    
    def parse_htim(self, data):
        record_length = data.read_uint()
        sub_blocks = data.read_uint()
        song_id = data.read_uint()
        block_type = data.read_uint()

        # data = self.data.read(record_length - len(data.getvalue()) - 8)
        # print(repr(data))

        return Htim(record_length, sub_blocks, song_id, block_type)

    def parse_hqlm(self, data):
        return Hqlm()

    def parse_hqim(self, data):
        return Hqim()

    def parse_hsts(self, data):
        return Hsts()

    def parse_hplm(self, data):
        return Hplm()

    def parse_hpim(self, data):
        data.skip(4 + 4)
        item_count = data.read_uint()
        return Hpim(item_count)
    
    def parse_hptm(self, data):
        data.skip(16)
        key = data.read_uint()
        return Hptm(key)

    def parse_hslm(self, data):
        return Hslm()

    def parse_hpsm(self, data):
        return Hpsm()

    def parse_hrlm(self, data):
        return Hrlm()

    def parse_hrpm(self, data):
        return Hrpm()


parser = argparse.ArgumentParser()
parser.add_argument('filename', nargs='?', default='iTunes Library.itl',
                    help='iTunes Library Filename')
args = parser.parse_args()

# So it appears that the .itl format, in modern versions of iTunes, has a header
# block containing some information, one part of which tells us how much of the
# following data is AES/ECB encrypted with a key that's made it around the
# Internet a bit. To get at the actual data you need to decrypt that bit in place
# then decompress (zlib) the bit after the initial header. After that it's a similar
# format to older iTunes library files.

itl = open(args.filename, 'rb').read()
header = itl[:HEADER_LENGTH]

crypt_length = (len(itl) - HEADER_LENGTH) & ~0xf
max_crypt_length = struct.unpack('>I', header[0x5C:0x60])[0]
crypt_length = min(crypt_length, max_crypt_length)

cipher = AES.new(CRYPTO_KEY, AES.MODE_ECB)
decrypted = cipher.decrypt(itl[HEADER_LENGTH:max_crypt_length + HEADER_LENGTH])

itl = decrypted + itl[max_crypt_length + HEADER_LENGTH:]
itl = header + zlib.decompress(itl)

track = {}
tracks = {}
playlist = {}
playlists = {}

for record in RecordParser(itl).parse():
    if type(record) is Htim:
        if track:
            tracks[track['song_id']] = track
        track = {'song_id': record.song_id}
    elif type(record) is Hohm:
        if record.type == HohmType.TITLE:
            track['title'] = record.data
        elif record.type == HohmType.ALBUM_TITLE:
            track['album'] = record.data
        elif record.type == HohmType.ARTIST:
            track['artist'] = record.data
        elif record.type == HohmType.PLAYLIST_TITLE:
            playlist['title'] = record.data
    elif type(record) is Hpim:
        if playlist:
            playlists[playlist['title']] = playlist
        playlist = {'items': []}
    elif type(record) is Hptm:
        playlist['items'].append(record.key)

if track:
    tracks[track['song_id']] = track

if playlist:
    playlists[playlist['title']] = playlist

output = csv.writer(open('playlists.csv', 'w'))

for title, playlist in playlists.items():
    # The playlists I was after had titles of the form 'YYYY-M' or 'YYYY-MM'...
    if len(title) < 5 or title[0] != '2' or title[4] != '-':
        continue
    year, month = title.split('-')
    # ... and I wanted to make them consistently 'YYYY-MM'.
    title = f'{year}-{int(month):02d}'
    for item in (tracks[x] for x in playlist['items']):
        print(repr(item))
        output.writerow([title, item['title'], item['artist'], item.get('album', '')])