Skip to content

Instantly share code, notes, and snippets.

@jeamland
Last active December 31, 2025 00:12
Show Gist options
  • Select an option

  • Save jeamland/c856e9993008c9611a9910a3b22f9479 to your computer and use it in GitHub Desktop.

Select an option

Save jeamland/c856e9993008c9611a9910a3b22f9479 to your computer and use it in GitHub Desktop.

Revisions

  1. jeamland revised this gist Oct 4, 2018. 1 changed file with 1 addition and 1 deletion.
    2 changes: 1 addition & 1 deletion playlist_extractor.py
    Original file line number Diff line number Diff line change
    @@ -3,7 +3,7 @@
    # Extract playlists from a non-XML iTunes Library file (.itl)
    # Copyright (c) 2018 Benno Rice, released under the BSD (2 Clause) Licence.

    # Important information on the encyrption used in the .itl file found here:
    # Important information on the encryption used in the .itl file found here:
    # https://mrexodia.cf/reversing/2014/12/16/iTunes-Library-Format-1
    # Highly useful information on the .itl format itself found here:
    # https://github.com/josephw/titl/blob/master/titl-core/src/main/java/org/kafsemo/titl/ParseLibrary.java
  2. jeamland revised this gist Oct 4, 2018. 1 changed file with 8 additions and 0 deletions.
    8 changes: 8 additions & 0 deletions playlist_extractor.py
    Original file line number Diff line number Diff line change
    @@ -1,5 +1,13 @@
    #!/usr/bin/env python

    # Extract playlists from a non-XML iTunes Library file (.itl)
    # Copyright (c) 2018 Benno Rice, released under the BSD (2 Clause) Licence.

    # Important information on the encyrption used in the .itl file found here:
    # https://mrexodia.cf/reversing/2014/12/16/iTunes-Library-Format-1
    # Highly useful information on the .itl format itself found here:
    # https://github.com/josephw/titl/blob/master/titl-core/src/main/java/org/kafsemo/titl/ParseLibrary.java

    import argparse
    import collections
    import csv
  3. jeamland created this gist Oct 4, 2018.
    324 changes: 324 additions & 0 deletions playlist_extractor.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,324 @@
    #!/usr/bin/env python

    import argparse
    import collections
    import csv
    import enum
    import io
    import struct
    import zlib

    from Crypto.Cipher import AES


    HEADER_LENGTH = 0x90
    CRYPTO_KEY = b'BHUILuilfghuila3'


    Hdfm = collections.namedtuple('Hdfm', field_names=[
    'file_length',
    'version',
    ])

    Hdsm = collections.namedtuple('Hdsm', field_names=[
    'block_type',
    'block_length',
    ])

    Hghm = collections.namedtuple('Hghm', field_names=[])

    Hohm = collections.namedtuple('Hohm', field_names=[
    'record_length',
    'type',
    'data',
    ])

    Halm = collections.namedtuple('Hghm', field_names=[])

    Haim = collections.namedtuple('Haim', field_names=[])

    Hilm = collections.namedtuple('Hilm', field_names=[])

    Hiim = collections.namedtuple('Hiim', field_names=[])

    Htlm = collections.namedtuple('Htlm', field_names=[])

    Htim = collections.namedtuple('Htim', field_names=[
    'record_length',
    'sub_blocks',
    'song_id',
    'block_type',
    # 'file_type',
    # 'playtime',
    # 'track_number',
    # 'track_total',
    # 'year',
    # 'bit_rate',
    # 'sample_rate',
    # 'volume_adjustment',
    # 'start_time',
    # 'end_time',
    # 'play_count',
    # 'compilation',
    # 'last_played',
    # 'disk_number',
    # 'disk_total',
    # 'rating',
    # 'added',
    ])

    Hqlm = collections.namedtuple('Hqlm', field_names=[])

    Hqim = collections.namedtuple('Hqlm', field_names=[])

    Hsts = collections.namedtuple('Hsts', field_names=[])

    Hplm = collections.namedtuple('Hplm', field_names=[])

    Hpim = collections.namedtuple('Hpim', field_names=[
    'item_count',
    ])

    Hptm = collections.namedtuple('Hptm', field_names=[
    'key',
    ])

    Hslm = collections.namedtuple('Hslm', field_names=[])

    Hpsm = collections.namedtuple('Hpsm', field_names=[])

    Hrlm = collections.namedtuple('Hrlm', field_names=[])

    Hrpm = collections.namedtuple('Hrpm', field_names=[])


    class HohmType(enum.IntEnum):
    TITLE = 0x02
    ALBUM_TITLE = 0x03
    ARTIST = 0x04
    PLAYLIST_TITLE = 0x64


    HOHM_ODD_TYPES = (0x42, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x192, 0x1f7, 0x1f4, 0x202, 0x320)


    class ItlIO(io.BytesIO):
    def __init__(self, *args, **kwargs):
    self.flipped = False
    super().__init__(*args, **kwargs)

    def skip(self, nbytes):
    self.read(nbytes)

    def read_ascii(self, nbytes):
    return self.read(nbytes).decode('ascii')

    def read_byte(self):
    return self.read(1)[0]

    def read_uint(self):
    if self.flipped:
    return struct.unpack('<I', self.read(4))[0]
    else:
    return struct.unpack('>I', self.read(4))[0]


    class RecordParser:
    def __init__(self, data):
    self.data = ItlIO(data)

    def parse(self):
    while True:
    record_type = self.data.read_ascii(4)
    if not record_type:
    return

    if self.data.flipped:
    record_type = record_type[-1::-1]

    method = f'parse_{record_type}'

    if not hasattr(self, method):
    method = f'parse_{record_type[-1::-1]}'
    if not hasattr(self, method):
    print(self.data.getvalue()[self.data.tell():])
    raise ValueError(f"unknown record type: {record_type}")
    self.data.flipped = True

    length = self.data.read_uint()
    data = ItlIO(self.data.read(length - 8))
    if self.data.flipped:
    data.flipped = True
    yield getattr(self, method)(data)

    def parse_hdfm(self, data):
    file_length = data.read_uint()
    data.skip(4)
    version_length = data.read_byte()
    version = data.read_ascii(version_length)
    return Hdfm(file_length=file_length,
    version=version)

    def parse_hdsm(self, data):
    record_length = data.read_uint()
    block_type = data.read_uint()

    if block_type in (4, 22):
    self.data.skip(record_length - len(data.getvalue()) - 8)

    return Hdsm(block_type=block_type, block_length=record_length)

    def parse_hghm(self, data):
    return Hghm()

    def parse_hohm(self, data):
    record_length = data.read_uint()
    hohm_type = data.read_uint()
    hohm_data = self.data.read(record_length - len(data.getvalue()) - 8)

    # print(hex(hohm_type), repr(hohm_data))

    if hohm_type not in HOHM_ODD_TYPES:
    hohm_data = hohm_data[16:]
    # What even is character encoding?
    # There might be something telling us what the encoding is but this
    # is sufficient for current purposes.
    if len(hohm_data) > 1 and len(hohm_data) % 2 == 0 and hohm_data[0] == 0:
    hohm_data = hohm_data.decode('utf-16be')
    elif len(hohm_data) > 1 and len(hohm_data) % 2 == 0 and hohm_data[-1] == 0:
    hohm_data = hohm_data.decode('utf-16le')
    else:
    hohm_data = hohm_data.decode('iso-8859-1')

    return Hohm(record_length=record_length, type=hohm_type, data=hohm_data)

    def parse_halm(self, data):
    return Halm()

    def parse_haim(self, data):
    return Haim()

    def parse_hilm(self, data):
    return Hilm()

    def parse_hiim(self, data):
    return Hiim()

    def parse_htlm(self, data):
    return Htlm()

    def parse_htim(self, data):
    record_length = data.read_uint()
    sub_blocks = data.read_uint()
    song_id = data.read_uint()
    block_type = data.read_uint()

    # data = self.data.read(record_length - len(data.getvalue()) - 8)
    # print(repr(data))

    return Htim(record_length, sub_blocks, song_id, block_type)

    def parse_hqlm(self, data):
    return Hqlm()

    def parse_hqim(self, data):
    return Hqim()

    def parse_hsts(self, data):
    return Hsts()

    def parse_hplm(self, data):
    return Hplm()

    def parse_hpim(self, data):
    data.skip(4 + 4)
    item_count = data.read_uint()
    return Hpim(item_count)

    def parse_hptm(self, data):
    data.skip(16)
    key = data.read_uint()
    return Hptm(key)

    def parse_hslm(self, data):
    return Hslm()

    def parse_hpsm(self, data):
    return Hpsm()

    def parse_hrlm(self, data):
    return Hrlm()

    def parse_hrpm(self, data):
    return Hrpm()


    parser = argparse.ArgumentParser()
    parser.add_argument('filename', nargs='?', default='iTunes Library.itl',
    help='iTunes Library Filename')
    args = parser.parse_args()

    # So it appears that the .itl format, in modern versions of iTunes, has a header
    # block containing some information, one part of which tells us how much of the
    # following data is AES/ECB encrypted with a key that's made it around the
    # Internet a bit. To get at the actual data you need to decrypt that bit in place
    # then decompress (zlib) the bit after the initial header. After that it's a similar
    # format to older iTunes library files.

    itl = open(args.filename, 'rb').read()
    header = itl[:HEADER_LENGTH]

    crypt_length = (len(itl) - HEADER_LENGTH) & ~0xf
    max_crypt_length = struct.unpack('>I', header[0x5C:0x60])[0]
    crypt_length = min(crypt_length, max_crypt_length)

    cipher = AES.new(CRYPTO_KEY, AES.MODE_ECB)
    decrypted = cipher.decrypt(itl[HEADER_LENGTH:max_crypt_length + HEADER_LENGTH])

    itl = decrypted + itl[max_crypt_length + HEADER_LENGTH:]
    itl = header + zlib.decompress(itl)

    track = {}
    tracks = {}
    playlist = {}
    playlists = {}

    for record in RecordParser(itl).parse():
    if type(record) is Htim:
    if track:
    tracks[track['song_id']] = track
    track = {'song_id': record.song_id}
    elif type(record) is Hohm:
    if record.type == HohmType.TITLE:
    track['title'] = record.data
    elif record.type == HohmType.ALBUM_TITLE:
    track['album'] = record.data
    elif record.type == HohmType.ARTIST:
    track['artist'] = record.data
    elif record.type == HohmType.PLAYLIST_TITLE:
    playlist['title'] = record.data
    elif type(record) is Hpim:
    if playlist:
    playlists[playlist['title']] = playlist
    playlist = {'items': []}
    elif type(record) is Hptm:
    playlist['items'].append(record.key)

    if track:
    tracks[track['song_id']] = track

    if playlist:
    playlists[playlist['title']] = playlist

    output = csv.writer(open('playlists.csv', 'w'))

    for title, playlist in playlists.items():
    # The playlists I was after had titles of the form 'YYYY-M' or 'YYYY-MM'...
    if len(title) < 5 or title[0] != '2' or title[4] != '-':
    continue
    year, month = title.split('-')
    # ... and I wanted to make them consistently 'YYYY-MM'.
    title = f'{year}-{int(month):02d}'
    for item in (tracks[x] for x in playlist['items']):
    print(repr(item))
    output.writerow([title, item['title'], item['artist'], item.get('album', '')])