jeamland · December 31, 2025 00:12 · Oct 4, 2018 · Oct 4, 2018 · Oct 4, 2018
diff --git a/playlist_extractor.py b/playlist_extractor.py
@@ -3,7 +3,7 @@
 # Extract playlists from a non-XML iTunes Library file (.itl)
 # Copyright (c) 2018 Benno Rice, released under the BSD (2 Clause) Licence.
 
-# Important information on the encyrption used in the .itl file found here:
+# Important information on the encryption used in the .itl file found here:
 # https://mrexodia.cf/reversing/2014/12/16/iTunes-Library-Format-1
 # Highly useful information on the .itl format itself found here:
 # https://github.com/josephw/titl/blob/master/titl-core/src/main/java/org/kafsemo/titl/ParseLibrary.java

diff --git a/playlist_extractor.py b/playlist_extractor.py
@@ -1,5 +1,13 @@
 #!/usr/bin/env python
 
+# Extract playlists from a non-XML iTunes Library file (.itl)
+# Copyright (c) 2018 Benno Rice, released under the BSD (2 Clause) Licence.
+
+# Important information on the encyrption used in the .itl file found here:
+# https://mrexodia.cf/reversing/2014/12/16/iTunes-Library-Format-1
+# Highly useful information on the .itl format itself found here:
+# https://github.com/josephw/titl/blob/master/titl-core/src/main/java/org/kafsemo/titl/ParseLibrary.java
+
 import argparse
 import collections
 import csv

diff --git a/playlist_extractor.py b/playlist_extractor.py
@@ -0,0 +1,324 @@
+#!/usr/bin/env python
+
+import argparse
+import collections
+import csv
+import enum
+import io
+import struct
+import zlib
+
+from Crypto.Cipher import AES
+
+
+HEADER_LENGTH = 0x90
+CRYPTO_KEY = b'BHUILuilfghuila3'
+
+
+Hdfm = collections.namedtuple('Hdfm', field_names=[
+    'file_length',
+    'version',
+])
+
+Hdsm = collections.namedtuple('Hdsm', field_names=[
+    'block_type',
+    'block_length',
+])
+
+Hghm = collections.namedtuple('Hghm', field_names=[])
+
+Hohm = collections.namedtuple('Hohm', field_names=[
+    'record_length',
+    'type',
+    'data',
+])
+
+Halm = collections.namedtuple('Hghm', field_names=[])
+
+Haim = collections.namedtuple('Haim', field_names=[])
+
+Hilm = collections.namedtuple('Hilm', field_names=[])
+
+Hiim = collections.namedtuple('Hiim', field_names=[])
+
+Htlm = collections.namedtuple('Htlm', field_names=[])
+
+Htim = collections.namedtuple('Htim', field_names=[
+    'record_length',
+    'sub_blocks',
+    'song_id',
+    'block_type',
+    # 'file_type',
+    # 'playtime',
+    # 'track_number',
+    # 'track_total',
+    # 'year',
+    # 'bit_rate',
+    # 'sample_rate',
+    # 'volume_adjustment',
+    # 'start_time',
+    # 'end_time',
+    # 'play_count',
+    # 'compilation',
+    # 'last_played',
+    # 'disk_number',
+    # 'disk_total',
+    # 'rating',
+    # 'added',
+])
+
+Hqlm = collections.namedtuple('Hqlm', field_names=[])
+
+Hqim = collections.namedtuple('Hqlm', field_names=[])
+
+Hsts = collections.namedtuple('Hsts', field_names=[])
+
+Hplm = collections.namedtuple('Hplm', field_names=[])
+
+Hpim = collections.namedtuple('Hpim', field_names=[
+    'item_count',
+])
+
+Hptm = collections.namedtuple('Hptm', field_names=[
+    'key',
+])
+
+Hslm = collections.namedtuple('Hslm', field_names=[])
+
+Hpsm = collections.namedtuple('Hpsm', field_names=[])
+
+Hrlm = collections.namedtuple('Hrlm', field_names=[])
+
+Hrpm = collections.namedtuple('Hrpm', field_names=[])
+
+
+class HohmType(enum.IntEnum):
+    TITLE = 0x02
+    ALBUM_TITLE = 0x03
+    ARTIST = 0x04
+    PLAYLIST_TITLE = 0x64
+
+
+HOHM_ODD_TYPES = (0x42, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x192, 0x1f7, 0x1f4, 0x202, 0x320)
+
+
+class ItlIO(io.BytesIO):
+    def __init__(self, *args, **kwargs):
+        self.flipped = False
+        super().__init__(*args, **kwargs)
+
+    def skip(self, nbytes):
+        self.read(nbytes)
+
+    def read_ascii(self, nbytes):
+        return self.read(nbytes).decode('ascii')
+
+    def read_byte(self):
+        return self.read(1)[0]
+
+    def read_uint(self):
+        if self.flipped:
+            return struct.unpack('<I', self.read(4))[0]
+        else:
+            return struct.unpack('>I', self.read(4))[0]
+
+
+class RecordParser:
+    def __init__(self, data):
+        self.data = ItlIO(data)
+
+    def parse(self):
+        while True:
+            record_type = self.data.read_ascii(4)
+            if not record_type:
+                return
+
+            if self.data.flipped:
+                record_type = record_type[-1::-1]
+
+            method = f'parse_{record_type}'
+
+            if not hasattr(self, method):
+                method = f'parse_{record_type[-1::-1]}'
+                if not hasattr(self, method):
+                    print(self.data.getvalue()[self.data.tell():])
+                    raise ValueError(f"unknown record type: {record_type}")
+                self.data.flipped = True
+
+            length = self.data.read_uint()
+            data = ItlIO(self.data.read(length - 8))
+            if self.data.flipped:
+                data.flipped = True
+            yield getattr(self, method)(data)
+
+    def parse_hdfm(self, data):
+        file_length = data.read_uint()
+        data.skip(4)
+        version_length = data.read_byte()
+        version = data.read_ascii(version_length)
+        return Hdfm(file_length=file_length,
+                    version=version)
+
+    def parse_hdsm(self, data):
+        record_length = data.read_uint()
+        block_type = data.read_uint()
+
+        if block_type in (4, 22):
+            self.data.skip(record_length - len(data.getvalue()) - 8)
+
+        return Hdsm(block_type=block_type, block_length=record_length)
+
+    def parse_hghm(self, data):
+        return Hghm()
+
+    def parse_hohm(self, data):
+        record_length = data.read_uint()
+        hohm_type = data.read_uint()
+        hohm_data = self.data.read(record_length - len(data.getvalue()) - 8)
+
+        # print(hex(hohm_type), repr(hohm_data))
+
+        if hohm_type not in HOHM_ODD_TYPES:
+            hohm_data = hohm_data[16:]
+            # What even is character encoding?
+            # There might be something telling us what the encoding is but this
+            # is sufficient for current purposes.
+            if len(hohm_data) > 1 and len(hohm_data) % 2 == 0 and hohm_data[0] == 0:
+                hohm_data = hohm_data.decode('utf-16be')
+            elif len(hohm_data) > 1 and len(hohm_data) % 2 == 0 and hohm_data[-1] == 0:
+                hohm_data = hohm_data.decode('utf-16le')
+            else:
+                hohm_data = hohm_data.decode('iso-8859-1')
+
+        return Hohm(record_length=record_length, type=hohm_type, data=hohm_data)
+
+    def parse_halm(self, data):
+        return Halm()
+
+    def parse_haim(self, data):
+        return Haim()
+
+    def parse_hilm(self, data):
+        return Hilm()
+
+    def parse_hiim(self, data):
+        return Hiim()
+
+    def parse_htlm(self, data):
+        return Htlm()
+
+    def parse_htim(self, data):
+        record_length = data.read_uint()
+        sub_blocks = data.read_uint()
+        song_id = data.read_uint()
+        block_type = data.read_uint()
+
+        # data = self.data.read(record_length - len(data.getvalue()) - 8)
+        # print(repr(data))
+
+        return Htim(record_length, sub_blocks, song_id, block_type)
+
+    def parse_hqlm(self, data):
+        return Hqlm()
+
+    def parse_hqim(self, data):
+        return Hqim()
+
+    def parse_hsts(self, data):
+        return Hsts()
+
+    def parse_hplm(self, data):
+        return Hplm()
+
+    def parse_hpim(self, data):
+        data.skip(4 + 4)
+        item_count = data.read_uint()
+        return Hpim(item_count)
+
+    def parse_hptm(self, data):
+        data.skip(16)
+        key = data.read_uint()
+        return Hptm(key)
+
+    def parse_hslm(self, data):
+        return Hslm()
+
+    def parse_hpsm(self, data):
+        return Hpsm()
+
+    def parse_hrlm(self, data):
+        return Hrlm()
+
+    def parse_hrpm(self, data):
+        return Hrpm()
+
+
+parser = argparse.ArgumentParser()
+parser.add_argument('filename', nargs='?', default='iTunes Library.itl',
+                    help='iTunes Library Filename')
+args = parser.parse_args()
+
+# So it appears that the .itl format, in modern versions of iTunes, has a header
+# block containing some information, one part of which tells us how much of the
+# following data is AES/ECB encrypted with a key that's made it around the
+# Internet a bit. To get at the actual data you need to decrypt that bit in place
+# then decompress (zlib) the bit after the initial header. After that it's a similar
+# format to older iTunes library files.
+
+itl = open(args.filename, 'rb').read()
+header = itl[:HEADER_LENGTH]
+
+crypt_length = (len(itl) - HEADER_LENGTH) & ~0xf
+max_crypt_length = struct.unpack('>I', header[0x5C:0x60])[0]
+crypt_length = min(crypt_length, max_crypt_length)
+
+cipher = AES.new(CRYPTO_KEY, AES.MODE_ECB)
+decrypted = cipher.decrypt(itl[HEADER_LENGTH:max_crypt_length + HEADER_LENGTH])
+
+itl = decrypted + itl[max_crypt_length + HEADER_LENGTH:]
+itl = header + zlib.decompress(itl)
+
+track = {}
+tracks = {}
+playlist = {}
+playlists = {}
+
+for record in RecordParser(itl).parse():
+    if type(record) is Htim:
+        if track:
+            tracks[track['song_id']] = track
+        track = {'song_id': record.song_id}
+    elif type(record) is Hohm:
+        if record.type == HohmType.TITLE:
+            track['title'] = record.data
+        elif record.type == HohmType.ALBUM_TITLE:
+            track['album'] = record.data
+        elif record.type == HohmType.ARTIST:
+            track['artist'] = record.data
+        elif record.type == HohmType.PLAYLIST_TITLE:
+            playlist['title'] = record.data
+    elif type(record) is Hpim:
+        if playlist:
+            playlists[playlist['title']] = playlist
+        playlist = {'items': []}
+    elif type(record) is Hptm:
+        playlist['items'].append(record.key)
+
+if track:
+    tracks[track['song_id']] = track
+
+if playlist:
+    playlists[playlist['title']] = playlist
+
+output = csv.writer(open('playlists.csv', 'w'))
+
+for title, playlist in playlists.items():
+    # The playlists I was after had titles of the form 'YYYY-M' or 'YYYY-MM'...
+    if len(title) < 5 or title[0] != '2' or title[4] != '-':
+        continue
+    year, month = title.split('-')
+    # ... and I wanted to make them consistently 'YYYY-MM'.
+    title = f'{year}-{int(month):02d}'
+    for item in (tracks[x] for x in playlist['items']):
+        print(repr(item))
+        output.writerow([title, item['title'], item['artist'], item.get('album', '')])
No results found