Last active
October 3, 2025 07:53
-
-
Save mystix/8c8733650f67c66eb55600af91f59125 to your computer and use it in GitHub Desktop.
OSCP PEN-200: Add Table of Contents page links + bookmarks to PDF
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env -S uv run --script | |
| # | |
| # /// script | |
| # requires-python = ">=3.13" | |
| # dependencies = [ | |
| # "pymupdf", | |
| # ] | |
| # /// | |
| import pymupdf | |
| import re | |
| def add_toc(doc, toc_start, toc_end): | |
| toc_section_regex = r"(\d+(?:\.\d+)*) \n(.+) \.{2,} (\d+)" | |
| pg_section_regex = r"(\d+(?:\.\d+)*) \n?(.+) \n" | |
| toc = [] | |
| for page in doc[toc_start:toc_end]: | |
| for section in page.get_text("blocks"): | |
| match = re.match(toc_section_regex, section[4]) | |
| if match: | |
| section_number = match.group(1) | |
| section_header = match.group(2).strip() | |
| pg_number = int(match.group(3)) | |
| for pg_block in doc[pg_number - 1].get_text("blocks"): | |
| pg_match = re.match(pg_section_regex, pg_block[4]) | |
| if pg_match and pg_match.group(2) in section_header: | |
| destination = { | |
| "kind": pymupdf.LINK_GOTO, | |
| "page": pg_number - 1, | |
| "from": pymupdf.Rect(*section[0:4]), | |
| "to" : pymupdf.Point(*pg_block[0:2]), # top-left corner of header bbox/rect | |
| } | |
| # add TOC link | |
| page.insert_link(destination) | |
| # add bookmark entry | |
| toc.append([len(section_number.split(".")), section_header, pg_number, destination]) | |
| # add bookmark entry for TOC | |
| toc.insert(0, [1, "Table of Contents", toc_start + 1]) | |
| doc.set_toc(toc) | |
| with pymupdf.open("pen-200.pdf") as doc: | |
| add_toc(doc, 2, 16) | |
| doc.save("pen-200-with-toc.pdf") | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment