Skip to content

Instantly share code, notes, and snippets.

@mystix
Last active October 3, 2025 07:53
Show Gist options
  • Select an option

  • Save mystix/8c8733650f67c66eb55600af91f59125 to your computer and use it in GitHub Desktop.

Select an option

Save mystix/8c8733650f67c66eb55600af91f59125 to your computer and use it in GitHub Desktop.

Revisions

  1. mystix revised this gist Oct 3, 2025. 1 changed file with 3 additions and 3 deletions.
    6 changes: 3 additions & 3 deletions pen-200-toc.py
    Original file line number Diff line number Diff line change
    @@ -13,10 +13,10 @@


    def add_toc(pdfin, toc_start, toc_end, pdfout):
    with pymupdf.open(pdfin) as pdf:
    toc_section_regex = r"(\d+(?:\.\d+)*) \n(.+) \.{2,} (\d+)"
    pg_section_regex = r"(\d+(?:\.\d+)*) \n?(.+) \n"
    toc_section_regex = r"(\d+(?:\.\d+)*) \n(.+) \.{2,} (\d+)"
    pg_section_regex = r"(\d+(?:\.\d+)*) \n?(.+) \n"

    with pymupdf.open(pdfin) as pdf:
    toc = []

    for page in pdf[toc_start:toc_end]:
  2. mystix revised this gist Oct 3, 2025. 1 changed file with 0 additions and 4 deletions.
    4 changes: 0 additions & 4 deletions pen-200-toc.py
    Original file line number Diff line number Diff line change
    @@ -12,8 +12,6 @@
    import re


    # ============== [ METHODS ] ================

    def add_toc(pdfin, toc_start, toc_end, pdfout):
    with pymupdf.open(pdfin) as pdf:
    toc_section_regex = r"(\d+(?:\.\d+)*) \n(.+) \.{2,} (\d+)"
    @@ -55,8 +53,6 @@ def add_toc(pdfin, toc_start, toc_end, pdfout):
    pdf.set_toc(toc)
    pdf.save(pdfout)

    # ===========================================


    def main():
    parser = argparse.ArgumentParser(
  3. mystix revised this gist Oct 3, 2025. 1 changed file with 2 additions and 1 deletion.
    3 changes: 2 additions & 1 deletion pen-200-toc.py
    Original file line number Diff line number Diff line change
    @@ -47,7 +47,7 @@ def add_toc(pdfin, toc_start, toc_end, pdfout):
    page.insert_link(destination)

    # add bookmark entry
    toc.append([len(section_number.split(".")), section_header, pg_number, destination])
    toc.append([len(section_number.split(".")), f"{section_number} - {section_header}", pg_number, destination])

    # add bookmark entry for TOC
    toc.insert(0, [1, "Table of Contents", toc_start + 1])
    @@ -75,3 +75,4 @@ def main():

    if __name__ == "__main__":
    main()

  4. mystix revised this gist Oct 3, 2025. 1 changed file with 22 additions and 1 deletion.
    23 changes: 22 additions & 1 deletion pen-200-toc.py
    Original file line number Diff line number Diff line change
    @@ -7,10 +7,13 @@
    # ]
    # ///

    import argparse
    import pymupdf
    import re


    # ============== [ METHODS ] ================

    def add_toc(pdfin, toc_start, toc_end, pdfout):
    with pymupdf.open(pdfin) as pdf:
    toc_section_regex = r"(\d+(?:\.\d+)*) \n(.+) \.{2,} (\d+)"
    @@ -52,5 +55,23 @@ def add_toc(pdfin, toc_start, toc_end, pdfout):
    pdf.set_toc(toc)
    pdf.save(pdfout)

    # ===========================================


    def main():
    parser = argparse.ArgumentParser(
    description="Add section links and PDF bookmarks to the OSCP PEN-200 PDF table of contents.",
    formatter_class=argparse.ArgumentDefaultsHelpFormatter
    )
    parser.add_argument("pdf_in", help="Input PDF")
    parser.add_argument("toc_start", help="First page of the Table of Contents", type=int)
    parser.add_argument("toc_end", help="Last page of the Table of Contents", type=int)
    parser.add_argument("pdf_out", help="Output PDF")

    args = parser.parse_args()

    add_toc(args.pdf_in, args.toc_start, args.toc_end, args.pdf_out)


    add_toc("pen-200.pdf", 2, 16, "pen-200-with-toc.pdf")
    if __name__ == "__main__":
    main()
  5. mystix revised this gist Oct 3, 2025. 1 changed file with 2 additions and 2 deletions.
    4 changes: 2 additions & 2 deletions pen-200-toc.py
    Original file line number Diff line number Diff line change
    @@ -11,8 +11,8 @@
    import re


    def add_toc(pdf, toc_start, toc_end, pdfout):
    with pymupdf.open(infile) as pdf:
    def add_toc(pdfin, toc_start, toc_end, pdfout):
    with pymupdf.open(pdfin) as pdf:
    toc_section_regex = r"(\d+(?:\.\d+)*) \n(.+) \.{2,} (\d+)"
    pg_section_regex = r"(\d+(?:\.\d+)*) \n?(.+) \n"

  6. mystix revised this gist Oct 3, 2025. 1 changed file with 1 addition and 1 deletion.
    2 changes: 1 addition & 1 deletion pen-200-toc.py
    Original file line number Diff line number Diff line change
    @@ -31,7 +31,7 @@ def add_toc(pdf, toc_start, toc_end, pdfout):
    pg_match = re.match(pg_section_regex, pg_block[4])

    # check if TOC section header contains page section header
    # (NOTE: page section header might be truncated i.e. spread out over 2 lines)
    # (NOTE: page section header might be truncated i.e. spread over 2 lines)
    if pg_match and pg_match.group(2) in section_header:
    destination = {
    "kind": pymupdf.LINK_GOTO,
  7. mystix revised this gist Oct 3, 2025. 1 changed file with 32 additions and 33 deletions.
    65 changes: 32 additions & 33 deletions pen-200-toc.py
    Original file line number Diff line number Diff line change
    @@ -11,47 +11,46 @@
    import re


    def add_toc(doc, toc_start, toc_end):
    toc_section_regex = r"(\d+(?:\.\d+)*) \n(.+) \.{2,} (\d+)"
    pg_section_regex = r"(\d+(?:\.\d+)*) \n?(.+) \n"
    def add_toc(pdf, toc_start, toc_end, pdfout):
    with pymupdf.open(infile) as pdf:
    toc_section_regex = r"(\d+(?:\.\d+)*) \n(.+) \.{2,} (\d+)"
    pg_section_regex = r"(\d+(?:\.\d+)*) \n?(.+) \n"

    toc = []
    toc = []

    for page in doc[toc_start:toc_end]:
    for section in page.get_text("blocks"):
    toc_match = re.match(toc_section_regex, section[4])
    for page in pdf[toc_start:toc_end]:
    for section in page.get_text("blocks"):
    toc_match = re.match(toc_section_regex, section[4])

    if toc_match:
    section_number = toc_match.group(1)
    section_header = toc_match.group(2).strip()
    pg_number = int(toc_match.group(3))
    if toc_match:
    section_number = toc_match.group(1)
    section_header = toc_match.group(2).strip()
    pg_number = int(toc_match.group(3))

    for pg_block in doc[pg_number - 1].get_text("blocks"):
    pg_match = re.match(pg_section_regex, pg_block[4])
    for pg_block in pdf[pg_number - 1].get_text("blocks"):
    pg_match = re.match(pg_section_regex, pg_block[4])

    # check if TOC section header contains page section header
    # (NOTE: page section header might be truncated i.e. spread out over 2 lines)
    if pg_match and pg_match.group(2) in section_header:
    destination = {
    "kind": pymupdf.LINK_GOTO,
    "page": pg_number - 1,
    "from": pymupdf.Rect(*section[0:4]),
    "to" : pymupdf.Point(*pg_block[0:2]), # top-left corner of header bbox/rect
    }
    # check if TOC section header contains page section header
    # (NOTE: page section header might be truncated i.e. spread out over 2 lines)
    if pg_match and pg_match.group(2) in section_header:
    destination = {
    "kind": pymupdf.LINK_GOTO,
    "page": pg_number - 1,
    "from": pymupdf.Rect(*section[0:4]),
    "to" : pymupdf.Point(*pg_block[0:2]), # top-left corner of header bbox/rect
    }

    # add TOC link
    page.insert_link(destination)
    # add TOC link
    page.insert_link(destination)

    # add bookmark entry
    toc.append([len(section_number.split(".")), section_header, pg_number, destination])
    # add bookmark entry
    toc.append([len(section_number.split(".")), section_header, pg_number, destination])

    # add bookmark entry for TOC
    toc.insert(0, [1, "Table of Contents", toc_start + 1])
    # add bookmark entry for TOC
    toc.insert(0, [1, "Table of Contents", toc_start + 1])

    doc.set_toc(toc)
    pdf.set_toc(toc)
    pdf.save(pdfout)


    with pymupdf.open("pen-200.pdf") as doc:
    add_toc(doc, 2, 16)
    doc.save("pen-200-with-toc.pdf")

    add_toc("pen-200.pdf", 2, 16, "pen-200-with-toc.pdf")
  8. mystix revised this gist Oct 3, 2025. 1 changed file with 5 additions and 5 deletions.
    10 changes: 5 additions & 5 deletions pen-200-toc.py
    Original file line number Diff line number Diff line change
    @@ -19,12 +19,12 @@ def add_toc(doc, toc_start, toc_end):

    for page in doc[toc_start:toc_end]:
    for section in page.get_text("blocks"):
    match = re.match(toc_section_regex, section[4])
    toc_match = re.match(toc_section_regex, section[4])

    if match:
    section_number = match.group(1)
    section_header = match.group(2).strip()
    pg_number = int(match.group(3))
    if toc_match:
    section_number = toc_match.group(1)
    section_header = toc_match.group(2).strip()
    pg_number = int(toc_match.group(3))

    for pg_block in doc[pg_number - 1].get_text("blocks"):
    pg_match = re.match(pg_section_regex, pg_block[4])
  9. mystix revised this gist Oct 2, 2025. No changes.
  10. mystix revised this gist Sep 23, 2025. 1 changed file with 1 addition and 1 deletion.
    2 changes: 1 addition & 1 deletion pen-200-toc.py
    Original file line number Diff line number Diff line change
    @@ -13,7 +13,7 @@

    def add_toc(doc, toc_start, toc_end):
    toc_section_regex = r"(\d+(?:\.\d+)*) \n(.+) \.{2,} (\d+)"
    pg_section_regex = r"(\d+(?:\.\d+)*) \n?(.+) \n"
    pg_section_regex = r"(\d+(?:\.\d+)*) \n?(.+) \n"

    toc = []

  11. mystix revised this gist Sep 23, 2025. 1 changed file with 2 additions and 0 deletions.
    2 changes: 2 additions & 0 deletions pen-200-toc.py
    Original file line number Diff line number Diff line change
    @@ -29,6 +29,8 @@ def add_toc(doc, toc_start, toc_end):
    for pg_block in doc[pg_number - 1].get_text("blocks"):
    pg_match = re.match(pg_section_regex, pg_block[4])

    # check if TOC section header contains page section header
    # (NOTE: page section header might be truncated i.e. spread out over 2 lines)
    if pg_match and pg_match.group(2) in section_header:
    destination = {
    "kind": pymupdf.LINK_GOTO,
  12. mystix revised this gist Sep 23, 2025. 1 changed file with 0 additions and 1 deletion.
    1 change: 0 additions & 1 deletion pen-200-toc.py
    Original file line number Diff line number Diff line change
    @@ -46,7 +46,6 @@ def add_toc(doc, toc_start, toc_end):
    # add bookmark entry for TOC
    toc.insert(0, [1, "Table of Contents", toc_start + 1])

    # set TOC bookmarks
    doc.set_toc(toc)


  13. mystix revised this gist Sep 23, 2025. 1 changed file with 1 addition and 1 deletion.
    2 changes: 1 addition & 1 deletion pen-200-toc.py
    Original file line number Diff line number Diff line change
    @@ -43,7 +43,7 @@ def add_toc(doc, toc_start, toc_end):
    # add bookmark entry
    toc.append([len(section_number.split(".")), section_header, pg_number, destination])

    # add TOC entry at the beginning
    # add bookmark entry for TOC
    toc.insert(0, [1, "Table of Contents", toc_start + 1])

    # set TOC bookmarks
  14. mystix revised this gist Sep 23, 2025. 1 changed file with 4 additions and 1 deletion.
    5 changes: 4 additions & 1 deletion pen-200-toc.py
    Original file line number Diff line number Diff line change
    @@ -43,7 +43,10 @@ def add_toc(doc, toc_start, toc_end):
    # add bookmark entry
    toc.append([len(section_number.split(".")), section_header, pg_number, destination])

    toc.insert(0, [1, "Table of Contents", toc_start + 1]) # add TOC entry at the beginning
    # add TOC entry at the beginning
    toc.insert(0, [1, "Table of Contents", toc_start + 1])

    # set TOC bookmarks
    doc.set_toc(toc)


  15. mystix revised this gist Sep 23, 2025. 1 changed file with 7 additions and 6 deletions.
    13 changes: 7 additions & 6 deletions pen-200-toc.py
    Original file line number Diff line number Diff line change
    @@ -12,24 +12,24 @@


    def add_toc(doc, toc_start, toc_end):
    section_regex = r"(\d+(?:\.\d+)*) \n(.+) \.{2,} (\d+)"
    toc_section_regex = r"(\d+(?:\.\d+)*) \n(.+) \.{2,} (\d+)"
    pg_section_regex = r"(\d+(?:\.\d+)*) \n?(.+) \n"

    toc = []

    for page in doc[toc_start:toc_end]:
    for section in page.get_text("blocks"):
    match = re.match(section_regex, section[4])
    match = re.match(toc_section_regex, section[4])

    if match:
    section_number = match.group(1)
    section_header = match.group(2).strip()
    pg_number = int(match.group(3))

    pg_section_regex = fr"{section_number} \n?{section_header} \n"

    for pg_block in doc[pg_number - 1].get_text("blocks"):
    header = pg_block[4]
    pg_match = re.match(pg_section_regex, pg_block[4])

    if re.match(pg_section_regex, header):
    if pg_match and pg_match.group(2) in section_header:
    destination = {
    "kind": pymupdf.LINK_GOTO,
    "page": pg_number - 1,
    @@ -50,3 +50,4 @@ def add_toc(doc, toc_start, toc_end):
    with pymupdf.open("pen-200.pdf") as doc:
    add_toc(doc, 2, 16)
    doc.save("pen-200-with-toc.pdf")

  16. mystix revised this gist Sep 23, 2025. No changes.
  17. mystix revised this gist Sep 23, 2025. No changes.
  18. mystix revised this gist Sep 23, 2025. 1 changed file with 1 addition and 1 deletion.
    2 changes: 1 addition & 1 deletion pen-200-toc.py
    Original file line number Diff line number Diff line change
    @@ -47,6 +47,6 @@ def add_toc(doc, toc_start, toc_end):
    doc.set_toc(toc)


    with pymupdf.open("pen-200-2025.pdf") as doc:
    with pymupdf.open("pen-200.pdf") as doc:
    add_toc(doc, 2, 16)
    doc.save("pen-200-with-toc.pdf")
  19. mystix revised this gist Sep 23, 2025. 1 changed file with 5 additions and 3 deletions.
    8 changes: 5 additions & 3 deletions pen-200-toc.py
    Original file line number Diff line number Diff line change
    @@ -24,10 +24,12 @@ def add_toc(doc, toc_start, toc_end):
    section_header = match.group(2).strip()
    pg_number = int(match.group(3))

    pg_section_regex = fr"{section_number} \n?{section_header} \n"

    for pg_block in doc[pg_number - 1].get_text("blocks"):
    header = pg_block[4].strip()
    header = pg_block[4]

    if header and header in f"{section_number} {section_header}":
    if re.match(pg_section_regex, header):
    destination = {
    "kind": pymupdf.LINK_GOTO,
    "page": pg_number - 1,
    @@ -45,6 +47,6 @@ def add_toc(doc, toc_start, toc_end):
    doc.set_toc(toc)


    with pymupdf.open("pen-200.pdf") as doc:
    with pymupdf.open("pen-200-2025.pdf") as doc:
    add_toc(doc, 2, 16)
    doc.save("pen-200-with-toc.pdf")
  20. mystix created this gist Sep 23, 2025.
    50 changes: 50 additions & 0 deletions pen-200-toc.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,50 @@
    #!/usr/bin/env -S uv run --script
    #
    # /// script
    # requires-python = ">=3.13"
    # dependencies = [
    # "pymupdf",
    # ]
    # ///

    import pymupdf
    import re


    def add_toc(doc, toc_start, toc_end):
    section_regex = r"(\d+(?:\.\d+)*) \n(.+) \.{2,} (\d+)"
    toc = []

    for page in doc[toc_start:toc_end]:
    for section in page.get_text("blocks"):
    match = re.match(section_regex, section[4])

    if match:
    section_number = match.group(1)
    section_header = match.group(2).strip()
    pg_number = int(match.group(3))

    for pg_block in doc[pg_number - 1].get_text("blocks"):
    header = pg_block[4].strip()

    if header and header in f"{section_number} {section_header}":
    destination = {
    "kind": pymupdf.LINK_GOTO,
    "page": pg_number - 1,
    "from": pymupdf.Rect(*section[0:4]),
    "to" : pymupdf.Point(*pg_block[0:2]), # top-left corner of header bbox/rect
    }

    # add TOC link
    page.insert_link(destination)

    # add bookmark entry
    toc.append([len(section_number.split(".")), section_header, pg_number, destination])

    toc.insert(0, [1, "Table of Contents", toc_start + 1]) # add TOC entry at the beginning
    doc.set_toc(toc)


    with pymupdf.open("pen-200.pdf") as doc:
    add_toc(doc, 2, 16)
    doc.save("pen-200-with-toc.pdf")