Last active
October 3, 2025 07:53
-
-
Save mystix/8c8733650f67c66eb55600af91f59125 to your computer and use it in GitHub Desktop.
Revisions
-
mystix revised this gist
Oct 3, 2025 . 1 changed file with 3 additions and 3 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -13,10 +13,10 @@ def add_toc(pdfin, toc_start, toc_end, pdfout): toc_section_regex = r"(\d+(?:\.\d+)*) \n(.+) \.{2,} (\d+)" pg_section_regex = r"(\d+(?:\.\d+)*) \n?(.+) \n" with pymupdf.open(pdfin) as pdf: toc = [] for page in pdf[toc_start:toc_end]: -
mystix revised this gist
Oct 3, 2025 . 1 changed file with 0 additions and 4 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -12,8 +12,6 @@ import re def add_toc(pdfin, toc_start, toc_end, pdfout): with pymupdf.open(pdfin) as pdf: toc_section_regex = r"(\d+(?:\.\d+)*) \n(.+) \.{2,} (\d+)" @@ -55,8 +53,6 @@ def add_toc(pdfin, toc_start, toc_end, pdfout): pdf.set_toc(toc) pdf.save(pdfout) def main(): parser = argparse.ArgumentParser( -
mystix revised this gist
Oct 3, 2025 . 1 changed file with 2 additions and 1 deletion.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -47,7 +47,7 @@ def add_toc(pdfin, toc_start, toc_end, pdfout): page.insert_link(destination) # add bookmark entry toc.append([len(section_number.split(".")), f"{section_number} - {section_header}", pg_number, destination]) # add bookmark entry for TOC toc.insert(0, [1, "Table of Contents", toc_start + 1]) @@ -75,3 +75,4 @@ def main(): if __name__ == "__main__": main() -
mystix revised this gist
Oct 3, 2025 . 1 changed file with 22 additions and 1 deletion.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -7,10 +7,13 @@ # ] # /// import argparse import pymupdf import re # ============== [ METHODS ] ================ def add_toc(pdfin, toc_start, toc_end, pdfout): with pymupdf.open(pdfin) as pdf: toc_section_regex = r"(\d+(?:\.\d+)*) \n(.+) \.{2,} (\d+)" @@ -52,5 +55,23 @@ def add_toc(pdfin, toc_start, toc_end, pdfout): pdf.set_toc(toc) pdf.save(pdfout) # =========================================== def main(): parser = argparse.ArgumentParser( description="Add section links and PDF bookmarks to the OSCP PEN-200 PDF table of contents.", formatter_class=argparse.ArgumentDefaultsHelpFormatter ) parser.add_argument("pdf_in", help="Input PDF") parser.add_argument("toc_start", help="First page of the Table of Contents", type=int) parser.add_argument("toc_end", help="Last page of the Table of Contents", type=int) parser.add_argument("pdf_out", help="Output PDF") args = parser.parse_args() add_toc(args.pdf_in, args.toc_start, args.toc_end, args.pdf_out) if __name__ == "__main__": main() -
mystix revised this gist
Oct 3, 2025 . 1 changed file with 2 additions and 2 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -11,8 +11,8 @@ import re def add_toc(pdfin, toc_start, toc_end, pdfout): with pymupdf.open(pdfin) as pdf: toc_section_regex = r"(\d+(?:\.\d+)*) \n(.+) \.{2,} (\d+)" pg_section_regex = r"(\d+(?:\.\d+)*) \n?(.+) \n" -
mystix revised this gist
Oct 3, 2025 . 1 changed file with 1 addition and 1 deletion.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -31,7 +31,7 @@ def add_toc(pdf, toc_start, toc_end, pdfout): pg_match = re.match(pg_section_regex, pg_block[4]) # check if TOC section header contains page section header # (NOTE: page section header might be truncated i.e. spread over 2 lines) if pg_match and pg_match.group(2) in section_header: destination = { "kind": pymupdf.LINK_GOTO, -
mystix revised this gist
Oct 3, 2025 . 1 changed file with 32 additions and 33 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -11,47 +11,46 @@ import re def add_toc(pdf, toc_start, toc_end, pdfout): with pymupdf.open(infile) as pdf: toc_section_regex = r"(\d+(?:\.\d+)*) \n(.+) \.{2,} (\d+)" pg_section_regex = r"(\d+(?:\.\d+)*) \n?(.+) \n" toc = [] for page in pdf[toc_start:toc_end]: for section in page.get_text("blocks"): toc_match = re.match(toc_section_regex, section[4]) if toc_match: section_number = toc_match.group(1) section_header = toc_match.group(2).strip() pg_number = int(toc_match.group(3)) for pg_block in pdf[pg_number - 1].get_text("blocks"): pg_match = re.match(pg_section_regex, pg_block[4]) # check if TOC section header contains page section header # (NOTE: page section header might be truncated i.e. spread out over 2 lines) if pg_match and pg_match.group(2) in section_header: destination = { "kind": pymupdf.LINK_GOTO, "page": pg_number - 1, "from": pymupdf.Rect(*section[0:4]), "to" : pymupdf.Point(*pg_block[0:2]), # top-left corner of header bbox/rect } # add TOC link page.insert_link(destination) # add bookmark entry toc.append([len(section_number.split(".")), section_header, pg_number, destination]) # add bookmark entry for TOC toc.insert(0, [1, "Table of Contents", toc_start + 1]) pdf.set_toc(toc) pdf.save(pdfout) add_toc("pen-200.pdf", 2, 16, "pen-200-with-toc.pdf") -
mystix revised this gist
Oct 3, 2025 . 1 changed file with 5 additions and 5 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -19,12 +19,12 @@ def add_toc(doc, toc_start, toc_end): for page in doc[toc_start:toc_end]: for section in page.get_text("blocks"): toc_match = re.match(toc_section_regex, section[4]) if toc_match: section_number = toc_match.group(1) section_header = toc_match.group(2).strip() pg_number = int(toc_match.group(3)) for pg_block in doc[pg_number - 1].get_text("blocks"): pg_match = re.match(pg_section_regex, pg_block[4]) -
mystix revised this gist
Oct 2, 2025 . No changes.There are no files selected for viewing
-
mystix revised this gist
Sep 23, 2025 . 1 changed file with 1 addition and 1 deletion.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -13,7 +13,7 @@ def add_toc(doc, toc_start, toc_end): toc_section_regex = r"(\d+(?:\.\d+)*) \n(.+) \.{2,} (\d+)" pg_section_regex = r"(\d+(?:\.\d+)*) \n?(.+) \n" toc = [] -
mystix revised this gist
Sep 23, 2025 . 1 changed file with 2 additions and 0 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -29,6 +29,8 @@ def add_toc(doc, toc_start, toc_end): for pg_block in doc[pg_number - 1].get_text("blocks"): pg_match = re.match(pg_section_regex, pg_block[4]) # check if TOC section header contains page section header # (NOTE: page section header might be truncated i.e. spread out over 2 lines) if pg_match and pg_match.group(2) in section_header: destination = { "kind": pymupdf.LINK_GOTO, -
mystix revised this gist
Sep 23, 2025 . 1 changed file with 0 additions and 1 deletion.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -46,7 +46,6 @@ def add_toc(doc, toc_start, toc_end): # add bookmark entry for TOC toc.insert(0, [1, "Table of Contents", toc_start + 1]) doc.set_toc(toc) -
mystix revised this gist
Sep 23, 2025 . 1 changed file with 1 addition and 1 deletion.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -43,7 +43,7 @@ def add_toc(doc, toc_start, toc_end): # add bookmark entry toc.append([len(section_number.split(".")), section_header, pg_number, destination]) # add bookmark entry for TOC toc.insert(0, [1, "Table of Contents", toc_start + 1]) # set TOC bookmarks -
mystix revised this gist
Sep 23, 2025 . 1 changed file with 4 additions and 1 deletion.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -43,7 +43,10 @@ def add_toc(doc, toc_start, toc_end): # add bookmark entry toc.append([len(section_number.split(".")), section_header, pg_number, destination]) # add TOC entry at the beginning toc.insert(0, [1, "Table of Contents", toc_start + 1]) # set TOC bookmarks doc.set_toc(toc) -
mystix revised this gist
Sep 23, 2025 . 1 changed file with 7 additions and 6 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -12,24 +12,24 @@ def add_toc(doc, toc_start, toc_end): toc_section_regex = r"(\d+(?:\.\d+)*) \n(.+) \.{2,} (\d+)" pg_section_regex = r"(\d+(?:\.\d+)*) \n?(.+) \n" toc = [] for page in doc[toc_start:toc_end]: for section in page.get_text("blocks"): match = re.match(toc_section_regex, section[4]) if match: section_number = match.group(1) section_header = match.group(2).strip() pg_number = int(match.group(3)) for pg_block in doc[pg_number - 1].get_text("blocks"): pg_match = re.match(pg_section_regex, pg_block[4]) if pg_match and pg_match.group(2) in section_header: destination = { "kind": pymupdf.LINK_GOTO, "page": pg_number - 1, @@ -50,3 +50,4 @@ def add_toc(doc, toc_start, toc_end): with pymupdf.open("pen-200.pdf") as doc: add_toc(doc, 2, 16) doc.save("pen-200-with-toc.pdf") -
mystix revised this gist
Sep 23, 2025 . No changes.There are no files selected for viewing
-
mystix revised this gist
Sep 23, 2025 . No changes.There are no files selected for viewing
-
mystix revised this gist
Sep 23, 2025 . 1 changed file with 1 addition and 1 deletion.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -47,6 +47,6 @@ def add_toc(doc, toc_start, toc_end): doc.set_toc(toc) with pymupdf.open("pen-200.pdf") as doc: add_toc(doc, 2, 16) doc.save("pen-200-with-toc.pdf") -
mystix revised this gist
Sep 23, 2025 . 1 changed file with 5 additions and 3 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -24,10 +24,12 @@ def add_toc(doc, toc_start, toc_end): section_header = match.group(2).strip() pg_number = int(match.group(3)) pg_section_regex = fr"{section_number} \n?{section_header} \n" for pg_block in doc[pg_number - 1].get_text("blocks"): header = pg_block[4] if re.match(pg_section_regex, header): destination = { "kind": pymupdf.LINK_GOTO, "page": pg_number - 1, @@ -45,6 +47,6 @@ def add_toc(doc, toc_start, toc_end): doc.set_toc(toc) with pymupdf.open("pen-200-2025.pdf") as doc: add_toc(doc, 2, 16) doc.save("pen-200-with-toc.pdf") -
mystix created this gist
Sep 23, 2025 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,50 @@ #!/usr/bin/env -S uv run --script # # /// script # requires-python = ">=3.13" # dependencies = [ # "pymupdf", # ] # /// import pymupdf import re def add_toc(doc, toc_start, toc_end): section_regex = r"(\d+(?:\.\d+)*) \n(.+) \.{2,} (\d+)" toc = [] for page in doc[toc_start:toc_end]: for section in page.get_text("blocks"): match = re.match(section_regex, section[4]) if match: section_number = match.group(1) section_header = match.group(2).strip() pg_number = int(match.group(3)) for pg_block in doc[pg_number - 1].get_text("blocks"): header = pg_block[4].strip() if header and header in f"{section_number} {section_header}": destination = { "kind": pymupdf.LINK_GOTO, "page": pg_number - 1, "from": pymupdf.Rect(*section[0:4]), "to" : pymupdf.Point(*pg_block[0:2]), # top-left corner of header bbox/rect } # add TOC link page.insert_link(destination) # add bookmark entry toc.append([len(section_number.split(".")), section_header, pg_number, destination]) toc.insert(0, [1, "Table of Contents", toc_start + 1]) # add TOC entry at the beginning doc.set_toc(toc) with pymupdf.open("pen-200.pdf") as doc: add_toc(doc, 2, 16) doc.save("pen-200-with-toc.pdf")