|
|
@@ -1,24 +1,21 @@ |
|
|
#!/usr/bin/env python3 |
|
|
|
|
|
import sys |
|
|
import re |
|
|
import shutil |
|
|
import argparse |
|
|
import binascii |
|
|
from os import path |
|
|
from sys import stderr |
|
|
|
|
|
# |
|
|
# Author: Daxda |
|
|
# Date: 02.04.2014 |
|
|
# WTF: This is a quick tool I've hacked together to easily remove the meta |
|
|
# information as well as the annoying link on each page of eBooks down- |
|
|
# loaded from it-ebooks.info. The modified file will hold the original |
|
|
# file name and the original file will be renamed to 'original.pdf.OLD' |
|
|
# |
|
|
# |
|
|
|
|
|
# 'pattern' is the regex pattern which is used to remove the annotation elements, |
|
|
# the rough structure of it looks like this: |
|
|
# information as well as the annoying link on each page of eBooks |
|
|
# downloaded from it-ebooks.info. The modified file will hold the |
|
|
# original file name, and the original file will be renamed to |
|
|
# 'original.pdf.old'. 'pattern' is the regex pattern which is used to |
|
|
# remove the annotation elements, the rough structure of it looks |
|
|
# like this: |
|
|
# |
|
|
# obj |
|
|
# << |
|
|
@@ -35,19 +32,22 @@ |
|
|
# endobj |
|
|
# |
|
|
|
|
|
pattern = b"""0a2f54797065202f416e6e6f740a2f53756274797065202f4c696e6b0a2f526563 |
|
|
74205b20.*?205d0a2f426f7264657220.*?\n0a2f41203c3c0a2f54797065202f416374696f6e0 |
|
|
a2f53202f5552490a2f5552492028687474703a2f2f7777772e69742d65626f6f6b732e696e666f |
|
|
2f290a3e3e""".replace(b"\n", b"").strip() |
|
|
pattern = b'''0a2f54797065202f416e6e6f740a2f53756274797065202f4c696e6b0a2f52656 |
|
|
374205b20.*?205d0a2f426f7264657220.*?\n0a2f41203c3c0a2f54797065202f416374696f6e |
|
|
0a2f53202f5552490a2f5552492028687474703a2f2f7777772e69742d65626f6f6b732e696e666 |
|
|
f2f290a3e3e'''.replace(b'\n', b'').strip() |
|
|
|
|
|
def remove_evil_links(pdf_data): |
|
|
""" Removes all it-ebook's links and metadata from the passed PDF data. """ |
|
|
'Removes all it-ebook links and metadata from the passed PDF data.' |
|
|
pdf_data = binascii.hexlify(pdf_data) |
|
|
# Remove each annotation element inside the PDF file (This removes the |
|
|
# "clickable" it-ebooks.info links) |
|
|
new_data = re.sub(pattern, b"", pdf_data) |
|
|
# Remove the actual links (link elements which are assigned to the annotations) |
|
|
new_data = new_data.replace(binascii.hexlify(b"www.it-ebooks.info"), b"") |
|
|
|
|
|
# Remove each annotation element inside the PDF file |
|
|
# (This removes the "clickable" it-ebooks.info links) |
|
|
new_data = re.sub(pattern, b'', pdf_data) |
|
|
|
|
|
# Remove the actual links |
|
|
# (link elements which are assigned to the annotations) |
|
|
new_data = new_data.replace(binascii.hexlify(b'www.it-ebooks.info'), b'') |
|
|
return binascii.unhexlify(new_data) |
|
|
|
|
|
def main(args): |
|
|
@@ -57,41 +57,48 @@ def main(args): |
|
|
if not file_path: |
|
|
continue |
|
|
if args.verbose: |
|
|
print("Processing: {0}".format(file_path)) |
|
|
print('Processing: {0}'.format(file_path)) |
|
|
try: |
|
|
with open(file_path, "rb") as input_file: |
|
|
with open(file_path, 'rb') as input_file: |
|
|
pdf_data = input_file.read() |
|
|
except IOError as e: |
|
|
stderr.write("{0}: {1}\n".format(file_path, e.strerror)) |
|
|
stderr.flush() |
|
|
sys.stderr.write('{0}: {1}\n'.format(file_path, e.strerror)) |
|
|
sys.stderr.flush() |
|
|
continue |
|
|
|
|
|
# Backup the file with a different name |
|
|
if not args.no_backup: |
|
|
if args.verbose: |
|
|
print("Creating backup: {0}.OLD".format(file_path)) |
|
|
shutil.move(file_path, "{0}.OLD".format(file_path)) |
|
|
print('Creating backup: {0}.old'.format(file_path)) |
|
|
shutil.move(file_path, '{0}.old'.format(file_path)) |
|
|
|
|
|
# Modify the PDF file |
|
|
new_pdf_data = remove_evil_links(pdf_data) |
|
|
# Save the new file |
|
|
with open(file_path, "wb") as out_file: |
|
|
with open(file_path, 'wb') as out_file: |
|
|
out_file.write(new_pdf_data) |
|
|
if args.verbose: |
|
|
print("Saving modified file: {0}".format(file_path)) |
|
|
print('Saving modified file: {0}'.format(file_path)) |
|
|
except KeyboardInterrupt: |
|
|
pass |
|
|
|
|
|
if __name__ == "__main__": |
|
|
if __name__ == '__main__': |
|
|
parser = argparse.ArgumentParser() |
|
|
parser.add_argument("-f", "--files", |
|
|
help="One or more PDF files to remove it-ebook's watermarks.", |
|
|
nargs="*", required=True) |
|
|
parser.add_argument("--no-backup", |
|
|
help="Disables the creating of backups for the files which"+\ |
|
|
" are being processed. ", |
|
|
action="store_true") |
|
|
parser.add_argument("-v", "--verbose", action="store_true") |
|
|
parser.add_argument( |
|
|
'-f', '--files', |
|
|
help='One or more PDF files to remove it-ebook watermarks.', |
|
|
nargs='*', required=True |
|
|
) |
|
|
parser.add_argument( |
|
|
'-n', '--no-backup', |
|
|
help='Disables the creating of backups for the files ' + |
|
|
'which are being processed.', |
|
|
action='store_true' |
|
|
) |
|
|
parser.add_argument( |
|
|
'-v', '--verbose', |
|
|
action='store_true' |
|
|
) |
|
|
|
|
|
args = parser.parse_args() |
|
|
main(args) |