michalsieron · May 29, 2021 21:35
diff --git a/xfdf.py b/xfdf.py
 """Transform Adobe Acrobat Reader comment xfdf file to txt.

 Usage: python3 xfdf.py <file_path>

 Run in the same directory as xfdf file, because there is no path 
 validation. There should be a file of the same name, but txt 
 extension created next to the original xfdf file.
 """

 import sys
 from xml.dom.minidom import parse


 try:
    NAME = sys.argv[1].split(".")[0]
 except IndexError:
    print("You must provide file name!", file=sys.stderr)
    sys.exit(1)

 dom = parse(NAME + ".xfdf")
 annots = dom.getElementsByTagName("annots")[0].childNodes

 with open(NAME + ".txt", "w") as fp:
    for a in annots:
        page = a.getAttribute("page")
        author = a.getAttribute("title")
        datetime = a.getAttribute("date")
        year = datetime[2:6]
        month = datetime[6:8]
        day = datetime[8:10]
        date = f"{day}.{month}.{year}"
        fp.write(f"page: {page}, author: {author}, date: {date}\n")
        for s in a.getElementsByTagName("span"):
            t = s.firstChild.wholeText.replace("\r\r", "\r").replace("\r", "\n")
            fp.write(t.strip() + "\n\n")
	"""Transform Adobe Acrobat Reader comment xfdf file to txt.

	Usage: python3 xfdf.py <file_path>

	Run in the same directory as xfdf file, because there is no path
	validation. There should be a file of the same name, but txt
	extension created next to the original xfdf file.
	"""

	import sys
	from xml.dom.minidom import parse


	try:
	NAME = sys.argv[1].split(".")[0]
	except IndexError:
	print("You must provide file name!", file=sys.stderr)
	sys.exit(1)

	dom = parse(NAME + ".xfdf")
	annots = dom.getElementsByTagName("annots")[0].childNodes

	with open(NAME + ".txt", "w") as fp:
	for a in annots:
	page = a.getAttribute("page")
	author = a.getAttribute("title")
	datetime = a.getAttribute("date")
	year = datetime[2:6]
	month = datetime[6:8]
	day = datetime[8:10]
	date = f"{day}.{month}.{year}"
	fp.write(f"page: {page}, author: {author}, date: {date}\n")
	for s in a.getElementsByTagName("span"):
	t = s.firstChild.wholeText.replace("\r\r", "\r").replace("\r", "\n")
	fp.write(t.strip() + "\n\n")
No results found