#!/usr/bin/python # ReadingListCatcher # - A script for exporting Safari Reading List items to Markdown and Pinboard # Brett Terpstra 2015 # https://gist.github.com/ttscoff/f27f161f37bbc7f5b418 # # Uses code from # Requires Python pinboard lib for Pinboard.in import: # `easy_install pinboard` or `pip install pinboard` import plistlib from shutil import copy import subprocess import os from tempfile import gettempdir import sys import re import time from datetime import date, datetime, timedelta from os import path import pytz BOOKMARKS_MARKDOWN_FILE = '~/Dropbox/Safari-ReadingList.md' # Markdown file if using md export BOOKMARKS_PLIST = '~/Library/Safari/Bookmarks.plist' # Shouldn't need to modify # call `plutil -convert xml1 ` to pre-process bookmark file in local directory USE_PLUTIL = True # default def copyTempFile(srcFile): # Make a copy of the bookmarks and convert it from a binary plist to text tempDirectory = gettempdir() copy(srcFile, tempDirectory) tmpFile = os.path.join(tempDirectory, os.path.basename(srcFile)) return tmpFile def removeTempFile(tmpFile): os.remove(tmpFile) class _readingList(): def __init__(self, args): print(args) bookmarksFile = os.path.expanduser(args.input_file) markdownFile = os.path.expanduser(args.output_file) bookmarksFileCopy = copyTempFile(bookmarksFile) sys.stdout.write('tmpfile bookmarksFileCopy: ') print(bookmarksFileCopy) self.postedCount = 0 self.content = '' self.newcontent = '' # last = time.strptime((datetime.now() - timedelta(days = 1)).strftime('%c')) last = time.strptime("2013-01-01 00:00:00 UTC", '%Y-%m-%d %H:%M:%S UTC') if USE_PLUTIL or args.use_plutil: converted = subprocess.call(['plutil', '-convert', 'xml1', bookmarksFileCopy]) else: converted = 0 if converted != 0: print('Couldn\'t convert bookmarks plist from xml format') sys.exit(converted) if args.write: if not os.path.exists(markdownFile): open(markdownFile, 'a').close() else: with open (markdownFile, 'r') as mdInput: self.content = mdInput.read() matchLast = re.search(re.compile('(?m)^Updated: (\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2} UTC)'), self.content) if matchLast != None: last = time.strptime(matchLast.group(1), '%Y-%m-%d %H:%M:%S UTC') last = datetime(*last[:6]) rx = re.compile("(?m)^Updated: (\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}) UTC") self.content = re.sub(rx,'',self.content).strip() plist = plistlib.readPlist(bookmarksFileCopy) # python2.7 error # --> /opt/conda/lib/python3.6/site-packages/ipykernel_launcher.py:5: DeprecationWarning: The readPlist function is deprecated, use load() instead # with open(bookmarksFileCopy, 'rb') as fp: # print(dir(plistlib) # plist = plistlib.load(fp) # There should only be one Reading List item, so take the first one readingList = [item for item in plist['Children'] if 'Title' in item and item['Title'] == 'com.apple.ReadingList'][0] if 'Children' in readingList: cleanRx = re.compile("[\|\`\:_\*\n]") for item in readingList['Children']: if item['ReadingList']['DateAdded'] > last: addtime = pytz.utc.localize(item['ReadingList']['DateAdded']).strftime('%c') title = re.sub(cleanRx, ' ', item['URIDictionary']['title'].encode('utf8')) # title = re.sub(cleanRx, ' ', item['URIDictionary']['title']) #python3 title = re.sub(' +', ' ', title) url = item['URLString'] description = '' if 'PreviewText' in item['ReadingList']: description = item['ReadingList']['PreviewText'].encode('utf8') # description = item['ReadingList']['PreviewText'] # python3 description = re.sub(cleanRx, ' ', description) description = re.sub(' +', ' ', description) self.itemToMarkdown(addtime, title, url, description) else: break pluralized = 'bookmarks' if self.postedCount > 1 else 'bookmark' if args.write: mdHandle = open(markdownFile, 'w') mdHandle.write('Updated: ' + datetime.utcnow().strftime('%Y-%m-%d %H:%M:%S') + " UTC\n\n") mdHandle.write(self.newcontent + self.content) mdHandle.close() if self.postedCount > 0: sys.stdout.write('\n' + datetime.utcnow().strftime('%Y-%m-%d %H:%M:%S') + ' UTC\n') sys.stdout.write('Found ' + str(self.postedCount) + ' new ' + pluralized + "\n") sys.stdout.write(('Saved' if args.write else 'WARN --no-write; so not writing') + ' to ' + markdownFile) else: sys.stdout.write('No new bookmarks found in Reading List') sys.stdout.write("\n") removeTempFile(bookmarksFileCopy) def itemToMarkdown(self, addtime, title, url, description): self.newcontent += '- [' + title + '](' + url + ' "Added on ' + addtime + '")' if not description == '': self.newcontent += "\n\n > " + description self.newcontent += "\n\n" self.postedCount += 1 if __name__ == "__main__": from argparse import ArgumentParser parser = ArgumentParser() parser.add_argument("-f", "--out-file", dest="output_file", default=BOOKMARKS_MARKDOWN_FILE, help="output markdown file", metavar="outfile") parser.add_argument("-b", "--bookmarks-file", dest="input_file", default=BOOKMARKS_PLIST, help="input Bookmarks.plist file", metavar="infile") parser.add_argument("--no-write", dest="write", action='store_false', help="write to output file") parser.add_argument("--no-plutil", dest="use_plutil", action='store_false', help="disable plutil system call - useful for running in jupyter or on linux.\nWARN you must parse the plist file yourself") args = parser.parse_args() _readingList(args)