#!/bin/sh set -e # NOTE: 451.36 GBs of free disk space is required to download # each captured artefact successfully. DB=~/Labs/modern.ie.vms.json/wayback-machine.json set_mtime(){ node -e ' const [, path, mtime] = process.argv; const date = new Date(mtime); if(!+date) throw new Error(`Invalid date: ${mtime}`) require("fs").utimesSync(path, date, date); ' "$@" } file_prop(){ jq -r ".[\"$1\"] | .$2" "$DB" } for key in `jq -r 'keys_unsorted | .[]' "$DB"`; do set -- "`file_prop "$key" ts`" "$key" "${key##*/}" case $3 in *%20*) set -- "$1" "$2" "`printf %s "$3" | sed 's/%20/ /g'`";; '') printf 'Invalid URL: %s\n' "$3" >&2; return 1;; esac if test -s "$3"; then printf >&2 'Already downloaded: %s\n' "$3" continue fi url="https://web.archive.org/web/$1id_/$2" printf >&2 'Downloading: %s\n' "$url" curl -# "$url" > "$3" stat -f %z "$3" | grep -q "`file_prop "$2" size`" set_mtime "$3" "`file_prop "$2" mtime`" chmod -w "$3" sha256sum "$3" >> SHA256 sleep 6 done