#!/bin/sh # Modified by Stan Schwertly to download locally rather than to send to Posterous. # Github: https://github.com/Stantheman/Twitpic-Backup # Copyright 2010 Tim "burndive" of http://burndive.blogspot.com/ # This software is licensed under the Creative Commons GNU GPL version 2.0 or later. # License informattion: http://creativecommons.org/licenses/GPL/2.0/ # This script is a derivative of the original, obtained from here: # http://tuxbox.blogspot.com/2010/03/twitpic-to-posterous-export-script.html # Version1.2 [add retry] RUN_DATE=`date +%F--%H-%m-%S` TP_NAME=$1 WORKING_DIR=$2 IMG_DOWNLOAD=1 PREFIX=twitpic-$TP_NAME HTML_OUT=$PREFIX-all-$RUN_DATE.html #CURL_OPT='-f --retry 3 --retry-delay 5 --retry-max-time 60' CURL_OPT='--retry 3 --retry-delay 5 --retry-max-time 60' # Checks the user-supplied arguments if [ -z "$TP_NAME" ]; then echo "You must supply a TP_NAME." exit fi if [ ! -d "$WORKING_DIR" ]; then echo "You must supply a WORKING_DIR." exit fi cd $WORKING_DIR # Checks for the directories it needs if [ ! -d "images" ]; then mkdir images; fi if [ ! -d "html" ]; then mkdir html; fi if [ ! -d "logs" ]; then mkdir logs; fi PAGE=0 MAXRETRY=10 RETRY=0 LAST=`curl http://twitpic.com/photos/${TP_NAME} \ | grep "Last<" \ | sed "s/.*\?page=\([0-9]*\).*/\1/"` if [ -z "$LAST" ]; then NEXT=`curl http://twitpic.com/photos/${TP_NAME} \ | grep "Next<" \ | sed "s/.*\?page=\([0-9]*\).*/\1/"` if [ -z "$NEXT" ]; then PAGE=1 else PAGE=$NEXT fi else PAGE=$LAST fi while [ $PAGE -ne 0 ]; do echo PAGE: $PAGE FILENAME="html/$PREFIX-page-$PAGE.html" echo "FILENAME=" $FILENAME echo "0 curl http://twitpic.com/photos/${TP_NAME}?page=$PAGE -O $FILENAME" if [ ! -f "$FILENAME" ]; then echo "0" # wget http://twitpic.com/photos/${TP_NAME}?page=$PAGE -O $FILENAME echo "1 ${TP_NAME}?page=$PAGE -O $FILENAME" curl http://twitpic.com/photos/${TP_NAME}?page=$PAGE -o $FILENAME $CURL_OPT if [ $? -eq 22 -a $RETRY -le $MAXRETRY ]; then RETRY=`expr $RETRY + 1` sleep 1 else RETRY=0 PAGE=`expr $PAGE - 1` fi else RETRY=0 PAGE=`expr $PAGE - 1` fi done ALL_IDS=`cat html/$PREFIX-page-* | grep -Eo "" | grep -Eo "/[a-zA-Z0-9]+" | grep -Eo "[a-zA-Z0-9]+" | grep -v "sopapipa" | sort -r | uniq | xargs` COUNT=0 LOG_FILE=logs/$PREFIX-log-$RUN_DATE.txt echo $ALL_IDS | tee -a $LOG_FILE for ID in $ALL_IDS; do COUNT=`expr $COUNT + 1` echo $ID: $COUNT | tee -a $LOG_FILE echo "Processing $ID..." FULL_HTML="html/$PREFIX-$ID-full.html" # wget http://twitpic.com/$ID -O $FULL_HTML if [ ! -f "$FULL_HTML" ]; then RETRY=$MAXRETRY while [ $RETRY -ne 0 ]; do echo "2 " curl http://twitpic.com/$ID -O $FULL_HTML curl http://twitpic.com/$ID -o $FULL_HTML $CURL_OPT if [ $? -eq 22 ]; then RETRY=`expr $RETRY - 1` sleep 1 else RETRY=0 fi done fi FULL_URL=`grep "