Created
October 27, 2014 05:36
-
-
Save eligor13/4910a259b0a68b2b4026 to your computer and use it in GitHub Desktop.
Revisions
-
eligor13 created this gist
Oct 27, 2014 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,147 @@ #!/bin/sh # Modified by Stan Schwertly to download locally rather than to send to Posterous. # Github: https://github.com/Stantheman/Twitpic-Backup # Copyright 2010 Tim "burndive" of http://burndive.blogspot.com/ # This software is licensed under the Creative Commons GNU GPL version 2.0 or later. # License informattion: http://creativecommons.org/licenses/GPL/2.0/ # This script is a derivative of the original, obtained from here: # http://tuxbox.blogspot.com/2010/03/twitpic-to-posterous-export-script.html # Version1.2 [add retry] RUN_DATE=`date +%F--%H-%m-%S` TP_NAME=$1 WORKING_DIR=$2 IMG_DOWNLOAD=1 PREFIX=twitpic-$TP_NAME HTML_OUT=$PREFIX-all-$RUN_DATE.html #CURL_OPT='-f --retry 3 --retry-delay 5 --retry-max-time 60' CURL_OPT='--retry 3 --retry-delay 5 --retry-max-time 60' # Checks the user-supplied arguments if [ -z "$TP_NAME" ]; then echo "You must supply a TP_NAME." exit fi if [ ! -d "$WORKING_DIR" ]; then echo "You must supply a WORKING_DIR." exit fi cd $WORKING_DIR # Checks for the directories it needs if [ ! -d "images" ]; then mkdir images; fi if [ ! -d "html" ]; then mkdir html; fi if [ ! -d "logs" ]; then mkdir logs; fi PAGE=0 MAXRETRY=10 RETRY=0 LAST=`curl http://twitpic.com/photos/${TP_NAME} \ | grep "<a href=.*>Last<" \ | sed "s/.*\?page=\([0-9]*\).*/\1/"` if [ -z "$LAST" ]; then NEXT=`curl http://twitpic.com/photos/${TP_NAME} \ | grep "<a href=.*>Next<" \ | sed "s/.*\?page=\([0-9]*\).*/\1/"` if [ -z "$NEXT" ]; then PAGE=1 else PAGE=$NEXT fi else PAGE=$LAST fi while [ $PAGE -ne 0 ]; do echo PAGE: $PAGE FILENAME="html/$PREFIX-page-$PAGE.html" echo "FILENAME=" $FILENAME echo "0 curl http://twitpic.com/photos/${TP_NAME}?page=$PAGE -O $FILENAME" if [ ! -f "$FILENAME" ]; then echo "0" # wget http://twitpic.com/photos/${TP_NAME}?page=$PAGE -O $FILENAME echo "1 ${TP_NAME}?page=$PAGE -O $FILENAME" curl http://twitpic.com/photos/${TP_NAME}?page=$PAGE -o $FILENAME $CURL_OPT if [ $? -eq 22 -a $RETRY -le $MAXRETRY ]; then RETRY=`expr $RETRY + 1` sleep 1 else RETRY=0 PAGE=`expr $PAGE - 1` fi else RETRY=0 PAGE=`expr $PAGE - 1` fi done ALL_IDS=`cat html/$PREFIX-page-* | grep -Eo "<a href=\"/[a-zA-Z0-9]+\">" | grep -Eo "/[a-zA-Z0-9]+" | grep -Eo "[a-zA-Z0-9]+" | grep -v "sopapipa" | sort -r | uniq | xargs` COUNT=0 LOG_FILE=logs/$PREFIX-log-$RUN_DATE.txt echo $ALL_IDS | tee -a $LOG_FILE for ID in $ALL_IDS; do COUNT=`expr $COUNT + 1` echo $ID: $COUNT | tee -a $LOG_FILE echo "Processing $ID..." FULL_HTML="html/$PREFIX-$ID-full.html" # wget http://twitpic.com/$ID -O $FULL_HTML if [ ! -f "$FULL_HTML" ]; then RETRY=$MAXRETRY while [ $RETRY -ne 0 ]; do echo "2 " curl http://twitpic.com/$ID -O $FULL_HTML curl http://twitpic.com/$ID -o $FULL_HTML $CURL_OPT if [ $? -eq 22 ]; then RETRY=`expr $RETRY - 1` sleep 1 else RETRY=0 fi done fi FULL_URL=`grep "<img src" $FULL_HTML | grep -Eo "src=\"[^\"]*\"" | grep -Eo "https://[^\"]*"` if [ "$IMG_DOWNLOAD" -eq 1 ]; then EXT=`echo "$FULL_URL" | grep -Eo "[a-zA-Z0-9]+\.[a-zA-Z0-9]+\?" | head -n1 | grep -Eo "\.[a-zA-Z0-9]+"` if [ -z "$EXT" ]; then EXT=`echo "$FULL_URL" | grep -Eo "\.[a-zA-Z0-9]+$"` fi FULL_FILE=$PREFIX-$ID-full$EXT # wget "$FULL_URL" -O "images/$FULL_FILE" if [ ! -f "images/$FULL_FILE" ]; then RETRY=$MAXRETRY while [ $RETRY -ne 0 ]; do echo "3 " curl "$FULL_URL" -O "images/$FULL_FILE" curl "$FULL_URL" -o "images/$FULL_FILE" $CURL_OPT if [ $? -eq 22 ]; then RETRY=`expr $RETRY - 1` sleep 1 else RETRY=0 fi done fi fi done