eligor13 · October 27, 2014 05:36 · Oct 27, 2014
diff --git a/twpicdl2.sh b/twpicdl2.sh
@@ -0,0 +1,147 @@
+#!/bin/sh
+
+# Modified by Stan Schwertly to download locally rather than to send to Posterous. 
+# Github: https://github.com/Stantheman/Twitpic-Backup
+
+# Copyright 2010 Tim "burndive" of http://burndive.blogspot.com/
+# This software is licensed under the Creative Commons GNU GPL version 2.0 or later.
+# License informattion: http://creativecommons.org/licenses/GPL/2.0/
+
+# This script is a derivative of the original, obtained from here:
+# http://tuxbox.blogspot.com/2010/03/twitpic-to-posterous-export-script.html
+
+# Version1.2 [add retry]
+
+RUN_DATE=`date +%F--%H-%m-%S`
+
+TP_NAME=$1
+WORKING_DIR=$2
+
+IMG_DOWNLOAD=1
+PREFIX=twitpic-$TP_NAME
+HTML_OUT=$PREFIX-all-$RUN_DATE.html
+
+#CURL_OPT='-f --retry 3 --retry-delay 5 --retry-max-time 60'
+CURL_OPT='--retry 3 --retry-delay 5 --retry-max-time 60'
+
+# Checks the user-supplied arguments
+if [ -z "$TP_NAME" ]; then
+  echo "You must supply a TP_NAME."
+  exit
+fi
+
+if [ ! -d "$WORKING_DIR" ]; then
+  echo "You must supply a WORKING_DIR."
+  exit
+fi
+
+cd $WORKING_DIR
+
+# Checks for the directories it needs
+if [ ! -d "images" ]; then
+  mkdir images;
+fi
+
+if [ ! -d "html" ]; then
+  mkdir html;
+fi
+
+if [ ! -d "logs" ]; then
+  mkdir logs;
+fi
+
+PAGE=0
+MAXRETRY=10
+RETRY=0
+
+LAST=`curl http://twitpic.com/photos/${TP_NAME} \
+  | grep "<a href=.*>Last<" \
+  | sed "s/.*\?page=\([0-9]*\).*/\1/"`
+if [ -z "$LAST" ]; then
+  NEXT=`curl http://twitpic.com/photos/${TP_NAME} \
+  | grep "<a href=.*>Next<" \
+  | sed "s/.*\?page=\([0-9]*\).*/\1/"`
+  if [ -z "$NEXT" ]; then
+    PAGE=1
+  else
+    PAGE=$NEXT
+  fi
+else
+  PAGE=$LAST
+fi
+
+while [ $PAGE -ne 0 ]; do
+  echo PAGE: $PAGE
+  FILENAME="html/$PREFIX-page-$PAGE.html"
+  echo "FILENAME=" $FILENAME
+  echo "0 curl http://twitpic.com/photos/${TP_NAME}?page=$PAGE -O $FILENAME"
+  if [ ! -f "$FILENAME" ]; then
+    echo "0"
+#	wget http://twitpic.com/photos/${TP_NAME}?page=$PAGE -O $FILENAME
+    echo "1 ${TP_NAME}?page=$PAGE -O $FILENAME"
+    curl http://twitpic.com/photos/${TP_NAME}?page=$PAGE -o $FILENAME $CURL_OPT
+    if [ $? -eq 22 -a $RETRY -le $MAXRETRY ]; then
+	  RETRY=`expr $RETRY + 1`
+	  sleep 1
+    else
+	  RETRY=0
+	  PAGE=`expr $PAGE - 1`
+    fi
+  else
+    RETRY=0
+	PAGE=`expr $PAGE - 1`
+  fi
+done
+
+ALL_IDS=`cat html/$PREFIX-page-* | grep -Eo "<a href=\"/[a-zA-Z0-9]+\">" | grep -Eo "/[a-zA-Z0-9]+" | grep -Eo "[a-zA-Z0-9]+" | grep -v "sopapipa" | sort -r | uniq | xargs`
+
+COUNT=0
+LOG_FILE=logs/$PREFIX-log-$RUN_DATE.txt
+
+echo $ALL_IDS | tee -a $LOG_FILE
+
+for ID in $ALL_IDS; do
+  COUNT=`expr $COUNT + 1`
+  echo $ID: $COUNT | tee -a $LOG_FILE
+
+  echo "Processing $ID..."
+  FULL_HTML="html/$PREFIX-$ID-full.html"
+#  wget http://twitpic.com/$ID -O $FULL_HTML
+  if [ ! -f "$FULL_HTML" ]; then
+    RETRY=$MAXRETRY
+    while [ $RETRY -ne 0 ]; do
+      echo "2 " curl http://twitpic.com/$ID -O $FULL_HTML
+      curl http://twitpic.com/$ID -o $FULL_HTML $CURL_OPT
+      if [ $? -eq 22 ]; then
+        RETRY=`expr $RETRY - 1`
+        sleep 1
+      else
+        RETRY=0
+      fi
+    done
+  fi
+
+  FULL_URL=`grep "<img src" $FULL_HTML | grep -Eo "src=\"[^\"]*\"" | grep -Eo "https://[^\"]*"`
+
+  if [ "$IMG_DOWNLOAD" -eq 1 ]; then
+	EXT=`echo "$FULL_URL" | grep -Eo "[a-zA-Z0-9]+\.[a-zA-Z0-9]+\?" | head -n1 | grep -Eo "\.[a-zA-Z0-9]+"`
+	if [ -z "$EXT" ]; then
+	  EXT=`echo "$FULL_URL" | grep -Eo "\.[a-zA-Z0-9]+$"`
+	fi
+	FULL_FILE=$PREFIX-$ID-full$EXT
+#	wget "$FULL_URL" -O "images/$FULL_FILE"
+	if [ ! -f "images/$FULL_FILE" ]; then
+      RETRY=$MAXRETRY
+      while [ $RETRY -ne 0 ]; do
+        echo "3 " curl "$FULL_URL" -O "images/$FULL_FILE"
+        curl "$FULL_URL" -o "images/$FULL_FILE" $CURL_OPT
+        if [ $? -eq 22 ]; then
+          RETRY=`expr $RETRY - 1`
+          sleep 1
+        else
+          RETRY=0
+        fi
+      done
+	fi
+  fi
+done
No results found