Skip to content

Instantly share code, notes, and snippets.

@srvanderplas
Created February 1, 2019 14:09
Show Gist options
  • Select an option

  • Save srvanderplas/f4551edaf2f9d2e256b809c40964bde9 to your computer and use it in GitHub Desktop.

Select an option

Save srvanderplas/f4551edaf2f9d2e256b809c40964bde9 to your computer and use it in GitHub Desktop.

Revisions

  1. srvanderplas created this gist Feb 1, 2019.
    225 changes: 225 additions & 0 deletions FullProcess.sh
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,225 @@
    #!/bin/bash

    # This uses GNU parallel: cite:
    # @article{Tange2011a,
    # title = {GNU Parallel - The Command-Line Power Tool},
    # author = {O. Tange},
    # address = {Frederiksberg, Denmark},
    # journal = {;login: The USENIX Magazine},
    # month = {Feb},
    # number = {1},
    # volume = {36},
    # url = {http://www.gnu.org/s/parallel},
    # year = {2011},
    # pages = {42-47}
    # }



    # Any changes must be propogated through format_picture()
    ORIG_DIR="./photos/"
    PNG_DIR="./pngs/"
    CROP_DIR="./cropped/"
    EDGE_DIR="./edges/"
    SLICE_DIR_Color="./colorslice64/"
    SLICE_DIR_Color2="./colorslice128/"
    SLICE_DIR="./slice64/"
    SLICE_DIR2="./slice128/"
    SMALL_DIR="./rejects/"


    whiteTHR=253

    if [ ! -d "$PNG_DIR" ]; then
    mkdir "$PNG_DIR"
    fi

    if [ ! -d "$CROP_DIR" ]; then
    mkdir "$CROP_DIR"
    fi

    # if [ ! -d "$NORM_DIR" ]; then
    # mkdir "$NORM_DIR"
    # fi

    if [ ! -d "$EDGE_DIR" ]; then
    mkdir "$EDGE_DIR"
    fi

    if [ ! -d "$SLICE_DIR" ]; then
    mkdir "$SLICE_DIR"
    fi

    if [ ! -d "$SLICE_DIR2" ]; then
    mkdir "$SLICE_DIR2"
    fi

    if [ ! -d "$SLICE_DIR_Color" ]; then
    mkdir "$SLICE_DIR_Color"
    fi

    if [ ! -d "$SLICE_DIR_Color2" ]; then
    mkdir "$SLICE_DIR_Color2"
    fi

    if [ ! -d "$SMALL_DIR" ]; then
    mkdir "$SMALL_DIR"
    fi

    # Set up manifest files
    usefulfile='useful_files.csv'
    unusefulfile='not_useful_files.csv'

    if [ ! -d "$usefulfile" ]; then
    echo "file, size, mean, discard" > $usefulfile
    fi

    if [ ! -d "$unusefulfile" ]; then
    echo "file, size, mean, discard" > $unusefulfile
    fi

    ##### Slice picture up and format appropriately ################################
    format_picture () {

    origfile=$(basename $@)
    basenoext=${origfile%.*}
    basefile="$basenoext.png"
    offset64="offset64_"
    offset32="offset32_"

    ORIG_DIR="./photos/"
    PNG_DIR="./pngs/"
    CROP_DIR="./cropped/"
    EDGE_DIR="./edges/"
    SLICE_DIR="./slice64/"
    SLICE_DIR2="./slice128/"
    SLICE_DIR_Color="./colorslice64/"
    SLICE_DIR_Color2="./colorslice128/"

    # echo $origfile
    # echo $basefile
    # echo $basenoext

    # Convert to PNG
    if [ ! -d $PNG_DIR$origfile ]; then
    convert $ORIG_DIR$origfile $PNG_DIR$basefile
    fi

    # Crop
    if [ ! -d $CROP_DIR$basefile ]; then
    convert $PNG_DIR$basefile -trim $CROP_DIR$basefile
    fi

    # Normalize colors
    # ./redist -s Normal $CROP_DIR$basefile $NORM_DIR$basefile

    # Edge Detect
    if [ ! -d $EDGE_DIR$basefile ]; then
    convert $CROP_DIR$basefile -canny 0x1+3%+3% -negate -colorspace Gray $EDGE_DIR$basefile
    fi

    # Offset by 64 pixels
    if [ ! -d $EDGE_DIR$offset32$basefile ]; then
    convert $EDGE_DIR$basefile \
    -gravity northeast \
    -background white \
    -extent $(identify -format '%[fx:W+32]x%[fx:H+32]' $EDGE_DIR$basefile ) $EDGE_DIR$offset32$basefile
    fi

    # Offset by 64 pixels
    if [ ! -d $EDGE_DIR$offset64$basefile ]; then
    convert $EDGE_DIR$basefile \
    -gravity northeast \
    -background white \
    -extent $(identify -format '%[fx:W+64]x%[fx:H+64]' $EDGE_DIR$basefile ) $EDGE_DIR$offset64$basefile
    fi

    # Color images
    if [ ! -d $CROP_DIR$offset32$basefile ]; then
    convert $CROP_DIR$basefile \
    -gravity northeast \
    -background white \
    -extent $(identify -format '%[fx:W+32]x%[fx:H+32]' $CROP_DIR$basefile ) $CROP_DIR$offset32$basefile
    fi

    # Offset by 64 pixels
    if [ ! -d $CROP_DIR$offset64$basefile ]; then
    convert $CROP_DIR$basefile \
    -gravity northeast \
    -background white \
    -extent $(identify -format '%[fx:W+64]x%[fx:H+64]' $CROP_DIR$basefile ) $CROP_DIR$offset64$basefile
    fi

    # Actually slice images
    ### This is a crude way to avoid repeating slicing for images that have already been processed...
    n64pics=$(find $SLICE_DIR -maxdepth 1 -type f -name "$basenoext*" | wc -l)
    n128pics=$(find $SLICE_DIR2 -maxdepth 1 -type f -name "$basenoext*" | wc -l)
    if (( $n64pics < 1 )); then
    convert $EDGE_DIR$basefile -quiet -gravity Center -crop 64x64 $SLICE_DIR$basenoext'_64_%03d.png'
    convert $EDGE_DIR$offset32$basefile -quiet -gravity Center -crop 64x64 $SLICE_DIR$basenoext'_64_%03d.5.png'
    fi

    if (( $n128pics < 1 )); then
    convert $EDGE_DIR$basefile -quiet -gravity Center -crop 128x128 $SLICE_DIR2$basenoext'_128_%03d.png'
    convert $EDGE_DIR$offset64$basefile -quiet -gravity Center -crop 128x128 $SLICE_DIR2$basenoext'_128_%03d.5.png'
    fi

    n64pics=$(find $SLICE_DIR_Color -maxdepth 1 -type f -name "$basenoext*" | wc -l)
    n128pics=$(find $SLICE_DIR_Color2 -maxdepth 1 -type f -name "$basenoext*" | wc -l)
    if (( $n64pics < 1 )); then
    convert $CROP_DIR$basefile -quiet -gravity Center -crop 64x64 $SLICE_DIR_Color$basenoext'_color64_%03d.png'
    convert $CROP_DIR$offset32$basefile -quiet -gravity Center -crop 64x64 $SLICE_DIR_Color$basenoext'_color64_%03d.5.png'
    fi

    if (( $n128pics < 1 )); then
    convert $CROP_DIR$basefile -quiet -gravity Center -crop 128x128 $SLICE_DIR_Color2$basenoext'_color128_%03d.png'
    convert $CROP_DIR$offset64$basefile -quiet -gravity Center -crop 128x128 $SLICE_DIR_Color2$basenoext'_color128_%03d.5.png'
    fi
    }

    export -f format_picture


    ##### Remove Useless Images ####################################################
    filter_images() {
    usefulfile='useful_files.csv'
    unusefulfile='not_useful_files.csv'

    whiteThr=253

    imgval=$(convert $1 -format "%[fx:mean*255]" info:)
    imgvalint=$(printf %.0f $imgval)
    imgsize=$(identify -format "%[fx:w!=h]" $1 )
    filename="./rejects/$(basename $1)"
    imw=$(identify -format "%w" $1)
    imh=$(identify -format "%h" $1)

    toowhite=$(( $imgvalint > $whiteThr ))
    if (( $toowhite == 1 )); then
    echo "removing $1: mean value $imgval"
    fi;

    if (( $imgsize == 1 )); then
    echo "moving $1 to small pics folder $filename"
    fi;

    removefile=$(( `expr $toowhite + $imgsize` > 0 ))

    savestr="$(basename $1), $imw x $imh, $imgval, $removefile"

    if (( $removefile == 1 )); then
    mv $1 $filename
    echo $savestr >> $unusefulfile;
    else
    echo $savestr >> $usefulfile;
    fi;
    }
    export -f filter_images

    ##### Actually do stuff ########################################################
    ls ./photos | parallel format_picture {}

    find $SLICE_DIR -type f | parallel filter_images
    find $SLICE_DIR2 -type f | parallel filter_images
    find $SLICE_DIR_Color -type f | parallel filter_images
    find $SLICE_DIR_Color2 -type f | parallel filter_images
    266 changes: 266 additions & 0 deletions ParallelProcess.sh
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,266 @@
    #!/bin/bash

    # This uses GNU parallel: cite:
    # @article{Tange2011a,
    # title = {GNU Parallel - The Command-Line Power Tool},
    # author = {O. Tange},
    # address = {Frederiksberg, Denmark},
    # journal = {;login: The USENIX Magazine},
    # month = {Feb},
    # number = {1},
    # volume = {36},
    # url = {http://www.gnu.org/s/parallel},
    # year = {2011},
    # pages = {42-47}
    # }

    # Exit function
    die() {
    printf '%s\n' "$1" >&2
    exit 1
    }

    filter_images() {

    if [[ $1 =~ color ]] ; then
    whiteThr=250
    else
    whiteThr=253
    fi

    imgval=$(convert $1 -format "%[fx:mean*255]" info:)
    imgvalint=$(printf %.0f $imgval)

    toowhite=$(( $imgvalint > $whiteThr ))
    if (( $toowhite == 1 )); then
    echo "removing $1: mean value $imgval"
    rm $1
    fi;
    }
    export -f filter_images

    # Function to process a single shoe
    process_shoe() {
    usage="$(basename "$0") [-h] [-e] [-m] [-x n] [-o n] [--overwrite] [--out <outdir>] file.jpg
    where:
    -h prints help
    -e uses canny edge detection on the image
    -m flips the image in x and y
    -x n sets the image chunk size to n x n
    -o n offsets the image by n x n pixels.
    --overwrite writes over previously generated intermediate images
    --out specifies the directory to store the intermediate and processed images.
    Defaults to ./processed/. Directory will be created if it does not already exist."


    #--- Read in arguments and set flag variables --------------------------------
    POSITIONAL=()

    # Initialize option variables
    SIZE=64
    EDGE=0
    OFFSET=0
    MIRROR=0
    OUTPATH="./processed"
    OVERWRITE=0
    offset_re='^[0-9]+$'
    option_re='^-'

    # Assumes space separated, e.g. ./myscript.sh -e conf -s /etc -l /usr/lib /etc/hosts
    while [[ $# -gt 0 ]]
    do
    key="$1"

    case $key in
    -h)
    echo "$usage"
    exit
    ;;
    -x|--size)
    SIZE=$2
    shift # past argument
    shift # past value
    ;;
    -e|--edge)
    EDGE=1
    shift # past argument
    ;;
    -o|--offset)
    if ![[ $2 =~ $offset_re ]]; then
    OFFSET=32
    shift # past argument
    else
    OFFSET="$2"
    shift # past argument
    shift # past value
    fi
    ;;
    -m|--mirror)
    MIRROR=1
    shift
    ;;
    --out)
    OUTPATH="$2"
    shift # past argument
    shift # past value
    ;;
    --overwrite)
    OVERWRITE=1
    shift # past argument
    ;;
    *) # unknown option
    POSITIONAL+=("$1") # save it in an array for later
    shift # past argument
    ;;
    esac
    done
    set -- "${POSITIONAL[@]}" # restore positional parameters

    if [ "$#" -gt 1 ]; then
    echo "Too many left over arguments. Assuming $1 is the file path."
    fi

    # --- Set Script variables ---------------------------------------------------

    origfile=$(basename $1)
    basenoext=${origfile%.*}
    basefile="$basenoext.png"
    wkfile="$basenoext"

    # Ensure folders all exist
    ORIG_DIR=$(dirname $1)
    PNG_DIR="$OUTPATH/pngs"
    TMP_DIR="$OUTPATH/toslice"
    SLICE_DIR="$OUTPATH/slices"

    if [ ! -d "$OUTPATH" ]; then
    mkdir "$OUTPATH"
    fi

    if [ ! -d "$PNG_DIR" ]; then
    mkdir "$PNG_DIR"
    fi

    if [ ! -d "$TMP_DIR" ]; then
    mkdir "$TMP_DIR"
    fi

    # if [ ! -d "$SLICE_DIR" ]; then
    # mkdir "$SLICE_DIR"
    # fi

    # Ensure the simple operations are done
    # Convert to PNG

    # echo $PNG_DIR/$basefile
    # echo $ORIG_DIR/$origfile

    if [ ! -f $PNG_DIR/$basefile ] || [ "$OVERWRITE" ]; then
    convert $ORIG_DIR/$origfile $PNG_DIR/$basefile
    fi

    # Crop
    if [ ! -f $TMP_DIR/$basefile ] || [ "$OVERWRITE" ]; then
    convert $PNG_DIR/$basefile -trim $TMP_DIR/$basefile
    fi

    wkfileprev=$wkfile
    if [ "$MIRROR" -eq "1" ]; then
    wkfile=$wkfileprev'_flip'
    if [ ! -f $TMP_DIR/$wkfile.png ] || [ "$OVERWRITE" ]; then
    convert $TMP_DIR/$wkfileprev.png -flip -flop $TMP_DIR/$wkfile'.png'
    fi
    fi

    wkfileprev=$wkfile
    if [ "$EDGE" -eq "1" ]; then
    wkfile=$wkfile'_edge'
    if [ ! -f $TMP_DIR/$wkfile.png ] || [ "$OVERWRITE" ]; then
    convert $TMP_DIR/$wkfileprev.png -canny 0x1+3%+3% -negate -colorspace Gray $TMP_DIR/$wkfile'.png'
    fi
    fi

    # wkfileprev=$wkfile
    # if [ "$OFFSET" -gt "0" ]; then
    # wkfile=$wkfile'_offset'$OFFSET
    # if [ ! -f $TMP_DIR/$wkfile.png ] || [ "$OVERWRITE" ]; then
    # convert $TMP_DIR/$wkfileprev.png \
    # -gravity northeast \
    # -background white \
    # -extent $(identify -format '%[fx:W+$OFFSET]x%[fx:H+$OFFSET]' $TMP_DIR/$wkfileprev'.png' ) \
    # $TMP_DIR/$wkfile.png
    # fi
    # fi
    #
    # # Prepare for cropping image
    #
    # imw=$(identify -format "%w" $TMP_DIR/$wkfileprev.png)
    # imh=$(identify -format "%h" $TMP_DIR/$wkfileprev.png)
    #
    # full_tile_w=$(( $imw / $SIZE ))
    # full_tile_h=$(( $imh / $SIZE ))
    #
    # vcanv_w=$(( ($full_tile_w) * $SIZE ))
    # vcanv_h=$(( ($full_tile_h) * $SIZE ))
    #
    # cx=$(( ($vcanv_w - $imw ) / 2 ))
    # cy=$(( ($vcanv_h - $imh ) / 2 ))
    #
    # if [ "$cy" -ge "0" ]; then
    # cy='+'$cy
    # fi
    # if [ "$cx" -ge "0" ]; then
    # cx='+'$cx
    # fi
    #
    # vcanvszstr=$vcanv_w'x'$vcanv_h
    #
    # wkfileprev=$wkfile
    # wkfile=$wkfile'_crop'$vcanvszstr
    #
    # # echo "Full tiles: $full_tile_w x $full_tile_h, cropping image to $vcanvszstr from $cx, $cy"
    # # echo "convert $TMP_DIR/$wkfileprev.png -repage $vcanvszstr$cx$cy -crop $vcanvszstr $TMP_DIR/$wkfile.png"
    # # convert $TMP_DIR/$wkfileprev.png -repage $vcanvszstr$cx$cy -crop $vcanvszstr $TMP_DIR/$wkfile.png
    # convert $TMP_DIR/$wkfileprev.png -repage $vcanvszstr+0+0 -crop $vcanvszstr $TMP_DIR/$wkfile.png
    #
    #
    # wkfileprev=$wkfile
    # wkfile=$wkfile'_sz'$SIZE
    # szstr=$SIZE'x'$SIZE
    #
    # convert $TMP_DIR/$wkfileprev.png -quiet -crop $szstr $SLICE_DIR/$wkfile'_%03d.png'
    #
    # for i in $SLICE_DIR/$wkfile*.png; do
    # filter_images $i
    # done

    }
    export -f process_shoe

    # Test with single shoe
    # process_shoe photos/adidas-originals-gazelle-tactile-yellow-black-gold_product_8894439_color_695418.jpg
    # process_shoe -e photos/adidas-originals-gazelle-tactile-yellow-black-gold_product_8894439_color_695418.jpg
    # process_shoe -m photos/adidas-originals-gazelle-tactile-yellow-black-gold_product_8894439_color_695418.jpg
    # process_shoe -e -m photos/adidas-originals-gazelle-tactile-yellow-black-gold_product_8894439_color_695418.jpg

    find ./photos -type f | parallel -j40 --joblog /tmp/log64 process_shoe {}

    # # 64x64 chunks
    # find ./photos -type f | parallel -j40 --joblog /tmp/log64e process_shoe -e -x 64 {}
    # find ./photos -type f | parallel -j40 --joblog /tmp/log64 process_shoe -x 64 {}
    # find ./photos -type f | parallel -j40 --joblog /tmp/log64em process_shoe -m -e -x 64 {}
    # find ./photos -type f | parallel -j40 --joblog /tmp/log64m process_shoe -m -x 64 {}
    #
    # # 128x128 chunks
    # find ./photos -type f | parallel -j40 --joblog /tmp/log128e process_shoe -e -x 128 {}
    # find ./photos -type f | parallel -j40 --joblog /tmp/log128 process_shoe -x 128 {}
    # find ./photos -type f | parallel -j40 --joblog /tmp/log128em process_shoe -m -e -x 128 {}
    # find ./photos -type f | parallel -j40 --joblog /tmp/log128m process_shoe -m -x 128 {}
    #
    # # 256x256 chunks
    # find ./photos -type f | parallel -j40 --joblog /tmp/log256e process_shoe -e -x 256 {}
    # find ./photos -type f | parallel -j40 --joblog /tmp/log256 process_shoe -x 256 {}
    # find ./photos -type f | parallel -j40 --joblog /tmp/log256em process_shoe -m -e -x 256 {}
    # find ./photos -type f | parallel -j40 --joblog /tmp/log256m process_shoe -m -x 256 {}