Skip to content

Instantly share code, notes, and snippets.

@calebr
Created July 27, 2016 04:16
Show Gist options
  • Select an option

  • Save calebr/543a5dd538b10636ab9e0c5591ecb8a0 to your computer and use it in GitHub Desktop.

Select an option

Save calebr/543a5dd538b10636ab9e0c5591ecb8a0 to your computer and use it in GitHub Desktop.
#!/usr/bin/env bash
INPUT=$1
# INPUT="/Users/kamola_greg/dev/identity/AUSTRALIAN_PASSPORT.jpg"
if [ -z ${INPUT+x} ]
then
echo "Usage: $0 <INPUT_IMAGE>"
exit
fi
WIDTH=`convert ${INPUT} -format "%w" info:`
HEIGHT=`convert ${INPUT} -format "%h" info:`
function to_decimal() {
echo "$1" | tr '.' ','
}
function cut() {
PART_NAME=$1
local D2=$(to_decimal $2)
D3=$(to_decimal $3)
D4=$(to_decimal $4)
D5=$(to_decimal $5)
DX=$(echo "$4 - $2" | bc)
DY=$(echo "$5 - $3" | bc)
convert ${INPUT} -crop ${DX}x${DY}+${2}+${3} +repage ${INPUT}_${PART_NAME}.jpg
}
function ocr() {
PART_NAME=$1
# Preprocess and run OCR.
# convert ${INPUT}_${PART_NAME}.jpg -fuzz 50% -fill white +opaque black ${INPUT}_${PART_NAME}.jpg
tesseract ${INPUT}_${PART_NAME}.jpg ${INPUT}_${PART_NAME} -c tessedit_write_images=true -c textord_max_noise_size=10 2>/dev/null
# textord_old_xheight 1
# textord_min_xheight 35
# Copy tesseract post processing results.
cp tessinput.tif ${INPUT}_${PART_NAME}_PROCESSING.jpg
# Display ocr results.
echo -n "$PART_NAME: "
cat ${INPUT}_${PART_NAME}.txt
cat ${INPUT}_${PART_NAME}.txt >> ${INPUT}_TEXT_RESULTS.txt
}
function mark() {
if [ ! -f "${INPUT}_MASK.png" ]; then
convert ${INPUT} ${INPUT}_MASK.png
fi
convert ${INPUT}_MASK.png -strokewidth 1 -stroke blue -fill "rgba(0, 0, 0, 0)" -draw "rectangle $1,$2 $3,$4" ${INPUT}_MASK.png 2>/dev/null
}
function cut_and_ocr() {
PART_NAME=$1
XOFF=`convert xc: -format "%[fx:$WIDTH*$2/100]" info:`
YOFF=`convert xc: -format "%[fx:$HEIGHT*$3/100]" info:`
WW=`convert xc: -format "%[fx:$WIDTH*$4/100]" info:`
HH=`convert xc: -format "%[fx:$HEIGHT*$5/100]" info:`
mark $XOFF $YOFF $WW $HH
#cut $PART_NAME $XOFF $YOFF $WW $HH
#ocr $PART_NAME
}
rm -f "${INPUT}_MASK.png"
rm -f "${INPUT}_TEXT_RESULTS.txt"
# Passport number.
# pixels: top left x, top left y, bottom right x, bottom right y (all in %)
# cut_and_ocr "PASSPORT_NUMBER" 75 9 100 15
cut_and_ocr "PASSPORT_NUMBER" 75 7 99 20
# Name.
# cut_and_ocr "NAME" 30 19 70 29
cut_and_ocr "NAME" 31 17 70 32
# Nationality.
# cut_and_ocr "NATIONALITY" 30 32 70 37
cut_and_ocr "NATIONALITY" 31 30 70 39
# Date of birth.
# cut_and_ocr "DATE_OF_BIRTH" 30 40 70 45
cut_and_ocr "DATE_OF_BIRTH" 31 38 70 47
# Place of birth.
# cut_and_ocr "PLACE_OF_BIRTH" 65 48 97 53
cut_and_ocr "PLACE_OF_BIRTH" 65 45 97 55
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment