Skip to content

Instantly share code, notes, and snippets.

@smeschke
Last active December 26, 2024 17:25
Show Gist options
  • Select an option

  • Save smeschke/aa989df78551a9050a78e0d7a8c50495 to your computer and use it in GitHub Desktop.

Select an option

Save smeschke/aa989df78551a9050a78e0d7a8c50495 to your computer and use it in GitHub Desktop.
Aligns a scanned document to find optimal rotation
import cv2
import numpy as np
out = cv2.VideoWriter('/home/stephen/Desktop/smooth_pose.avi',cv2.VideoWriter_fourcc('M','J','P','G'), 60, (640,640))
src = 255 - cv2.imread('/home/stephen/Desktop/scan.jpg',0)
scores = []
def rotate(img, angle):
rows,cols = img.shape
M = cv2.getRotationMatrix2D((cols/2,rows/2),angle,1)
dst = cv2.warpAffine(img,M,(cols,rows))
return dst
def sum_rows(img):
# Create a list to store the row sums
row_sums = []
# Iterate through the rows
for r in range(img.shape[0]-1):
# Sum the row
row_sum = sum(sum(img[r:r+1,:]))
# Add the sum to the list
row_sums.append(row_sum)
# Normalize range to (0,255)
row_sums = (row_sums/max(row_sums)) * 255
# Return
return row_sums
def display_data(roi, row_sums, buffer):
# Create background to draw transform on
bg = np.zeros((buffer*2, buffer*2), np.uint8)
# Iterate through the rows and draw on the background
for row in range(roi.shape[0]-1):
row_sum = row_sums[row]
bg[row:row+1, :] = row_sum
left_side = int(buffer/3)
bg[:, left_side:] = roi[:,left_side:]
cv2.imshow('bg1', bg)
k = cv2.waitKey(1)
out.write(cv2.cvtColor(cv2.resize(bg, (640,640)), cv2.COLOR_GRAY2BGR))
return k
# Rotate the image around in a circle
angle = 0
while angle <= 360:
# Rotate the source image
img = rotate(src, angle)
# Crop the center 1/3rd of the image (roi is filled with text)
h,w = img.shape
buffer = min(h, w) - int(min(h,w)/1.5)
roi = img[int(h/2-buffer):int(h/2+buffer), int(w/2-buffer):int(w/2+buffer)]
# Create background to draw transform on
bg = np.zeros((buffer*2, buffer*2), np.uint8)
# Compute the sums of the rows
row_sums = sum_rows(roi)
# High score --> Zebra stripes
score = np.count_nonzero(row_sums)
scores.append(score)
# Image has best rotation
if score <= min(scores):
# Save the rotatied image
print('found optimal rotation')
best_rotation = img.copy()
k = display_data(roi, row_sums, buffer)
if k == 27: break
# Increment angle and try again
angle += .5
cv2.destroyAllWindows()
def area_to_top_of_text(img):
# Create a background to draw on
bg = np.zeros_like(img)
# Iterate through the rows
for position in range(w-1):
# Find the top value in the column
column = np.array(img[:,position:position+1])
top = np.argmax(column)
# Fill in the area from the top of the page to top of the text
a = position, 0
b = position, top
cv2.line(img, a, b, 123, 1)
cv2.line(bg, a, b, 255, 1)
# Show and return
cv2.imshow('img', img)
cv2.waitKey(0)
return img, bg
# Find the area from the top of page to top of image
_, bg = area_to_top_of_text(best_rotation.copy())
right_side_up = sum(sum(bg))
# Flip image and try again
best_rotation_flipped = rotate(best_rotation, 180)
_, bg = area_to_top_of_text(best_rotation_flipped.copy())
upside_down = sum(sum(bg))
# Check which area is larger
if right_side_up < upside_down: aligned_image = best_rotation
else: aligned_image = best_rotation_flipped
# Save aligned image
cv2.imwrite('/home/stephen/Desktop/best_rotation.png', 255-aligned_image)
cv2.destroyAllWindows()
@NUAAwanghe
Copy link

hi,i come from https://stackoverflow.com/questions/55654142/detect-if-a-text-image-is-upside-down,Can you provide some details about this function “area_to_top_of_text“,Thank you in advance。

@yamini1473
Copy link

Hey by any chance did you come up with the solution yet?

@mikegashler
Copy link

mikegashler commented Jul 27, 2020

For those asking, here's a snip of code that returns a positive number if there is more area in the margins above, and a negative number if there is more area in the margins below. (Disclaimer: This is untested code):

def top_bot_margin_ratio(image: np.ndarray) -> float:
    if len(image.shape) > 2 and image.shape[2] > 1:
        image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    above = 0
    below = 0
    for x in range(image.shape[1]):
        col = np.argwhere(image[:, x] < 128)
        if col.shape[0] > 0:
            above += col[0, 0]
            below += image.shape[0] - 1 - col[-1, 0]
    return math.log(above / below)

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment