Skip to content

Instantly share code, notes, and snippets.

@jalotra
Created February 18, 2026 16:24
Show Gist options
  • Select an option

  • Save jalotra/7ca6a2ca859478fd7ccea4e8fcb905c0 to your computer and use it in GitHub Desktop.

Select an option

Save jalotra/7ca6a2ca859478fd7ccea4e8fcb905c0 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python3
"""
Morphology-based ID Card Extraction from PDF
Extracts ID cards using OpenCV morphology operations to detect grid structure.
"""
import fitz # PyMuPDF
import cv2
import numpy as np
import json
import os
from pathlib import Path
from typing import List, Dict, Tuple
import argparse
def pdf_to_images(pdf_path: str, output_dir: str, dpi: int = 200) -> List[str]:
"""Convert PDF pages to images using PyMuPDF."""
print(f"Converting PDF to images at {dpi} DPI...")
doc = fitz.open(pdf_path)
image_paths = []
os.makedirs(output_dir, exist_ok=True)
for page_num in range(len(doc)):
page = doc[page_num]
# Calculate matrix for desired DPI
mat = fitz.Matrix(dpi/72, dpi/72)
pix = page.get_pixmap(matrix=mat)
image_path = os.path.join(output_dir, f"page_{page_num:04d}.png")
pix.save(image_path)
image_paths.append(image_path)
if (page_num + 1) % 10 == 0:
print(f" Processed {page_num + 1}/{len(doc)} pages...")
doc.close()
print(f"โœ“ Converted {len(image_paths)} pages to images")
return image_paths
def preprocess_image(image_path: str) -> Tuple[np.ndarray, np.ndarray]:
"""Load and preprocess image for morphology operations."""
# Load image
img = cv2.imread(image_path)
if img is None:
raise ValueError(f"Could not load image: {image_path}")
# Convert to grayscale
if len(img.shape) == 3:
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
else:
gray = img
# Apply adaptive threshold on inverted image
# The ~ inverts the image (black becomes white, white becomes black)
binary = cv2.adaptiveThreshold(
~gray, 255,
cv2.ADAPTIVE_THRESH_MEAN_C,
cv2.THRESH_BINARY,
15, -2
)
return img, binary
def extract_grid_lines(binary: np.ndarray) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
"""Extract horizontal and vertical grid lines using morphology."""
height, width = binary.shape
# Create copies for horizontal and vertical extraction
horizontal = binary.copy()
vertical = binary.copy()
# --- Extract Horizontal Lines ---
# Define horizontal kernel (long horizontal line)
horizontal_size = width // 30
horizontal_structure = cv2.getStructuringElement(
cv2.MORPH_RECT,
(horizontal_size, 1)
)
# Apply morphology operations
horizontal = cv2.erode(horizontal, horizontal_structure)
horizontal = cv2.dilate(horizontal, horizontal_structure)
# --- Extract Vertical Lines ---
# Define vertical kernel (long vertical line)
vertical_size = height // 30
vertical_structure = cv2.getStructuringElement(
cv2.MORPH_RECT,
(1, vertical_size)
)
# Apply morphology operations
vertical = cv2.erode(vertical, vertical_structure)
vertical = cv2.dilate(vertical, vertical_structure)
# Combine horizontal and vertical lines to get grid
grid = cv2.bitwise_or(horizontal, vertical)
return grid, horizontal, vertical
def find_cells_from_grid(grid: np.ndarray, img_shape: Tuple[int, int]) -> List[Tuple[int, int, int, int]]:
"""
Find individual cell bounding boxes from the grid structure.
Strategy: Find the outer grid boundary from grid lines, then subdivide into 2x8 cells.
"""
height, width = img_shape[:2]
page_area = width * height
# Find all white pixels (grid lines) and get their bounding box
# This gives us the extent of the grid
white_pixels = np.where(grid > 128)
if len(white_pixels[0]) == 0:
return []
y_min, y_max = white_pixels[0].min(), white_pixels[0].max()
x_min, x_max = white_pixels[1].min(), white_pixels[1].max()
# Add small margins
margin = 10
x_min = max(0, x_min - margin)
y_min = max(0, y_min - margin)
x_max = min(width, x_max + margin)
y_max = min(height, y_max + margin)
grid_bbox = (x_min, y_min, x_max - x_min, y_max - y_min)
print(f" Found grid boundary from lines: {grid_bbox}")
gx, gy, gw, gh = grid_bbox
cells = []
# Subdivide into 2 columns ร— 8 rows
cell_width = gw // 2
cell_height = gh // 8
for row in range(8):
for col in range(2):
x = gx + col * cell_width
y = gy + row * cell_height
w = cell_width
h = cell_height
cells.append((x, y, w, h))
print(f" Subdivided grid into {len(cells)} cells (2ร—8)")
return cells
def sort_contours_grid(cells: List[Tuple[int, int, int, int]]) -> List[Tuple[int, int, int, int]]:
"""Sort cells in reading order (top-to-bottom, left-to-right)."""
if not cells:
return []
# Calculate average height for row grouping
avg_height = sum(h for _, _, _, h in cells) / len(cells)
y_tolerance = avg_height * 0.6 # Allow some vertical variation
def sort_key(item):
x, y, w, h = item
# Group by rows using y coordinate
row = int(round(y / y_tolerance))
return (row, x)
sorted_cells = sorted(cells, key=sort_key)
return sorted_cells
def extract_cards_from_boxes(image_path: str, output_dir: str, boxes: List[Tuple],
padding: int = 5) -> List[Dict]:
"""Extract card images from bounding boxes."""
img = cv2.imread(image_path)
if img is None:
return []
os.makedirs(output_dir, exist_ok=True)
base_name = Path(image_path).stem
card_info = []
for i, (x, y, w, h) in enumerate(boxes):
# Add small padding to remove borders
x1 = max(0, x + padding)
y1 = max(0, y + padding)
x2 = min(img.shape[1], x + w - padding)
y2 = min(img.shape[0], y + h - padding)
if x2 <= x1 or y2 <= y1:
continue
# Extract card
card = img[y1:y2, x1:x2]
# Skip if too small
if card.shape[0] < 50 or card.shape[1] < 50:
continue
# Save card
card_filename = f"{base_name}_card_{i:03d}.png"
card_path = os.path.join(output_dir, card_filename)
cv2.imwrite(card_path, card)
card_info.append({
'card_number': i + 1,
'bbox': [int(x1), int(y1), int(x2), int(y2)],
'filename': card_filename,
'width': int(x2 - x1),
'height': int(y2 - y1)
})
return card_info
def visualize_detection(image_path: str, boxes: List[Tuple],
output_path: str) -> None:
"""Create visualization of detected cards."""
img = cv2.imread(image_path)
if img is None:
return
# Draw bounding boxes
for i, (x, y, w, h) in enumerate(boxes):
cv2.rectangle(img, (x, y), (x + w, y + h), (0, 255, 0), 3)
cv2.putText(img, str(i + 1), (x + 10, y + 40),
cv2.FONT_HERSHEY_SIMPLEX, 1.2, (0, 255, 0), 3)
cv2.imwrite(output_path, img)
print(f" โœ“ Saved visualization to {output_path}")
def process_page(image_path: str, cards_output_dir: str,
debug_dir: str = None) -> Dict:
"""Process a single page and extract ID cards."""
print(f"\nProcessing {Path(image_path).name}...")
# Preprocess
img, binary = preprocess_image(image_path)
height, width = img.shape[:2]
print(f" Image size: {width}x{height}")
# Extract grid lines
grid, horizontal, vertical = extract_grid_lines(binary)
print(f" Grid extracted")
# Find cells from grid
cells = find_cells_from_grid(grid, img.shape)
print(f" Found {len(cells)} valid cells")
# Sort cells in grid order
boxes = sort_contours_grid(cells)
# Save debug images if requested
if debug_dir:
os.makedirs(debug_dir, exist_ok=True)
base_name = Path(image_path).stem
cv2.imwrite(os.path.join(debug_dir, f"{base_name}_binary.png"), binary)
cv2.imwrite(os.path.join(debug_dir, f"{base_name}_grid.png"), grid)
cv2.imwrite(os.path.join(debug_dir, f"{base_name}_horizontal.png"), horizontal)
cv2.imwrite(os.path.join(debug_dir, f"{base_name}_vertical.png"), vertical)
# Visualize detection
viz_path = os.path.join(debug_dir, f"{base_name}_detected.png")
visualize_detection(image_path, boxes, viz_path)
# Extract cards
card_info = extract_cards_from_boxes(image_path, cards_output_dir, boxes)
print(f" โœ“ Extracted {len(card_info)} cards")
return {
'page': Path(image_path).name,
'image_size': [width, height],
'cards_found': len(card_info),
'cards': card_info
}
def main():
parser = argparse.ArgumentParser(
description='Extract ID cards from PDF using morphology operations'
)
parser.add_argument('pdf_path', help='Path to input PDF file')
parser.add_argument('--pages', type=int, default=625,
help='Number of pages to process (default: 5, use -1 for all)')
parser.add_argument('--dpi', type=int, default=200,
help='DPI for PDF rendering (default: 200)')
parser.add_argument('--output-dir', default='output',
help='Output directory (default: output)')
parser.add_argument('--debug', action='store_true',
help='Save debug images showing detection steps')
args = parser.parse_args()
# Setup directories
images_dir = os.path.join(args.output_dir, 'images')
cards_dir = os.path.join(args.output_dir, 'cards')
debug_dir = os.path.join(args.output_dir, 'debug') if args.debug else None
os.makedirs(args.output_dir, exist_ok=True)
print("=" * 60)
print("ID Card Extraction - Morphology-Based Approach")
print("=" * 60)
if not os.path.exists(images_dir):
# Step 1: Convert PDF to images
print("\n๐Ÿ“„ STEP 1: Converting PDF to images...")
image_paths = pdf_to_images(args.pdf_path, images_dir, args.dpi)
else:
image_paths = [os.path.join(images_dir, f) for f in os.listdir(images_dir)]
image_paths.sort()
print(f"Found {len(image_paths)} images in {images_dir}")
# Limit pages if specified
if args.pages > 0:
image_paths = image_paths[:args.pages]
print(f"Processing first {args.pages} pages...")
# Step 2: Process each page
print("\n๐Ÿ” STEP 2: Detecting and extracting ID cards...")
all_results = []
for i, image_path in enumerate(image_paths, 1):
result = process_page(image_path, cards_dir, debug_dir)
all_results.append(result)
if (i) % 5 == 0:
print(f"\nProgress: {i}/{len(image_paths)} pages processed")
# Step 3: Save results
print("\n๐Ÿ’พ STEP 3: Saving results...")
results_file = os.path.join(args.output_dir, 'bounding_boxes.json')
with open(results_file, 'w') as f:
json.dump({
'total_pages': len(all_results),
'total_cards': sum(r['cards_found'] for r in all_results),
'pages': all_results
}, f, indent=2)
print(f"โœ“ Saved bounding boxes to {results_file}")
# Summary
print("\n" + "=" * 60)
print("EXTRACTION COMPLETE")
print("=" * 60)
print(f"Pages processed: {len(all_results)}")
print(f"Total cards extracted: {sum(r['cards_found'] for r in all_results)}")
if all_results:
print(f"Average cards per page: {sum(r['cards_found'] for r in all_results) / len(all_results):.1f}")
print(f"\nOutput directories:")
print(f" - Images: {images_dir}")
print(f" - Cards: {cards_dir}")
if debug_dir:
print(f" - Debug: {debug_dir}")
print(f" - Results: {results_file}")
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment