Created
February 18, 2026 16:24
-
-
Save jalotra/7ca6a2ca859478fd7ccea4e8fcb905c0 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env python3 | |
| """ | |
| Morphology-based ID Card Extraction from PDF | |
| Extracts ID cards using OpenCV morphology operations to detect grid structure. | |
| """ | |
| import fitz # PyMuPDF | |
| import cv2 | |
| import numpy as np | |
| import json | |
| import os | |
| from pathlib import Path | |
| from typing import List, Dict, Tuple | |
| import argparse | |
| def pdf_to_images(pdf_path: str, output_dir: str, dpi: int = 200) -> List[str]: | |
| """Convert PDF pages to images using PyMuPDF.""" | |
| print(f"Converting PDF to images at {dpi} DPI...") | |
| doc = fitz.open(pdf_path) | |
| image_paths = [] | |
| os.makedirs(output_dir, exist_ok=True) | |
| for page_num in range(len(doc)): | |
| page = doc[page_num] | |
| # Calculate matrix for desired DPI | |
| mat = fitz.Matrix(dpi/72, dpi/72) | |
| pix = page.get_pixmap(matrix=mat) | |
| image_path = os.path.join(output_dir, f"page_{page_num:04d}.png") | |
| pix.save(image_path) | |
| image_paths.append(image_path) | |
| if (page_num + 1) % 10 == 0: | |
| print(f" Processed {page_num + 1}/{len(doc)} pages...") | |
| doc.close() | |
| print(f"โ Converted {len(image_paths)} pages to images") | |
| return image_paths | |
| def preprocess_image(image_path: str) -> Tuple[np.ndarray, np.ndarray]: | |
| """Load and preprocess image for morphology operations.""" | |
| # Load image | |
| img = cv2.imread(image_path) | |
| if img is None: | |
| raise ValueError(f"Could not load image: {image_path}") | |
| # Convert to grayscale | |
| if len(img.shape) == 3: | |
| gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) | |
| else: | |
| gray = img | |
| # Apply adaptive threshold on inverted image | |
| # The ~ inverts the image (black becomes white, white becomes black) | |
| binary = cv2.adaptiveThreshold( | |
| ~gray, 255, | |
| cv2.ADAPTIVE_THRESH_MEAN_C, | |
| cv2.THRESH_BINARY, | |
| 15, -2 | |
| ) | |
| return img, binary | |
| def extract_grid_lines(binary: np.ndarray) -> Tuple[np.ndarray, np.ndarray, np.ndarray]: | |
| """Extract horizontal and vertical grid lines using morphology.""" | |
| height, width = binary.shape | |
| # Create copies for horizontal and vertical extraction | |
| horizontal = binary.copy() | |
| vertical = binary.copy() | |
| # --- Extract Horizontal Lines --- | |
| # Define horizontal kernel (long horizontal line) | |
| horizontal_size = width // 30 | |
| horizontal_structure = cv2.getStructuringElement( | |
| cv2.MORPH_RECT, | |
| (horizontal_size, 1) | |
| ) | |
| # Apply morphology operations | |
| horizontal = cv2.erode(horizontal, horizontal_structure) | |
| horizontal = cv2.dilate(horizontal, horizontal_structure) | |
| # --- Extract Vertical Lines --- | |
| # Define vertical kernel (long vertical line) | |
| vertical_size = height // 30 | |
| vertical_structure = cv2.getStructuringElement( | |
| cv2.MORPH_RECT, | |
| (1, vertical_size) | |
| ) | |
| # Apply morphology operations | |
| vertical = cv2.erode(vertical, vertical_structure) | |
| vertical = cv2.dilate(vertical, vertical_structure) | |
| # Combine horizontal and vertical lines to get grid | |
| grid = cv2.bitwise_or(horizontal, vertical) | |
| return grid, horizontal, vertical | |
| def find_cells_from_grid(grid: np.ndarray, img_shape: Tuple[int, int]) -> List[Tuple[int, int, int, int]]: | |
| """ | |
| Find individual cell bounding boxes from the grid structure. | |
| Strategy: Find the outer grid boundary from grid lines, then subdivide into 2x8 cells. | |
| """ | |
| height, width = img_shape[:2] | |
| page_area = width * height | |
| # Find all white pixels (grid lines) and get their bounding box | |
| # This gives us the extent of the grid | |
| white_pixels = np.where(grid > 128) | |
| if len(white_pixels[0]) == 0: | |
| return [] | |
| y_min, y_max = white_pixels[0].min(), white_pixels[0].max() | |
| x_min, x_max = white_pixels[1].min(), white_pixels[1].max() | |
| # Add small margins | |
| margin = 10 | |
| x_min = max(0, x_min - margin) | |
| y_min = max(0, y_min - margin) | |
| x_max = min(width, x_max + margin) | |
| y_max = min(height, y_max + margin) | |
| grid_bbox = (x_min, y_min, x_max - x_min, y_max - y_min) | |
| print(f" Found grid boundary from lines: {grid_bbox}") | |
| gx, gy, gw, gh = grid_bbox | |
| cells = [] | |
| # Subdivide into 2 columns ร 8 rows | |
| cell_width = gw // 2 | |
| cell_height = gh // 8 | |
| for row in range(8): | |
| for col in range(2): | |
| x = gx + col * cell_width | |
| y = gy + row * cell_height | |
| w = cell_width | |
| h = cell_height | |
| cells.append((x, y, w, h)) | |
| print(f" Subdivided grid into {len(cells)} cells (2ร8)") | |
| return cells | |
| def sort_contours_grid(cells: List[Tuple[int, int, int, int]]) -> List[Tuple[int, int, int, int]]: | |
| """Sort cells in reading order (top-to-bottom, left-to-right).""" | |
| if not cells: | |
| return [] | |
| # Calculate average height for row grouping | |
| avg_height = sum(h for _, _, _, h in cells) / len(cells) | |
| y_tolerance = avg_height * 0.6 # Allow some vertical variation | |
| def sort_key(item): | |
| x, y, w, h = item | |
| # Group by rows using y coordinate | |
| row = int(round(y / y_tolerance)) | |
| return (row, x) | |
| sorted_cells = sorted(cells, key=sort_key) | |
| return sorted_cells | |
| def extract_cards_from_boxes(image_path: str, output_dir: str, boxes: List[Tuple], | |
| padding: int = 5) -> List[Dict]: | |
| """Extract card images from bounding boxes.""" | |
| img = cv2.imread(image_path) | |
| if img is None: | |
| return [] | |
| os.makedirs(output_dir, exist_ok=True) | |
| base_name = Path(image_path).stem | |
| card_info = [] | |
| for i, (x, y, w, h) in enumerate(boxes): | |
| # Add small padding to remove borders | |
| x1 = max(0, x + padding) | |
| y1 = max(0, y + padding) | |
| x2 = min(img.shape[1], x + w - padding) | |
| y2 = min(img.shape[0], y + h - padding) | |
| if x2 <= x1 or y2 <= y1: | |
| continue | |
| # Extract card | |
| card = img[y1:y2, x1:x2] | |
| # Skip if too small | |
| if card.shape[0] < 50 or card.shape[1] < 50: | |
| continue | |
| # Save card | |
| card_filename = f"{base_name}_card_{i:03d}.png" | |
| card_path = os.path.join(output_dir, card_filename) | |
| cv2.imwrite(card_path, card) | |
| card_info.append({ | |
| 'card_number': i + 1, | |
| 'bbox': [int(x1), int(y1), int(x2), int(y2)], | |
| 'filename': card_filename, | |
| 'width': int(x2 - x1), | |
| 'height': int(y2 - y1) | |
| }) | |
| return card_info | |
| def visualize_detection(image_path: str, boxes: List[Tuple], | |
| output_path: str) -> None: | |
| """Create visualization of detected cards.""" | |
| img = cv2.imread(image_path) | |
| if img is None: | |
| return | |
| # Draw bounding boxes | |
| for i, (x, y, w, h) in enumerate(boxes): | |
| cv2.rectangle(img, (x, y), (x + w, y + h), (0, 255, 0), 3) | |
| cv2.putText(img, str(i + 1), (x + 10, y + 40), | |
| cv2.FONT_HERSHEY_SIMPLEX, 1.2, (0, 255, 0), 3) | |
| cv2.imwrite(output_path, img) | |
| print(f" โ Saved visualization to {output_path}") | |
| def process_page(image_path: str, cards_output_dir: str, | |
| debug_dir: str = None) -> Dict: | |
| """Process a single page and extract ID cards.""" | |
| print(f"\nProcessing {Path(image_path).name}...") | |
| # Preprocess | |
| img, binary = preprocess_image(image_path) | |
| height, width = img.shape[:2] | |
| print(f" Image size: {width}x{height}") | |
| # Extract grid lines | |
| grid, horizontal, vertical = extract_grid_lines(binary) | |
| print(f" Grid extracted") | |
| # Find cells from grid | |
| cells = find_cells_from_grid(grid, img.shape) | |
| print(f" Found {len(cells)} valid cells") | |
| # Sort cells in grid order | |
| boxes = sort_contours_grid(cells) | |
| # Save debug images if requested | |
| if debug_dir: | |
| os.makedirs(debug_dir, exist_ok=True) | |
| base_name = Path(image_path).stem | |
| cv2.imwrite(os.path.join(debug_dir, f"{base_name}_binary.png"), binary) | |
| cv2.imwrite(os.path.join(debug_dir, f"{base_name}_grid.png"), grid) | |
| cv2.imwrite(os.path.join(debug_dir, f"{base_name}_horizontal.png"), horizontal) | |
| cv2.imwrite(os.path.join(debug_dir, f"{base_name}_vertical.png"), vertical) | |
| # Visualize detection | |
| viz_path = os.path.join(debug_dir, f"{base_name}_detected.png") | |
| visualize_detection(image_path, boxes, viz_path) | |
| # Extract cards | |
| card_info = extract_cards_from_boxes(image_path, cards_output_dir, boxes) | |
| print(f" โ Extracted {len(card_info)} cards") | |
| return { | |
| 'page': Path(image_path).name, | |
| 'image_size': [width, height], | |
| 'cards_found': len(card_info), | |
| 'cards': card_info | |
| } | |
| def main(): | |
| parser = argparse.ArgumentParser( | |
| description='Extract ID cards from PDF using morphology operations' | |
| ) | |
| parser.add_argument('pdf_path', help='Path to input PDF file') | |
| parser.add_argument('--pages', type=int, default=625, | |
| help='Number of pages to process (default: 5, use -1 for all)') | |
| parser.add_argument('--dpi', type=int, default=200, | |
| help='DPI for PDF rendering (default: 200)') | |
| parser.add_argument('--output-dir', default='output', | |
| help='Output directory (default: output)') | |
| parser.add_argument('--debug', action='store_true', | |
| help='Save debug images showing detection steps') | |
| args = parser.parse_args() | |
| # Setup directories | |
| images_dir = os.path.join(args.output_dir, 'images') | |
| cards_dir = os.path.join(args.output_dir, 'cards') | |
| debug_dir = os.path.join(args.output_dir, 'debug') if args.debug else None | |
| os.makedirs(args.output_dir, exist_ok=True) | |
| print("=" * 60) | |
| print("ID Card Extraction - Morphology-Based Approach") | |
| print("=" * 60) | |
| if not os.path.exists(images_dir): | |
| # Step 1: Convert PDF to images | |
| print("\n๐ STEP 1: Converting PDF to images...") | |
| image_paths = pdf_to_images(args.pdf_path, images_dir, args.dpi) | |
| else: | |
| image_paths = [os.path.join(images_dir, f) for f in os.listdir(images_dir)] | |
| image_paths.sort() | |
| print(f"Found {len(image_paths)} images in {images_dir}") | |
| # Limit pages if specified | |
| if args.pages > 0: | |
| image_paths = image_paths[:args.pages] | |
| print(f"Processing first {args.pages} pages...") | |
| # Step 2: Process each page | |
| print("\n๐ STEP 2: Detecting and extracting ID cards...") | |
| all_results = [] | |
| for i, image_path in enumerate(image_paths, 1): | |
| result = process_page(image_path, cards_dir, debug_dir) | |
| all_results.append(result) | |
| if (i) % 5 == 0: | |
| print(f"\nProgress: {i}/{len(image_paths)} pages processed") | |
| # Step 3: Save results | |
| print("\n๐พ STEP 3: Saving results...") | |
| results_file = os.path.join(args.output_dir, 'bounding_boxes.json') | |
| with open(results_file, 'w') as f: | |
| json.dump({ | |
| 'total_pages': len(all_results), | |
| 'total_cards': sum(r['cards_found'] for r in all_results), | |
| 'pages': all_results | |
| }, f, indent=2) | |
| print(f"โ Saved bounding boxes to {results_file}") | |
| # Summary | |
| print("\n" + "=" * 60) | |
| print("EXTRACTION COMPLETE") | |
| print("=" * 60) | |
| print(f"Pages processed: {len(all_results)}") | |
| print(f"Total cards extracted: {sum(r['cards_found'] for r in all_results)}") | |
| if all_results: | |
| print(f"Average cards per page: {sum(r['cards_found'] for r in all_results) / len(all_results):.1f}") | |
| print(f"\nOutput directories:") | |
| print(f" - Images: {images_dir}") | |
| print(f" - Cards: {cards_dir}") | |
| if debug_dir: | |
| print(f" - Debug: {debug_dir}") | |
| print(f" - Results: {results_file}") | |
| if __name__ == '__main__': | |
| main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment