jalotra · February 18, 2026 16:24
diff --git a/extract_cards.py b/extract_cards.py
 #!/usr/bin/env python3
 """
 Morphology-based ID Card Extraction from PDF
 Extracts ID cards using OpenCV morphology operations to detect grid structure.
 """

 import fitz  # PyMuPDF
 import cv2
 import numpy as np
 import json
 import os
 from pathlib import Path
 from typing import List, Dict, Tuple
 import argparse


 def pdf_to_images(pdf_path: str, output_dir: str, dpi: int = 200) -> List[str]:
    """Convert PDF pages to images using PyMuPDF."""
    print(f"Converting PDF to images at {dpi} DPI...")
    
    doc = fitz.open(pdf_path)
    image_paths = []
    
    os.makedirs(output_dir, exist_ok=True)
    
    for page_num in range(len(doc)):
        page = doc[page_num]
        
        # Calculate matrix for desired DPI
        mat = fitz.Matrix(dpi/72, dpi/72)
        pix = page.get_pixmap(matrix=mat)
        
        image_path = os.path.join(output_dir, f"page_{page_num:04d}.png")
        pix.save(image_path)
        image_paths.append(image_path)
        
        if (page_num + 1) % 10 == 0:
            print(f"  Processed {page_num + 1}/{len(doc)} pages...")
    
    doc.close()
    print(f"✓ Converted {len(image_paths)} pages to images")
    return image_paths


 def preprocess_image(image_path: str) -> Tuple[np.ndarray, np.ndarray]:
    """Load and preprocess image for morphology operations."""
    # Load image
    img = cv2.imread(image_path)
    if img is None:
        raise ValueError(f"Could not load image: {image_path}")
    
    # Convert to grayscale
    if len(img.shape) == 3:
        gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    else:
        gray = img
    
    # Apply adaptive threshold on inverted image
    # The ~ inverts the image (black becomes white, white becomes black)
    binary = cv2.adaptiveThreshold(
        ~gray, 255, 
        cv2.ADAPTIVE_THRESH_MEAN_C, 
        cv2.THRESH_BINARY, 
        15, -2
    )
    
    return img, binary


 def extract_grid_lines(binary: np.ndarray) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
    """Extract horizontal and vertical grid lines using morphology."""
    height, width = binary.shape
    
    # Create copies for horizontal and vertical extraction
    horizontal = binary.copy()
    vertical = binary.copy()
    
    # --- Extract Horizontal Lines ---
    # Define horizontal kernel (long horizontal line)
    horizontal_size = width // 30
    horizontal_structure = cv2.getStructuringElement(
        cv2.MORPH_RECT, 
        (horizontal_size, 1)
    )
    
    # Apply morphology operations
    horizontal = cv2.erode(horizontal, horizontal_structure)
    horizontal = cv2.dilate(horizontal, horizontal_structure)
    
    # --- Extract Vertical Lines ---
    # Define vertical kernel (long vertical line)
    vertical_size = height // 30
    vertical_structure = cv2.getStructuringElement(
        cv2.MORPH_RECT, 
        (1, vertical_size)
    )
    
    # Apply morphology operations
    vertical = cv2.erode(vertical, vertical_structure)
    vertical = cv2.dilate(vertical, vertical_structure)
    
    # Combine horizontal and vertical lines to get grid
    grid = cv2.bitwise_or(horizontal, vertical)
    
    return grid, horizontal, vertical


 def find_cells_from_grid(grid: np.ndarray, img_shape: Tuple[int, int]) -> List[Tuple[int, int, int, int]]:
    """
    Find individual cell bounding boxes from the grid structure.
    Strategy: Find the outer grid boundary from grid lines, then subdivide into 2x8 cells.
    """
    height, width = img_shape[:2]
    page_area = width * height
    
    # Find all white pixels (grid lines) and get their bounding box
    # This gives us the extent of the grid
    white_pixels = np.where(grid > 128)
    if len(white_pixels[0]) == 0:
        return []
    
    y_min, y_max = white_pixels[0].min(), white_pixels[0].max()
    x_min, x_max = white_pixels[1].min(), white_pixels[1].max()
    
    # Add small margins
    margin = 10
    x_min = max(0, x_min - margin)
    y_min = max(0, y_min - margin)
    x_max = min(width, x_max + margin)
    y_max = min(height, y_max + margin)
    
    grid_bbox = (x_min, y_min, x_max - x_min, y_max - y_min)
    print(f"    Found grid boundary from lines: {grid_bbox}")
    
    gx, gy, gw, gh = grid_bbox
    cells = []
    
    # Subdivide into 2 columns × 8 rows
    cell_width = gw // 2
    cell_height = gh // 8
    
    for row in range(8):
        for col in range(2):
            x = gx + col * cell_width
            y = gy + row * cell_height
            w = cell_width
            h = cell_height
            
            cells.append((x, y, w, h))
    
    print(f"    Subdivided grid into {len(cells)} cells (2×8)")
    
    return cells


 def sort_contours_grid(cells: List[Tuple[int, int, int, int]]) -> List[Tuple[int, int, int, int]]:
    """Sort cells in reading order (top-to-bottom, left-to-right)."""
    if not cells:
        return []
    
    # Calculate average height for row grouping
    avg_height = sum(h for _, _, _, h in cells) / len(cells)
    y_tolerance = avg_height * 0.6  # Allow some vertical variation
    
    def sort_key(item):
        x, y, w, h = item
        # Group by rows using y coordinate
        row = int(round(y / y_tolerance))
        return (row, x)
    
    sorted_cells = sorted(cells, key=sort_key)
    return sorted_cells


 def extract_cards_from_boxes(image_path: str, output_dir: str, boxes: List[Tuple], 
                            padding: int = 5) -> List[Dict]:
    """Extract card images from bounding boxes."""
    img = cv2.imread(image_path)
    if img is None:
        return []
    
    os.makedirs(output_dir, exist_ok=True)
    base_name = Path(image_path).stem
    
    card_info = []
    
    for i, (x, y, w, h) in enumerate(boxes):
        # Add small padding to remove borders
        x1 = max(0, x + padding)
        y1 = max(0, y + padding)
        x2 = min(img.shape[1], x + w - padding)
        y2 = min(img.shape[0], y + h - padding)
        
        if x2 <= x1 or y2 <= y1:
            continue
        
        # Extract card
        card = img[y1:y2, x1:x2]
        
        # Skip if too small
        if card.shape[0] < 50 or card.shape[1] < 50:
            continue
        
        # Save card
        card_filename = f"{base_name}_card_{i:03d}.png"
        card_path = os.path.join(output_dir, card_filename)
        cv2.imwrite(card_path, card)
        
        card_info.append({
            'card_number': i + 1,
            'bbox': [int(x1), int(y1), int(x2), int(y2)],
            'filename': card_filename,
            'width': int(x2 - x1),
            'height': int(y2 - y1)
        })
    
    return card_info


 def visualize_detection(image_path: str, boxes: List[Tuple], 
                       output_path: str) -> None:
    """Create visualization of detected cards."""
    img = cv2.imread(image_path)
    if img is None:
        return
    
    # Draw bounding boxes
    for i, (x, y, w, h) in enumerate(boxes):
        cv2.rectangle(img, (x, y), (x + w, y + h), (0, 255, 0), 3)
        cv2.putText(img, str(i + 1), (x + 10, y + 40),
                   cv2.FONT_HERSHEY_SIMPLEX, 1.2, (0, 255, 0), 3)
    
    cv2.imwrite(output_path, img)
    print(f"  ✓ Saved visualization to {output_path}")


 def process_page(image_path: str, cards_output_dir: str, 
                debug_dir: str = None) -> Dict:
    """Process a single page and extract ID cards."""
    print(f"\nProcessing {Path(image_path).name}...")
    
    # Preprocess
    img, binary = preprocess_image(image_path)
    height, width = img.shape[:2]
    print(f"  Image size: {width}x{height}")
    
    # Extract grid lines
    grid, horizontal, vertical = extract_grid_lines(binary)
    print(f"  Grid extracted")
    
    # Find cells from grid
    cells = find_cells_from_grid(grid, img.shape)
    print(f"  Found {len(cells)} valid cells")
    
    # Sort cells in grid order
    boxes = sort_contours_grid(cells)
    
    # Save debug images if requested
    if debug_dir:
        os.makedirs(debug_dir, exist_ok=True)
        base_name = Path(image_path).stem
        
        cv2.imwrite(os.path.join(debug_dir, f"{base_name}_binary.png"), binary)
        cv2.imwrite(os.path.join(debug_dir, f"{base_name}_grid.png"), grid)
        cv2.imwrite(os.path.join(debug_dir, f"{base_name}_horizontal.png"), horizontal)
        cv2.imwrite(os.path.join(debug_dir, f"{base_name}_vertical.png"), vertical)
        
        # Visualize detection
        viz_path = os.path.join(debug_dir, f"{base_name}_detected.png")
        visualize_detection(image_path, boxes, viz_path)
    
    # Extract cards
    card_info = extract_cards_from_boxes(image_path, cards_output_dir, boxes)
    print(f"  ✓ Extracted {len(card_info)} cards")
    
    return {
        'page': Path(image_path).name,
        'image_size': [width, height],
        'cards_found': len(card_info),
        'cards': card_info
    }


 def main():
    parser = argparse.ArgumentParser(
        description='Extract ID cards from PDF using morphology operations'
    )
    parser.add_argument('pdf_path', help='Path to input PDF file')
    parser.add_argument('--pages', type=int, default=625,
                       help='Number of pages to process (default: 5, use -1 for all)')
    parser.add_argument('--dpi', type=int, default=200,
                       help='DPI for PDF rendering (default: 200)')
    parser.add_argument('--output-dir', default='output',
                       help='Output directory (default: output)')
    parser.add_argument('--debug', action='store_true',
                       help='Save debug images showing detection steps')
    
    args = parser.parse_args()
    
    # Setup directories
    images_dir = os.path.join(args.output_dir, 'images')
    cards_dir = os.path.join(args.output_dir, 'cards')
    debug_dir = os.path.join(args.output_dir, 'debug') if args.debug else None
    
    os.makedirs(args.output_dir, exist_ok=True)
    
    print("=" * 60)
    print("ID Card Extraction - Morphology-Based Approach")
    print("=" * 60)
    
    if not os.path.exists(images_dir):
        # Step 1: Convert PDF to images
        print("\n📄 STEP 1: Converting PDF to images...")
        image_paths = pdf_to_images(args.pdf_path, images_dir, args.dpi)
    else:
        image_paths = [os.path.join(images_dir, f) for f in os.listdir(images_dir)]
        image_paths.sort()
        print(f"Found {len(image_paths)} images in {images_dir}")

    # Limit pages if specified
    if args.pages > 0:
        image_paths = image_paths[:args.pages]
        print(f"Processing first {args.pages} pages...")
    
    # Step 2: Process each page
    print("\n🔍 STEP 2: Detecting and extracting ID cards...")
    all_results = []
    
    for i, image_path in enumerate(image_paths, 1):
        result = process_page(image_path, cards_dir, debug_dir)
        all_results.append(result)
        
        if (i) % 5 == 0:
            print(f"\nProgress: {i}/{len(image_paths)} pages processed")
    
    # Step 3: Save results
    print("\n💾 STEP 3: Saving results...")
    
    results_file = os.path.join(args.output_dir, 'bounding_boxes.json')
    with open(results_file, 'w') as f:
        json.dump({
            'total_pages': len(all_results),
            'total_cards': sum(r['cards_found'] for r in all_results),
            'pages': all_results
        }, f, indent=2)
    
    print(f"✓ Saved bounding boxes to {results_file}")
    
    # Summary
    print("\n" + "=" * 60)
    print("EXTRACTION COMPLETE")
    print("=" * 60)
    print(f"Pages processed: {len(all_results)}")
    print(f"Total cards extracted: {sum(r['cards_found'] for r in all_results)}")
    if all_results:
        print(f"Average cards per page: {sum(r['cards_found'] for r in all_results) / len(all_results):.1f}")
    print(f"\nOutput directories:")
    print(f"  - Images: {images_dir}")
    print(f"  - Cards: {cards_dir}")
    if debug_dir:
        print(f"  - Debug: {debug_dir}")
    print(f"  - Results: {results_file}")


 if __name__ == '__main__':
    main()
	#!/usr/bin/env python3
	"""
	Morphology-based ID Card Extraction from PDF
	Extracts ID cards using OpenCV morphology operations to detect grid structure.
	"""

	import fitz # PyMuPDF
	import cv2
	import numpy as np
	import json
	import os
	from pathlib import Path
	from typing import List, Dict, Tuple
	import argparse


	def pdf_to_images(pdf_path: str, output_dir: str, dpi: int = 200) -> List[str]:
	"""Convert PDF pages to images using PyMuPDF."""
	print(f"Converting PDF to images at {dpi} DPI...")

	doc = fitz.open(pdf_path)
	image_paths = []

	os.makedirs(output_dir, exist_ok=True)

	for page_num in range(len(doc)):
	page = doc[page_num]

	# Calculate matrix for desired DPI
	mat = fitz.Matrix(dpi/72, dpi/72)
	pix = page.get_pixmap(matrix=mat)

	image_path = os.path.join(output_dir, f"page_{page_num:04d}.png")
	pix.save(image_path)
	image_paths.append(image_path)

	if (page_num + 1) % 10 == 0:
	print(f" Processed {page_num + 1}/{len(doc)} pages...")

	doc.close()
	print(f"✓ Converted {len(image_paths)} pages to images")
	return image_paths


	def preprocess_image(image_path: str) -> Tuple[np.ndarray, np.ndarray]:
	"""Load and preprocess image for morphology operations."""
	# Load image
	img = cv2.imread(image_path)
	if img is None:
	raise ValueError(f"Could not load image: {image_path}")

	# Convert to grayscale
	if len(img.shape) == 3:
	gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
	else:
	gray = img

	# Apply adaptive threshold on inverted image
	# The ~ inverts the image (black becomes white, white becomes black)
	binary = cv2.adaptiveThreshold(
	~gray, 255,
	cv2.ADAPTIVE_THRESH_MEAN_C,
	cv2.THRESH_BINARY,
	15, -2
	)

	return img, binary


	def extract_grid_lines(binary: np.ndarray) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
	"""Extract horizontal and vertical grid lines using morphology."""
	height, width = binary.shape

	# Create copies for horizontal and vertical extraction
	horizontal = binary.copy()
	vertical = binary.copy()

	# --- Extract Horizontal Lines ---
	# Define horizontal kernel (long horizontal line)
	horizontal_size = width // 30
	horizontal_structure = cv2.getStructuringElement(
	cv2.MORPH_RECT,
	(horizontal_size, 1)
	)

	# Apply morphology operations
	horizontal = cv2.erode(horizontal, horizontal_structure)
	horizontal = cv2.dilate(horizontal, horizontal_structure)

	# --- Extract Vertical Lines ---
	# Define vertical kernel (long vertical line)
	vertical_size = height // 30
	vertical_structure = cv2.getStructuringElement(
	cv2.MORPH_RECT,
	(1, vertical_size)
	)

	# Apply morphology operations
	vertical = cv2.erode(vertical, vertical_structure)
	vertical = cv2.dilate(vertical, vertical_structure)

	# Combine horizontal and vertical lines to get grid
	grid = cv2.bitwise_or(horizontal, vertical)

	return grid, horizontal, vertical


	def find_cells_from_grid(grid: np.ndarray, img_shape: Tuple[int, int]) -> List[Tuple[int, int, int, int]]:
	"""
	Find individual cell bounding boxes from the grid structure.
	Strategy: Find the outer grid boundary from grid lines, then subdivide into 2x8 cells.
	"""
	height, width = img_shape[:2]
	page_area = width * height

	# Find all white pixels (grid lines) and get their bounding box
	# This gives us the extent of the grid
	white_pixels = np.where(grid > 128)
	if len(white_pixels[0]) == 0:
	return []

	y_min, y_max = white_pixels[0].min(), white_pixels[0].max()
	x_min, x_max = white_pixels[1].min(), white_pixels[1].max()

	# Add small margins
	margin = 10
	x_min = max(0, x_min - margin)
	y_min = max(0, y_min - margin)
	x_max = min(width, x_max + margin)
	y_max = min(height, y_max + margin)

	grid_bbox = (x_min, y_min, x_max - x_min, y_max - y_min)
	print(f" Found grid boundary from lines: {grid_bbox}")

	gx, gy, gw, gh = grid_bbox
	cells = []

	# Subdivide into 2 columns × 8 rows
	cell_width = gw // 2
	cell_height = gh // 8

	for row in range(8):
	for col in range(2):
	x = gx + col * cell_width
	y = gy + row * cell_height
	w = cell_width
	h = cell_height

	cells.append((x, y, w, h))

	print(f" Subdivided grid into {len(cells)} cells (2×8)")

	return cells


	def sort_contours_grid(cells: List[Tuple[int, int, int, int]]) -> List[Tuple[int, int, int, int]]:
	"""Sort cells in reading order (top-to-bottom, left-to-right)."""
	if not cells:
	return []

	# Calculate average height for row grouping
	avg_height = sum(h for _, _, _, h in cells) / len(cells)
	y_tolerance = avg_height * 0.6 # Allow some vertical variation

	def sort_key(item):
	x, y, w, h = item
	# Group by rows using y coordinate
	row = int(round(y / y_tolerance))
	return (row, x)

	sorted_cells = sorted(cells, key=sort_key)
	return sorted_cells


	def extract_cards_from_boxes(image_path: str, output_dir: str, boxes: List[Tuple],
	padding: int = 5) -> List[Dict]:
	"""Extract card images from bounding boxes."""
	img = cv2.imread(image_path)
	if img is None:
	return []

	os.makedirs(output_dir, exist_ok=True)
	base_name = Path(image_path).stem

	card_info = []

	for i, (x, y, w, h) in enumerate(boxes):
	# Add small padding to remove borders
	x1 = max(0, x + padding)
	y1 = max(0, y + padding)
	x2 = min(img.shape[1], x + w - padding)
	y2 = min(img.shape[0], y + h - padding)

	if x2 <= x1 or y2 <= y1:
	continue

	# Extract card
	card = img[y1:y2, x1:x2]

	# Skip if too small
	if card.shape[0] < 50 or card.shape[1] < 50:
	continue

	# Save card
	card_filename = f"{base_name}_card_{i:03d}.png"
	card_path = os.path.join(output_dir, card_filename)
	cv2.imwrite(card_path, card)

	card_info.append({
	'card_number': i + 1,
	'bbox': [int(x1), int(y1), int(x2), int(y2)],
	'filename': card_filename,
	'width': int(x2 - x1),
	'height': int(y2 - y1)
	})

	return card_info


	def visualize_detection(image_path: str, boxes: List[Tuple],
	output_path: str) -> None:
	"""Create visualization of detected cards."""
	img = cv2.imread(image_path)
	if img is None:
	return

	# Draw bounding boxes
	for i, (x, y, w, h) in enumerate(boxes):
	cv2.rectangle(img, (x, y), (x + w, y + h), (0, 255, 0), 3)
	cv2.putText(img, str(i + 1), (x + 10, y + 40),
	cv2.FONT_HERSHEY_SIMPLEX, 1.2, (0, 255, 0), 3)

	cv2.imwrite(output_path, img)
	print(f" ✓ Saved visualization to {output_path}")


	def process_page(image_path: str, cards_output_dir: str,
	debug_dir: str = None) -> Dict:
	"""Process a single page and extract ID cards."""
	print(f"\nProcessing {Path(image_path).name}...")

	# Preprocess
	img, binary = preprocess_image(image_path)
	height, width = img.shape[:2]
	print(f" Image size: {width}x{height}")

	# Extract grid lines
	grid, horizontal, vertical = extract_grid_lines(binary)
	print(f" Grid extracted")

	# Find cells from grid
	cells = find_cells_from_grid(grid, img.shape)
	print(f" Found {len(cells)} valid cells")

	# Sort cells in grid order
	boxes = sort_contours_grid(cells)

	# Save debug images if requested
	if debug_dir:
	os.makedirs(debug_dir, exist_ok=True)
	base_name = Path(image_path).stem

	cv2.imwrite(os.path.join(debug_dir, f"{base_name}_binary.png"), binary)
	cv2.imwrite(os.path.join(debug_dir, f"{base_name}_grid.png"), grid)
	cv2.imwrite(os.path.join(debug_dir, f"{base_name}_horizontal.png"), horizontal)
	cv2.imwrite(os.path.join(debug_dir, f"{base_name}_vertical.png"), vertical)

	# Visualize detection
	viz_path = os.path.join(debug_dir, f"{base_name}_detected.png")
	visualize_detection(image_path, boxes, viz_path)

	# Extract cards
	card_info = extract_cards_from_boxes(image_path, cards_output_dir, boxes)
	print(f" ✓ Extracted {len(card_info)} cards")

	return {
	'page': Path(image_path).name,
	'image_size': [width, height],
	'cards_found': len(card_info),
	'cards': card_info
	}


	def main():
	parser = argparse.ArgumentParser(
	description='Extract ID cards from PDF using morphology operations'
	)
	parser.add_argument('pdf_path', help='Path to input PDF file')
	parser.add_argument('--pages', type=int, default=625,
	help='Number of pages to process (default: 5, use -1 for all)')
	parser.add_argument('--dpi', type=int, default=200,
	help='DPI for PDF rendering (default: 200)')
	parser.add_argument('--output-dir', default='output',
	help='Output directory (default: output)')
	parser.add_argument('--debug', action='store_true',
	help='Save debug images showing detection steps')

	args = parser.parse_args()

	# Setup directories
	images_dir = os.path.join(args.output_dir, 'images')
	cards_dir = os.path.join(args.output_dir, 'cards')
	debug_dir = os.path.join(args.output_dir, 'debug') if args.debug else None

	os.makedirs(args.output_dir, exist_ok=True)

	print("=" * 60)
	print("ID Card Extraction - Morphology-Based Approach")
	print("=" * 60)

	if not os.path.exists(images_dir):
	# Step 1: Convert PDF to images
	print("\n📄 STEP 1: Converting PDF to images...")
	image_paths = pdf_to_images(args.pdf_path, images_dir, args.dpi)
	else:
	image_paths = [os.path.join(images_dir, f) for f in os.listdir(images_dir)]
	image_paths.sort()
	print(f"Found {len(image_paths)} images in {images_dir}")

	# Limit pages if specified
	if args.pages > 0:
	image_paths = image_paths[:args.pages]
	print(f"Processing first {args.pages} pages...")

	# Step 2: Process each page
	print("\n🔍 STEP 2: Detecting and extracting ID cards...")
	all_results = []

	for i, image_path in enumerate(image_paths, 1):
	result = process_page(image_path, cards_dir, debug_dir)
	all_results.append(result)

	if (i) % 5 == 0:
	print(f"\nProgress: {i}/{len(image_paths)} pages processed")

	# Step 3: Save results
	print("\n💾 STEP 3: Saving results...")

	results_file = os.path.join(args.output_dir, 'bounding_boxes.json')
	with open(results_file, 'w') as f:
	json.dump({
	'total_pages': len(all_results),
	'total_cards': sum(r['cards_found'] for r in all_results),
	'pages': all_results
	}, f, indent=2)

	print(f"✓ Saved bounding boxes to {results_file}")

	# Summary
	print("\n" + "=" * 60)
	print("EXTRACTION COMPLETE")
	print("=" * 60)
	print(f"Pages processed: {len(all_results)}")
	print(f"Total cards extracted: {sum(r['cards_found'] for r in all_results)}")
	if all_results:
	print(f"Average cards per page: {sum(r['cards_found'] for r in all_results) / len(all_results):.1f}")
	print(f"\nOutput directories:")
	print(f" - Images: {images_dir}")
	print(f" - Cards: {cards_dir}")
	if debug_dir:
	print(f" - Debug: {debug_dir}")
	print(f" - Results: {results_file}")


	if __name__ == '__main__':
	main()
No results found