mfaani · March 27, 2026 17:32
diff --git a/Dowload.sh b/Dowload.sh
 #!/usr/bin/env zsh
 set -euo pipefail

 # Extract high-resolution screenshots from WWDC 2021 session 10126
 # Requires: ffmpeg, yt-dlp

 VIDEO_URL="https://developer.apple.com/videos/play/wwdc2021/10126/"
 # Store outputs under the discoverable-design folder
 POST_DIR="$PWD/content/posts/design/discoverable-design"
 OUT_DIR="$POST_DIR/images"
 VID_DIR="$POST_DIR/videos"

 mkdir -p "$OUT_DIR" "$VID_DIR"

 # Download the best-quality HLS stream locally (if not already downloaded) under the post folder
 VIDEO_MP4="$VID_DIR/wwdc2021_10126.mp4"
 if [[ ! -f "$VIDEO_MP4" ]]; then
  command -v yt-dlp >/dev/null 2>&1 || { echo "yt-dlp not found. Install via: brew install yt-dlp"; exit 1 }
  command -v ffmpeg >/dev/null 2>&1 || { echo "ffmpeg not found. Install via: brew install ffmpeg"; exit 1 }
  echo "Downloading video..."
  # Try best muxed, else best video+audio combination
  yt-dlp -f "bv*+ba/best" -o "$VIDEO_MP4" "$VIDEO_URL" || {
    echo "Format selection failed. Listing available formats..."
    yt-dlp -F "$VIDEO_URL"
    echo "You can override the format by setting YTDLP_FORMAT. Example: YTDLP_FORMAT=137+140"
    fmt=${YTDLP_FORMAT:-}
    if [[ -n "$fmt" ]]; then
      echo "Attempting with format: $fmt"
      yt-dlp -f "$fmt" -o "$VIDEO_MP4" "$VIDEO_URL"
    else
      echo "No YTDLP_FORMAT provided; aborting download so you can choose a format above."
      exit 1
    fi
  }
 fi

 # Get video duration
 DURATION=$(ffprobe -v error -show_entries format=duration -of default=noprint_wrappers=1:nokey=1 "$VIDEO_MP4" | cut -d. -f1)
 echo "Video duration: ${DURATION}s"

 # Capture every 3 seconds throughout the entire video
 echo "Capturing frames every 3 seconds..."
 for (( t=0; t<=DURATION; t+=3 )); do
  fname=$(printf "%s/wwdc10126_%04d.jpg" "$OUT_DIR" "$t")
  if [[ -f "$fname" ]]; then
    echo "Exists: $fname"
    continue
  fi
  echo "Capturing t=$t -> $fname"
  ffmpeg -hide_banner -loglevel error -ss "$t" -i "$VIDEO_MP4" -frames:v 1 -q:v 2 "$fname"
 done

 echo "Done. Images in: $OUT_DIR"
 echo "Tip: Delete any frames with speakers; keep slides, icons, or text-only frames."
diff --git a/Filter.sh b/Filter.sh
 #!/usr/bin/env python3
 import os
 import sys
 import cv2
 import numpy as np
 from collections import defaultdict

 def compute_image_hash(img_path, hash_size=8):
    """Compute perceptual hash for duplicate detection."""
    img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
    if img is None:
        return None
    # Resize to hash_size x hash_size
    resized = cv2.resize(img, (hash_size, hash_size), interpolation=cv2.INTER_AREA)
    # Compute mean
    avg = resized.mean()
    # Create binary hash
    hash_bits = (resized > avg).flatten()
    return tuple(hash_bits)

 def remove_duplicates(images_dir):
    """Remove duplicate/near-duplicate images using perceptual hashing."""
    print("\n=== Deduplicating images ===")
    hash_to_files = defaultdict(list)
    
    # Compute hashes for all images
    for name in sorted(os.listdir(images_dir)):
        if not (name.lower().endswith('.jpg') or name.lower().endswith('.png')):
            continue
        path = os.path.join(images_dir, name)
        img_hash = compute_image_hash(path)
        if img_hash:
            hash_to_files[img_hash].append((name, path))
    
    # Keep first image of each hash group, delete rest
    kept, deleted = 0, 0
    for img_hash, files in hash_to_files.items():
        if len(files) > 1:
            # Keep the first file (earliest timestamp)
            kept += 1
            print(f"✓ Kept: {files[0][0]} (representative)")
            # Delete duplicates
            for name, path in files[1:]:
                try:
                    os.remove(path)
                    deleted += 1
                    print(f"❌ Deleted (duplicate): {name}")
                except Exception as e:
                    print(f"Error deleting {name}: {e}")
        else:
            kept += 1
    
    print(f"Deduplication: Kept {kept} unique, Deleted {deleted} duplicates\n")
    return kept, deleted

 def has_face(img_path, face_cascade):
    """Check if image contains a face using Haar cascade detection."""
    img = cv2.imread(img_path)
    if img is None:
        return False
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    faces = face_cascade.detectMultiScale(gray, scaleFactor=1.1, minNeighbors=5, minSize=(60, 60))
    return len(faces) > 0

 def has_text_or_ui(img_path):
    """Check if image contains text or UI elements (high contrast edges)."""
    img = cv2.imread(img_path)
    if img is None:
        return False, 0.0
    
    # Convert to grayscale
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    
    # Detect edges (text/UI has strong edges)
    edges = cv2.Canny(gray, 50, 150)
    
    # Calculate percentage of edge pixels
    edge_percentage = (np.count_nonzero(edges) / edges.size) * 100
    
    # If >2% of pixels are edges, likely contains text/UI
    return edge_percentage > 2.0, edge_percentage

 # Main processing function
 def main():
    # Validate command line arguments   
    if len(sys.argv) < 2:
        print("Usage: filter_non_person_frames.py <images_dir>")
        sys.exit(1)

    images_dir = sys.argv[1]
    if not os.path.isdir(images_dir):
        print(f"Not a directory: {images_dir}")
        sys.exit(1)

    # Step 1: Remove duplicates first
    remove_duplicates(images_dir)
    
    # Load Haar cascade classifier for face detection
    cascade_path = cv2.data.haarcascades + 'haarcascade_frontalface_default.xml'
    face_cascade = cv2.CascadeClassifier(cascade_path)
    if face_cascade.empty():
        print("Failed to load Haar cascade for face detection.")
        sys.exit(1)

    # Step 2: Filter person-only images
    print("=== Filtering person-only frames ===")
    # Process all images in directory
    kept, deleted = 0, 0
    for name in sorted(os.listdir(images_dir)):
        # Skip non-image files
        if not (name.lower().endswith('.jpg') or name.lower().endswith('.png')):
            continue
        path = os.path.join(images_dir, name)
        try:
            has_person = has_face(path, face_cascade)
            has_content, edge_pct = has_text_or_ui(path)
            
            # Keep: app screenshots, text, or text+person
            # Delete: person-only (face detected but no text/UI)
            if has_person and not has_content:
                os.remove(path)
                deleted += 1
                print(f"❌ Deleted (person only): {name} [face=yes, edges={edge_pct:.2f}%]")
            else:
                kept += 1
                reason = []
                if has_person:
                    reason.append("face=yes")
                else:
                    reason.append("face=no")
                reason.append(f"edges={edge_pct:.2f}%")
                print(f"✓ Kept: {name} [{', '.join(reason)}]")
        except Exception as e:
            print(f"Error processing {name}: {e}")

    print(f"\n=== Summary ===")
    print(f"Person-only filter: Kept {kept}, Deleted {deleted}")

 # execute main function
 if __name__ == '__main__':
    main()
	#!/usr/bin/env zsh
	set -euo pipefail

	# Extract high-resolution screenshots from WWDC 2021 session 10126
	# Requires: ffmpeg, yt-dlp

	VIDEO_URL="https://developer.apple.com/videos/play/wwdc2021/10126/"
	# Store outputs under the discoverable-design folder
	POST_DIR="$PWD/content/posts/design/discoverable-design"
	OUT_DIR="$POST_DIR/images"
	VID_DIR="$POST_DIR/videos"

	mkdir -p "$OUT_DIR" "$VID_DIR"

	# Download the best-quality HLS stream locally (if not already downloaded) under the post folder
	VIDEO_MP4="$VID_DIR/wwdc2021_10126.mp4"
	if [[ ! -f "$VIDEO_MP4" ]]; then
	command -v yt-dlp >/dev/null 2>&1 \|\| { echo "yt-dlp not found. Install via: brew install yt-dlp"; exit 1 }
	command -v ffmpeg >/dev/null 2>&1 \|\| { echo "ffmpeg not found. Install via: brew install ffmpeg"; exit 1 }
	echo "Downloading video..."
	# Try best muxed, else best video+audio combination
	yt-dlp -f "bv*+ba/best" -o "$VIDEO_MP4" "$VIDEO_URL" \|\| {
	echo "Format selection failed. Listing available formats..."
	yt-dlp -F "$VIDEO_URL"
	echo "You can override the format by setting YTDLP_FORMAT. Example: YTDLP_FORMAT=137+140"
	fmt=${YTDLP_FORMAT:-}
	if [[ -n "$fmt" ]]; then
	echo "Attempting with format: $fmt"
	yt-dlp -f "$fmt" -o "$VIDEO_MP4" "$VIDEO_URL"
	else
	echo "No YTDLP_FORMAT provided; aborting download so you can choose a format above."
	exit 1
	fi
	}
	fi

	# Get video duration
	DURATION=$(ffprobe -v error -show_entries format=duration -of default=noprint_wrappers=1:nokey=1 "$VIDEO_MP4" \| cut -d. -f1)
	echo "Video duration: ${DURATION}s"

	# Capture every 3 seconds throughout the entire video
	echo "Capturing frames every 3 seconds..."
	for (( t=0; t<=DURATION; t+=3 )); do
	fname=$(printf "%s/wwdc10126_%04d.jpg" "$OUT_DIR" "$t")
	if [[ -f "$fname" ]]; then
	echo "Exists: $fname"
	continue
	fi
	echo "Capturing t=$t -> $fname"
	ffmpeg -hide_banner -loglevel error -ss "$t" -i "$VIDEO_MP4" -frames:v 1 -q:v 2 "$fname"
	done

	echo "Done. Images in: $OUT_DIR"
	echo "Tip: Delete any frames with speakers; keep slides, icons, or text-only frames."
	#!/usr/bin/env python3
	import os
	import sys
	import cv2
	import numpy as np
	from collections import defaultdict

	def compute_image_hash(img_path, hash_size=8):
	"""Compute perceptual hash for duplicate detection."""
	img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
	if img is None:
	return None
	# Resize to hash_size x hash_size
	resized = cv2.resize(img, (hash_size, hash_size), interpolation=cv2.INTER_AREA)
	# Compute mean
	avg = resized.mean()
	# Create binary hash
	hash_bits = (resized > avg).flatten()
	return tuple(hash_bits)

	def remove_duplicates(images_dir):
	"""Remove duplicate/near-duplicate images using perceptual hashing."""
	print("\n=== Deduplicating images ===")
	hash_to_files = defaultdict(list)

	# Compute hashes for all images
	for name in sorted(os.listdir(images_dir)):
	if not (name.lower().endswith('.jpg') or name.lower().endswith('.png')):
	continue
	path = os.path.join(images_dir, name)
	img_hash = compute_image_hash(path)
	if img_hash:
	hash_to_files[img_hash].append((name, path))

	# Keep first image of each hash group, delete rest
	kept, deleted = 0, 0
	for img_hash, files in hash_to_files.items():
	if len(files) > 1:
	# Keep the first file (earliest timestamp)
	kept += 1
	print(f"✓ Kept: {files[0][0]} (representative)")
	# Delete duplicates
	for name, path in files[1:]:
	try:
	os.remove(path)
	deleted += 1
	print(f"❌ Deleted (duplicate): {name}")
	except Exception as e:
	print(f"Error deleting {name}: {e}")
	else:
	kept += 1

	print(f"Deduplication: Kept {kept} unique, Deleted {deleted} duplicates\n")
	return kept, deleted

	def has_face(img_path, face_cascade):
	"""Check if image contains a face using Haar cascade detection."""
	img = cv2.imread(img_path)
	if img is None:
	return False
	gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
	faces = face_cascade.detectMultiScale(gray, scaleFactor=1.1, minNeighbors=5, minSize=(60, 60))
	return len(faces) > 0

	def has_text_or_ui(img_path):
	"""Check if image contains text or UI elements (high contrast edges)."""
	img = cv2.imread(img_path)
	if img is None:
	return False, 0.0

	# Convert to grayscale
	gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

	# Detect edges (text/UI has strong edges)
	edges = cv2.Canny(gray, 50, 150)

	# Calculate percentage of edge pixels
	edge_percentage = (np.count_nonzero(edges) / edges.size) * 100

	# If >2% of pixels are edges, likely contains text/UI
	return edge_percentage > 2.0, edge_percentage

	# Main processing function
	def main():
	# Validate command line arguments
	if len(sys.argv) < 2:
	print("Usage: filter_non_person_frames.py <images_dir>")
	sys.exit(1)

	images_dir = sys.argv[1]
	if not os.path.isdir(images_dir):
	print(f"Not a directory: {images_dir}")
	sys.exit(1)

	# Step 1: Remove duplicates first
	remove_duplicates(images_dir)

	# Load Haar cascade classifier for face detection
	cascade_path = cv2.data.haarcascades + 'haarcascade_frontalface_default.xml'
	face_cascade = cv2.CascadeClassifier(cascade_path)
	if face_cascade.empty():
	print("Failed to load Haar cascade for face detection.")
	sys.exit(1)

	# Step 2: Filter person-only images
	print("=== Filtering person-only frames ===")
	# Process all images in directory
	kept, deleted = 0, 0
	for name in sorted(os.listdir(images_dir)):
	# Skip non-image files
	if not (name.lower().endswith('.jpg') or name.lower().endswith('.png')):
	continue
	path = os.path.join(images_dir, name)
	try:
	has_person = has_face(path, face_cascade)
	has_content, edge_pct = has_text_or_ui(path)

	# Keep: app screenshots, text, or text+person
	# Delete: person-only (face detected but no text/UI)
	if has_person and not has_content:
	os.remove(path)
	deleted += 1
	print(f"❌ Deleted (person only): {name} [face=yes, edges={edge_pct:.2f}%]")
	else:
	kept += 1
	reason = []
	if has_person:
	reason.append("face=yes")
	else:
	reason.append("face=no")
	reason.append(f"edges={edge_pct:.2f}%")
	print(f"✓ Kept: {name} [{', '.join(reason)}]")
	except Exception as e:
	print(f"Error processing {name}: {e}")

	print(f"\n=== Summary ===")
	print(f"Person-only filter: Kept {kept}, Deleted {deleted}")

	# execute main function
	if __name__ == '__main__':
	main()