Skip to content

Instantly share code, notes, and snippets.

@mfaani
Created March 27, 2026 17:32
Show Gist options
  • Select an option

  • Save mfaani/80ecd310dbcba145698e37562925db47 to your computer and use it in GitHub Desktop.

Select an option

Save mfaani/80ecd310dbcba145698e37562925db47 to your computer and use it in GitHub Desktop.
Extract wwdc
#!/usr/bin/env zsh
set -euo pipefail
# Extract high-resolution screenshots from WWDC 2021 session 10126
# Requires: ffmpeg, yt-dlp
VIDEO_URL="https://developer.apple.com/videos/play/wwdc2021/10126/"
# Store outputs under the discoverable-design folder
POST_DIR="$PWD/content/posts/design/discoverable-design"
OUT_DIR="$POST_DIR/images"
VID_DIR="$POST_DIR/videos"
mkdir -p "$OUT_DIR" "$VID_DIR"
# Download the best-quality HLS stream locally (if not already downloaded) under the post folder
VIDEO_MP4="$VID_DIR/wwdc2021_10126.mp4"
if [[ ! -f "$VIDEO_MP4" ]]; then
command -v yt-dlp >/dev/null 2>&1 || { echo "yt-dlp not found. Install via: brew install yt-dlp"; exit 1 }
command -v ffmpeg >/dev/null 2>&1 || { echo "ffmpeg not found. Install via: brew install ffmpeg"; exit 1 }
echo "Downloading video..."
# Try best muxed, else best video+audio combination
yt-dlp -f "bv*+ba/best" -o "$VIDEO_MP4" "$VIDEO_URL" || {
echo "Format selection failed. Listing available formats..."
yt-dlp -F "$VIDEO_URL"
echo "You can override the format by setting YTDLP_FORMAT. Example: YTDLP_FORMAT=137+140"
fmt=${YTDLP_FORMAT:-}
if [[ -n "$fmt" ]]; then
echo "Attempting with format: $fmt"
yt-dlp -f "$fmt" -o "$VIDEO_MP4" "$VIDEO_URL"
else
echo "No YTDLP_FORMAT provided; aborting download so you can choose a format above."
exit 1
fi
}
fi
# Get video duration
DURATION=$(ffprobe -v error -show_entries format=duration -of default=noprint_wrappers=1:nokey=1 "$VIDEO_MP4" | cut -d. -f1)
echo "Video duration: ${DURATION}s"
# Capture every 3 seconds throughout the entire video
echo "Capturing frames every 3 seconds..."
for (( t=0; t<=DURATION; t+=3 )); do
fname=$(printf "%s/wwdc10126_%04d.jpg" "$OUT_DIR" "$t")
if [[ -f "$fname" ]]; then
echo "Exists: $fname"
continue
fi
echo "Capturing t=$t -> $fname"
ffmpeg -hide_banner -loglevel error -ss "$t" -i "$VIDEO_MP4" -frames:v 1 -q:v 2 "$fname"
done
echo "Done. Images in: $OUT_DIR"
echo "Tip: Delete any frames with speakers; keep slides, icons, or text-only frames."
#!/usr/bin/env python3
import os
import sys
import cv2
import numpy as np
from collections import defaultdict
def compute_image_hash(img_path, hash_size=8):
"""Compute perceptual hash for duplicate detection."""
img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
if img is None:
return None
# Resize to hash_size x hash_size
resized = cv2.resize(img, (hash_size, hash_size), interpolation=cv2.INTER_AREA)
# Compute mean
avg = resized.mean()
# Create binary hash
hash_bits = (resized > avg).flatten()
return tuple(hash_bits)
def remove_duplicates(images_dir):
"""Remove duplicate/near-duplicate images using perceptual hashing."""
print("\n=== Deduplicating images ===")
hash_to_files = defaultdict(list)
# Compute hashes for all images
for name in sorted(os.listdir(images_dir)):
if not (name.lower().endswith('.jpg') or name.lower().endswith('.png')):
continue
path = os.path.join(images_dir, name)
img_hash = compute_image_hash(path)
if img_hash:
hash_to_files[img_hash].append((name, path))
# Keep first image of each hash group, delete rest
kept, deleted = 0, 0
for img_hash, files in hash_to_files.items():
if len(files) > 1:
# Keep the first file (earliest timestamp)
kept += 1
print(f"✓ Kept: {files[0][0]} (representative)")
# Delete duplicates
for name, path in files[1:]:
try:
os.remove(path)
deleted += 1
print(f"❌ Deleted (duplicate): {name}")
except Exception as e:
print(f"Error deleting {name}: {e}")
else:
kept += 1
print(f"Deduplication: Kept {kept} unique, Deleted {deleted} duplicates\n")
return kept, deleted
def has_face(img_path, face_cascade):
"""Check if image contains a face using Haar cascade detection."""
img = cv2.imread(img_path)
if img is None:
return False
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
faces = face_cascade.detectMultiScale(gray, scaleFactor=1.1, minNeighbors=5, minSize=(60, 60))
return len(faces) > 0
def has_text_or_ui(img_path):
"""Check if image contains text or UI elements (high contrast edges)."""
img = cv2.imread(img_path)
if img is None:
return False, 0.0
# Convert to grayscale
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# Detect edges (text/UI has strong edges)
edges = cv2.Canny(gray, 50, 150)
# Calculate percentage of edge pixels
edge_percentage = (np.count_nonzero(edges) / edges.size) * 100
# If >2% of pixels are edges, likely contains text/UI
return edge_percentage > 2.0, edge_percentage
# Main processing function
def main():
# Validate command line arguments
if len(sys.argv) < 2:
print("Usage: filter_non_person_frames.py <images_dir>")
sys.exit(1)
images_dir = sys.argv[1]
if not os.path.isdir(images_dir):
print(f"Not a directory: {images_dir}")
sys.exit(1)
# Step 1: Remove duplicates first
remove_duplicates(images_dir)
# Load Haar cascade classifier for face detection
cascade_path = cv2.data.haarcascades + 'haarcascade_frontalface_default.xml'
face_cascade = cv2.CascadeClassifier(cascade_path)
if face_cascade.empty():
print("Failed to load Haar cascade for face detection.")
sys.exit(1)
# Step 2: Filter person-only images
print("=== Filtering person-only frames ===")
# Process all images in directory
kept, deleted = 0, 0
for name in sorted(os.listdir(images_dir)):
# Skip non-image files
if not (name.lower().endswith('.jpg') or name.lower().endswith('.png')):
continue
path = os.path.join(images_dir, name)
try:
has_person = has_face(path, face_cascade)
has_content, edge_pct = has_text_or_ui(path)
# Keep: app screenshots, text, or text+person
# Delete: person-only (face detected but no text/UI)
if has_person and not has_content:
os.remove(path)
deleted += 1
print(f"❌ Deleted (person only): {name} [face=yes, edges={edge_pct:.2f}%]")
else:
kept += 1
reason = []
if has_person:
reason.append("face=yes")
else:
reason.append("face=no")
reason.append(f"edges={edge_pct:.2f}%")
print(f"✓ Kept: {name} [{', '.join(reason)}]")
except Exception as e:
print(f"Error processing {name}: {e}")
print(f"\n=== Summary ===")
print(f"Person-only filter: Kept {kept}, Deleted {deleted}")
# execute main function
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment