playday3008 · August 25, 2025 14:10
diff --git a/google_photos.py b/google_photos.py
 from dataclasses import dataclass
 from typing import Optional, Any
 from datetime import datetime
 from pathlib import Path
 from glob import glob
 import os
 import json
 import re

 from pydantic import BaseModel


 class NTime(BaseModel):
    timestamp: datetime
    formatted: str


 class GeoData(BaseModel):
    latitude: float
    longitude: float
    altitude: float
    latitudeSpan: float
    longitudeSpan: float


 class People(BaseModel):
    name: str


 class DeviceFolder(BaseModel):
    localFolderName: str


 class MobileUpload(BaseModel):
    deviceFolder: Optional[DeviceFolder] = None
    deviceType: Optional[str] = None


 class WebUpload(BaseModel):
    computerUpload: dict[str, Any]


 class DriveDesktopUploader(BaseModel):
    version: str


 class Composition(BaseModel):
    type: str


 class GooglePhotosOrigin(BaseModel):
    driveSync: Optional[dict[str, Any]] = None
    mobileUpload: Optional[MobileUpload] = None
    webUpload: Optional[WebUpload] = None
    driveDesktopUploader: Optional[DriveDesktopUploader] = None
    composition: Optional[Composition] = None


 class AppSource(BaseModel):
    androidPackageName: str


 class GooglePhotosMetadata(BaseModel):
    title: str
    description: str
    imageViews: int
    creationTime: NTime
    photoTakenTime: NTime
    geoData: GeoData
    geoDataExif: Optional[GeoData] = None
    people: Optional[list[People]] = None
    archived: Optional[bool] = None
    favorited: Optional[bool] = None
    url: str
    googlePhotosOrigin: Optional[GooglePhotosOrigin] = None
    appSource: Optional[AppSource] = None


 @dataclass
 class Item:
    mediaFile: Path
    metadataFile: Path
    metadata: GooglePhotosMetadata


 def google_photos(base_path: Path = Path(".")) -> None:
    blacklist_files: list[str] = [
        # Metadata of each album
        "metadata.json",
        # Metadatas of whole Google Photos library
        "print-subscriptions.json",
        "shared_album_comments.json",
        "user-generated-memory-titles.json",
    ]

    # Check base path
    if not base_path.exists():
        print(f"Base path {base_path} does not exist")
        exit(1)
    if not base_path.is_dir():
        print(f"Base path {base_path} is not a directory")
        exit(2)

    print(f"Processing files in {base_path}")

    # Get all non JSON files (for later comparison)
    media_before: dict[Path, os.stat_result] = {
        Path(p): Path(p).stat()
        for p in glob(os.path.join(base_path, f"**/*.*"), recursive=True)
        if Path(p).suffix != ".json"
    }

    # Get all media and metadata files
    all_media: set[Path] = {
        Path(p)
        for p in glob(os.path.join(base_path, f"**/*.*"), recursive=True)
        if Path(p).suffix != ".json"
    }
    all_meta: set[Path] = {
        Path(p)
        for p in glob(os.path.join(base_path, f"**/*.json"), recursive=True)
        if Path(p).name not in blacklist_files
    }

    print(f"Found {len(all_media)} media files and {len(all_meta)} metadata files")

    # Try to match media files to metadata files
    metadata_match: re.Pattern[str] = re.compile(
        r"^(.+?)(?:|\.(?:|s(?:|u(?:|p(?:|p(?:|l(?:|e(?:|m(?:|e(?:|n(?:|t(?:|a(?:|l(?:|-(?:|m(?:|e(?:|t(?:|a(?:|d(?:|a(?:|t(?:|a))))))))))))))))))))))(|\((?:|\d+?(?:|\))))\.json$"
    )
    items: list[Item] = []
    for metafile in all_meta:
        # Match metadata file
        meta_match: Optional[re.Match[str]] = metadata_match.match(metafile.name)
        if not meta_match:
            print(f"Could not match regex for '{metafile}'")
            continue

        # Get metadata
        meta: GooglePhotosMetadata
        with open(metafile, "r") as f:
            data: Any = json.load(f)
            try:
                meta = GooglePhotosMetadata.model_validate(data)
            except Exception as e:
                print(f"Error parsing '{metafile}': {e}")
                continue

        # Find corresponding media file
        max_len = 51
        media_origin: Path = Path(meta.title)
        media: Path = metafile.parent / (
            media_origin.stem[0 : max_len - len(media_origin.suffix)]
            + meta_match.group(2)
            + media_origin.suffix
        )

        if not media.exists():
            # Try case insensitive match
            media_candidates: list[Path] = [
                p
                for p in metafile.parent.glob(
                    meta.title[0:max_len] + "*",
                    case_sensitive=False,
                )
                if p.suffix != ".json"
            ]
            if len(media_candidates) == 0:
                print(f"No media file found for '{metafile}' with title '{meta.title}'")
                continue
            elif len(media_candidates) > 1:
                print(
                    f"Multiple media files found for '{metafile}' with title '{meta.title}':"
                )
                for mc in media_candidates:
                    print(f" - {mc}")
                continue
            else:
                media = media_candidates[0]

        items.append(Item(mediaFile=media, metadataFile=metafile, metadata=meta))

        # Handle `-edited` suffix
        if True:
            edited_file: Path = metafile.parent / (
                (media_origin.stem + "-edited")[0 : max_len - len(media_origin.suffix)]
                + meta_match.group(2)
                + media_origin.suffix
            )
            if edited_file.exists() and edited_file != media:
                items.append(
                    Item(mediaFile=edited_file, metadataFile=metafile, metadata=meta)
                )

    print(f"Matched {len(items)} items")

    # For each item, modify creation/modification time of media file to match photoTakenTime
    for item in items:
        new_time: datetime = item.metadata.photoTakenTime.timestamp
        mod_time: float = new_time.timestamp()

        os.utime(item.mediaFile, (mod_time, mod_time))

        # Delete metadata file after processing
        try:
            os.remove(item.metadataFile)
        except Exception as e:
            pass

    # Find non JSON files with unchanged timestamps (i.e. not processed)
    media_after: dict[Path, os.stat_result] = {
        Path(p): Path(p).stat()
        for p in glob(os.path.join(base_path, f"**/*.*"), recursive=True)
        if Path(p).suffix != ".json"
    }

    unprocessed: list[Path] = []
    for file, stats in media_before.items():
        if file in media_after and media_after[file] == stats:
            unprocessed.append(file)

    for file in unprocessed:
        print(f"Unprocessed file: '{file}'")

    # Rename media files to match metadata title (if different)
    edited_match = re.compile(r"^(.+?)((?:-edited)(?:\(\d+?\))?)$")
    for item in items:
        if item.mediaFile.name != item.metadata.title:
            new_media: Path = item.mediaFile.parent / item.metadata.title
            edited: Optional[re.Match[str]] = edited_match.match(item.mediaFile.stem)
            if edited:
                title = Path(item.metadata.title)
                new_media = item.mediaFile.parent / (
                    title.stem + edited.group(2) + title.suffix
                )
            if new_media == item.mediaFile:
                continue
            if new_media.exists():
                print(
                    f"Cannot rename '{item.mediaFile}' to '{new_media}' as it already exists"
                )
            elif len(Path(item.mediaFile.name).suffix) != 0:
                # print(f"Renaming {item.mediaFile} to {new_media}")
                item.mediaFile.rename(new_media)
                item.mediaFile = new_media
            else:
                print(
                    f"Cannot rename '{item.mediaFile}' to '{new_media}' as it has no suffix"
                )

    return


 if __name__ == "__main__":
    google_photos(Path("./Takeout/Google Photos/"))
    pass
	from dataclasses import dataclass
	from typing import Optional, Any
	from datetime import datetime
	from pathlib import Path
	from glob import glob
	import os
	import json
	import re

	from pydantic import BaseModel


	class NTime(BaseModel):
	timestamp: datetime
	formatted: str


	class GeoData(BaseModel):
	latitude: float
	longitude: float
	altitude: float
	latitudeSpan: float
	longitudeSpan: float


	class People(BaseModel):
	name: str


	class DeviceFolder(BaseModel):
	localFolderName: str


	class MobileUpload(BaseModel):
	deviceFolder: Optional[DeviceFolder] = None
	deviceType: Optional[str] = None


	class WebUpload(BaseModel):
	computerUpload: dict[str, Any]


	class DriveDesktopUploader(BaseModel):
	version: str


	class Composition(BaseModel):
	type: str


	class GooglePhotosOrigin(BaseModel):
	driveSync: Optional[dict[str, Any]] = None
	mobileUpload: Optional[MobileUpload] = None
	webUpload: Optional[WebUpload] = None
	driveDesktopUploader: Optional[DriveDesktopUploader] = None
	composition: Optional[Composition] = None


	class AppSource(BaseModel):
	androidPackageName: str


	class GooglePhotosMetadata(BaseModel):
	title: str
	description: str
	imageViews: int
	creationTime: NTime
	photoTakenTime: NTime
	geoData: GeoData
	geoDataExif: Optional[GeoData] = None
	people: Optional[list[People]] = None
	archived: Optional[bool] = None
	favorited: Optional[bool] = None
	url: str
	googlePhotosOrigin: Optional[GooglePhotosOrigin] = None
	appSource: Optional[AppSource] = None


	@dataclass
	class Item:
	mediaFile: Path
	metadataFile: Path
	metadata: GooglePhotosMetadata


	def google_photos(base_path: Path = Path(".")) -> None:
	blacklist_files: list[str] = [
	# Metadata of each album
	"metadata.json",
	# Metadatas of whole Google Photos library
	"print-subscriptions.json",
	"shared_album_comments.json",
	"user-generated-memory-titles.json",
	]

	# Check base path
	if not base_path.exists():
	print(f"Base path {base_path} does not exist")
	exit(1)
	if not base_path.is_dir():
	print(f"Base path {base_path} is not a directory")
	exit(2)

	print(f"Processing files in {base_path}")

	# Get all non JSON files (for later comparison)
	media_before: dict[Path, os.stat_result] = {
	Path(p): Path(p).stat()
	for p in glob(os.path.join(base_path, f"*/.*"), recursive=True)
	if Path(p).suffix != ".json"
	}

	# Get all media and metadata files
	all_media: set[Path] = {
	Path(p)
	for p in glob(os.path.join(base_path, f"*/.*"), recursive=True)
	if Path(p).suffix != ".json"
	}
	all_meta: set[Path] = {
	Path(p)
	for p in glob(os.path.join(base_path, f"*/.json"), recursive=True)
	if Path(p).name not in blacklist_files
	}

	print(f"Found {len(all_media)} media files and {len(all_meta)} metadata files")

	# Try to match media files to metadata files
	metadata_match: re.Pattern[str] = re.compile(
	r"^(.+?)(?:\|\.(?:\|s(?:\|u(?:\|p(?:\|p(?:\|l(?:\|e(?:\|m(?:\|e(?:\|n(?:\|t(?:\|a(?:\|l(?:\|-(?:\|m(?:\|e(?:\|t(?:\|a(?:\|d(?:\|a(?:\|t(?:\|a))))))))))))))))))))))(\|\((?:\|\d+?(?:\|\))))\.json$"
	)
	items: list[Item] = []
	for metafile in all_meta:
	# Match metadata file
	meta_match: Optional[re.Match[str]] = metadata_match.match(metafile.name)
	if not meta_match:
	print(f"Could not match regex for '{metafile}'")
	continue

	# Get metadata
	meta: GooglePhotosMetadata
	with open(metafile, "r") as f:
	data: Any = json.load(f)
	try:
	meta = GooglePhotosMetadata.model_validate(data)
	except Exception as e:
	print(f"Error parsing '{metafile}': {e}")
	continue

	# Find corresponding media file
	max_len = 51
	media_origin: Path = Path(meta.title)
	media: Path = metafile.parent / (
	media_origin.stem[0 : max_len - len(media_origin.suffix)]
	+ meta_match.group(2)
	+ media_origin.suffix
	)

	if not media.exists():
	# Try case insensitive match
	media_candidates: list[Path] = [
	p
	for p in metafile.parent.glob(
	meta.title[0:max_len] + "*",
	case_sensitive=False,
	)
	if p.suffix != ".json"
	]
	if len(media_candidates) == 0:
	print(f"No media file found for '{metafile}' with title '{meta.title}'")
	continue
	elif len(media_candidates) > 1:
	print(
	f"Multiple media files found for '{metafile}' with title '{meta.title}':"
	)
	for mc in media_candidates:
	print(f" - {mc}")
	continue
	else:
	media = media_candidates[0]

	items.append(Item(mediaFile=media, metadataFile=metafile, metadata=meta))

	# Handle `-edited` suffix
	if True:
	edited_file: Path = metafile.parent / (
	(media_origin.stem + "-edited")[0 : max_len - len(media_origin.suffix)]
	+ meta_match.group(2)
	+ media_origin.suffix
	)
	if edited_file.exists() and edited_file != media:
	items.append(
	Item(mediaFile=edited_file, metadataFile=metafile, metadata=meta)
	)

	print(f"Matched {len(items)} items")

	# For each item, modify creation/modification time of media file to match photoTakenTime
	for item in items:
	new_time: datetime = item.metadata.photoTakenTime.timestamp
	mod_time: float = new_time.timestamp()

	os.utime(item.mediaFile, (mod_time, mod_time))

	# Delete metadata file after processing
	try:
	os.remove(item.metadataFile)
	except Exception as e:
	pass

	# Find non JSON files with unchanged timestamps (i.e. not processed)
	media_after: dict[Path, os.stat_result] = {
	Path(p): Path(p).stat()
	for p in glob(os.path.join(base_path, f"*/.*"), recursive=True)
	if Path(p).suffix != ".json"
	}

	unprocessed: list[Path] = []
	for file, stats in media_before.items():
	if file in media_after and media_after[file] == stats:
	unprocessed.append(file)

	for file in unprocessed:
	print(f"Unprocessed file: '{file}'")

	# Rename media files to match metadata title (if different)
	edited_match = re.compile(r"^(.+?)((?:-edited)(?:\(\d+?\))?)$")
	for item in items:
	if item.mediaFile.name != item.metadata.title:
	new_media: Path = item.mediaFile.parent / item.metadata.title
	edited: Optional[re.Match[str]] = edited_match.match(item.mediaFile.stem)
	if edited:
	title = Path(item.metadata.title)
	new_media = item.mediaFile.parent / (
	title.stem + edited.group(2) + title.suffix
	)
	if new_media == item.mediaFile:
	continue
	if new_media.exists():
	print(
	f"Cannot rename '{item.mediaFile}' to '{new_media}' as it already exists"
	)
	elif len(Path(item.mediaFile.name).suffix) != 0:
	# print(f"Renaming {item.mediaFile} to {new_media}")
	item.mediaFile.rename(new_media)
	item.mediaFile = new_media
	else:
	print(
	f"Cannot rename '{item.mediaFile}' to '{new_media}' as it has no suffix"
	)

	return


	if __name__ == "__main__":
	google_photos(Path("./Takeout/Google Photos/"))
	pass
No results found