Skip to content

Instantly share code, notes, and snippets.

@playday3008
Last active August 25, 2025 14:10
Show Gist options
  • Select an option

  • Save playday3008/c05899baeaf9c613dca89e33c44b850d to your computer and use it in GitHub Desktop.

Select an option

Save playday3008/c05899baeaf9c613dca89e33c44b850d to your computer and use it in GitHub Desktop.
Google Takeout postprocess
from dataclasses import dataclass
from typing import Optional, Any
from datetime import datetime
from pathlib import Path
from glob import glob
import os
import json
import re
from pydantic import BaseModel
class NTime(BaseModel):
timestamp: datetime
formatted: str
class GeoData(BaseModel):
latitude: float
longitude: float
altitude: float
latitudeSpan: float
longitudeSpan: float
class People(BaseModel):
name: str
class DeviceFolder(BaseModel):
localFolderName: str
class MobileUpload(BaseModel):
deviceFolder: Optional[DeviceFolder] = None
deviceType: Optional[str] = None
class WebUpload(BaseModel):
computerUpload: dict[str, Any]
class DriveDesktopUploader(BaseModel):
version: str
class Composition(BaseModel):
type: str
class GooglePhotosOrigin(BaseModel):
driveSync: Optional[dict[str, Any]] = None
mobileUpload: Optional[MobileUpload] = None
webUpload: Optional[WebUpload] = None
driveDesktopUploader: Optional[DriveDesktopUploader] = None
composition: Optional[Composition] = None
class AppSource(BaseModel):
androidPackageName: str
class GooglePhotosMetadata(BaseModel):
title: str
description: str
imageViews: int
creationTime: NTime
photoTakenTime: NTime
geoData: GeoData
geoDataExif: Optional[GeoData] = None
people: Optional[list[People]] = None
archived: Optional[bool] = None
favorited: Optional[bool] = None
url: str
googlePhotosOrigin: Optional[GooglePhotosOrigin] = None
appSource: Optional[AppSource] = None
@dataclass
class Item:
mediaFile: Path
metadataFile: Path
metadata: GooglePhotosMetadata
def google_photos(base_path: Path = Path(".")) -> None:
blacklist_files: list[str] = [
# Metadata of each album
"metadata.json",
# Metadatas of whole Google Photos library
"print-subscriptions.json",
"shared_album_comments.json",
"user-generated-memory-titles.json",
]
# Check base path
if not base_path.exists():
print(f"Base path {base_path} does not exist")
exit(1)
if not base_path.is_dir():
print(f"Base path {base_path} is not a directory")
exit(2)
print(f"Processing files in {base_path}")
# Get all non JSON files (for later comparison)
media_before: dict[Path, os.stat_result] = {
Path(p): Path(p).stat()
for p in glob(os.path.join(base_path, f"**/*.*"), recursive=True)
if Path(p).suffix != ".json"
}
# Get all media and metadata files
all_media: set[Path] = {
Path(p)
for p in glob(os.path.join(base_path, f"**/*.*"), recursive=True)
if Path(p).suffix != ".json"
}
all_meta: set[Path] = {
Path(p)
for p in glob(os.path.join(base_path, f"**/*.json"), recursive=True)
if Path(p).name not in blacklist_files
}
print(f"Found {len(all_media)} media files and {len(all_meta)} metadata files")
# Try to match media files to metadata files
metadata_match: re.Pattern[str] = re.compile(
r"^(.+?)(?:|\.(?:|s(?:|u(?:|p(?:|p(?:|l(?:|e(?:|m(?:|e(?:|n(?:|t(?:|a(?:|l(?:|-(?:|m(?:|e(?:|t(?:|a(?:|d(?:|a(?:|t(?:|a))))))))))))))))))))))(|\((?:|\d+?(?:|\))))\.json$"
)
items: list[Item] = []
for metafile in all_meta:
# Match metadata file
meta_match: Optional[re.Match[str]] = metadata_match.match(metafile.name)
if not meta_match:
print(f"Could not match regex for '{metafile}'")
continue
# Get metadata
meta: GooglePhotosMetadata
with open(metafile, "r") as f:
data: Any = json.load(f)
try:
meta = GooglePhotosMetadata.model_validate(data)
except Exception as e:
print(f"Error parsing '{metafile}': {e}")
continue
# Find corresponding media file
max_len = 51
media_origin: Path = Path(meta.title)
media: Path = metafile.parent / (
media_origin.stem[0 : max_len - len(media_origin.suffix)]
+ meta_match.group(2)
+ media_origin.suffix
)
if not media.exists():
# Try case insensitive match
media_candidates: list[Path] = [
p
for p in metafile.parent.glob(
meta.title[0:max_len] + "*",
case_sensitive=False,
)
if p.suffix != ".json"
]
if len(media_candidates) == 0:
print(f"No media file found for '{metafile}' with title '{meta.title}'")
continue
elif len(media_candidates) > 1:
print(
f"Multiple media files found for '{metafile}' with title '{meta.title}':"
)
for mc in media_candidates:
print(f" - {mc}")
continue
else:
media = media_candidates[0]
items.append(Item(mediaFile=media, metadataFile=metafile, metadata=meta))
# Handle `-edited` suffix
if True:
edited_file: Path = metafile.parent / (
(media_origin.stem + "-edited")[0 : max_len - len(media_origin.suffix)]
+ meta_match.group(2)
+ media_origin.suffix
)
if edited_file.exists() and edited_file != media:
items.append(
Item(mediaFile=edited_file, metadataFile=metafile, metadata=meta)
)
print(f"Matched {len(items)} items")
# For each item, modify creation/modification time of media file to match photoTakenTime
for item in items:
new_time: datetime = item.metadata.photoTakenTime.timestamp
mod_time: float = new_time.timestamp()
os.utime(item.mediaFile, (mod_time, mod_time))
# Delete metadata file after processing
try:
os.remove(item.metadataFile)
except Exception as e:
pass
# Find non JSON files with unchanged timestamps (i.e. not processed)
media_after: dict[Path, os.stat_result] = {
Path(p): Path(p).stat()
for p in glob(os.path.join(base_path, f"**/*.*"), recursive=True)
if Path(p).suffix != ".json"
}
unprocessed: list[Path] = []
for file, stats in media_before.items():
if file in media_after and media_after[file] == stats:
unprocessed.append(file)
for file in unprocessed:
print(f"Unprocessed file: '{file}'")
# Rename media files to match metadata title (if different)
edited_match = re.compile(r"^(.+?)((?:-edited)(?:\(\d+?\))?)$")
for item in items:
if item.mediaFile.name != item.metadata.title:
new_media: Path = item.mediaFile.parent / item.metadata.title
edited: Optional[re.Match[str]] = edited_match.match(item.mediaFile.stem)
if edited:
title = Path(item.metadata.title)
new_media = item.mediaFile.parent / (
title.stem + edited.group(2) + title.suffix
)
if new_media == item.mediaFile:
continue
if new_media.exists():
print(
f"Cannot rename '{item.mediaFile}' to '{new_media}' as it already exists"
)
elif len(Path(item.mediaFile.name).suffix) != 0:
# print(f"Renaming {item.mediaFile} to {new_media}")
item.mediaFile.rename(new_media)
item.mediaFile = new_media
else:
print(
f"Cannot rename '{item.mediaFile}' to '{new_media}' as it has no suffix"
)
return
if __name__ == "__main__":
google_photos(Path("./Takeout/Google Photos/"))
pass
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment