Skip to content

Instantly share code, notes, and snippets.

@ronnycoding
Last active May 25, 2025 21:12
Show Gist options
  • Select an option

  • Save ronnycoding/725146cba0761a179b8a0d3c2f49f75c to your computer and use it in GitHub Desktop.

Select an option

Save ronnycoding/725146cba0761a179b8a0d3c2f49f75c to your computer and use it in GitHub Desktop.
πŸš€ Google Drive Backup Script πŸ—‚οΈβœ¨
#!/usr/bin/env python3
"""
Google Drive Backup Script
Downloads and backs up files from a shared Google Drive or My Drive folder to local storage.
"""
import os
import io
import json
from pathlib import Path
from datetime import datetime
import logging
from typing import List, Dict, Optional
from google.auth.transport.requests import Request
from google.oauth2.credentials import Credentials
from google_auth_oauthlib.flow import InstalledAppFlow
from googleapiclient.discovery import build
from googleapiclient.http import MediaIoBaseDownload
from googleapiclient.errors import HttpError
# Google Drive API scopes
SCOPES = ['https://www.googleapis.com/auth/drive.readonly']
class GoogleDriveBackup:
def __init__(self, backup_path: str, credentials_file: str = 'credentials.json'):
"""
Initialize the Google Drive backup tool.
Args:
backup_path: Path to the external hard drive or backup location
credentials_file: Path to the Google API credentials JSON file
"""
self.backup_path = Path(backup_path)
self.credentials_file = credentials_file
self.service = None
self.stats = {
'files_downloaded': 0,
'folders_created': 0,
'skipped_files': 0,
'errors': 0,
'total_size': 0
}
# Setup logging
self.setup_logging()
# Ensure backup directory exists
self.backup_path.mkdir(parents=True, exist_ok=True)
def setup_logging(self):
"""Setup logging configuration."""
log_format = '%(asctime)s - %(levelname)s - %(message)s'
logging.basicConfig(
level=logging.INFO,
format=log_format,
handlers=[
logging.FileHandler('gdrive_backup.log'),
logging.StreamHandler()
]
)
self.logger = logging.getLogger(__name__)
def authenticate(self):
"""Authenticate with Google Drive API."""
creds = None
token_file = 'token.json'
# Load existing token if available
if os.path.exists(token_file):
creds = Credentials.from_authorized_user_file(token_file, SCOPES)
# If no valid credentials, run OAuth flow
if not creds or not creds.valid:
if creds and creds.expired and creds.refresh_token:
creds.refresh(Request())
else:
if not os.path.exists(self.credentials_file):
raise FileNotFoundError(
f"Credentials file '{self.credentials_file}' not found. "
"Please download it from Google Cloud Console."
)
flow = InstalledAppFlow.from_client_secrets_file(
self.credentials_file, SCOPES
)
creds = flow.run_local_server(port=0)
# Save credentials for next run
with open(token_file, 'w') as token:
token.write(creds.to_json())
# Build the service
self.service = build('drive', 'v3', credentials=creds)
self.logger.info("Successfully authenticated with Google Drive API")
def get_folder_id_from_url(self, folder_url: str) -> str:
"""
Extract folder ID from Google Drive folder URL.
Args:
folder_url: Google Drive folder URL
Returns:
Folder ID string
"""
if '/folders/' in folder_url:
return folder_url.split('/folders/')[-1].split('?')[0]
elif 'id=' in folder_url:
return folder_url.split('id=')[-1].split('&')[0]
else:
# Assume the input is already a folder ID
return folder_url
def list_files_in_folder(self, folder_id: str) -> List[Dict]:
"""
List all files and folders in a Google Drive folder, including items in
Shared Drives, and handle pagination.
Args:
folder_id: Google Drive folder ID
Returns:
List of file/folder metadata dictionaries
"""
items: List[Dict] = []
page_token: Optional[str] = None
try:
while True:
response = self.service.files().list(
q=f"'{folder_id}' in parents and trashed=false",
fields="nextPageToken, files(id, name, mimeType, size, modifiedTime)",
supportsAllDrives=True,
includeItemsFromAllDrives=True,
pageSize=1000,
pageToken=page_token,
).execute()
items.extend(response.get("files", []))
page_token = response.get("nextPageToken")
if not page_token:
break
except HttpError as error:
self.logger.error(f"Error listing files in folder {folder_id}: {error}")
self.stats["errors"] += 1
return items
def download_file(self, file_id: str, file_name: str, local_path: Path) -> bool:
"""
Download a file from Google Drive.
Args:
file_id: Google Drive file ID
file_name: Name of the file
local_path: Local path where file should be saved
Returns:
True if download successful, False otherwise
"""
try:
# Check if file already exists and compare sizes
if local_path.exists():
try:
# Get remote file size
file_metadata = self.service.files().get(fileId=file_id, fields='size', supportsAllDrives=True).execute()
remote_size = int(file_metadata.get('size', 0))
local_size = local_path.stat().st_size
if local_size == remote_size:
self.logger.info(f"File already exists with same size, skipping: {file_name}")
self.stats['skipped_files'] += 1
return True
else:
self.logger.info(f"File exists but size differs ({local_size} vs {remote_size}), re-downloading: {file_name}")
except:
# If we can't get size info, just re-download to be safe
pass
# Get file metadata
file_metadata = self.service.files().get(fileId=file_id, fields='mimeType,size', supportsAllDrives=True).execute()
# Handle Google Workspace files (Docs, Sheets, etc.)
mime_type = file_metadata.get('mimeType', '')
if 'google-apps' in mime_type:
return self.download_google_workspace_file(file_id, file_name, local_path, mime_type)
else:
return self.download_regular_file(file_id, file_name, local_path)
except HttpError as error:
self.logger.error(f"Error downloading file {file_name}: {error}")
self.stats['errors'] += 1
return False
def download_regular_file(self, file_id: str, file_name: str, local_path: Path) -> bool:
"""Download a regular file (not Google Workspace)."""
try:
# Get file size for progress tracking
file_metadata = self.service.files().get(fileId=file_id, fields='size', supportsAllDrives=True).execute()
file_size = int(file_metadata.get('size', 0))
request = self.service.files().get_media(fileId=file_id)
# For large files (like MP4), write directly to disk instead of memory
if file_size > 50 * 1024 * 1024: # 50MB threshold
return self.download_large_file(request, file_name, local_path, file_size)
else:
return self.download_small_file(request, file_name, local_path, file_size)
except HttpError as error:
self.logger.error(f"Error downloading regular file {file_name}: {error}")
self.stats['errors'] += 1
return False
def download_large_file(self, request, file_name: str, local_path: Path, file_size: int) -> bool:
"""Download large files directly to disk with progress tracking."""
try:
with open(local_path, 'wb') as f:
downloader = MediaIoBaseDownload(f, request, chunksize=10*1024*1024) # 10MB chunks
done = False
while done is False:
status, done = downloader.next_chunk()
if status:
progress = int(status.progress() * 100)
downloaded_mb = (file_size * status.progress()) / (1024*1024)
total_mb = file_size / (1024*1024)
self.logger.info(f"Downloading {file_name}: {progress}% ({downloaded_mb:.1f}/{total_mb:.1f} MB)")
actual_size = local_path.stat().st_size
self.stats['total_size'] += actual_size
self.stats['files_downloaded'] += 1
self.logger.info(f"Downloaded: {file_name} ({actual_size / (1024*1024):.2f} MB)")
return True
except Exception as error:
self.logger.error(f"Error downloading large file {file_name}: {error}")
if local_path.exists():
local_path.unlink() # Remove partial file
return False
def download_small_file(self, request, file_name: str, local_path: Path, file_size: int) -> bool:
"""Download small files to memory then write to disk."""
try:
file_io = io.BytesIO()
downloader = MediaIoBaseDownload(file_io, request)
done = False
while done is False:
status, done = downloader.next_chunk()
if status and file_size > 5*1024*1024: # Show progress for files > 5MB
progress = int(status.progress() * 100)
self.logger.info(f"Downloading {file_name}: {progress}%")
# Write to file
with open(local_path, 'wb') as f:
f.write(file_io.getvalue())
actual_size = local_path.stat().st_size
self.stats['total_size'] += actual_size
self.stats['files_downloaded'] += 1
self.logger.info(f"Downloaded: {file_name} ({actual_size / (1024*1024):.2f} MB)")
return True
except Exception as error:
self.logger.error(f"Error downloading small file {file_name}: {error}")
return False
def download_google_workspace_file(self, file_id: str, file_name: str, local_path: Path, mime_type: str) -> bool:
"""Download Google Workspace files with appropriate export format."""
export_formats = {
'application/vnd.google-apps.document': ('application/vnd.openxmlformats-officedocument.wordprocessingml.document', '.docx'),
'application/vnd.google-apps.spreadsheet': ('application/vnd.openxmlformats-officedocument.spreadsheetml.sheet', '.xlsx'),
'application/vnd.google-apps.presentation': ('application/vnd.openxmlformats-officedocument.presentationml.presentation', '.pptx'),
'application/vnd.google-apps.drawing': ('image/png', '.png'),
}
if mime_type not in export_formats:
self.logger.warning(f"Unsupported Google Workspace file type: {mime_type} for {file_name}")
self.stats['skipped_files'] += 1
return False
export_mime_type, extension = export_formats[mime_type]
# Add appropriate extension if not present
if not local_path.suffix:
local_path = local_path.with_suffix(extension)
try:
request = self.service.files().export_media(
fileId=file_id,
mimeType=export_mime_type,
supportsAllDrives=True,
)
file_io = io.BytesIO()
downloader = MediaIoBaseDownload(file_io, request)
done = False
while done is False:
status, done = downloader.next_chunk()
# Write to file
with open(local_path, 'wb') as f:
f.write(file_io.getvalue())
file_size = local_path.stat().st_size
self.stats['total_size'] += file_size
self.stats['files_downloaded'] += 1
self.logger.info(f"Downloaded (exported): {file_name} as {local_path.name} ({file_size} bytes)")
return True
except HttpError as error:
self.logger.error(f"Error downloading Google Workspace file {file_name}: {error}")
self.stats['errors'] += 1
return False
def backup_folder(self, folder_id: str, local_folder_path: Path, folder_name: str = ""):
"""
Recursively backup a folder and all its contents.
Args:
folder_id: Google Drive folder ID
local_folder_path: Local path where folder should be backed up
folder_name: Name of the folder (for logging)
"""
# Create local folder if it doesn't exist
local_folder_path.mkdir(parents=True, exist_ok=True)
self.stats['folders_created'] += 1
self.logger.info(f"Backing up folder: {folder_name or 'Root'} -> {local_folder_path}")
# Get all files and folders in this directory
items = self.list_files_in_folder(folder_id)
for item in items:
item_name = item['name']
item_id = item['id']
item_type = item['mimeType']
# Clean filename for file system compatibility
safe_name = self.sanitize_filename(item_name)
local_item_path = local_folder_path / safe_name
if item_type == 'application/vnd.google-apps.folder':
# It's a folder - recurse into it
self.backup_folder(item_id, local_item_path, item_name)
else:
# It's a file - download it
self.download_file(item_id, item_name, local_item_path)
def sanitize_filename(self, filename: str) -> str:
"""
Sanitize filename for file system compatibility.
Args:
filename: Original filename
Returns:
Sanitized filename
"""
# Replace problematic characters
invalid_chars = '<>:"/\\|?*'
for char in invalid_chars:
filename = filename.replace(char, '_')
# Trim whitespace and dots from ends
filename = filename.strip('. ')
# Ensure filename isn't empty
if not filename:
filename = 'unnamed_file'
return filename
def print_stats(self):
"""Print backup statistics."""
print("\n" + "="*50)
print("BACKUP COMPLETED")
print("="*50)
print(f"Files downloaded: {self.stats['files_downloaded']}")
print(f"Folders created: {self.stats['folders_created']}")
print(f"Files skipped: {self.stats['skipped_files']}")
print(f"Errors encountered: {self.stats['errors']}")
print(f"Total data downloaded: {self.format_file_size(self.stats['total_size'])}")
print("="*50)
def format_file_size(self, size_bytes: int) -> str:
"""Format file size in human readable format."""
if size_bytes < 1024:
return f"{size_bytes} B"
elif size_bytes < 1024**2:
return f"{size_bytes/1024:.1f} KB"
elif size_bytes < 1024**3:
return f"{size_bytes/(1024**2):.1f} MB"
else:
return f"{size_bytes/(1024**3):.2f} GB"
def run_backup(self, folder_url_or_id: str, backup_folder_name: str = None):
"""
Run the complete backup process.
Args:
folder_url_or_id: Google Drive folder URL or ID
backup_folder_name: Name for the backup folder (optional)
"""
try:
self.logger.info("Starting Google Drive backup process...")
# Authenticate with Google Drive
self.authenticate()
# Extract folder ID from URL
folder_id = self.get_folder_id_from_url(folder_url_or_id)
# Get folder information
try:
folder_info = self.service.files().get(fileId=folder_id, supportsAllDrives=True).execute()
folder_name = folder_info['name']
except HttpError:
folder_name = backup_folder_name or "SharedFolder"
# Create backup subfolder with timestamp
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
backup_subfolder = self.backup_path / f"{folder_name}_{timestamp}"
self.logger.info(f"Backing up '{folder_name}' to {backup_subfolder}")
# Start the backup process
self.backup_folder(folder_id, backup_subfolder, folder_name)
# Print final statistics
self.print_stats()
self.logger.info("Backup process completed successfully!")
except Exception as e:
self.logger.error(f"Backup process failed: {str(e)}")
raise
def main():
"""Main function to run the backup script."""
# Configuration - MODIFY THESE VALUES
BACKUP_PATH = "/Volumes/EXTERNAL_HARDRIVE/courses" # Change this to your external drive path
CREDENTIALS_FILE = "google_credentials.json" # Path to your Google API credentials file
FOLDER_URL = "https://drive.google.com/drive/folders/YOUR_FOLDER_ID" # Google Drive folder URL
# You can also use environment variables
backup_path = os.getenv('BACKUP_PATH', BACKUP_PATH)
# get gredentials file from path ./file.json
credentials_file = os.path.join(os.path.dirname(__file__), CREDENTIALS_FILE)
folder_url = os.getenv('FOLDER_URL', FOLDER_URL)
try:
# Create backup instance
backup_tool = GoogleDriveBackup(backup_path, credentials_file)
# Run the backup
backup_tool.run_backup(folder_url)
except KeyboardInterrupt:
print("\nBackup interrupted by user.")
except Exception as e:
print(f"Error: {str(e)}")
print("Please check the log file 'gdrive_backup.log' for more details.")
if __name__ == "__main__":
main()
@ronnycoding
Copy link
Copy Markdown
Author

ronnycoding commented May 25, 2025

πŸš€ Google Drive Backup Script πŸ—‚οΈβœ¨

Need a digital vacuum cleaner for your Drive clutter? πŸ§ΉπŸ’Ύ
This CLI sidekick slurps an entire folder (My Drive or Shared Drive) onto your diskβ€”perfect for weekend-warrior backups or paranoid cron-jobs. πŸ€–πŸ”

Why it’s awesome 😎
β€’ πŸ” OAuth 2.0 wizardry (token lives in token.json) β€” scope: drive.readonly
β€’ πŸͺ† Inception-style recursion: dives into sub-folders and Shared Drives (supportsAllDrives=True)
β€’ πŸ‹οΈ Heavy-lifter: streams chunky files (> 50 MB) & exports Google Docs/Sheets/Slides/Doodles to DOCX/XLSX/PPTX/PNG πŸ•
β€’ πŸ”„ Idempotent AF: skips stuff already downloaded (size-match) 🚦
β€’ πŸ“Š Verbose logger: all drama in gdrive_backup.log
β€’ πŸͺ„ Usage: set BACKUP_PATH, google_credentials.json, FOLDER_URL β†’ python3 google_drive_backup.py 🎩

Ideal for hoarding MP4s, memes, and β€œtotally-not-sensitive” spreadsheets on your trusty external drive. πŸ•΅οΈβ€β™‚οΈπŸ’Ώ

Key Features:

Recursive Download: Downloads all files and subfolders from a shared Google Drive folder
File Type Support: Handles regular files and Google Workspace files (Docs, Sheets, Slides)
Authentication: Uses OAuth2 for secure Google Drive API access
Progress Tracking: Logs download progress and provides statistics
Error Handling: Robust error handling and logging
File Safety: Sanitizes filenames for file system compatibility
Duplicate Prevention: Skips files that already exist locally

Setup Instructions:

  1. Install Required Packages:
    bashpip install google-auth google-auth-oauthlib google-auth-httplib2 google-api-python-client
  2. Get Google Drive API Credentials:

Go to Google Cloud Console
Create a new project or select existing one
Enable the Google Drive API
Go to "Credentials" β†’ "Create Credentials" β†’ "OAuth client ID"
Choose "Desktop application"
Download the JSON file and save it as credentials.json

  1. Configure the Script:
    Modify these variables in the main() function:

BACKUP_PATH: Path to your external hard drive
CREDENTIALS_FILE: Path to your credentials.json file
FOLDER_URL: Google Drive folder URL you want to backup

  1. Run the Script:
    bashpython gdrive_backup.py
    Usage Examples:
    You can also use environment variables:
    bashexport BACKUP_PATH="/media/external_drive/backups"
    export FOLDER_URL="https://drive.google.com/drive/folders/1abc123def456..."
    python gdrive_backup.py
    Features:

Smart File Handling: Exports Google Docs as .docx, Sheets as .xlsx, etc.
Logging: Creates detailed logs in gdrive_backup.log
Statistics: Shows download progress and final summary
Timestamped Backups: Each backup gets a unique timestamped folder
Resume Capability: Skips already downloaded files on subsequent runs

The script will authenticate with Google (opening a browser window the first time), then systematically download all files and folders while maintaining the original folder structure on your external drive.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment