Last active
May 25, 2025 21:12
-
-
Save ronnycoding/725146cba0761a179b8a0d3c2f49f75c to your computer and use it in GitHub Desktop.
π Google Drive Backup Script ποΈβ¨
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env python3 | |
| """ | |
| Google Drive Backup Script | |
| Downloads and backs up files from a shared Google Drive or My Drive folder to local storage. | |
| """ | |
| import os | |
| import io | |
| import json | |
| from pathlib import Path | |
| from datetime import datetime | |
| import logging | |
| from typing import List, Dict, Optional | |
| from google.auth.transport.requests import Request | |
| from google.oauth2.credentials import Credentials | |
| from google_auth_oauthlib.flow import InstalledAppFlow | |
| from googleapiclient.discovery import build | |
| from googleapiclient.http import MediaIoBaseDownload | |
| from googleapiclient.errors import HttpError | |
| # Google Drive API scopes | |
| SCOPES = ['https://www.googleapis.com/auth/drive.readonly'] | |
| class GoogleDriveBackup: | |
| def __init__(self, backup_path: str, credentials_file: str = 'credentials.json'): | |
| """ | |
| Initialize the Google Drive backup tool. | |
| Args: | |
| backup_path: Path to the external hard drive or backup location | |
| credentials_file: Path to the Google API credentials JSON file | |
| """ | |
| self.backup_path = Path(backup_path) | |
| self.credentials_file = credentials_file | |
| self.service = None | |
| self.stats = { | |
| 'files_downloaded': 0, | |
| 'folders_created': 0, | |
| 'skipped_files': 0, | |
| 'errors': 0, | |
| 'total_size': 0 | |
| } | |
| # Setup logging | |
| self.setup_logging() | |
| # Ensure backup directory exists | |
| self.backup_path.mkdir(parents=True, exist_ok=True) | |
| def setup_logging(self): | |
| """Setup logging configuration.""" | |
| log_format = '%(asctime)s - %(levelname)s - %(message)s' | |
| logging.basicConfig( | |
| level=logging.INFO, | |
| format=log_format, | |
| handlers=[ | |
| logging.FileHandler('gdrive_backup.log'), | |
| logging.StreamHandler() | |
| ] | |
| ) | |
| self.logger = logging.getLogger(__name__) | |
| def authenticate(self): | |
| """Authenticate with Google Drive API.""" | |
| creds = None | |
| token_file = 'token.json' | |
| # Load existing token if available | |
| if os.path.exists(token_file): | |
| creds = Credentials.from_authorized_user_file(token_file, SCOPES) | |
| # If no valid credentials, run OAuth flow | |
| if not creds or not creds.valid: | |
| if creds and creds.expired and creds.refresh_token: | |
| creds.refresh(Request()) | |
| else: | |
| if not os.path.exists(self.credentials_file): | |
| raise FileNotFoundError( | |
| f"Credentials file '{self.credentials_file}' not found. " | |
| "Please download it from Google Cloud Console." | |
| ) | |
| flow = InstalledAppFlow.from_client_secrets_file( | |
| self.credentials_file, SCOPES | |
| ) | |
| creds = flow.run_local_server(port=0) | |
| # Save credentials for next run | |
| with open(token_file, 'w') as token: | |
| token.write(creds.to_json()) | |
| # Build the service | |
| self.service = build('drive', 'v3', credentials=creds) | |
| self.logger.info("Successfully authenticated with Google Drive API") | |
| def get_folder_id_from_url(self, folder_url: str) -> str: | |
| """ | |
| Extract folder ID from Google Drive folder URL. | |
| Args: | |
| folder_url: Google Drive folder URL | |
| Returns: | |
| Folder ID string | |
| """ | |
| if '/folders/' in folder_url: | |
| return folder_url.split('/folders/')[-1].split('?')[0] | |
| elif 'id=' in folder_url: | |
| return folder_url.split('id=')[-1].split('&')[0] | |
| else: | |
| # Assume the input is already a folder ID | |
| return folder_url | |
| def list_files_in_folder(self, folder_id: str) -> List[Dict]: | |
| """ | |
| List all files and folders in a Google Drive folder, including items in | |
| Shared Drives, and handle pagination. | |
| Args: | |
| folder_id: Google Drive folder ID | |
| Returns: | |
| List of file/folder metadata dictionaries | |
| """ | |
| items: List[Dict] = [] | |
| page_token: Optional[str] = None | |
| try: | |
| while True: | |
| response = self.service.files().list( | |
| q=f"'{folder_id}' in parents and trashed=false", | |
| fields="nextPageToken, files(id, name, mimeType, size, modifiedTime)", | |
| supportsAllDrives=True, | |
| includeItemsFromAllDrives=True, | |
| pageSize=1000, | |
| pageToken=page_token, | |
| ).execute() | |
| items.extend(response.get("files", [])) | |
| page_token = response.get("nextPageToken") | |
| if not page_token: | |
| break | |
| except HttpError as error: | |
| self.logger.error(f"Error listing files in folder {folder_id}: {error}") | |
| self.stats["errors"] += 1 | |
| return items | |
| def download_file(self, file_id: str, file_name: str, local_path: Path) -> bool: | |
| """ | |
| Download a file from Google Drive. | |
| Args: | |
| file_id: Google Drive file ID | |
| file_name: Name of the file | |
| local_path: Local path where file should be saved | |
| Returns: | |
| True if download successful, False otherwise | |
| """ | |
| try: | |
| # Check if file already exists and compare sizes | |
| if local_path.exists(): | |
| try: | |
| # Get remote file size | |
| file_metadata = self.service.files().get(fileId=file_id, fields='size', supportsAllDrives=True).execute() | |
| remote_size = int(file_metadata.get('size', 0)) | |
| local_size = local_path.stat().st_size | |
| if local_size == remote_size: | |
| self.logger.info(f"File already exists with same size, skipping: {file_name}") | |
| self.stats['skipped_files'] += 1 | |
| return True | |
| else: | |
| self.logger.info(f"File exists but size differs ({local_size} vs {remote_size}), re-downloading: {file_name}") | |
| except: | |
| # If we can't get size info, just re-download to be safe | |
| pass | |
| # Get file metadata | |
| file_metadata = self.service.files().get(fileId=file_id, fields='mimeType,size', supportsAllDrives=True).execute() | |
| # Handle Google Workspace files (Docs, Sheets, etc.) | |
| mime_type = file_metadata.get('mimeType', '') | |
| if 'google-apps' in mime_type: | |
| return self.download_google_workspace_file(file_id, file_name, local_path, mime_type) | |
| else: | |
| return self.download_regular_file(file_id, file_name, local_path) | |
| except HttpError as error: | |
| self.logger.error(f"Error downloading file {file_name}: {error}") | |
| self.stats['errors'] += 1 | |
| return False | |
| def download_regular_file(self, file_id: str, file_name: str, local_path: Path) -> bool: | |
| """Download a regular file (not Google Workspace).""" | |
| try: | |
| # Get file size for progress tracking | |
| file_metadata = self.service.files().get(fileId=file_id, fields='size', supportsAllDrives=True).execute() | |
| file_size = int(file_metadata.get('size', 0)) | |
| request = self.service.files().get_media(fileId=file_id) | |
| # For large files (like MP4), write directly to disk instead of memory | |
| if file_size > 50 * 1024 * 1024: # 50MB threshold | |
| return self.download_large_file(request, file_name, local_path, file_size) | |
| else: | |
| return self.download_small_file(request, file_name, local_path, file_size) | |
| except HttpError as error: | |
| self.logger.error(f"Error downloading regular file {file_name}: {error}") | |
| self.stats['errors'] += 1 | |
| return False | |
| def download_large_file(self, request, file_name: str, local_path: Path, file_size: int) -> bool: | |
| """Download large files directly to disk with progress tracking.""" | |
| try: | |
| with open(local_path, 'wb') as f: | |
| downloader = MediaIoBaseDownload(f, request, chunksize=10*1024*1024) # 10MB chunks | |
| done = False | |
| while done is False: | |
| status, done = downloader.next_chunk() | |
| if status: | |
| progress = int(status.progress() * 100) | |
| downloaded_mb = (file_size * status.progress()) / (1024*1024) | |
| total_mb = file_size / (1024*1024) | |
| self.logger.info(f"Downloading {file_name}: {progress}% ({downloaded_mb:.1f}/{total_mb:.1f} MB)") | |
| actual_size = local_path.stat().st_size | |
| self.stats['total_size'] += actual_size | |
| self.stats['files_downloaded'] += 1 | |
| self.logger.info(f"Downloaded: {file_name} ({actual_size / (1024*1024):.2f} MB)") | |
| return True | |
| except Exception as error: | |
| self.logger.error(f"Error downloading large file {file_name}: {error}") | |
| if local_path.exists(): | |
| local_path.unlink() # Remove partial file | |
| return False | |
| def download_small_file(self, request, file_name: str, local_path: Path, file_size: int) -> bool: | |
| """Download small files to memory then write to disk.""" | |
| try: | |
| file_io = io.BytesIO() | |
| downloader = MediaIoBaseDownload(file_io, request) | |
| done = False | |
| while done is False: | |
| status, done = downloader.next_chunk() | |
| if status and file_size > 5*1024*1024: # Show progress for files > 5MB | |
| progress = int(status.progress() * 100) | |
| self.logger.info(f"Downloading {file_name}: {progress}%") | |
| # Write to file | |
| with open(local_path, 'wb') as f: | |
| f.write(file_io.getvalue()) | |
| actual_size = local_path.stat().st_size | |
| self.stats['total_size'] += actual_size | |
| self.stats['files_downloaded'] += 1 | |
| self.logger.info(f"Downloaded: {file_name} ({actual_size / (1024*1024):.2f} MB)") | |
| return True | |
| except Exception as error: | |
| self.logger.error(f"Error downloading small file {file_name}: {error}") | |
| return False | |
| def download_google_workspace_file(self, file_id: str, file_name: str, local_path: Path, mime_type: str) -> bool: | |
| """Download Google Workspace files with appropriate export format.""" | |
| export_formats = { | |
| 'application/vnd.google-apps.document': ('application/vnd.openxmlformats-officedocument.wordprocessingml.document', '.docx'), | |
| 'application/vnd.google-apps.spreadsheet': ('application/vnd.openxmlformats-officedocument.spreadsheetml.sheet', '.xlsx'), | |
| 'application/vnd.google-apps.presentation': ('application/vnd.openxmlformats-officedocument.presentationml.presentation', '.pptx'), | |
| 'application/vnd.google-apps.drawing': ('image/png', '.png'), | |
| } | |
| if mime_type not in export_formats: | |
| self.logger.warning(f"Unsupported Google Workspace file type: {mime_type} for {file_name}") | |
| self.stats['skipped_files'] += 1 | |
| return False | |
| export_mime_type, extension = export_formats[mime_type] | |
| # Add appropriate extension if not present | |
| if not local_path.suffix: | |
| local_path = local_path.with_suffix(extension) | |
| try: | |
| request = self.service.files().export_media( | |
| fileId=file_id, | |
| mimeType=export_mime_type, | |
| supportsAllDrives=True, | |
| ) | |
| file_io = io.BytesIO() | |
| downloader = MediaIoBaseDownload(file_io, request) | |
| done = False | |
| while done is False: | |
| status, done = downloader.next_chunk() | |
| # Write to file | |
| with open(local_path, 'wb') as f: | |
| f.write(file_io.getvalue()) | |
| file_size = local_path.stat().st_size | |
| self.stats['total_size'] += file_size | |
| self.stats['files_downloaded'] += 1 | |
| self.logger.info(f"Downloaded (exported): {file_name} as {local_path.name} ({file_size} bytes)") | |
| return True | |
| except HttpError as error: | |
| self.logger.error(f"Error downloading Google Workspace file {file_name}: {error}") | |
| self.stats['errors'] += 1 | |
| return False | |
| def backup_folder(self, folder_id: str, local_folder_path: Path, folder_name: str = ""): | |
| """ | |
| Recursively backup a folder and all its contents. | |
| Args: | |
| folder_id: Google Drive folder ID | |
| local_folder_path: Local path where folder should be backed up | |
| folder_name: Name of the folder (for logging) | |
| """ | |
| # Create local folder if it doesn't exist | |
| local_folder_path.mkdir(parents=True, exist_ok=True) | |
| self.stats['folders_created'] += 1 | |
| self.logger.info(f"Backing up folder: {folder_name or 'Root'} -> {local_folder_path}") | |
| # Get all files and folders in this directory | |
| items = self.list_files_in_folder(folder_id) | |
| for item in items: | |
| item_name = item['name'] | |
| item_id = item['id'] | |
| item_type = item['mimeType'] | |
| # Clean filename for file system compatibility | |
| safe_name = self.sanitize_filename(item_name) | |
| local_item_path = local_folder_path / safe_name | |
| if item_type == 'application/vnd.google-apps.folder': | |
| # It's a folder - recurse into it | |
| self.backup_folder(item_id, local_item_path, item_name) | |
| else: | |
| # It's a file - download it | |
| self.download_file(item_id, item_name, local_item_path) | |
| def sanitize_filename(self, filename: str) -> str: | |
| """ | |
| Sanitize filename for file system compatibility. | |
| Args: | |
| filename: Original filename | |
| Returns: | |
| Sanitized filename | |
| """ | |
| # Replace problematic characters | |
| invalid_chars = '<>:"/\\|?*' | |
| for char in invalid_chars: | |
| filename = filename.replace(char, '_') | |
| # Trim whitespace and dots from ends | |
| filename = filename.strip('. ') | |
| # Ensure filename isn't empty | |
| if not filename: | |
| filename = 'unnamed_file' | |
| return filename | |
| def print_stats(self): | |
| """Print backup statistics.""" | |
| print("\n" + "="*50) | |
| print("BACKUP COMPLETED") | |
| print("="*50) | |
| print(f"Files downloaded: {self.stats['files_downloaded']}") | |
| print(f"Folders created: {self.stats['folders_created']}") | |
| print(f"Files skipped: {self.stats['skipped_files']}") | |
| print(f"Errors encountered: {self.stats['errors']}") | |
| print(f"Total data downloaded: {self.format_file_size(self.stats['total_size'])}") | |
| print("="*50) | |
| def format_file_size(self, size_bytes: int) -> str: | |
| """Format file size in human readable format.""" | |
| if size_bytes < 1024: | |
| return f"{size_bytes} B" | |
| elif size_bytes < 1024**2: | |
| return f"{size_bytes/1024:.1f} KB" | |
| elif size_bytes < 1024**3: | |
| return f"{size_bytes/(1024**2):.1f} MB" | |
| else: | |
| return f"{size_bytes/(1024**3):.2f} GB" | |
| def run_backup(self, folder_url_or_id: str, backup_folder_name: str = None): | |
| """ | |
| Run the complete backup process. | |
| Args: | |
| folder_url_or_id: Google Drive folder URL or ID | |
| backup_folder_name: Name for the backup folder (optional) | |
| """ | |
| try: | |
| self.logger.info("Starting Google Drive backup process...") | |
| # Authenticate with Google Drive | |
| self.authenticate() | |
| # Extract folder ID from URL | |
| folder_id = self.get_folder_id_from_url(folder_url_or_id) | |
| # Get folder information | |
| try: | |
| folder_info = self.service.files().get(fileId=folder_id, supportsAllDrives=True).execute() | |
| folder_name = folder_info['name'] | |
| except HttpError: | |
| folder_name = backup_folder_name or "SharedFolder" | |
| # Create backup subfolder with timestamp | |
| timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") | |
| backup_subfolder = self.backup_path / f"{folder_name}_{timestamp}" | |
| self.logger.info(f"Backing up '{folder_name}' to {backup_subfolder}") | |
| # Start the backup process | |
| self.backup_folder(folder_id, backup_subfolder, folder_name) | |
| # Print final statistics | |
| self.print_stats() | |
| self.logger.info("Backup process completed successfully!") | |
| except Exception as e: | |
| self.logger.error(f"Backup process failed: {str(e)}") | |
| raise | |
| def main(): | |
| """Main function to run the backup script.""" | |
| # Configuration - MODIFY THESE VALUES | |
| BACKUP_PATH = "/Volumes/EXTERNAL_HARDRIVE/courses" # Change this to your external drive path | |
| CREDENTIALS_FILE = "google_credentials.json" # Path to your Google API credentials file | |
| FOLDER_URL = "https://drive.google.com/drive/folders/YOUR_FOLDER_ID" # Google Drive folder URL | |
| # You can also use environment variables | |
| backup_path = os.getenv('BACKUP_PATH', BACKUP_PATH) | |
| # get gredentials file from path ./file.json | |
| credentials_file = os.path.join(os.path.dirname(__file__), CREDENTIALS_FILE) | |
| folder_url = os.getenv('FOLDER_URL', FOLDER_URL) | |
| try: | |
| # Create backup instance | |
| backup_tool = GoogleDriveBackup(backup_path, credentials_file) | |
| # Run the backup | |
| backup_tool.run_backup(folder_url) | |
| except KeyboardInterrupt: | |
| print("\nBackup interrupted by user.") | |
| except Exception as e: | |
| print(f"Error: {str(e)}") | |
| print("Please check the log file 'gdrive_backup.log' for more details.") | |
| if __name__ == "__main__": | |
| main() |
Author
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
π Google Drive Backup Script ποΈβ¨
Need a digital vacuum cleaner for your Drive clutter? π§ΉπΎ
This CLI sidekick slurps an entire folder (My Drive or Shared Drive) onto your diskβperfect for weekend-warrior backups or paranoid cron-jobs. π€π
Why itβs awesome π
β’ π OAuth 2.0 wizardry (token lives in token.json) β scope: drive.readonly
β’ πͺ Inception-style recursion: dives into sub-folders and Shared Drives (supportsAllDrives=True)
β’ ποΈ Heavy-lifter: streams chunky files (> 50 MB) & exports Google Docs/Sheets/Slides/Doodles to DOCX/XLSX/PPTX/PNG π
β’ π Idempotent AF: skips stuff already downloaded (size-match) π¦
β’ π Verbose logger: all drama in gdrive_backup.log
β’ πͺ Usage: set BACKUP_PATH, google_credentials.json, FOLDER_URL β python3 google_drive_backup.py π©
Ideal for hoarding MP4s, memes, and βtotally-not-sensitiveβ spreadsheets on your trusty external drive. π΅οΈββοΈπΏ
Key Features:
Recursive Download: Downloads all files and subfolders from a shared Google Drive folder
File Type Support: Handles regular files and Google Workspace files (Docs, Sheets, Slides)
Authentication: Uses OAuth2 for secure Google Drive API access
Progress Tracking: Logs download progress and provides statistics
Error Handling: Robust error handling and logging
File Safety: Sanitizes filenames for file system compatibility
Duplicate Prevention: Skips files that already exist locally
Setup Instructions:
bashpip install google-auth google-auth-oauthlib google-auth-httplib2 google-api-python-client
Go to Google Cloud Console
Create a new project or select existing one
Enable the Google Drive API
Go to "Credentials" β "Create Credentials" β "OAuth client ID"
Choose "Desktop application"
Download the JSON file and save it as credentials.json
Modify these variables in the main() function:
BACKUP_PATH: Path to your external hard drive
CREDENTIALS_FILE: Path to your credentials.json file
FOLDER_URL: Google Drive folder URL you want to backup
bashpython gdrive_backup.py
Usage Examples:
You can also use environment variables:
bashexport BACKUP_PATH="/media/external_drive/backups"
export FOLDER_URL="https://drive.google.com/drive/folders/1abc123def456..."
python gdrive_backup.py
Features:
Smart File Handling: Exports Google Docs as .docx, Sheets as .xlsx, etc.
Logging: Creates detailed logs in gdrive_backup.log
Statistics: Shows download progress and final summary
Timestamped Backups: Each backup gets a unique timestamped folder
Resume Capability: Skips already downloaded files on subsequent runs
The script will authenticate with Google (opening a browser window the first time), then systematically download all files and folders while maintaining the original folder structure on your external drive.