ronnycoding · May 25, 2025 21:12 · ronnycoding · May 25, 2025
diff --git a/google_drive_backup.py b/google_drive_backup.py
 #!/usr/bin/env python3
 """
 Google Drive Backup Script
 Downloads and backs up files from a shared Google Drive or My Drive folder to local storage.
 """

 import os
 import io
 import json
 from pathlib import Path
 from datetime import datetime
 import logging
 from typing import List, Dict, Optional

 from google.auth.transport.requests import Request
 from google.oauth2.credentials import Credentials
 from google_auth_oauthlib.flow import InstalledAppFlow
 from googleapiclient.discovery import build
 from googleapiclient.http import MediaIoBaseDownload
 from googleapiclient.errors import HttpError

 # Google Drive API scopes
 SCOPES = ['https://www.googleapis.com/auth/drive.readonly']

 class GoogleDriveBackup:
    def __init__(self, backup_path: str, credentials_file: str = 'credentials.json'):
        """
        Initialize the Google Drive backup tool.

        Args:
            backup_path: Path to the external hard drive or backup location
            credentials_file: Path to the Google API credentials JSON file
        """
        self.backup_path = Path(backup_path)
        self.credentials_file = credentials_file
        self.service = None
        self.stats = {
            'files_downloaded': 0,
            'folders_created': 0,
            'skipped_files': 0,
            'errors': 0,
            'total_size': 0
        }

        # Setup logging
        self.setup_logging()

        # Ensure backup directory exists
        self.backup_path.mkdir(parents=True, exist_ok=True)

    def setup_logging(self):
        """Setup logging configuration."""
        log_format = '%(asctime)s - %(levelname)s - %(message)s'
        logging.basicConfig(
            level=logging.INFO,
            format=log_format,
            handlers=[
                logging.FileHandler('gdrive_backup.log'),
                logging.StreamHandler()
            ]
        )
        self.logger = logging.getLogger(__name__)

    def authenticate(self):
        """Authenticate with Google Drive API."""
        creds = None
        token_file = 'token.json'

        # Load existing token if available
        if os.path.exists(token_file):
            creds = Credentials.from_authorized_user_file(token_file, SCOPES)

        # If no valid credentials, run OAuth flow
        if not creds or not creds.valid:
            if creds and creds.expired and creds.refresh_token:
                creds.refresh(Request())
            else:
                if not os.path.exists(self.credentials_file):
                    raise FileNotFoundError(
                        f"Credentials file '{self.credentials_file}' not found. "
                        "Please download it from Google Cloud Console."
                    )
                flow = InstalledAppFlow.from_client_secrets_file(
                    self.credentials_file, SCOPES
                )
                creds = flow.run_local_server(port=0)

            # Save credentials for next run
            with open(token_file, 'w') as token:
                token.write(creds.to_json())

        # Build the service
        self.service = build('drive', 'v3', credentials=creds)
        self.logger.info("Successfully authenticated with Google Drive API")

    def get_folder_id_from_url(self, folder_url: str) -> str:
        """
        Extract folder ID from Google Drive folder URL.

        Args:
            folder_url: Google Drive folder URL

        Returns:
            Folder ID string
        """
        if '/folders/' in folder_url:
            return folder_url.split('/folders/')[-1].split('?')[0]
        elif 'id=' in folder_url:
            return folder_url.split('id=')[-1].split('&')[0]
        else:
            # Assume the input is already a folder ID
            return folder_url

    def list_files_in_folder(self, folder_id: str) -> List[Dict]:
        """
        List all files and folders in a Google Drive folder, including items in
        Shared Drives, and handle pagination.

        Args:
            folder_id: Google Drive folder ID

        Returns:
            List of file/folder metadata dictionaries
        """
        items: List[Dict] = []
        page_token: Optional[str] = None

        try:
            while True:
                response = self.service.files().list(
                    q=f"'{folder_id}' in parents and trashed=false",
                    fields="nextPageToken, files(id, name, mimeType, size, modifiedTime)",
                    supportsAllDrives=True,
                    includeItemsFromAllDrives=True,
                    pageSize=1000,
                    pageToken=page_token,
                ).execute()

                items.extend(response.get("files", []))
                page_token = response.get("nextPageToken")
                if not page_token:
                    break

        except HttpError as error:
            self.logger.error(f"Error listing files in folder {folder_id}: {error}")
            self.stats["errors"] += 1

        return items

    def download_file(self, file_id: str, file_name: str, local_path: Path) -> bool:
        """
        Download a file from Google Drive.

        Args:
            file_id: Google Drive file ID
            file_name: Name of the file
            local_path: Local path where file should be saved

        Returns:
            True if download successful, False otherwise
        """
        try:
            # Check if file already exists and compare sizes
            if local_path.exists():
                try:
                    # Get remote file size
                    file_metadata = self.service.files().get(fileId=file_id, fields='size', supportsAllDrives=True).execute()
                    remote_size = int(file_metadata.get('size', 0))
                    local_size = local_path.stat().st_size

                    if local_size == remote_size:
                        self.logger.info(f"File already exists with same size, skipping: {file_name}")
                        self.stats['skipped_files'] += 1
                        return True
                    else:
                        self.logger.info(f"File exists but size differs ({local_size} vs {remote_size}), re-downloading: {file_name}")
                except:
                    # If we can't get size info, just re-download to be safe
                    pass

            # Get file metadata
            file_metadata = self.service.files().get(fileId=file_id, fields='mimeType,size', supportsAllDrives=True).execute()

            # Handle Google Workspace files (Docs, Sheets, etc.)
            mime_type = file_metadata.get('mimeType', '')

            if 'google-apps' in mime_type:
                return self.download_google_workspace_file(file_id, file_name, local_path, mime_type)
            else:
                return self.download_regular_file(file_id, file_name, local_path)

        except HttpError as error:
            self.logger.error(f"Error downloading file {file_name}: {error}")
            self.stats['errors'] += 1
            return False

    def download_regular_file(self, file_id: str, file_name: str, local_path: Path) -> bool:
        """Download a regular file (not Google Workspace)."""
        try:
            # Get file size for progress tracking
            file_metadata = self.service.files().get(fileId=file_id, fields='size', supportsAllDrives=True).execute()
            file_size = int(file_metadata.get('size', 0))

            request = self.service.files().get_media(fileId=file_id)

            # For large files (like MP4), write directly to disk instead of memory
            if file_size > 50 * 1024 * 1024:  # 50MB threshold
                return self.download_large_file(request, file_name, local_path, file_size)
            else:
                return self.download_small_file(request, file_name, local_path, file_size)

        except HttpError as error:
            self.logger.error(f"Error downloading regular file {file_name}: {error}")
            self.stats['errors'] += 1
            return False

    def download_large_file(self, request, file_name: str, local_path: Path, file_size: int) -> bool:
        """Download large files directly to disk with progress tracking."""
        try:
            with open(local_path, 'wb') as f:
                downloader = MediaIoBaseDownload(f, request, chunksize=10*1024*1024)  # 10MB chunks

                done = False
                while done is False:
                    status, done = downloader.next_chunk()
                    if status:
                        progress = int(status.progress() * 100)
                        downloaded_mb = (file_size * status.progress()) / (1024*1024)
                        total_mb = file_size / (1024*1024)
                        self.logger.info(f"Downloading {file_name}: {progress}% ({downloaded_mb:.1f}/{total_mb:.1f} MB)")

            actual_size = local_path.stat().st_size
            self.stats['total_size'] += actual_size
            self.stats['files_downloaded'] += 1
            self.logger.info(f"Downloaded: {file_name} ({actual_size / (1024*1024):.2f} MB)")
            return True

        except Exception as error:
            self.logger.error(f"Error downloading large file {file_name}: {error}")
            if local_path.exists():
                local_path.unlink()  # Remove partial file
            return False

    def download_small_file(self, request, file_name: str, local_path: Path, file_size: int) -> bool:
        """Download small files to memory then write to disk."""
        try:
            file_io = io.BytesIO()
            downloader = MediaIoBaseDownload(file_io, request)

            done = False
            while done is False:
                status, done = downloader.next_chunk()
                if status and file_size > 5*1024*1024:  # Show progress for files > 5MB
                    progress = int(status.progress() * 100)
                    self.logger.info(f"Downloading {file_name}: {progress}%")

            # Write to file
            with open(local_path, 'wb') as f:
                f.write(file_io.getvalue())

            actual_size = local_path.stat().st_size
            self.stats['total_size'] += actual_size
            self.stats['files_downloaded'] += 1
            self.logger.info(f"Downloaded: {file_name} ({actual_size / (1024*1024):.2f} MB)")
            return True

        except Exception as error:
            self.logger.error(f"Error downloading small file {file_name}: {error}")
            return False

    def download_google_workspace_file(self, file_id: str, file_name: str, local_path: Path, mime_type: str) -> bool:
        """Download Google Workspace files with appropriate export format."""
        export_formats = {
            'application/vnd.google-apps.document': ('application/vnd.openxmlformats-officedocument.wordprocessingml.document', '.docx'),
            'application/vnd.google-apps.spreadsheet': ('application/vnd.openxmlformats-officedocument.spreadsheetml.sheet', '.xlsx'),
            'application/vnd.google-apps.presentation': ('application/vnd.openxmlformats-officedocument.presentationml.presentation', '.pptx'),
            'application/vnd.google-apps.drawing': ('image/png', '.png'),
        }

        if mime_type not in export_formats:
            self.logger.warning(f"Unsupported Google Workspace file type: {mime_type} for {file_name}")
            self.stats['skipped_files'] += 1
            return False

        export_mime_type, extension = export_formats[mime_type]

        # Add appropriate extension if not present
        if not local_path.suffix:
            local_path = local_path.with_suffix(extension)

        try:
            request = self.service.files().export_media(
                fileId=file_id,
                mimeType=export_mime_type,
                supportsAllDrives=True,
            )
            file_io = io.BytesIO()
            downloader = MediaIoBaseDownload(file_io, request)

            done = False
            while done is False:
                status, done = downloader.next_chunk()

            # Write to file
            with open(local_path, 'wb') as f:
                f.write(file_io.getvalue())

            file_size = local_path.stat().st_size
            self.stats['total_size'] += file_size
            self.stats['files_downloaded'] += 1
            self.logger.info(f"Downloaded (exported): {file_name} as {local_path.name} ({file_size} bytes)")
            return True

        except HttpError as error:
            self.logger.error(f"Error downloading Google Workspace file {file_name}: {error}")
            self.stats['errors'] += 1
            return False

    def backup_folder(self, folder_id: str, local_folder_path: Path, folder_name: str = ""):
        """
        Recursively backup a folder and all its contents.

        Args:
            folder_id: Google Drive folder ID
            local_folder_path: Local path where folder should be backed up
            folder_name: Name of the folder (for logging)
        """
        # Create local folder if it doesn't exist
        local_folder_path.mkdir(parents=True, exist_ok=True)
        self.stats['folders_created'] += 1

        self.logger.info(f"Backing up folder: {folder_name or 'Root'} -> {local_folder_path}")

        # Get all files and folders in this directory
        items = self.list_files_in_folder(folder_id)

        for item in items:
            item_name = item['name']
            item_id = item['id']
            item_type = item['mimeType']

            # Clean filename for file system compatibility
            safe_name = self.sanitize_filename(item_name)
            local_item_path = local_folder_path / safe_name

            if item_type == 'application/vnd.google-apps.folder':
                # It's a folder - recurse into it
                self.backup_folder(item_id, local_item_path, item_name)
            else:
                # It's a file - download it
                self.download_file(item_id, item_name, local_item_path)

    def sanitize_filename(self, filename: str) -> str:
        """
        Sanitize filename for file system compatibility.

        Args:
            filename: Original filename

        Returns:
            Sanitized filename
        """
        # Replace problematic characters
        invalid_chars = '<>:"/\\|?*'
        for char in invalid_chars:
            filename = filename.replace(char, '_')

        # Trim whitespace and dots from ends
        filename = filename.strip('. ')

        # Ensure filename isn't empty
        if not filename:
            filename = 'unnamed_file'

        return filename

    def print_stats(self):
        """Print backup statistics."""
        print("\n" + "="*50)
        print("BACKUP COMPLETED")
        print("="*50)
        print(f"Files downloaded: {self.stats['files_downloaded']}")
        print(f"Folders created: {self.stats['folders_created']}")
        print(f"Files skipped: {self.stats['skipped_files']}")
        print(f"Errors encountered: {self.stats['errors']}")
        print(f"Total data downloaded: {self.format_file_size(self.stats['total_size'])}")
        print("="*50)

    def format_file_size(self, size_bytes: int) -> str:
        """Format file size in human readable format."""
        if size_bytes < 1024:
            return f"{size_bytes} B"
        elif size_bytes < 1024**2:
            return f"{size_bytes/1024:.1f} KB"
        elif size_bytes < 1024**3:
            return f"{size_bytes/(1024**2):.1f} MB"
        else:
            return f"{size_bytes/(1024**3):.2f} GB"

    def run_backup(self, folder_url_or_id: str, backup_folder_name: str = None):
        """
        Run the complete backup process.

        Args:
            folder_url_or_id: Google Drive folder URL or ID
            backup_folder_name: Name for the backup folder (optional)
        """
        try:
            self.logger.info("Starting Google Drive backup process...")

            # Authenticate with Google Drive
            self.authenticate()

            # Extract folder ID from URL
            folder_id = self.get_folder_id_from_url(folder_url_or_id)

            # Get folder information
            try:
                folder_info = self.service.files().get(fileId=folder_id, supportsAllDrives=True).execute()
                folder_name = folder_info['name']
            except HttpError:
                folder_name = backup_folder_name or "SharedFolder"

            # Create backup subfolder with timestamp
            timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
            backup_subfolder = self.backup_path / f"{folder_name}_{timestamp}"

            self.logger.info(f"Backing up '{folder_name}' to {backup_subfolder}")

            # Start the backup process
            self.backup_folder(folder_id, backup_subfolder, folder_name)

            # Print final statistics
            self.print_stats()
            self.logger.info("Backup process completed successfully!")

        except Exception as e:
            self.logger.error(f"Backup process failed: {str(e)}")
            raise

 def main():
    """Main function to run the backup script."""

    # Configuration - MODIFY THESE VALUES
    BACKUP_PATH = "/Volumes/EXTERNAL_HARDRIVE/courses"  # Change this to your external drive path
    CREDENTIALS_FILE = "google_credentials.json"  # Path to your Google API credentials file
    FOLDER_URL = "https://drive.google.com/drive/folders/YOUR_FOLDER_ID"  # Google Drive folder URL

    # You can also use environment variables
    backup_path = os.getenv('BACKUP_PATH', BACKUP_PATH)
    # get gredentials file from path ./file.json
    credentials_file = os.path.join(os.path.dirname(__file__), CREDENTIALS_FILE)
    folder_url = os.getenv('FOLDER_URL', FOLDER_URL)

    try:
        # Create backup instance
        backup_tool = GoogleDriveBackup(backup_path, credentials_file)

        # Run the backup
        backup_tool.run_backup(folder_url)

    except KeyboardInterrupt:
        print("\nBackup interrupted by user.")
    except Exception as e:
        print(f"Error: {str(e)}")
        print("Please check the log file 'gdrive_backup.log' for more details.")

 if __name__ == "__main__":
    main()
	#!/usr/bin/env python3
	"""
	Google Drive Backup Script
	Downloads and backs up files from a shared Google Drive or My Drive folder to local storage.
	"""

	import os
	import io
	import json
	from pathlib import Path
	from datetime import datetime
	import logging
	from typing import List, Dict, Optional

	from google.auth.transport.requests import Request
	from google.oauth2.credentials import Credentials
	from google_auth_oauthlib.flow import InstalledAppFlow
	from googleapiclient.discovery import build
	from googleapiclient.http import MediaIoBaseDownload
	from googleapiclient.errors import HttpError

	# Google Drive API scopes
	SCOPES = ['https://www.googleapis.com/auth/drive.readonly']

	class GoogleDriveBackup:
	def __init__(self, backup_path: str, credentials_file: str = 'credentials.json'):
	"""
	Initialize the Google Drive backup tool.

	Args:
	backup_path: Path to the external hard drive or backup location
	credentials_file: Path to the Google API credentials JSON file
	"""
	self.backup_path = Path(backup_path)
	self.credentials_file = credentials_file
	self.service = None
	self.stats = {
	'files_downloaded': 0,
	'folders_created': 0,
	'skipped_files': 0,
	'errors': 0,
	'total_size': 0
	}

	# Setup logging
	self.setup_logging()

	# Ensure backup directory exists
	self.backup_path.mkdir(parents=True, exist_ok=True)

	def setup_logging(self):
	"""Setup logging configuration."""
	log_format = '%(asctime)s - %(levelname)s - %(message)s'
	logging.basicConfig(
	level=logging.INFO,
	format=log_format,
	handlers=[
	logging.FileHandler('gdrive_backup.log'),
	logging.StreamHandler()
	]
	)
	self.logger = logging.getLogger(__name__)

	def authenticate(self):
	"""Authenticate with Google Drive API."""
	creds = None
	token_file = 'token.json'

	# Load existing token if available
	if os.path.exists(token_file):
	creds = Credentials.from_authorized_user_file(token_file, SCOPES)

	# If no valid credentials, run OAuth flow
	if not creds or not creds.valid:
	if creds and creds.expired and creds.refresh_token:
	creds.refresh(Request())
	else:
	if not os.path.exists(self.credentials_file):
	raise FileNotFoundError(
	f"Credentials file '{self.credentials_file}' not found. "
	"Please download it from Google Cloud Console."
	)
	flow = InstalledAppFlow.from_client_secrets_file(
	self.credentials_file, SCOPES
	)
	creds = flow.run_local_server(port=0)

	# Save credentials for next run
	with open(token_file, 'w') as token:
	token.write(creds.to_json())

	# Build the service
	self.service = build('drive', 'v3', credentials=creds)
	self.logger.info("Successfully authenticated with Google Drive API")

	def get_folder_id_from_url(self, folder_url: str) -> str:
	"""
	Extract folder ID from Google Drive folder URL.

	Args:
	folder_url: Google Drive folder URL

	Returns:
	Folder ID string
	"""
	if '/folders/' in folder_url:
	return folder_url.split('/folders/')[-1].split('?')[0]
	elif 'id=' in folder_url:
	return folder_url.split('id=')[-1].split('&')[0]
	else:
	# Assume the input is already a folder ID
	return folder_url

	def list_files_in_folder(self, folder_id: str) -> List[Dict]:
	"""
	List all files and folders in a Google Drive folder, including items in
	Shared Drives, and handle pagination.

	Args:
	folder_id: Google Drive folder ID

	Returns:
	List of file/folder metadata dictionaries
	"""
	items: List[Dict] = []
	page_token: Optional[str] = None

	try:
	while True:
	response = self.service.files().list(
	q=f"'{folder_id}' in parents and trashed=false",
	fields="nextPageToken, files(id, name, mimeType, size, modifiedTime)",
	supportsAllDrives=True,
	includeItemsFromAllDrives=True,
	pageSize=1000,
	pageToken=page_token,
	).execute()

	items.extend(response.get("files", []))
	page_token = response.get("nextPageToken")
	if not page_token:
	break

	except HttpError as error:
	self.logger.error(f"Error listing files in folder {folder_id}: {error}")
	self.stats["errors"] += 1

	return items

	def download_file(self, file_id: str, file_name: str, local_path: Path) -> bool:
	"""
	Download a file from Google Drive.

	Args:
	file_id: Google Drive file ID
	file_name: Name of the file
	local_path: Local path where file should be saved

	Returns:
	True if download successful, False otherwise
	"""
	try:
	# Check if file already exists and compare sizes
	if local_path.exists():
	try:
	# Get remote file size
	file_metadata = self.service.files().get(fileId=file_id, fields='size', supportsAllDrives=True).execute()
	remote_size = int(file_metadata.get('size', 0))
	local_size = local_path.stat().st_size

	if local_size == remote_size:
	self.logger.info(f"File already exists with same size, skipping: {file_name}")
	self.stats['skipped_files'] += 1
	return True
	else:
	self.logger.info(f"File exists but size differs ({local_size} vs {remote_size}), re-downloading: {file_name}")
	except:
	# If we can't get size info, just re-download to be safe
	pass

	# Get file metadata
	file_metadata = self.service.files().get(fileId=file_id, fields='mimeType,size', supportsAllDrives=True).execute()

	# Handle Google Workspace files (Docs, Sheets, etc.)
	mime_type = file_metadata.get('mimeType', '')

	if 'google-apps' in mime_type:
	return self.download_google_workspace_file(file_id, file_name, local_path, mime_type)
	else:
	return self.download_regular_file(file_id, file_name, local_path)

	except HttpError as error:
	self.logger.error(f"Error downloading file {file_name}: {error}")
	self.stats['errors'] += 1
	return False

	def download_regular_file(self, file_id: str, file_name: str, local_path: Path) -> bool:
	"""Download a regular file (not Google Workspace)."""
	try:
	# Get file size for progress tracking
	file_metadata = self.service.files().get(fileId=file_id, fields='size', supportsAllDrives=True).execute()
	file_size = int(file_metadata.get('size', 0))

	request = self.service.files().get_media(fileId=file_id)

	# For large files (like MP4), write directly to disk instead of memory
	if file_size > 50 * 1024 * 1024: # 50MB threshold
	return self.download_large_file(request, file_name, local_path, file_size)
	else:
	return self.download_small_file(request, file_name, local_path, file_size)

	except HttpError as error:
	self.logger.error(f"Error downloading regular file {file_name}: {error}")
	self.stats['errors'] += 1
	return False

	def download_large_file(self, request, file_name: str, local_path: Path, file_size: int) -> bool:
	"""Download large files directly to disk with progress tracking."""
	try:
	with open(local_path, 'wb') as f:
	downloader = MediaIoBaseDownload(f, request, chunksize=1010241024) # 10MB chunks

	done = False
	while done is False:
	status, done = downloader.next_chunk()
	if status:
	progress = int(status.progress() * 100)
	downloaded_mb = (file_size * status.progress()) / (1024*1024)
	total_mb = file_size / (1024*1024)
	self.logger.info(f"Downloading {file_name}: {progress}% ({downloaded_mb:.1f}/{total_mb:.1f} MB)")

	actual_size = local_path.stat().st_size
	self.stats['total_size'] += actual_size
	self.stats['files_downloaded'] += 1
	self.logger.info(f"Downloaded: {file_name} ({actual_size / (1024*1024):.2f} MB)")
	return True

	except Exception as error:
	self.logger.error(f"Error downloading large file {file_name}: {error}")
	if local_path.exists():
	local_path.unlink() # Remove partial file
	return False

	def download_small_file(self, request, file_name: str, local_path: Path, file_size: int) -> bool:
	"""Download small files to memory then write to disk."""
	try:
	file_io = io.BytesIO()
	downloader = MediaIoBaseDownload(file_io, request)

	done = False
	while done is False:
	status, done = downloader.next_chunk()
	if status and file_size > 510241024: # Show progress for files > 5MB
	progress = int(status.progress() * 100)
	self.logger.info(f"Downloading {file_name}: {progress}%")

	# Write to file
	with open(local_path, 'wb') as f:
	f.write(file_io.getvalue())

	actual_size = local_path.stat().st_size
	self.stats['total_size'] += actual_size
	self.stats['files_downloaded'] += 1
	self.logger.info(f"Downloaded: {file_name} ({actual_size / (1024*1024):.2f} MB)")
	return True

	except Exception as error:
	self.logger.error(f"Error downloading small file {file_name}: {error}")
	return False

	def download_google_workspace_file(self, file_id: str, file_name: str, local_path: Path, mime_type: str) -> bool:
	"""Download Google Workspace files with appropriate export format."""
	export_formats = {
	'application/vnd.google-apps.document': ('application/vnd.openxmlformats-officedocument.wordprocessingml.document', '.docx'),
	'application/vnd.google-apps.spreadsheet': ('application/vnd.openxmlformats-officedocument.spreadsheetml.sheet', '.xlsx'),
	'application/vnd.google-apps.presentation': ('application/vnd.openxmlformats-officedocument.presentationml.presentation', '.pptx'),
	'application/vnd.google-apps.drawing': ('image/png', '.png'),
	}

	if mime_type not in export_formats:
	self.logger.warning(f"Unsupported Google Workspace file type: {mime_type} for {file_name}")
	self.stats['skipped_files'] += 1
	return False

	export_mime_type, extension = export_formats[mime_type]

	# Add appropriate extension if not present
	if not local_path.suffix:
	local_path = local_path.with_suffix(extension)

	try:
	request = self.service.files().export_media(
	fileId=file_id,
	mimeType=export_mime_type,
	supportsAllDrives=True,
	)
	file_io = io.BytesIO()
	downloader = MediaIoBaseDownload(file_io, request)

	done = False
	while done is False:
	status, done = downloader.next_chunk()

	# Write to file
	with open(local_path, 'wb') as f:
	f.write(file_io.getvalue())

	file_size = local_path.stat().st_size
	self.stats['total_size'] += file_size
	self.stats['files_downloaded'] += 1
	self.logger.info(f"Downloaded (exported): {file_name} as {local_path.name} ({file_size} bytes)")
	return True

	except HttpError as error:
	self.logger.error(f"Error downloading Google Workspace file {file_name}: {error}")
	self.stats['errors'] += 1
	return False

	def backup_folder(self, folder_id: str, local_folder_path: Path, folder_name: str = ""):
	"""
	Recursively backup a folder and all its contents.

	Args:
	folder_id: Google Drive folder ID
	local_folder_path: Local path where folder should be backed up
	folder_name: Name of the folder (for logging)
	"""
	# Create local folder if it doesn't exist
	local_folder_path.mkdir(parents=True, exist_ok=True)
	self.stats['folders_created'] += 1

	self.logger.info(f"Backing up folder: {folder_name or 'Root'} -> {local_folder_path}")

	# Get all files and folders in this directory
	items = self.list_files_in_folder(folder_id)

	for item in items:
	item_name = item['name']
	item_id = item['id']
	item_type = item['mimeType']

	# Clean filename for file system compatibility
	safe_name = self.sanitize_filename(item_name)
	local_item_path = local_folder_path / safe_name

	if item_type == 'application/vnd.google-apps.folder':
	# It's a folder - recurse into it
	self.backup_folder(item_id, local_item_path, item_name)
	else:
	# It's a file - download it
	self.download_file(item_id, item_name, local_item_path)

	def sanitize_filename(self, filename: str) -> str:
	"""
	Sanitize filename for file system compatibility.

	Args:
	filename: Original filename

	Returns:
	Sanitized filename
	"""
	# Replace problematic characters
	invalid_chars = '<>:"/\\\|?*'
	for char in invalid_chars:
	filename = filename.replace(char, '_')

	# Trim whitespace and dots from ends
	filename = filename.strip('. ')

	# Ensure filename isn't empty
	if not filename:
	filename = 'unnamed_file'

	return filename

	def print_stats(self):
	"""Print backup statistics."""
	print("\n" + "="*50)
	print("BACKUP COMPLETED")
	print("="*50)
	print(f"Files downloaded: {self.stats['files_downloaded']}")
	print(f"Folders created: {self.stats['folders_created']}")
	print(f"Files skipped: {self.stats['skipped_files']}")
	print(f"Errors encountered: {self.stats['errors']}")
	print(f"Total data downloaded: {self.format_file_size(self.stats['total_size'])}")
	print("="*50)

	def format_file_size(self, size_bytes: int) -> str:
	"""Format file size in human readable format."""
	if size_bytes < 1024:
	return f"{size_bytes} B"
	elif size_bytes < 1024**2:
	return f"{size_bytes/1024:.1f} KB"
	elif size_bytes < 1024**3:
	return f"{size_bytes/(1024**2):.1f} MB"
	else:
	return f"{size_bytes/(1024**3):.2f} GB"

	def run_backup(self, folder_url_or_id: str, backup_folder_name: str = None):
	"""
	Run the complete backup process.

	Args:
	folder_url_or_id: Google Drive folder URL or ID
	backup_folder_name: Name for the backup folder (optional)
	"""
	try:
	self.logger.info("Starting Google Drive backup process...")

	# Authenticate with Google Drive
	self.authenticate()

	# Extract folder ID from URL
	folder_id = self.get_folder_id_from_url(folder_url_or_id)

	# Get folder information
	try:
	folder_info = self.service.files().get(fileId=folder_id, supportsAllDrives=True).execute()
	folder_name = folder_info['name']
	except HttpError:
	folder_name = backup_folder_name or "SharedFolder"

	# Create backup subfolder with timestamp
	timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
	backup_subfolder = self.backup_path / f"{folder_name}_{timestamp}"

	self.logger.info(f"Backing up '{folder_name}' to {backup_subfolder}")

	# Start the backup process
	self.backup_folder(folder_id, backup_subfolder, folder_name)

	# Print final statistics
	self.print_stats()
	self.logger.info("Backup process completed successfully!")

	except Exception as e:
	self.logger.error(f"Backup process failed: {str(e)}")
	raise

	def main():
	"""Main function to run the backup script."""

	# Configuration - MODIFY THESE VALUES
	BACKUP_PATH = "/Volumes/EXTERNAL_HARDRIVE/courses" # Change this to your external drive path
	CREDENTIALS_FILE = "google_credentials.json" # Path to your Google API credentials file
	FOLDER_URL = "https://drive.google.com/drive/folders/YOUR_FOLDER_ID" # Google Drive folder URL

	# You can also use environment variables
	backup_path = os.getenv('BACKUP_PATH', BACKUP_PATH)
	# get gredentials file from path ./file.json
	credentials_file = os.path.join(os.path.dirname(__file__), CREDENTIALS_FILE)
	folder_url = os.getenv('FOLDER_URL', FOLDER_URL)

	try:
	# Create backup instance
	backup_tool = GoogleDriveBackup(backup_path, credentials_file)

	# Run the backup
	backup_tool.run_backup(folder_url)

	except KeyboardInterrupt:
	print("\nBackup interrupted by user.")
	except Exception as e:
	print(f"Error: {str(e)}")
	print("Please check the log file 'gdrive_backup.log' for more details.")

	if __name__ == "__main__":
	main()
No results found