#!/usr/bin/env python3 """ IBM Cloud Object Storage - HMAC Credential Bucket Access Tester This script tests HMAC credentials to determine which buckets you can actually read from and download files. Useful when credentials may only work for certain buckets due to regional restrictions or permissions. Usage: python test_bucket_access.py """ import os import ibm_boto3 from ibm_botocore.client import Config from ibm_botocore.exceptions import ClientError import sys def format_size(size_bytes): """ Convert bytes to a human-readable format. Args: size_bytes: Size in bytes Returns: Formatted string (e.g., "1.5 GB", "256 MB", "3.2 KB") """ if size_bytes == 0: return "0 B (empty)" elif size_bytes < 1024: return f"{size_bytes} B" elif size_bytes < 1024 * 1024: return f"{size_bytes / 1024:.2f} KB" elif size_bytes < 1024 * 1024 * 1024: return f"{size_bytes / (1024 * 1024):.2f} MB" else: return f"{size_bytes / (1024 * 1024 * 1024):.2f} GB" def create_cos_client(endpoint, access_key, secret_key): """ Create an IBM Cloud Object Storage client using HMAC credentials. HMAC (Hash-based Message Authentication Code) credentials are an alternative to API key authentication. They consist of an access key and secret key pair. Args: endpoint: The COS endpoint URL (e.g., "s3.us-south.cloud-object-storage.appdomain.cloud") access_key: HMAC access key ID secret_key: HMAC secret access key Returns: An IBM COS client object configured with the provided credentials """ endpoint = os.getenv('COS_ENDPOINT') access_key = os.getenv('COS_ACCESS_KEY') secret_key = os.getenv('COS_SECRET_KEY') # Ensure the endpoint has the https:// protocol if not endpoint.startswith("http"): endpoint = f"https://{endpoint}" # Create the client with S3-compatible signature version 4 # This is required for IBM Cloud Object Storage HMAC authentication client = ibm_boto3.client( 's3', ibm_api_key_id=None, # We're using HMAC, not API key ibm_service_instance_id=None, # Not needed for HMAC auth config=Config(signature_version='s3v4'), # S3 signature version 4 endpoint_url=endpoint, aws_access_key_id=access_key, # HMAC access key aws_secret_access_key=secret_key # HMAC secret key ) return client def list_all_buckets(client): """ List all buckets that are visible with the current credentials. Note: Just because a bucket is visible doesn't mean you can read from it. We'll test read access separately for each bucket. Args: client: IBM COS client object Returns: List of bucket names (strings) """ try: # Call the list_buckets API response = client.list_buckets() # Extract bucket names from the response buckets = [bucket['Name'] for bucket in response.get('Buckets', [])] return buckets except ClientError as e: # If we can't even list buckets, the credentials are likely invalid error_code = e.response['Error']['Code'] print(f"❌ Error listing buckets: {error_code}") print(f" Message: {e.response['Error']['Message']}") return [] except Exception as e: print(f"❌ Unexpected error: {str(e)}") return [] def test_bucket_read_access(client, bucket_name): """ Test if we can actually read from a specific bucket. This attempts to list objects in the bucket. If this succeeds, we have read access. If it fails, we'll get an error code that tells us why. Common error codes: - AccessDenied: You don't have permission (often means wrong region) - NoSuchBucket: The bucket doesn't exist - 403: Forbidden - bucket exists but you can't access it Args: client: IBM COS client object bucket_name: Name of the bucket to test Returns: Tuple of (success: bool, message: str, object_count: int) """ try: # Try to list objects in the bucket # We only request 1 object to make this test fast response = client.list_objects_v2( Bucket=bucket_name, MaxKeys=1 # Just need to know if we CAN list, not list everything ) # If we got here without an exception, we have read access! object_count = response.get('KeyCount', 0) # Try to get a more accurate total count if 'Contents' in response: # There are objects in the bucket return True, "✅ READ ACCESS CONFIRMED", object_count else: # Bucket is empty, but we can read from it return True, "✅ READ ACCESS CONFIRMED (empty bucket)", 0 except ClientError as e: # We got an error - parse it to understand what went wrong error_code = e.response['Error']['Code'] error_msg = e.response['Error']['Message'] if error_code == 'NoSuchBucket': return False, "❌ Bucket does not exist", 0 elif error_code == 'AccessDenied': # This usually means the bucket is in a different region # or your credentials don't have permission return False, "❌ ACCESS DENIED (likely wrong region or insufficient permissions)", 0 elif error_code == '403': return False, "❌ FORBIDDEN (bucket exists but credentials lack access)", 0 else: return False, f"❌ ERROR: {error_code} - {error_msg}", 0 except Exception as e: return False, f"❌ UNEXPECTED ERROR: {str(e)}", 0 def get_bucket_stats(client, bucket_name): """ Get the total number of objects and total size for a bucket. This function iterates through ALL objects in the bucket using pagination to count them and sum up their sizes. This can take a while for large buckets but gives you accurate information about what you're about to download. Args: client: IBM COS client object bucket_name: Name of the bucket Returns: Tuple of (total_objects: int, total_size_bytes: int) """ try: print(f" 📊 Counting objects and calculating size...") # Use a paginator to handle buckets with many objects # This automatically handles the continuation tokens for us paginator = client.get_paginator('list_objects_v2') total_objects = 0 total_size = 0 # Iterate through all pages for page in paginator.paginate(Bucket=bucket_name): if 'Contents' in page: for obj in page['Contents']: # Skip directory markers (objects ending with /) if not obj['Key'].endswith('/'): total_objects += 1 total_size += obj['Size'] return total_objects, total_size except ClientError as e: # If we can't get stats, just return zeros # The bucket is still accessible, we just can't count everything return 0, 0 except Exception as e: return 0, 0 def get_bucket_location(client, bucket_name): """ Try to determine which region a bucket is in. This helps identify why you might not have access - the bucket could be in a different region than your credentials are configured for. Args: client: IBM COS client object bucket_name: Name of the bucket Returns: String with region name, or "unknown" if we can't determine it """ try: response = client.get_bucket_location(Bucket=bucket_name) # LocationConstraint is the region region = response.get('LocationConstraint', 'us-east-1') return region if region else 'us-east-1' except: # If we can't get the location, it's often because we don't have access return "unknown (no access)" def main(): """ Main function - prompts for credentials and tests bucket access. """ print("=" * 70) print("IBM Cloud Object Storage - Bucket Access Tester") print("=" * 70) print() print("This script will test your HMAC credentials to see which buckets") print("you can actually read from and download files.") print() # Prompt for credentials print("Enter your IBM COS credentials:") print() endpoint = input("Endpoint (e.g., s3.us-south.cloud-object-storage.appdomain.cloud): ").strip() access_key = input("HMAC Access Key: ").strip() secret_key = input("HMAC Secret Key: ").strip() print() print("=" * 70) print("Testing credentials...") print("=" * 70) print() # Validate inputs if not endpoint or not access_key or not secret_key: print("❌ Error: All fields are required!") sys.exit(1) # Create the COS client try: client = create_cos_client(endpoint, access_key, secret_key) print(f"✅ Created COS client for endpoint: {endpoint}") except Exception as e: print(f"❌ Failed to create COS client: {str(e)}") sys.exit(1) print() # Step 1: List all buckets print("STEP 1: Listing all visible buckets...") print("-" * 70) buckets = list_all_buckets(client) if not buckets: print("❌ No buckets found or unable to list buckets.") print() print("This could mean:") print(" - Your credentials are invalid") print(" - Your credentials don't have any bucket access") print(" - There's a network connectivity issue") sys.exit(1) print(f"✅ Found {len(buckets)} bucket(s)") print() # Step 2: Test read access for each bucket print("STEP 2: Testing read access for each bucket...") print("-" * 70) print() # Track results accessible_buckets = [] inaccessible_buckets = [] for i, bucket in enumerate(buckets, 1): print(f"[{i}/{len(buckets)}] Testing bucket: {bucket}") # Test if we can read from this bucket can_read, message, obj_count = test_bucket_read_access(client, bucket) print(f" {message}") if can_read: # We have access! Try to get the region region = get_bucket_location(client, bucket) print(f" 📍 Region: {region}") # Get detailed statistics (object count and total size) total_objects, total_size = get_bucket_stats(client, bucket) print(f" 📦 Objects: {total_objects:,}") print(f" 💾 Total Size: {format_size(total_size)}") accessible_buckets.append({ 'name': bucket, 'region': region, 'has_objects': total_objects > 0, 'object_count': total_objects, 'total_size': total_size }) else: inaccessible_buckets.append(bucket) print() # Step 3: Summary print("=" * 70) print("SUMMARY") print("=" * 70) print() print(f"Total buckets found: {len(buckets)}") print(f"Accessible buckets: {len(accessible_buckets)} ✅") print(f"Inaccessible buckets: {len(inaccessible_buckets)} ❌") # Calculate totals across all accessible buckets if accessible_buckets: total_objects_all = sum(b['object_count'] for b in accessible_buckets) total_size_all = sum(b['total_size'] for b in accessible_buckets) print(f"Total objects: {total_objects_all:,} 📦") print(f"Total size: {format_size(total_size_all)} 💾") print() if accessible_buckets: print("🎉 ACCESSIBLE BUCKETS (You can download from these):") print("-" * 70) for bucket_info in accessible_buckets: print(f" ✅ {bucket_info['name']}") print(f" Region: {bucket_info['region']}") print(f" Objects: {bucket_info['object_count']:,} | Size: {format_size(bucket_info['total_size'])}") print() if inaccessible_buckets: print("❌ INACCESSIBLE BUCKETS (Cannot download from these):") print("-" * 70) for bucket in inaccessible_buckets: print(f" ❌ {bucket}") print() print("💡 TIP: Inaccessible buckets are often in different regions.") print(" You may need different credentials for those regions.") print() # Final recommendations if accessible_buckets: print("=" * 70) print("NEXT STEPS") print("=" * 70) print() print("You can now download files from the accessible buckets using:") print() print("1. rclone (fastest for large syncs)") print("2. MinIO client (mc) (good for quick downloads)") print("3. boto3 (Python library, good for programmatic access)") print() print("Would you like me to help you set up a download script? 😊") print() if __name__ == "__main__": try: main() except KeyboardInterrupt: print("\n\n⚠️ Test cancelled by user") sys.exit(0) except Exception as e: print(f"\n\n❌ Unexpected error: {str(e)}") import traceback traceback.print_exc() sys.exit(1)