#!/usr/bin/env bash # # DuckDB AWS SSO CA Certificate Fix - Reproduction & Verification Script # # This script demonstrates the SSO CA certificate issue in the DuckDB AWS extension # and verifies that the fix works correctly. # # Problem: When the DuckDB AWS extension is built on one Linux distribution (e.g., # manylinux/RHEL with certs at /etc/pki/tls/certs/ca-bundle.crt) and run on another # (e.g., Ubuntu/Debian with certs at /etc/ssl/certs/ca-certificates.crt), the SSO # credential provider fails with SSL certificate verification errors because it # doesn't receive the runtime-detected CA certificate path. # # The fix passes SELECTED_CURL_CERT_PATH to SSOCredentialsProvider via ClientConfiguration. # # Prerequisites: # - Docker installed and running # - Active AWS SSO session (run: aws sso login --profile ) # - ~/.aws directory with valid SSO configuration # # IMPORTANT: The builds take ~10-15 minutes each. AWS SSO sessions may expire # during this time. If the tests fail with "Failed to generate secret", refresh # your SSO session (aws sso login --profile ) and re-run the script. # The script will reuse already-built extensions, so subsequent runs are faster. # # Usage: # ./test_sso_ca_fix.sh # # Author: Al Johri # Date: 2025-01-23 set -euo pipefail ############################################################################### # CONFIGURATION - Modify these values for your environment ############################################################################### # AWS SSO profile name (must have an active SSO session) AWS_PROFILE="${AWS_PROFILE:-seltz-dev-al}" # S3 path to test (must be accessible with the SSO profile above) # Set to empty string to skip S3 access test S3_TEST_PATH="${S3_TEST_PATH:-s3://seltz-dev-al/outputs/simple_qa_10/exa/auto/search_results.parquet}" # Git commits to test # Main branch commit (without the fix) - latest main as of 2025-01-23 MAIN_COMMIT="7cacaf6" # Fix commit (with the CA certificate fix) FIX_COMMIT="f53439c" # Repository URL (fork with the fix) REPO_URL="https://github.com/AlJohri/duckdb-aws.git" # DuckDB version (must match the version the extension is built for) DUCKDB_VERSION="v1.4.0" # Docker images MANYLINUX_IMAGE="quay.io/pypa/manylinux_2_28_x86_64" TEST_IMAGE="ubuntu:22.04" # Working directory for the test WORK_DIR="${WORK_DIR:-/tmp/duckdb-aws-sso-test}" ############################################################################### # HELPER FUNCTIONS ############################################################################### # Print a section header print_header() { echo "" echo "==============================================================================" echo " $1" echo "==============================================================================" echo "" } # Print a subsection header print_subheader() { echo "" echo "--- $1 ---" echo "" } # Print success message in green print_success() { echo -e "\033[32m✓ $1\033[0m" } # Print failure message in red print_failure() { echo -e "\033[31m✗ $1\033[0m" } # Print info message in blue print_info() { echo -e "\033[34mℹ $1\033[0m" } # Check if a command exists check_command() { if ! command -v "$1" &> /dev/null; then print_failure "Required command '$1' not found. Please install it first." exit 1 fi } # Clean up a directory using Docker (handles root-owned files from container builds) docker_cleanup() { local dir="$1" if [ -d "$dir" ]; then docker run --rm -v "$(dirname "$dir"):/cleanup" alpine rm -rf "/cleanup/$(basename "$dir")" 2>/dev/null || true fi } # Build the extension in a manylinux container # Arguments: $1 = source directory, $2 = output subdirectory name build_extension() { local src_dir="$1" local subdir="$2" # Check if extension already exists (allows re-running after SSO refresh without rebuilding) if [ -f "${WORK_DIR}/extensions/${subdir}/aws.duckdb_extension" ]; then print_success "Extension already built at extensions/${subdir}/aws.duckdb_extension (skipping build)" return 0 fi print_info "Building extension in manylinux container (this may take several minutes)..." docker run --rm \ -v "${src_dir}:/workspace" \ -w /workspace \ "${MANYLINUX_IMAGE}" bash -c " # Install build dependencies yum install -y -q gcc-toolset-12 cmake ninja-build git perl-IPC-Cmd openssl-devel libcurl-devel zip > /dev/null 2>&1 source /opt/rh/gcc-toolset-12/enable # Configure git for submodules git config --global --add safe.directory /workspace git config --global --add safe.directory /workspace/duckdb git config --global --add safe.directory /workspace/extension-ci-tools # Initialize submodules git submodule update --init --recursive 2>/dev/null # Clone and bootstrap vcpkg if [ ! -d /vcpkg ]; then git clone -q https://github.com/microsoft/vcpkg.git /vcpkg fi cd /vcpkg git checkout -q 5e5d0e1cd7785623065e77eff011afdeec1a3574 ./bootstrap-vcpkg.sh > /dev/null 2>&1 # Build the extension cd /workspace make clean > /dev/null 2>&1 || true VCPKG_TOOLCHAIN_PATH=/vcpkg/scripts/buildsystems/vcpkg.cmake GEN=ninja make release 2>&1 | tail -20 " # Copy the built extension to a subdirectory (keep filename as aws.duckdb_extension) # DuckDB derives the init function name from the filename, so we can't rename it mkdir -p "${WORK_DIR}/extensions/${subdir}" cp "${src_dir}/build/release/extension/aws/aws.duckdb_extension" \ "${WORK_DIR}/extensions/${subdir}/aws.duckdb_extension" print_success "Extension built and saved to extensions/${subdir}/aws.duckdb_extension" } # Test SSO authentication in a container # Arguments: $1 = extension subdirectory, $2 = test name # Returns: 0 if successful, 1 if failed test_sso_auth() { local ext_subdir="$1" local test_name="$2" print_info "Testing SSO authentication with ${test_name}..." # Run the test and capture output local output local exit_code=0 output=$(docker run --rm \ -v "${WORK_DIR}/extensions:/extensions:ro" \ -v "${HOME}/.aws:/root/.aws:ro" \ -e "AWS_PROFILE=${AWS_PROFILE}" \ -w /tmp \ "${TEST_IMAGE}" bash -c " # Install dependencies quietly apt-get update -qq > /dev/null 2>&1 apt-get install -y -qq wget unzip ca-certificates > /dev/null 2>&1 # Download DuckDB CLI wget -q https://github.com/duckdb/duckdb/releases/download/${DUCKDB_VERSION}/duckdb_cli-linux-amd64.zip unzip -q duckdb_cli-linux-amd64.zip # Run the SSO test ./duckdb -unsigned -c \" INSTALL httpfs; LOAD '/extensions/${ext_subdir}/aws.duckdb_extension'; LOAD httpfs; CREATE SECRET my_sso (TYPE S3, PROVIDER CREDENTIAL_CHAIN, CHAIN 'sso', PROFILE '${AWS_PROFILE}'); SELECT * FROM duckdb_secrets(); \" 2>&1 " 2>&1) || exit_code=$? echo "$output" # Check for certificate errors (common patterns) if echo "$output" | grep -qiE "certificate|SSL|TLS|curl.*error|CURLE_|ca-bundle|ca-certificates"; then print_failure "Certificate/SSL error detected" return 1 fi # Check for successful secret creation if echo "$output" | grep -q "credential_chain" && echo "$output" | grep -q "my_sso"; then print_success "SSO secret created successfully" return 0 fi print_failure "SSO authentication failed (unknown error)" return 1 } # Test S3 access in a container # Arguments: $1 = extension subdirectory, $2 = test name # Returns: 0 if successful, 1 if failed test_s3_access() { local ext_subdir="$1" local test_name="$2" if [ -z "${S3_TEST_PATH}" ]; then print_info "Skipping S3 access test (S3_TEST_PATH not configured)" return 0 fi print_info "Testing S3 access with ${test_name}..." local output local exit_code=0 output=$(docker run --rm \ -v "${WORK_DIR}/extensions:/extensions:ro" \ -v "${HOME}/.aws:/root/.aws:ro" \ -e "AWS_PROFILE=${AWS_PROFILE}" \ -w /tmp \ "${TEST_IMAGE}" bash -c " # Install dependencies quietly apt-get update -qq > /dev/null 2>&1 apt-get install -y -qq wget unzip ca-certificates > /dev/null 2>&1 # Download DuckDB CLI wget -q https://github.com/duckdb/duckdb/releases/download/${DUCKDB_VERSION}/duckdb_cli-linux-amd64.zip unzip -q duckdb_cli-linux-amd64.zip # Run the S3 query ./duckdb -unsigned -c \" INSTALL httpfs; LOAD '/extensions/${ext_subdir}/aws.duckdb_extension'; LOAD httpfs; CREATE SECRET my_sso (TYPE S3, PROVIDER CREDENTIAL_CHAIN, CHAIN 'sso', PROFILE '${AWS_PROFILE}'); SELECT COUNT(*) as row_count FROM '${S3_TEST_PATH}'; \" 2>&1 " 2>&1) || exit_code=$? echo "$output" # Check for successful query (should show a count) if echo "$output" | grep -q "row_count"; then print_success "S3 query executed successfully" return 0 fi print_failure "S3 access failed" return 1 } ############################################################################### # MAIN SCRIPT ############################################################################### print_header "DuckDB AWS SSO CA Certificate Fix - Test Script" echo "Configuration:" echo " AWS Profile: ${AWS_PROFILE}" echo " S3 Test Path: ${S3_TEST_PATH:-'(not configured)'}" echo " DuckDB Version: ${DUCKDB_VERSION}" echo " Work Directory: ${WORK_DIR}" echo " Main Commit: ${MAIN_COMMIT} (without fix)" echo " Fix Commit: ${FIX_COMMIT} (with fix)" # Check prerequisites print_subheader "Checking Prerequisites" check_command docker check_command git check_command aws print_success "Required commands available" # Verify Docker is running if ! docker info > /dev/null 2>&1; then print_failure "Docker is not running. Please start Docker first." exit 1 fi print_success "Docker is running" # Verify AWS SSO session if ! aws sts get-caller-identity --profile "${AWS_PROFILE}" > /dev/null 2>&1; then print_failure "AWS SSO session not active. Please run: aws sso login --profile ${AWS_PROFILE}" exit 1 fi print_success "AWS SSO session is active for profile '${AWS_PROFILE}'" # Set up work directory # If extensions already exist, keep them (allows quick re-runs after SSO refresh) print_subheader "Setting Up Work Directory" if [ -f "${WORK_DIR}/extensions/main/aws.duckdb_extension" ] && [ -f "${WORK_DIR}/extensions/fix/aws.duckdb_extension" ]; then print_info "Extensions already built - keeping existing work directory" print_info "To force rebuild, run: docker run --rm -v /tmp:/tmp alpine rm -rf ${WORK_DIR}" else docker_cleanup "${WORK_DIR}" mkdir -p "${WORK_DIR}/extensions" print_success "Created ${WORK_DIR}" fi ############################################################################### # PART 1: Demonstrate the problem with main branch commit ############################################################################### print_header "PART 1: Demonstrating the Problem (commit ${MAIN_COMMIT})" print_subheader "Cloning repository for main commit (without fix)" if [ -d "${WORK_DIR}/duckdb-aws-main" ]; then print_info "Repository already cloned for main commit" else git clone -q "${REPO_URL}" "${WORK_DIR}/duckdb-aws-main" cd "${WORK_DIR}/duckdb-aws-main" git checkout -q "${MAIN_COMMIT}" cd - > /dev/null fi print_success "Using commit ${MAIN_COMMIT}" echo "Commit info: $(cd ${WORK_DIR}/duckdb-aws-main && git log -1 --oneline)" print_subheader "Building extension from main commit" build_extension "${WORK_DIR}/duckdb-aws-main" "main" print_subheader "Testing SSO with main commit extension" echo "" echo "EXPECTED: This should FAIL with a certificate error because the SSO provider" echo " doesn't receive the CA certificate path when built on manylinux" echo " and run on Ubuntu." echo "" main_sso_result=0 test_sso_auth "main" "main commit" || main_sso_result=$? if [ $main_sso_result -ne 0 ]; then print_success "Main commit correctly shows the SSO CA certificate bug" else print_info "Unexpected: Main commit SSO worked (bug may already be fixed in this commit)" fi ############################################################################### # PART 2: Verify the fix ############################################################################### print_header "PART 2: Verifying the Fix (commit ${FIX_COMMIT})" print_subheader "Cloning repository for fix commit" if [ -d "${WORK_DIR}/duckdb-aws-fix" ]; then print_info "Repository already cloned for fix commit" else git clone -q "${REPO_URL}" "${WORK_DIR}/duckdb-aws-fix" cd "${WORK_DIR}/duckdb-aws-fix" git checkout -q "${FIX_COMMIT}" cd - > /dev/null fi print_success "Using commit ${FIX_COMMIT}" echo "Commit info: $(cd ${WORK_DIR}/duckdb-aws-fix && git log -1 --oneline)" echo "" echo "The fix adds CA certificate path configuration to SSOCredentialsProvider:" cd "${WORK_DIR}/duckdb-aws-fix" && git show --stat HEAD | head -10 && cd - > /dev/null print_subheader "Building extension from fix commit" build_extension "${WORK_DIR}/duckdb-aws-fix" "fix" print_subheader "Testing SSO with fix commit extension" echo "" echo "EXPECTED: This should SUCCEED because the fix passes the CA certificate" echo " path to the SSO provider." echo "" fix_sso_result=0 test_sso_auth "fix" "fix commit" || fix_sso_result=$? if [ $fix_sso_result -eq 0 ]; then print_success "Fix commit correctly resolves the SSO CA certificate bug" else print_failure "Fix commit still has SSO issues" fi # Test S3 access if SSO worked if [ $fix_sso_result -eq 0 ] && [ -n "${S3_TEST_PATH}" ]; then print_subheader "Testing S3 access with fix commit extension" test_s3_access "fix" "fix commit" fi ############################################################################### # SUMMARY ############################################################################### print_header "Test Summary" echo "| Test | Result |" echo "|------------------------------------|----------|" if [ $main_sso_result -ne 0 ]; then echo "| Main commit SSO (should fail) | FAILED | <- Expected (demonstrates bug)" else echo "| Main commit SSO (should fail) | PASSED | <- Unexpected" fi if [ $fix_sso_result -eq 0 ]; then echo "| Fix commit SSO (should pass) | PASSED | <- Expected (fix works)" else echo "| Fix commit SSO (should pass) | FAILED | <- Unexpected" fi echo "" echo "Commits tested:" echo " Main (broken): ${MAIN_COMMIT}" echo " Fix (working): ${FIX_COMMIT}" echo "" echo "Extension files saved in: ${WORK_DIR}/extensions/" echo "" echo "To clean up: docker run --rm -v /tmp:/tmp alpine rm -rf ${WORK_DIR}" echo "" # Final verdict if [ $main_sso_result -ne 0 ] && [ $fix_sso_result -eq 0 ]; then print_success "TEST PASSED: Bug reproduced and fix verified!" exit 0 elif [ $main_sso_result -eq 0 ] && [ $fix_sso_result -eq 0 ]; then print_info "Both commits work - bug may have been fixed in the main commit already" exit 0 else print_failure "TEST FAILED: Unexpected results" exit 1 fi