gounthar · March 12, 2026 13:20
diff --git a/k3-full-run.sh b/k3-full-run.sh
 #!/bin/bash

 # K3 Full Data Collection Script
 # Run this on a fresh BianbuCloud K3 instance to collect ALL benchmark data.
 # Designed for a wiped machine — installs deps, builds, benchmarks, backs up.
 #
 # Usage:
 #   chmod +x k3-full-run.sh
 #   nohup ./k3-full-run.sh 2>&1 | tee k3-full-run.log &
 #
 # Or inside tmux (recommended — instances drop connections):
 #   tmux new -s bench
 #   ./k3-full-run.sh 2>&1 | tee k3-full-run.log

 set -uo pipefail
 # NOTE: intentionally no -e (errexit) — benchmark failures must not kill the script.
 # Critical failures (like build) are checked explicitly via $? or exit codes.

 WORKDIR="$HOME/k3-lab"
 RESULTS="$WORKDIR/results"
 TIMESTAMP=$(date +%Y%m%d-%H%M%S)
 LOGFILE="$WORKDIR/k3-full-run-${TIMESTAMP}.log"
 MODEL_DIR="$WORKDIR/models"
 THREAD_COUNTS="1 4 8"

 mkdir -p "$RESULTS" "$MODEL_DIR"

 log() { echo "[$(date '+%H:%M:%S')] $*" | tee -a "$LOGFILE"; }

 # Save results incrementally — instance can die at any time
 save_checkpoint() {
    local tag="$1"
    tar czf "$WORKDIR/checkpoint-${tag}-$(date +%H%M%S).tar.gz" \
        -C "$WORKDIR" results/ 2>/dev/null || true
    log "CHECKPOINT saved: $tag"
 }

 ########################################
 # PHASE 1: System Info
 ########################################
 log "====== PHASE 1: SYSTEM INFO ======"

 log "--- OS ---"
 uname -a | tee "$RESULTS/uname.txt"
 cat /etc/os-release | tee "$RESULTS/os-release.txt"

 log "--- CPU ---"
 lscpu 2>&1 | tee "$RESULTS/lscpu.txt"
 cat /proc/cpuinfo | tee "$RESULTS/cpuinfo.txt"
 grep -i isa /proc/cpuinfo | sort -u | tee "$RESULTS/isa-extensions.txt"

 log "--- Memory ---"
 free -h | tee "$RESULTS/memory.txt"
 cat /proc/meminfo > "$RESULTS/meminfo.txt"

 log "--- Storage ---"
 df -h | tee "$RESULTS/storage.txt"

 log "--- Compilers & tools ---"
 {
    gcc --version 2>&1 | head -1 || echo "gcc: NOT FOUND"
    g++ --version 2>&1 | head -1 || echo "g++: NOT FOUND"
    cmake --version 2>&1 | head -1 || echo "cmake: NOT FOUND"
    git --version 2>&1 | head -1 || echo "git: NOT FOUND"
    python3 --version 2>&1 | head -1 || echo "python3: NOT FOUND"
    for tool in make ninja node rustc cargo docker wget curl htop perf tmux; do
        if command -v "$tool" &>/dev/null; then
            echo "$tool: $($tool --version 2>&1 | head -1)"
        else
            echo "$tool: NOT FOUND"
        fi
    done
 } | tee "$RESULTS/tools.txt"

 log "--- AI core scheduling ---"
 {
    if [ -f /proc/set_ai_thread ]; then
        echo "/proc/set_ai_thread exists"
        ls -la /proc/set_ai_thread
    else
        echo "/proc/set_ai_thread NOT FOUND"
    fi
 } | tee "$RESULTS/ai-cores.txt"

 log "--- CPU topology ---"
 {
    for cpu in /sys/devices/system/cpu/cpu*/topology; do
        cpuid=$(echo "$cpu" | grep -oP 'cpu\K[0-9]+')
        coreid=$(cat "$cpu/core_id" 2>/dev/null || echo "?")
        echo "cpu${cpuid}: core_id=${coreid}"
    done
 } | tee "$RESULTS/cpu-topology.txt"

 log "--- Kernel RVV config ---"
 zcat /proc/config.gz 2>/dev/null | grep -iE "riscv|rvv|vector" > "$RESULTS/kernel-rvv.txt" || echo "kernel config not accessible" > "$RESULTS/kernel-rvv.txt"

 log "--- vlen measurement ---"
 cat > /tmp/vlenb.c << 'EOF'
 #include <stdio.h>
 int main() {
    unsigned long vlenb = 0;
 #if defined(__riscv_v)
    __asm__ volatile("csrr %0, vlenb" : "=r"(vlenb));
    printf("vlenb=%lu bytes, vlen=%lu bits\n", vlenb, vlenb * 8);
 #else
    printf("RVV not enabled at compile time\n");
 #endif
    return 0;
 }
 EOF
 gcc -march=rv64gcv -O2 -o /tmp/vlenb /tmp/vlenb.c 2>/dev/null && {
    echo "X100 cores:"; /tmp/vlenb
    if [ -f /proc/set_ai_thread ]; then
        echo "A100 cores:"; bash -c 'echo $$ > /proc/set_ai_thread 2>/dev/null; /tmp/vlenb'
    fi
 } | tee "$RESULTS/vlen.txt"
 rm -f /tmp/vlenb /tmp/vlenb.c

 log "--- SpacemiT packages ---"
 apt-cache search spacemit 2>/dev/null | tee "$RESULTS/spacemit-packages.txt" || true
 dpkg -l | grep -i spacemit 2>/dev/null | tee "$RESULTS/spacemit-installed.txt" || true
 dpkg -l | grep -i llama 2>/dev/null | tee -a "$RESULTS/spacemit-installed.txt" || true

 save_checkpoint "phase1-sysinfo"

 ########################################
 # PHASE 2: Install Dependencies
 ########################################
 log "====== PHASE 2: DEPENDENCIES ======"

 for dep in cmake git tmux; do
    if ! command -v "$dep" &>/dev/null; then
        log "WARNING: $dep not found — install it manually and re-run"
    fi
 done

 log "cmake: $(cmake --version | head -1)"
 log "gcc: $(gcc --version | head -1)"

 ########################################
 # PHASE 3: Clone & Build llama.cpp
 ########################################
 log "====== PHASE 3: BUILD LLAMA.CPP ======"

 cd "$WORKDIR"
 if [ ! -d "llama.cpp" ]; then
    log "Cloning llama.cpp..."
    { time git clone --depth 1 https://github.com/ggml-org/llama.cpp.git 2>&1 ; } 2>&1 | tee "$RESULTS/git-clone.txt"
 fi

 cd "$WORKDIR/llama.cpp"
 git log --oneline -1 | tee "$RESULTS/llama-cpp-version.txt"

 NPROC=$(nproc)
 log "Building with $NPROC cores (GGML_NATIVE=ON)..."

 # Configure
 { time cmake -B build -DCMAKE_BUILD_TYPE=Release -DGGML_NATIVE=ON 2>&1 ; } 2>&1 | tee "$RESULTS/cmake-configure.txt"

 # Save detected march
 grep -r "march" build/ggml/src/CMakeFiles/ggml-cpu.dir/flags.make 2>/dev/null | tee "$RESULTS/march-flags.txt" || true
 cp build/CMakeCache.txt "$RESULTS/CMakeCache-x100.txt" 2>/dev/null || true

 # Build
 log "Building..."
 { time cmake --build build --config Release -j "$NPROC" 2>&1 ; } 2>&1 | tee "$RESULTS/build-log.txt"
 BUILD_STATUS=$?

 if [ $BUILD_STATUS -ne 0 ]; then
    log "BUILD FAILED (exit $BUILD_STATUS)"
    save_checkpoint "phase3-build-failed"
    exit 1
 fi

 log "Build successful"
 ls -la build/bin/llama-bench build/bin/llama-cli build/bin/llama-server 2>/dev/null | tee "$RESULTS/build-binaries.txt"

 # Also configure on A100 to compare flags
 if [ -f /proc/set_ai_thread ]; then
    log "Configuring on A100 cores for comparison..."
    bash -c 'echo $$ > /proc/set_ai_thread 2>/dev/null; cd '"$WORKDIR/llama.cpp"' && cmake -B build-a100 -DCMAKE_BUILD_TYPE=Release -DGGML_NATIVE=ON' 2>&1 | tee "$RESULTS/cmake-configure-a100.txt"
    cp build-a100/CMakeCache.txt "$RESULTS/CMakeCache-a100.txt" 2>/dev/null || true
    grep -r "march" build-a100/ggml/src/CMakeFiles/ggml-cpu.dir/flags.make 2>/dev/null | tee "$RESULTS/march-flags-a100.txt" || true

    log "CMakeCache diff (X100 vs A100):"
    diff <(grep -E 'GGML_|MARCH|march|NATIVE|RVV|RISCV' "$RESULTS/CMakeCache-x100.txt") \
         <(grep -E 'GGML_|MARCH|march|NATIVE|RVV|RISCV' "$RESULTS/CMakeCache-a100.txt") \
         2>/dev/null | tee "$RESULTS/cmake-diff.txt" || echo "No differences" | tee "$RESULTS/cmake-diff.txt"
 fi

 save_checkpoint "phase3-build"

 ########################################
 # PHASE 4: Download Models
 ########################################
 log "====== PHASE 4: DOWNLOAD MODELS ======"

 cd "$WORKDIR"

 # TinyLlama 1.1B Q4_0 (~600MB) — same as Part 1
 TINYLLAMA="$MODEL_DIR/tinyllama-1.1b-chat-v1.0.Q4_0.gguf"
 if [ ! -f "$TINYLLAMA" ]; then
    log "Downloading TinyLlama 1.1B Q4_0..."
    # HuggingFace is blocked from China — use hf-mirror.com
    curl -L --connect-timeout 30 -o "$TINYLLAMA" \
        "https://hf-mirror.com/TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF/resolve/main/tinyllama-1.1b-chat-v1.0.Q4_0.gguf" 2>&1 \
        || curl -L --connect-timeout 30 -o "$TINYLLAMA" \
        "https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF/resolve/main/tinyllama-1.1b-chat-v1.0.Q4_0.gguf" 2>&1 \
        || { log "FAILED to download TinyLlama"; rm -f "$TINYLLAMA"; }
 fi

 # Qwen2.5 0.5B Q4_0 (~400MB) — same as SpacemiT's benchmark
 QWEN="$MODEL_DIR/qwen2.5-0.5b-instruct-q4_0.gguf"
 if [ ! -f "$QWEN" ]; then
    log "Downloading Qwen2.5 0.5B Q4_0..."
    curl -L --connect-timeout 30 -o "$QWEN" \
        "https://hf-mirror.com/Qwen/Qwen2.5-0.5B-Instruct-GGUF/resolve/main/qwen2.5-0.5b-instruct-q4_0.gguf" 2>&1 \
        || curl -L --connect-timeout 30 -o "$QWEN" \
        "https://huggingface.co/Qwen/Qwen2.5-0.5B-Instruct-GGUF/resolve/main/qwen2.5-0.5b-instruct-q4_0.gguf" 2>&1 \
        || { log "FAILED to download Qwen2.5"; rm -f "$QWEN"; }
 fi

 # Qwen3.5 0.8B Q4_0 (~507MB) — latest generation, smallest Qwen3.5
 QWEN35="$MODEL_DIR/Qwen3.5-0.8B-Q4_0.gguf"
 if [ ! -f "$QWEN35" ]; then
    log "Downloading Qwen3.5 0.8B Q4_0..."
    curl -L --connect-timeout 30 -o "$QWEN35" \
        "https://hf-mirror.com/unsloth/Qwen3.5-0.8B-GGUF/resolve/main/Qwen3.5-0.8B-Q4_0.gguf" 2>&1 \
        || curl -L --connect-timeout 30 -o "$QWEN35" \
        "https://huggingface.co/unsloth/Qwen3.5-0.8B-GGUF/resolve/main/Qwen3.5-0.8B-Q4_0.gguf" 2>&1 \
        || { log "FAILED to download Qwen3.5"; rm -f "$QWEN35"; }
 fi

 ls -lh "$MODEL_DIR"/*.gguf 2>/dev/null | tee "$RESULTS/models.txt"

 save_checkpoint "phase4-models"

 ########################################
 # PHASE 5: Benchmarks — X100 cores
 ########################################
 log "====== PHASE 5: BENCHMARKS X100 ======"

 BENCH="$WORKDIR/llama.cpp/build/bin/llama-bench"

 for model in "$MODEL_DIR"/*.gguf; do
    [ -f "$model" ] || continue
    MODEL_NAME=$(basename "$model" .gguf)
    log "--- Benchmarking $MODEL_NAME on X100 ---"

    for threads in $THREAD_COUNTS; do
        log "  X100 / $MODEL_NAME / ${threads}t"
        {
            echo "=== X100 / $MODEL_NAME / ${threads} threads ==="
            echo "Timestamp: $(date)"
            "$BENCH" -m "$model" -p 512 -n 0 -r 3 -t "$threads" 2>&1 || true
            echo ""
            "$BENCH" -m "$model" -p 0 -n 128 -r 3 -t "$threads" 2>&1 || true
            echo ""
        } | tee -a "$RESULTS/bench-x100-${MODEL_NAME}.txt"
    done
    save_checkpoint "phase5-x100-${MODEL_NAME}"
 done

 ########################################
 # PHASE 6: Benchmarks — A100 cores (standard RVV)
 ########################################
 if [ -f /proc/set_ai_thread ]; then
    log "====== PHASE 6: BENCHMARKS A100 (standard RVV) ======"

    for model in "$MODEL_DIR"/*.gguf; do
        [ -f "$model" ] || continue
        MODEL_NAME=$(basename "$model" .gguf)
        log "--- Benchmarking $MODEL_NAME on A100 (standard RVV) ---"

        for threads in $THREAD_COUNTS; do
            log "  A100-RVV / $MODEL_NAME / ${threads}t"
            {
                echo "=== A100 standard RVV / $MODEL_NAME / ${threads} threads ==="
                echo "Timestamp: $(date)"
                bash -c "echo \$\$ > /proc/set_ai_thread 2>/dev/null; $BENCH -m $model -p 512 -n 0 -r 3 -t $threads" 2>&1 || true
                echo ""
                bash -c "echo \$\$ > /proc/set_ai_thread 2>/dev/null; $BENCH -m $model -p 0 -n 128 -r 3 -t $threads" 2>&1 || true
                echo ""
            } | tee -a "$RESULTS/bench-a100-rvv-${MODEL_NAME}.txt"
        done
        save_checkpoint "phase6-a100-rvv-${MODEL_NAME}"
    done
 fi

 ########################################
 # PHASE 7: SpacemiT IME2 binary
 ########################################
 log "====== PHASE 7: SPACEMIT IME2 ======"

 SPACEMIT_BENCH=""
 # Check if SpacemiT package is already installed
 if ! dpkg -l llama.cpp-tools-spacemit 2>/dev/null | grep -q '^ii'; then
    log "WARNING: llama.cpp-tools-spacemit not installed — install manually:"
    log "  sudo apt-get install -y llama.cpp-tools-spacemit || sudo dpkg --force-overwrite -i /var/cache/apt/archives/llama.cpp-tools-spacemit_*.deb"
    log "  (apt downloads the .deb but fails on file conflict; dpkg --force-overwrite finishes the job)"
 fi

 # Find SpacemiT llama-bench
 if command -v /usr/bin/llama-bench &>/dev/null; then
    SPACEMIT_BENCH="/usr/bin/llama-bench"
    log "SpacemiT llama-bench found: $SPACEMIT_BENCH"
    /usr/bin/llama-cli --version 2>&1 | tee "$RESULTS/spacemit-version.txt" || true
 elif [ -f /usr/bin/llama-bench ]; then
    SPACEMIT_BENCH="/usr/bin/llama-bench"
 fi

 if [ -n "$SPACEMIT_BENCH" ]; then
    for model in "$MODEL_DIR"/*.gguf; do
        [ -f "$model" ] || continue
        MODEL_NAME=$(basename "$model" .gguf)
        log "--- Benchmarking $MODEL_NAME with SpacemiT IME2 ---"

        # Test if SpacemiT binary can load this model (old fork may not support newer architectures)
        if ! "$SPACEMIT_BENCH" -m "$model" -p 1 -n 0 -r 1 -t 1 &>/dev/null; then
            log "SKIPPING $MODEL_NAME — SpacemiT binary cannot load this model (unsupported architecture)"
            echo "=== SpacemiT IME2 / $MODEL_NAME: SKIPPED (model not supported by SpacemiT binary) ===" \
                >> "$RESULTS/bench-a100-ime2-${MODEL_NAME}.txt"
            continue
        fi

        # SpacemiT binary auto-migrates to A100 cores
        for threads in $THREAD_COUNTS; do
            log "  IME2 / $MODEL_NAME / ${threads}t"
            {
                echo "=== SpacemiT IME2 / $MODEL_NAME / ${threads} threads ==="
                echo "Timestamp: $(date)"
                "$SPACEMIT_BENCH" -m "$model" -p 512 -n 0 -r 3 -t "$threads" 2>&1 || true
                echo ""
                "$SPACEMIT_BENCH" -m "$model" -p 0 -n 128 -r 3 -t "$threads" 2>&1 || true
                echo ""
            } | tee -a "$RESULTS/bench-a100-ime2-${MODEL_NAME}.txt"
        done
        save_checkpoint "phase7-ime2-${MODEL_NAME}"
    done

    # Also test without explicit AI core scheduling to confirm auto-migration
    log "--- Auto-migration test (no /proc/set_ai_thread) ---"
    FIRST_MODEL=$(ls "$MODEL_DIR"/*.gguf 2>/dev/null | head -1)
    if [ -n "$FIRST_MODEL" ]; then
        {
            echo "=== SpacemiT IME2 auto-migration test ==="
            "$SPACEMIT_BENCH" -m "$FIRST_MODEL" -p 512 -n 128 -r 3 -t 8 2>&1
        } | tee "$RESULTS/ime2-auto-migration.txt"
    fi
 else
    log "SpacemiT IME2 binary not available — skipping"
 fi

 save_checkpoint "phase7-ime2"

 ########################################
 # PHASE 8: Fork release binary test
 ########################################
 log "====== PHASE 8: FORK RELEASE BINARY ======"

 REPO="gounthar/llama.cpp"
 RELEASE_DIR="$WORKDIR/release-test"
 mkdir -p "$RELEASE_DIR"

 RELEASE_TAG=$(curl -s "https://api.github.com/repos/$REPO/releases/latest" 2>/dev/null | grep '"tag_name"' | cut -d'"' -f4)
 log "Latest release: ${RELEASE_TAG:-NONE}"

 if [ -n "$RELEASE_TAG" ]; then
    # Get all riscv64 assets
    ASSETS=$(curl -s "https://api.github.com/repos/$REPO/releases/latest" \
        | grep '"browser_download_url"' \
        | grep -i 'riscv64' \
        | cut -d'"' -f4)

    for asset_url in $ASSETS; do
        asset_file=$(basename "$asset_url")
        label=$(echo "$asset_file" | grep -oP '(native|generic)' || echo "unknown")
        extract_dir="$RELEASE_DIR/$label"
        mkdir -p "$extract_dir"

        log "Downloading $asset_file..."
        curl -L --connect-timeout 30 -o "$RELEASE_DIR/$asset_file" "$asset_url" 2>&1

        if [[ "$asset_file" == *.tar.gz ]]; then
            tar xzf "$RELEASE_DIR/$asset_file" -C "$extract_dir" 2>&1
        elif [[ "$asset_file" == *.zip ]]; then
            unzip -o "$RELEASE_DIR/$asset_file" -d "$extract_dir" 2>&1
        fi

        RELEASE_BENCH=$(find "$extract_dir" -name "llama-bench" -type f | head -1)
        if [ -n "$RELEASE_BENCH" ]; then
            chmod +x "$RELEASE_BENCH"
            log "--- Release $label binary info ---"
            file "$RELEASE_BENCH" 2>/dev/null | tee -a "$RESULTS/release-binary-info.txt"
            ldd "$RELEASE_BENCH" 2>/dev/null | tee -a "$RESULTS/release-binary-info.txt"

            # Check for variant .so files
            find "$extract_dir" -name "*.so" -o -name "libggml*" 2>/dev/null | tee -a "$RESULTS/release-variants.txt"

            FIRST_MODEL=$(ls "$MODEL_DIR"/*.gguf 2>/dev/null | head -1)
            if [ -n "$FIRST_MODEL" ]; then
                log "--- Release $label benchmark (X100, 8t) ---"
                {
                    echo "=== Release $label / X100 / 8 threads ==="
                    "$RELEASE_BENCH" -m "$FIRST_MODEL" -p 512 -n 0 -r 3 -t 8 2>&1 || true
                    echo ""
                    "$RELEASE_BENCH" -m "$FIRST_MODEL" -p 0 -n 128 -r 3 -t 8 2>&1 || true
                } | tee -a "$RESULTS/bench-release-${label}.txt"
            fi
        fi
    done
 else
    log "No release found on $REPO — skipping"
 fi

 save_checkpoint "phase8-release"

 ########################################
 # PHASE 9: llama-server interactive test
 ########################################
 log "====== PHASE 9: LLAMA-SERVER TEST ======"

 SERVER="$WORKDIR/llama.cpp/build/bin/llama-server"
 FIRST_MODEL=$(ls "$MODEL_DIR"/*.gguf 2>/dev/null | head -1)

 if [ -f "$SERVER" ] && [ -n "$FIRST_MODEL" ]; then
    log "Starting llama-server (X100, 4t)..."
    "$SERVER" -m "$FIRST_MODEL" -t 4 --host 127.0.0.1 --port 8080 &
    SERVER_PID=$!
    sleep 5

    if kill -0 $SERVER_PID 2>/dev/null; then
        log "Server running (PID $SERVER_PID), testing API..."
        {
            echo "=== llama-server OpenAI-compatible API test ==="
            curl -s http://127.0.0.1:8080/v1/chat/completions \
                -H "Content-Type: application/json" \
                -d '{"model":"tinyllama","messages":[{"role":"user","content":"What is RISC-V in one sentence?"}],"max_tokens":50}'
        } | tee "$RESULTS/llama-server-test.txt"

        kill $SERVER_PID 2>/dev/null
        wait $SERVER_PID 2>/dev/null || true
    else
        log "Server failed to start"
    fi
 else
    log "llama-server or model not available — skipping"
 fi

 save_checkpoint "phase9-server"

 ########################################
 # PHASE 10: Final Backup
 ########################################
 log "====== PHASE 10: FINAL BACKUP ======"

 cd "$WORKDIR"

 # Summary of all results
 log "--- Results summary ---"
 ls -lhR "$RESULTS/" | tee "$RESULTS/results-index.txt"

 # Create final archive (everything except model files and build objects)
 ARCHIVE="k3-results-final-${TIMESTAMP}.tar.gz"
 tar czf "$HOME/$ARCHIVE" \
    -C "$WORKDIR" \
    results/ \
    k3-full-run-*.log \
    --exclude='*.gguf' \
    --exclude='build*/' \
    2>/dev/null

 ARCHIVE_SIZE=$(du -h "$HOME/$ARCHIVE" | cut -f1)
 log "Final archive: ~/$ARCHIVE ($ARCHIVE_SIZE)"

 # Upload to 0x0.st (works from China, no auth needed)
 log "--- Uploading to 0x0.st ---"
 UPLOAD_URL=$(curl -F"file=@$HOME/$ARCHIVE" https://0x0.st 2>/dev/null)
 if [ -n "$UPLOAD_URL" ]; then
    log "DOWNLOAD URL: $UPLOAD_URL"
    log "(valid for 30 days, grab it soon)"
 else
    log "0x0.st upload failed — use SCP instead"
 fi

 log ""
 log "========================================"
 log "  ALL DONE"
 log "  Results in: $RESULTS/"
 log "  Archive:    ~/$ARCHIVE"
 if [ -n "$UPLOAD_URL" ]; then
 log "  Download:   $UPLOAD_URL"
 fi
 log "========================================"
 log ""
 log "Fallback download (from WSL):"
 log "  scp -o HostKeyAlgorithms=+ssh-rsa -o PubkeyAcceptedAlgorithms=+ssh-rsa \\"
 log "    bianbu@Pre65M12K3SUB2-06.gdriscv.com:~/$ARCHIVE \\"
 log "    /mnt/c/support/users/dev/riscv/spacemit-k3-lab/experiments/"
	#!/bin/bash

	# K3 Full Data Collection Script
	# Run this on a fresh BianbuCloud K3 instance to collect ALL benchmark data.
	# Designed for a wiped machine — installs deps, builds, benchmarks, backs up.
	#
	# Usage:
	# chmod +x k3-full-run.sh
	# nohup ./k3-full-run.sh 2>&1 \| tee k3-full-run.log &
	#
	# Or inside tmux (recommended — instances drop connections):
	# tmux new -s bench
	# ./k3-full-run.sh 2>&1 \| tee k3-full-run.log

	set -uo pipefail
	# NOTE: intentionally no -e (errexit) — benchmark failures must not kill the script.
	# Critical failures (like build) are checked explicitly via $? or exit codes.

	WORKDIR="$HOME/k3-lab"
	RESULTS="$WORKDIR/results"
	TIMESTAMP=$(date +%Y%m%d-%H%M%S)
	LOGFILE="$WORKDIR/k3-full-run-${TIMESTAMP}.log"
	MODEL_DIR="$WORKDIR/models"
	THREAD_COUNTS="1 4 8"

	mkdir -p "$RESULTS" "$MODEL_DIR"

	log() { echo "[$(date '+%H:%M:%S')] $*" \| tee -a "$LOGFILE"; }

	# Save results incrementally — instance can die at any time
	save_checkpoint() {
	local tag="$1"
	tar czf "$WORKDIR/checkpoint-${tag}-$(date +%H%M%S).tar.gz" \
	-C "$WORKDIR" results/ 2>/dev/null \|\| true
	log "CHECKPOINT saved: $tag"
	}

	########################################
	# PHASE 1: System Info
	########################################
	log "====== PHASE 1: SYSTEM INFO ======"

	log "--- OS ---"
	uname -a \| tee "$RESULTS/uname.txt"
	cat /etc/os-release \| tee "$RESULTS/os-release.txt"

	log "--- CPU ---"
	lscpu 2>&1 \| tee "$RESULTS/lscpu.txt"
	cat /proc/cpuinfo \| tee "$RESULTS/cpuinfo.txt"
	grep -i isa /proc/cpuinfo \| sort -u \| tee "$RESULTS/isa-extensions.txt"

	log "--- Memory ---"
	free -h \| tee "$RESULTS/memory.txt"
	cat /proc/meminfo > "$RESULTS/meminfo.txt"

	log "--- Storage ---"
	df -h \| tee "$RESULTS/storage.txt"

	log "--- Compilers & tools ---"
	{
	gcc --version 2>&1 \| head -1 \|\| echo "gcc: NOT FOUND"
	g++ --version 2>&1 \| head -1 \|\| echo "g++: NOT FOUND"
	cmake --version 2>&1 \| head -1 \|\| echo "cmake: NOT FOUND"
	git --version 2>&1 \| head -1 \|\| echo "git: NOT FOUND"
	python3 --version 2>&1 \| head -1 \|\| echo "python3: NOT FOUND"
	for tool in make ninja node rustc cargo docker wget curl htop perf tmux; do
	if command -v "$tool" &>/dev/null; then
	echo "$tool: $($tool --version 2>&1 \| head -1)"
	else
	echo "$tool: NOT FOUND"
	fi
	done
	} \| tee "$RESULTS/tools.txt"

	log "--- AI core scheduling ---"
	{
	if [ -f /proc/set_ai_thread ]; then
	echo "/proc/set_ai_thread exists"
	ls -la /proc/set_ai_thread
	else
	echo "/proc/set_ai_thread NOT FOUND"
	fi
	} \| tee "$RESULTS/ai-cores.txt"

	log "--- CPU topology ---"
	{
	for cpu in /sys/devices/system/cpu/cpu*/topology; do
	cpuid=$(echo "$cpu" \| grep -oP 'cpu\K[0-9]+')
	coreid=$(cat "$cpu/core_id" 2>/dev/null \|\| echo "?")
	echo "cpu${cpuid}: core_id=${coreid}"
	done
	} \| tee "$RESULTS/cpu-topology.txt"

	log "--- Kernel RVV config ---"
	zcat /proc/config.gz 2>/dev/null \| grep -iE "riscv\|rvv\|vector" > "$RESULTS/kernel-rvv.txt" \|\| echo "kernel config not accessible" > "$RESULTS/kernel-rvv.txt"

	log "--- vlen measurement ---"
	cat > /tmp/vlenb.c << 'EOF'
	#include <stdio.h>
	int main() {
	unsigned long vlenb = 0;
	#if defined(__riscv_v)
	__asm__ volatile("csrr %0, vlenb" : "=r"(vlenb));
	printf("vlenb=%lu bytes, vlen=%lu bits\n", vlenb, vlenb * 8);
	#else
	printf("RVV not enabled at compile time\n");
	#endif
	return 0;
	}
	EOF
	gcc -march=rv64gcv -O2 -o /tmp/vlenb /tmp/vlenb.c 2>/dev/null && {
	echo "X100 cores:"; /tmp/vlenb
	if [ -f /proc/set_ai_thread ]; then
	echo "A100 cores:"; bash -c 'echo $$ > /proc/set_ai_thread 2>/dev/null; /tmp/vlenb'
	fi
	} \| tee "$RESULTS/vlen.txt"
	rm -f /tmp/vlenb /tmp/vlenb.c

	log "--- SpacemiT packages ---"
	apt-cache search spacemit 2>/dev/null \| tee "$RESULTS/spacemit-packages.txt" \|\| true
	dpkg -l \| grep -i spacemit 2>/dev/null \| tee "$RESULTS/spacemit-installed.txt" \|\| true
	dpkg -l \| grep -i llama 2>/dev/null \| tee -a "$RESULTS/spacemit-installed.txt" \|\| true

	save_checkpoint "phase1-sysinfo"

	########################################
	# PHASE 2: Install Dependencies
	########################################
	log "====== PHASE 2: DEPENDENCIES ======"

	for dep in cmake git tmux; do
	if ! command -v "$dep" &>/dev/null; then
	log "WARNING: $dep not found — install it manually and re-run"
	fi
	done

	log "cmake: $(cmake --version \| head -1)"
	log "gcc: $(gcc --version \| head -1)"

	########################################
	# PHASE 3: Clone & Build llama.cpp
	########################################
	log "====== PHASE 3: BUILD LLAMA.CPP ======"

	cd "$WORKDIR"
	if [ ! -d "llama.cpp" ]; then
	log "Cloning llama.cpp..."
	{ time git clone --depth 1 https://github.com/ggml-org/llama.cpp.git 2>&1 ; } 2>&1 \| tee "$RESULTS/git-clone.txt"
	fi

	cd "$WORKDIR/llama.cpp"
	git log --oneline -1 \| tee "$RESULTS/llama-cpp-version.txt"

	NPROC=$(nproc)
	log "Building with $NPROC cores (GGML_NATIVE=ON)..."

	# Configure
	{ time cmake -B build -DCMAKE_BUILD_TYPE=Release -DGGML_NATIVE=ON 2>&1 ; } 2>&1 \| tee "$RESULTS/cmake-configure.txt"

	# Save detected march
	grep -r "march" build/ggml/src/CMakeFiles/ggml-cpu.dir/flags.make 2>/dev/null \| tee "$RESULTS/march-flags.txt" \|\| true
	cp build/CMakeCache.txt "$RESULTS/CMakeCache-x100.txt" 2>/dev/null \|\| true

	# Build
	log "Building..."
	{ time cmake --build build --config Release -j "$NPROC" 2>&1 ; } 2>&1 \| tee "$RESULTS/build-log.txt"
	BUILD_STATUS=$?

	if [ $BUILD_STATUS -ne 0 ]; then
	log "BUILD FAILED (exit $BUILD_STATUS)"
	save_checkpoint "phase3-build-failed"
	exit 1
	fi

	log "Build successful"
	ls -la build/bin/llama-bench build/bin/llama-cli build/bin/llama-server 2>/dev/null \| tee "$RESULTS/build-binaries.txt"

	# Also configure on A100 to compare flags
	if [ -f /proc/set_ai_thread ]; then
	log "Configuring on A100 cores for comparison..."
	bash -c 'echo $$ > /proc/set_ai_thread 2>/dev/null; cd '"$WORKDIR/llama.cpp"' && cmake -B build-a100 -DCMAKE_BUILD_TYPE=Release -DGGML_NATIVE=ON' 2>&1 \| tee "$RESULTS/cmake-configure-a100.txt"
	cp build-a100/CMakeCache.txt "$RESULTS/CMakeCache-a100.txt" 2>/dev/null \|\| true
	grep -r "march" build-a100/ggml/src/CMakeFiles/ggml-cpu.dir/flags.make 2>/dev/null \| tee "$RESULTS/march-flags-a100.txt" \|\| true

	log "CMakeCache diff (X100 vs A100):"
	diff <(grep -E 'GGML_\|MARCH\|march\|NATIVE\|RVV\|RISCV' "$RESULTS/CMakeCache-x100.txt") \
	<(grep -E 'GGML_\|MARCH\|march\|NATIVE\|RVV\|RISCV' "$RESULTS/CMakeCache-a100.txt") \
	2>/dev/null \| tee "$RESULTS/cmake-diff.txt" \|\| echo "No differences" \| tee "$RESULTS/cmake-diff.txt"
	fi

	save_checkpoint "phase3-build"

	########################################
	# PHASE 4: Download Models
	########################################
	log "====== PHASE 4: DOWNLOAD MODELS ======"

	cd "$WORKDIR"

	# TinyLlama 1.1B Q4_0 (~600MB) — same as Part 1
	TINYLLAMA="$MODEL_DIR/tinyllama-1.1b-chat-v1.0.Q4_0.gguf"
	if [ ! -f "$TINYLLAMA" ]; then
	log "Downloading TinyLlama 1.1B Q4_0..."
	# HuggingFace is blocked from China — use hf-mirror.com
	curl -L --connect-timeout 30 -o "$TINYLLAMA" \
	"https://hf-mirror.com/TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF/resolve/main/tinyllama-1.1b-chat-v1.0.Q4_0.gguf" 2>&1 \
	\|\| curl -L --connect-timeout 30 -o "$TINYLLAMA" \
	"https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF/resolve/main/tinyllama-1.1b-chat-v1.0.Q4_0.gguf" 2>&1 \
	\|\| { log "FAILED to download TinyLlama"; rm -f "$TINYLLAMA"; }
	fi

	# Qwen2.5 0.5B Q4_0 (~400MB) — same as SpacemiT's benchmark
	QWEN="$MODEL_DIR/qwen2.5-0.5b-instruct-q4_0.gguf"
	if [ ! -f "$QWEN" ]; then
	log "Downloading Qwen2.5 0.5B Q4_0..."
	curl -L --connect-timeout 30 -o "$QWEN" \
	"https://hf-mirror.com/Qwen/Qwen2.5-0.5B-Instruct-GGUF/resolve/main/qwen2.5-0.5b-instruct-q4_0.gguf" 2>&1 \
	\|\| curl -L --connect-timeout 30 -o "$QWEN" \
	"https://huggingface.co/Qwen/Qwen2.5-0.5B-Instruct-GGUF/resolve/main/qwen2.5-0.5b-instruct-q4_0.gguf" 2>&1 \
	\|\| { log "FAILED to download Qwen2.5"; rm -f "$QWEN"; }
	fi

	# Qwen3.5 0.8B Q4_0 (~507MB) — latest generation, smallest Qwen3.5
	QWEN35="$MODEL_DIR/Qwen3.5-0.8B-Q4_0.gguf"
	if [ ! -f "$QWEN35" ]; then
	log "Downloading Qwen3.5 0.8B Q4_0..."
	curl -L --connect-timeout 30 -o "$QWEN35" \
	"https://hf-mirror.com/unsloth/Qwen3.5-0.8B-GGUF/resolve/main/Qwen3.5-0.8B-Q4_0.gguf" 2>&1 \
	\|\| curl -L --connect-timeout 30 -o "$QWEN35" \
	"https://huggingface.co/unsloth/Qwen3.5-0.8B-GGUF/resolve/main/Qwen3.5-0.8B-Q4_0.gguf" 2>&1 \
	\|\| { log "FAILED to download Qwen3.5"; rm -f "$QWEN35"; }
	fi

	ls -lh "$MODEL_DIR"/*.gguf 2>/dev/null \| tee "$RESULTS/models.txt"

	save_checkpoint "phase4-models"

	########################################
	# PHASE 5: Benchmarks — X100 cores
	########################################
	log "====== PHASE 5: BENCHMARKS X100 ======"

	BENCH="$WORKDIR/llama.cpp/build/bin/llama-bench"

	for model in "$MODEL_DIR"/*.gguf; do
	[ -f "$model" ] \|\| continue
	MODEL_NAME=$(basename "$model" .gguf)
	log "--- Benchmarking $MODEL_NAME on X100 ---"

	for threads in $THREAD_COUNTS; do
	log " X100 / $MODEL_NAME / ${threads}t"
	{
	echo "=== X100 / $MODEL_NAME / ${threads} threads ==="
	echo "Timestamp: $(date)"
	"$BENCH" -m "$model" -p 512 -n 0 -r 3 -t "$threads" 2>&1 \|\| true
	echo ""
	"$BENCH" -m "$model" -p 0 -n 128 -r 3 -t "$threads" 2>&1 \|\| true
	echo ""
	} \| tee -a "$RESULTS/bench-x100-${MODEL_NAME}.txt"
	done
	save_checkpoint "phase5-x100-${MODEL_NAME}"
	done

	########################################
	# PHASE 6: Benchmarks — A100 cores (standard RVV)
	########################################
	if [ -f /proc/set_ai_thread ]; then
	log "====== PHASE 6: BENCHMARKS A100 (standard RVV) ======"

	for model in "$MODEL_DIR"/*.gguf; do
	[ -f "$model" ] \|\| continue
	MODEL_NAME=$(basename "$model" .gguf)
	log "--- Benchmarking $MODEL_NAME on A100 (standard RVV) ---"

	for threads in $THREAD_COUNTS; do
	log " A100-RVV / $MODEL_NAME / ${threads}t"
	{
	echo "=== A100 standard RVV / $MODEL_NAME / ${threads} threads ==="
	echo "Timestamp: $(date)"
	bash -c "echo \$\$ > /proc/set_ai_thread 2>/dev/null; $BENCH -m $model -p 512 -n 0 -r 3 -t $threads" 2>&1 \|\| true
	echo ""
	bash -c "echo \$\$ > /proc/set_ai_thread 2>/dev/null; $BENCH -m $model -p 0 -n 128 -r 3 -t $threads" 2>&1 \|\| true
	echo ""
	} \| tee -a "$RESULTS/bench-a100-rvv-${MODEL_NAME}.txt"
	done
	save_checkpoint "phase6-a100-rvv-${MODEL_NAME}"
	done
	fi

	########################################
	# PHASE 7: SpacemiT IME2 binary
	########################################
	log "====== PHASE 7: SPACEMIT IME2 ======"

	SPACEMIT_BENCH=""
	# Check if SpacemiT package is already installed
	if ! dpkg -l llama.cpp-tools-spacemit 2>/dev/null \| grep -q '^ii'; then
	log "WARNING: llama.cpp-tools-spacemit not installed — install manually:"
	log " sudo apt-get install -y llama.cpp-tools-spacemit \|\| sudo dpkg --force-overwrite -i /var/cache/apt/archives/llama.cpp-tools-spacemit_*.deb"
	log " (apt downloads the .deb but fails on file conflict; dpkg --force-overwrite finishes the job)"
	fi

	# Find SpacemiT llama-bench
	if command -v /usr/bin/llama-bench &>/dev/null; then
	SPACEMIT_BENCH="/usr/bin/llama-bench"
	log "SpacemiT llama-bench found: $SPACEMIT_BENCH"
	/usr/bin/llama-cli --version 2>&1 \| tee "$RESULTS/spacemit-version.txt" \|\| true
	elif [ -f /usr/bin/llama-bench ]; then
	SPACEMIT_BENCH="/usr/bin/llama-bench"
	fi

	if [ -n "$SPACEMIT_BENCH" ]; then
	for model in "$MODEL_DIR"/*.gguf; do
	[ -f "$model" ] \|\| continue
	MODEL_NAME=$(basename "$model" .gguf)
	log "--- Benchmarking $MODEL_NAME with SpacemiT IME2 ---"

	# Test if SpacemiT binary can load this model (old fork may not support newer architectures)
	if ! "$SPACEMIT_BENCH" -m "$model" -p 1 -n 0 -r 1 -t 1 &>/dev/null; then
	log "SKIPPING $MODEL_NAME — SpacemiT binary cannot load this model (unsupported architecture)"
	echo "=== SpacemiT IME2 / $MODEL_NAME: SKIPPED (model not supported by SpacemiT binary) ===" \
	>> "$RESULTS/bench-a100-ime2-${MODEL_NAME}.txt"
	continue
	fi

	# SpacemiT binary auto-migrates to A100 cores
	for threads in $THREAD_COUNTS; do
	log " IME2 / $MODEL_NAME / ${threads}t"
	{
	echo "=== SpacemiT IME2 / $MODEL_NAME / ${threads} threads ==="
	echo "Timestamp: $(date)"
	"$SPACEMIT_BENCH" -m "$model" -p 512 -n 0 -r 3 -t "$threads" 2>&1 \|\| true
	echo ""
	"$SPACEMIT_BENCH" -m "$model" -p 0 -n 128 -r 3 -t "$threads" 2>&1 \|\| true
	echo ""
	} \| tee -a "$RESULTS/bench-a100-ime2-${MODEL_NAME}.txt"
	done
	save_checkpoint "phase7-ime2-${MODEL_NAME}"
	done

	# Also test without explicit AI core scheduling to confirm auto-migration
	log "--- Auto-migration test (no /proc/set_ai_thread) ---"
	FIRST_MODEL=$(ls "$MODEL_DIR"/*.gguf 2>/dev/null \| head -1)
	if [ -n "$FIRST_MODEL" ]; then
	{
	echo "=== SpacemiT IME2 auto-migration test ==="
	"$SPACEMIT_BENCH" -m "$FIRST_MODEL" -p 512 -n 128 -r 3 -t 8 2>&1
	} \| tee "$RESULTS/ime2-auto-migration.txt"
	fi
	else
	log "SpacemiT IME2 binary not available — skipping"
	fi

	save_checkpoint "phase7-ime2"

	########################################
	# PHASE 8: Fork release binary test
	########################################
	log "====== PHASE 8: FORK RELEASE BINARY ======"

	REPO="gounthar/llama.cpp"
	RELEASE_DIR="$WORKDIR/release-test"
	mkdir -p "$RELEASE_DIR"

	RELEASE_TAG=$(curl -s "https://api.github.com/repos/$REPO/releases/latest" 2>/dev/null \| grep '"tag_name"' \| cut -d'"' -f4)
	log "Latest release: ${RELEASE_TAG:-NONE}"

	if [ -n "$RELEASE_TAG" ]; then
	# Get all riscv64 assets
	ASSETS=$(curl -s "https://api.github.com/repos/$REPO/releases/latest" \
	\| grep '"browser_download_url"' \
	\| grep -i 'riscv64' \
	\| cut -d'"' -f4)

	for asset_url in $ASSETS; do
	asset_file=$(basename "$asset_url")
	label=$(echo "$asset_file" \| grep -oP '(native\|generic)' \|\| echo "unknown")
	extract_dir="$RELEASE_DIR/$label"
	mkdir -p "$extract_dir"

	log "Downloading $asset_file..."
	curl -L --connect-timeout 30 -o "$RELEASE_DIR/$asset_file" "$asset_url" 2>&1

	if [[ "$asset_file" == *.tar.gz ]]; then
	tar xzf "$RELEASE_DIR/$asset_file" -C "$extract_dir" 2>&1
	elif [[ "$asset_file" == *.zip ]]; then
	unzip -o "$RELEASE_DIR/$asset_file" -d "$extract_dir" 2>&1
	fi

	RELEASE_BENCH=$(find "$extract_dir" -name "llama-bench" -type f \| head -1)
	if [ -n "$RELEASE_BENCH" ]; then
	chmod +x "$RELEASE_BENCH"
	log "--- Release $label binary info ---"
	file "$RELEASE_BENCH" 2>/dev/null \| tee -a "$RESULTS/release-binary-info.txt"
	ldd "$RELEASE_BENCH" 2>/dev/null \| tee -a "$RESULTS/release-binary-info.txt"

	# Check for variant .so files
	find "$extract_dir" -name ".so" -o -name "libggml" 2>/dev/null \| tee -a "$RESULTS/release-variants.txt"

	FIRST_MODEL=$(ls "$MODEL_DIR"/*.gguf 2>/dev/null \| head -1)
	if [ -n "$FIRST_MODEL" ]; then
	log "--- Release $label benchmark (X100, 8t) ---"
	{
	echo "=== Release $label / X100 / 8 threads ==="
	"$RELEASE_BENCH" -m "$FIRST_MODEL" -p 512 -n 0 -r 3 -t 8 2>&1 \|\| true
	echo ""
	"$RELEASE_BENCH" -m "$FIRST_MODEL" -p 0 -n 128 -r 3 -t 8 2>&1 \|\| true
	} \| tee -a "$RESULTS/bench-release-${label}.txt"
	fi
	fi
	done
	else
	log "No release found on $REPO — skipping"
	fi

	save_checkpoint "phase8-release"

	########################################
	# PHASE 9: llama-server interactive test
	########################################
	log "====== PHASE 9: LLAMA-SERVER TEST ======"

	SERVER="$WORKDIR/llama.cpp/build/bin/llama-server"
	FIRST_MODEL=$(ls "$MODEL_DIR"/*.gguf 2>/dev/null \| head -1)

	if [ -f "$SERVER" ] && [ -n "$FIRST_MODEL" ]; then
	log "Starting llama-server (X100, 4t)..."
	"$SERVER" -m "$FIRST_MODEL" -t 4 --host 127.0.0.1 --port 8080 &
	SERVER_PID=$!
	sleep 5

	if kill -0 $SERVER_PID 2>/dev/null; then
	log "Server running (PID $SERVER_PID), testing API..."
	{
	echo "=== llama-server OpenAI-compatible API test ==="
	curl -s http://127.0.0.1:8080/v1/chat/completions \
	-H "Content-Type: application/json" \
	-d '{"model":"tinyllama","messages":[{"role":"user","content":"What is RISC-V in one sentence?"}],"max_tokens":50}'
	} \| tee "$RESULTS/llama-server-test.txt"

	kill $SERVER_PID 2>/dev/null
	wait $SERVER_PID 2>/dev/null \|\| true
	else
	log "Server failed to start"
	fi
	else
	log "llama-server or model not available — skipping"
	fi

	save_checkpoint "phase9-server"

	########################################
	# PHASE 10: Final Backup
	########################################
	log "====== PHASE 10: FINAL BACKUP ======"

	cd "$WORKDIR"

	# Summary of all results
	log "--- Results summary ---"
	ls -lhR "$RESULTS/" \| tee "$RESULTS/results-index.txt"

	# Create final archive (everything except model files and build objects)
	ARCHIVE="k3-results-final-${TIMESTAMP}.tar.gz"
	tar czf "$HOME/$ARCHIVE" \
	-C "$WORKDIR" \
	results/ \
	k3-full-run-*.log \
	--exclude='*.gguf' \
	--exclude='build*/' \
	2>/dev/null

	ARCHIVE_SIZE=$(du -h "$HOME/$ARCHIVE" \| cut -f1)
	log "Final archive: ~/$ARCHIVE ($ARCHIVE_SIZE)"

	# Upload to 0x0.st (works from China, no auth needed)
	log "--- Uploading to 0x0.st ---"
	UPLOAD_URL=$(curl -F"file=@$HOME/$ARCHIVE" https://0x0.st 2>/dev/null)
	if [ -n "$UPLOAD_URL" ]; then
	log "DOWNLOAD URL: $UPLOAD_URL"
	log "(valid for 30 days, grab it soon)"
	else
	log "0x0.st upload failed — use SCP instead"
	fi

	log ""
	log "========================================"
	log " ALL DONE"
	log " Results in: $RESULTS/"
	log " Archive: ~/$ARCHIVE"
	if [ -n "$UPLOAD_URL" ]; then
	log " Download: $UPLOAD_URL"
	fi
	log "========================================"
	log ""
	log "Fallback download (from WSL):"
	log " scp -o HostKeyAlgorithms=+ssh-rsa -o PubkeyAcceptedAlgorithms=+ssh-rsa \\"
	log " bianbu@Pre65M12K3SUB2-06.gdriscv.com:~/$ARCHIVE \\"
	log " /mnt/c/support/users/dev/riscv/spacemit-k3-lab/experiments/"
No results found