Skip to content

Instantly share code, notes, and snippets.

@carlosroman
Last active April 24, 2026 11:59
Show Gist options
  • Select an option

  • Save carlosroman/068fcfdf00bd5691bac0b03a8fc644bc to your computer and use it in GitHub Desktop.

Select an option

Save carlosroman/068fcfdf00bd5691bac0b03a8fc644bc to your computer and use it in GitHub Desktop.
Halo Strix rnd benchmarks

Run 1

Results

model size params backend ngl n_batch n_ubatch type_k type_v fa mmap test t/s
qwen35moe 122B.A10B Q5_K - Medium 82.16 GiB 122.11 B ROCm 999 1024 1024 q8_0 q8_0 1 0 pp512 @ d16384 98.24 ± 0.32
qwen35moe 122B.A10B Q5_K - Medium 82.16 GiB 122.11 B ROCm 999 1024 1024 q8_0 q8_0 1 0 tg128 @ d16384 16.43 ± 0.09
qwen35moe 122B.A10B Q5_K - Medium 82.16 GiB 122.11 B ROCm 999 1024 1024 q8_0 q8_0 1 0 pp512 @ d32768 58.65 ± 0.09
qwen35moe 122B.A10B Q5_K - Medium 82.16 GiB 122.11 B ROCm 999 1024 1024 q8_0 q8_0 1 0 tg128 @ d32768 14.93 ± 0.01
qwen35moe 122B.A10B Q5_K - Medium 82.16 GiB 122.11 B ROCm 999 1024 1024 q8_0 q8_0 1 0 pp512 @ d131072 16.53 ± 0.12
qwen35moe 122B.A10B Q5_K - Medium 82.16 GiB 122.11 B ROCm 999 1024 1024 q8_0 q8_0 1 0 tg128 @ d131072 9.75 ± 0.01

build: ff5ef8278 (1100)

Build Args

#!/usr/bin/env sh

echo "Setting up..."
cmake	-S . \
        -B build \
        -DCMAKE_BUILD_TYPE=Release \
        -DCMAKE_C_COMPILER="${LLVM_BIN}/clang" \
        -DCMAKE_CXX_COMPILER="${LLVM_BIN}/clang++" \
        -DCMAKE_HIP_COMPILER="${LLVM_BIN}/clang" \
        -DCMAKE_HIP_FLAGS="--rocm-path=${ROCM_PATHf} -mllvm --amdgpu-unroll-threshold-local=600" \
        -DGGML_HIP_ROCWMMA_FATTN=ON \
        -DGGML_HIP=ON \
        -DGGML_HIPBLAS=ON \
        -DGPU_TARGETS=gfx1151 \
        -DHIP_PLATFORM=amd \
        -DLLAMA_BUILD_EXAMPLES=OFF \
        -DLLAMA_BUILD_TESTS=OFF \
        -DLLAMA_OPENSSL=ON \
        --fresh

echo "Building..."
cmake 	--build build \
        --clean-first \
        --config Release -- -j$(nproc)

Run 2

Results

model size params backend ngl n_batch n_ubatch type_k type_v fa mmap test t/s
qwen35moe 122B.A10B Q5_K - Medium 82.16 GiB 122.11 B ROCm 999 1024 1024 q8_0 q8_0 1 0 pp512 @ d16384 269.90 ± 0.44
qwen35moe 122B.A10B Q5_K - Medium 82.16 GiB 122.11 B ROCm 999 1024 1024 q8_0 q8_0 1 0 tg128 @ d16384 16.47 ± 0.02
qwen35moe 122B.A10B Q5_K - Medium 82.16 GiB 122.11 B ROCm 999 1024 1024 q8_0 q8_0 1 0 pp512 @ d32768 220.99 ± 0.41
qwen35moe 122B.A10B Q5_K - Medium 82.16 GiB 122.11 B ROCm 999 1024 1024 q8_0 q8_0 1 0 tg128 @ d32768 14.94 ± 0.01
qwen35moe 122B.A10B Q5_K - Medium 82.16 GiB 122.11 B ROCm 999 1024 1024 q8_0 q8_0 1 0 pp512 @ d131072 100.91 ± 0.23
qwen35moe 122B.A10B Q5_K - Medium 82.16 GiB 122.11 B ROCm 999 1024 1024 q8_0 q8_0 1 0 tg128 @ d131072 9.75 ± 0.00

build: ff5ef8278 (1100)

Build Args

#!/usr/bin/env sh

echo "Setting up..."
cmake	-S . \
        -B build \
        -DCMAKE_C_COMPILER="${LLVM_BIN}/clang" \
        -DCMAKE_CXX_COMPILER="${LLVM_BIN}/clang++" \
        -DCMAKE_HIP_COMPILER="${LLVM_BIN}/clang" \
        -DCMAKE_HIP_FLAGS="--rocm-path=${ROCM_PATHf} -mllvm --amdgpu-unroll-threshold-local=600" \
        -DGGML_HIP_ROCWMMA_FATTN=OFF \
        -DGGML_HIP=ON \
        -DGGML_HIPBLAS=ON \
        -DGPU_TARGETS="gfx1151" \
        -DHIP_PLATFORM=amd \
        -DLLAMA_BUILD_EXAMPLES=OFF \
        -DLLAMA_BUILD_TESTS=OFF \
        -DLLAMA_OPENSSL=ON \
        --fresh

echo "Building..."
cmake 	--build build \
        --clean-first \
        --config Release -- -j$(nproc)

Run 3

Results

model size params backend ngl n_batch n_ubatch type_k type_v fa mmap test t/s
qwen35moe 122B.A10B Q5_K - Medium 82.16 GiB 122.11 B ROCm 999 1024 1024 q8_0 q8_0 1 0 pp512 @ d16384 257.73 ± 2.07
qwen35moe 122B.A10B Q5_K - Medium 82.16 GiB 122.11 B ROCm 999 1024 1024 q8_0 q8_0 1 0 tg128 @ d16384 16.37 ± 0.14
qwen35moe 122B.A10B Q5_K - Medium 82.16 GiB 122.11 B ROCm 999 1024 1024 q8_0 q8_0 1 0 pp512 @ d32768 220.84 ± 1.66
qwen35moe 122B.A10B Q5_K - Medium 82.16 GiB 122.11 B ROCm 999 1024 1024 q8_0 q8_0 1 0 tg128 @ d32768 14.95 ± 0.02
qwen35moe 122B.A10B Q5_K - Medium 82.16 GiB 122.11 B ROCm 999 1024 1024 q8_0 q8_0 1 0 pp512 @ d131072 104.69 ± 0.22
qwen35moe 122B.A10B Q5_K - Medium 82.16 GiB 122.11 B ROCm 999 1024 1024 q8_0 q8_0 1 0 tg128 @ d131072 9.75 ± 0.00

build: ff5ef8278 (1100)

Build Args

#!/usr/bin/env sh

echo "Setting up..."
cmake	-S . \
        -B build \
        -DCMAKE_C_COMPILER="${LLVM_BIN}/clang" \
        -DCMAKE_CXX_COMPILER="${LLVM_BIN}/clang++" \
        -DCMAKE_HIP_COMPILER="${LLVM_BIN}/clang" \
        -DCMAKE_HIP_FLAGS="--rocm-path=${ROCM_PATHf} -mllvm --amdgpu-unroll-threshold-local=600" \
        -DGGML_HIP_NO_VMM=OFF \
        -DGGML_HIP_ROCWMMA_FATTN=OFF \
        -DGGML_HIP=ON \
        -DGGML_HIPBLAS=ON \
        -DGPU_TARGETS="gfx1151" \
        -DHIP_PLATFORM=amd \
        -DLLAMA_BUILD_EXAMPLES=OFF \
        -DLLAMA_BUILD_TESTS=OFF \
        -DLLAMA_OPENSSL=ON \
        --fresh

echo "Building..."
cmake 	--build build \
        --clean-first \
        --config Release -- -j$(nproc)

Run 4

Results

model size params backend ngl n_batch n_ubatch type_k type_v fa mmap test t/s
qwen35moe 122B.A10B Q5_K - Medium 82.16 GiB 122.11 B ROCm 999 1024 1024 q8_0 q8_0 1 0 pp512 @ d16384 268.95 ± 0.65
qwen35moe 122B.A10B Q5_K - Medium 82.16 GiB 122.11 B ROCm 999 1024 1024 q8_0 q8_0 1 0 tg128 @ d16384 16.47 ± 0.02
qwen35moe 122B.A10B Q5_K - Medium 82.16 GiB 122.11 B ROCm 999 1024 1024 q8_0 q8_0 1 0 pp512 @ d32768 220.80 ± 1.00
qwen35moe 122B.A10B Q5_K - Medium 82.16 GiB 122.11 B ROCm 999 1024 1024 q8_0 q8_0 1 0 tg128 @ d32768 14.93 ± 0.01
qwen35moe 122B.A10B Q5_K - Medium 82.16 GiB 122.11 B ROCm 999 1024 1024 q8_0 q8_0 1 0 pp512 @ d131072 104.33 ± 0.18
qwen35moe 122B.A10B Q5_K - Medium 82.16 GiB 122.11 B ROCm 999 1024 1024 q8_0 q8_0 1 0 tg128 @ d131072 9.71 ± 0.04

build: ff5ef8278 (1100)

Build Args

#!/usr/bin/env sh

echo "Setting up..."
cmake	-S . \
        -B build \
        -DCMAKE_BUILD_TYPE=Release \
        -DCMAKE_C_COMPILER="${LLVM_BIN}/clang" \
        -DCMAKE_CXX_COMPILER="${LLVM_BIN}/clang++" \
        -DCMAKE_HIP_COMPILER="${LLVM_BIN}/clang" \
        -DCMAKE_HIP_FLAGS="--rocm-path=${ROCM_PATH} -mllvm --amdgpu-unroll-threshold-local=600" \
        -DGGML_HIP_NO_VMM=OFF \
        -DGGML_HIP_ROCWMMA_FATTN=OFF \
        -DGGML_HIP=ON \
        -DGGML_HIPBLAS=OFF \
        -DGPU_TARGETS="gfx1151" \
        -DHIP_PLATFORM=amd \
        -DLLAMA_BUILD_EXAMPLES=OFF \
        -DLLAMA_BUILD_TESTS=OFF \
        -DLLAMA_OPENSSL=ON \
        --fresh

echo "Building..."
cmake 	--build build \
        --clean-first \
        --config Release -- -j$(nproc)

Run 5

Results

model size params backend ngl n_batch n_ubatch type_k type_v fa mmap test t/s
qwen35moe 122B.A10B Q5_K - Medium 82.16 GiB 122.11 B ROCm 999 1024 1024 q8_0 q8_0 1 0 pp512 @ d16384 271.34 ± 0.44
qwen35moe 122B.A10B Q5_K - Medium 82.16 GiB 122.11 B ROCm 999 1024 1024 q8_0 q8_0 1 0 tg128 @ d16384 16.47 ± 0.02
qwen35moe 122B.A10B Q5_K - Medium 82.16 GiB 122.11 B ROCm 999 1024 1024 q8_0 q8_0 1 0 pp512 @ d32768 221.16 ± 0.94
qwen35moe 122B.A10B Q5_K - Medium 82.16 GiB 122.11 B ROCm 999 1024 1024 q8_0 q8_0 1 0 tg128 @ d32768 14.93 ± 0.02
qwen35moe 122B.A10B Q5_K - Medium 82.16 GiB 122.11 B ROCm 999 1024 1024 q8_0 q8_0 1 0 pp512 @ d131072 103.40 ± 0.26
qwen35moe 122B.A10B Q5_K - Medium 82.16 GiB 122.11 B ROCm 999 1024 1024 q8_0 q8_0 1 0 tg128 @ d131072 9.74 ± 0.01

build: ff5ef8278 (1100)

Build Args

#!/usr/bin/env sh

echo "Setting up..."
cmake	-S . \
        -B build \
        -DCMAKE_BUILD_TYPE=Release \
        -DCMAKE_C_COMPILER="${LLVM_BIN}/clang" \
        -DCMAKE_CXX_COMPILER="${LLVM_BIN}/clang++" \
        -DCMAKE_HIP_COMPILER="${LLVM_BIN}/clang" \
        -DCMAKE_HIP_FLAGS="--rocm-path=${ROCM_PATH} -mllvm --amdgpu-unroll-threshold-local=600" \
        -DGGML_CUDA_FA_ALL_QUANTS=ON \
        -DGGML_HIP_ROCWMMA_FATTN=OFF \
        -DGGML_HIP=ON \
        -DGPU_TARGETS="gfx1151" \
        -DHIP_PLATFORM=amd \
        -DLLAMA_BUILD_EXAMPLES=OFF \
        -DLLAMA_BUILD_TESTS=OFF \
        -DLLAMA_OPENSSL=ON \
        --fresh

echo "Building..."
cmake 	--build build \
        --clean-first \
        --config Release -- -j$(nproc)

Run 6

Results

model size params backend ngl n_batch n_ubatch type_k type_v fa mmap test t/s
qwen35moe 122B.A10B Q5_K - Medium 82.16 GiB 122.11 B ROCm 999 1024 1024 q8_0 q8_0 1 0 pp512 @ d16384 265.93 ± 0.26
qwen35moe 122B.A10B Q5_K - Medium 82.16 GiB 122.11 B ROCm 999 1024 1024 q8_0 q8_0 1 0 tg128 @ d16384 16.46 ± 0.01
qwen35moe 122B.A10B Q5_K - Medium 82.16 GiB 122.11 B ROCm 999 1024 1024 q8_0 q8_0 1 0 pp512 @ d32768 220.91 ± 0.88
qwen35moe 122B.A10B Q5_K - Medium 82.16 GiB 122.11 B ROCm 999 1024 1024 q8_0 q8_0 1 0 tg128 @ d32768 14.94 ± 0.01
qwen35moe 122B.A10B Q5_K - Medium 82.16 GiB 122.11 B ROCm 999 1024 1024 q8_0 q8_0 1 0 pp512 @ d131072 104.26 ± 0.18
qwen35moe 122B.A10B Q5_K - Medium 82.16 GiB 122.11 B ROCm 999 1024 1024 q8_0 q8_0 1 0 tg128 @ d131072 9.74 ± 0.01

build: ff5ef8278 (1100)

Build Args

#!/usr/bin/env sh

echo "Setting up..."
cmake	-S . \
        -B build \
        -DAMDGPU_TARGETS="gfx1151" \
        -DCMAKE_BUILD_TYPE=Release \
        -DCMAKE_C_COMPILER="${LLVM_BIN}/clang" \
        -DCMAKE_C_FLAGS="-O3 -march=znver5 -mtune=znver5" \
        -DCMAKE_CXX_COMPILER="${LLVM_BIN}/clang++" \
        -DCMAKE_CXX_FLAGS="-O3 -march=znver5 -mtune=znver5" \
        -DCMAKE_HIP_COMPILER="${LLVM_BIN}/clang" \
        -DCMAKE_HIP_FLAGS="--rocm-path=${ROCM_PATH} -mllvm --amdgpu-unroll-threshold-local=600" \
        -DGGML_AVX=ON \
        -DGGML_AVX2=ON \
        -DGGML_AVX512_VBMI=ON \
        -DGGML_AVX512_VNNI=ON \
        -DGGML_AVX512=ON \
        -DGGML_BMI2=ON \
        -DGGML_CUDA_FA_ALL_QUANTS=ON \
        -DGGML_F16C=ON \
        -DGGML_FMA=ON \
        -DGGML_HIP_ROCWMMA_FATTN=OFF \
        -DGGML_HIP=ON \
        -DGGML_NATIVE=OFF \
        -DGPU_TARGETS="gfx1151" \
        -DLLAMA_BUILD_EXAMPLES=OFF \
        -DLLAMA_BUILD_TESTS=OFF \
        -DLLAMA_OPENSSL=ON \
        --fresh

echo "Building..."
cmake 	--build build \
        --clean-first \
        --config Release -- -j$(nproc)

Run 7

Results

model size params backend ngl n_batch n_ubatch type_k type_v fa mmap test t/s
qwen35moe 122B.A10B Q5_K - Medium 82.16 GiB 122.11 B ROCm 999 1024 1024 q8_0 q8_0 1 0 pp512 @ d16384 269.02 ± 1.10
qwen35moe 122B.A10B Q5_K - Medium 82.16 GiB 122.11 B ROCm 999 1024 1024 q8_0 q8_0 1 0 tg128 @ d16384 18.31 ± 0.25
qwen35moe 122B.A10B Q5_K - Medium 82.16 GiB 122.11 B ROCm 999 1024 1024 q8_0 q8_0 1 0 pp512 @ d32768 219.73 ± 0.76
qwen35moe 122B.A10B Q5_K - Medium 82.16 GiB 122.11 B ROCm 999 1024 1024 q8_0 q8_0 1 0 tg128 @ d32768 16.52 ± 0.02
qwen35moe 122B.A10B Q5_K - Medium 82.16 GiB 122.11 B ROCm 999 1024 1024 q8_0 q8_0 1 0 pp512 @ d131072 104.34 ± 0.51
qwen35moe 122B.A10B Q5_K - Medium 82.16 GiB 122.11 B ROCm 999 1024 1024 q8_0 q8_0 1 0 tg128 @ d131072 10.31 ± 0.01

build: ff5ef8278 (1100)

Build Args

#!/usr/bin/env sh

echo "Setting up..."
cmake	-S . \
        -B build \
        -DAMDGPU_TARGETS="gfx1151" \
        -DCMAKE_BUILD_TYPE=Release \
        -DCMAKE_C_COMPILER="${LLVM_BIN}/clang" \
        -DCMAKE_C_FLAGS="-O3 -march=znver5 -mtune=znver5" \
        -DCMAKE_CXX_COMPILER="${LLVM_BIN}/clang++" \
        -DCMAKE_CXX_FLAGS="-O3 -march=znver5 -mtune=znver5" \
        -DCMAKE_HIP_COMPILER="${LLVM_BIN}/clang" \
        -DCMAKE_HIP_FLAGS="--rocm-path=${ROCM_PATH} -mllvm --amdgpu-unroll-threshold-local=600" \
        -DGGML_CUDA_ENABLE_UNIFIED_MEMORY=ON \
        -DGGML_CUDA_FA_ALL_QUANTS=ON \
        -DGGML_CUDA_FA=ON \
        -DGGML_CUDA_FORCE_CUBLAS=OFF \
        -DGGML_CUDA_FORCE_MMQ=OFF \
        -DGGML_HIP_GRAPHS=ON \
        -DGGML_HIP_ROCWMMA_FATTN=OFF \
        -DGGML_HIP_UMA=OFF \
        -DGGML_HIP=ON \
        -DGGML_NATIVE=ON \
        -DGGML_OPENMP=ON \
        -DGGML_RPC=ON \
        -DGPU_TARGETS="gfx1151" \
        -DHIP_PLATFORM=amd \
        -DLLAMA_BUILD_EXAMPLES=OFF \
        -DLLAMA_BUILD_TESTS=OFF \
        -DLLAMA_OPENSSL=ON \
        --fresh

echo "Building..."
cmake 	--build build \
        --clean-first \
        --config Release -- -j$(nproc)

Run 8

Results

model size params backend ngl n_batch n_ubatch type_k type_v fa mmap test t/s
qwen35moe 122B.A10B Q5_K - Medium 82.16 GiB 122.11 B ROCm 999 1024 1024 q8_0 q8_0 1 0 pp512 @ d16384 270.73 ± 0.60
qwen35moe 122B.A10B Q5_K - Medium 82.16 GiB 122.11 B ROCm 999 1024 1024 q8_0 q8_0 1 0 tg128 @ d16384 18.45 ± 0.03
qwen35moe 122B.A10B Q5_K - Medium 82.16 GiB 122.11 B ROCm 999 1024 1024 q8_0 q8_0 1 0 pp512 @ d32768 221.20 ± 1.34
qwen35moe 122B.A10B Q5_K - Medium 82.16 GiB 122.11 B ROCm 999 1024 1024 q8_0 q8_0 1 0 tg128 @ d32768 16.54 ± 0.02
qwen35moe 122B.A10B Q5_K - Medium 82.16 GiB 122.11 B ROCm 999 1024 1024 q8_0 q8_0 1 0 pp512 @ d131072 102.17 ± 0.19
qwen35moe 122B.A10B Q5_K - Medium 82.16 GiB 122.11 B ROCm 999 1024 1024 q8_0 q8_0 1 0 tg128 @ d131072 10.32 ± 0.01

build: ff5ef8278 (1100)

Build Args

#!/usr/bin/env sh

echo "Setting up..."
cmake	-S . \
		-B build \
        -DAMDGPU_TARGETS="gfx1151" \
        -DAMDGPU_TARGETS=gfx1151 \
        -DCMAKE_BUILD_TYPE=Release \
        -DCMAKE_BUILD_TYPE=Release \
        -DCMAKE_C_COMPILER="${LLVM_BIN}/clang" \
        -DCMAKE_C_FLAGS="-I${ROCM_PATH}/include" \
        -DCMAKE_CXX_COMPILER="${LLVM_BIN}/clang++" \
        -DCMAKE_CXX_FLAGS="-I${ROCM_PATH}/include" \
        -DCMAKE_HIP_COMPILER="${LLVM_BIN}/clang" \
        -DCMAKE_HIP_FLAGS="--rocm-path=${ROCM_PATH} -mllvm --amdgpu-unroll-threshold-local=600" \
        -DGGML_CUDA_ENABLE_UNIFIED_MEMORY=ON \
        -DGGML_CUDA_FA_ALL_QUANTS=ON \
        -DGGML_CUDA_FA=ON \
        -DGGML_CUDA_FORCE_CUBLAS=OFF \
        -DGGML_CUDA_FORCE_MMQ=OFF \
        -DGGML_HIP_GRAPHS=ON \
        -DGGML_HIP_ROCWMMA_FATTN=OFF \
        -DGGML_HIP_UMA=OFF \
        -DGGML_HIP=ON \
        -DGGML_NATIVE=ON \
        -DGGML_OPENMP=ON \
        -DGGML_RPC=ON \
        -DGPU_TARGETS="gfx1151" \
        -DHIP_PLATFORM=amd \
        -DLLAMA_BUILD_EXAMPLES=OFF \
        -DLLAMA_BUILD_TESTS=OFF \
        -DLLAMA_OPENSSL=ON \
        --fresh

echo "Building..."
cmake 	--build build \
		--clean-first \
		--config Release -- -j$(nproc)

model size params backend ngl n_batch n_ubatch type_k type_v fa mmap test t/s
qwen35moe 122B.A10B Q4_K - Medium 70.35 GiB 122.11 B ROCm 999 1024 1024 q8_0 q8_0 1 0 pp512 @ d16384 269.89 ± 0.80
qwen35moe 122B.A10B Q4_K - Medium 70.35 GiB 122.11 B ROCm 999 1024 1024 q8_0 q8_0 1 0 tg128 @ d16384 19.81 ± 0.03
qwen35moe 122B.A10B Q4_K - Medium 70.35 GiB 122.11 B ROCm 999 1024 1024 q8_0 q8_0 1 0 pp512 @ d32768 220.79 ± 0.58
qwen35moe 122B.A10B Q4_K - Medium 70.35 GiB 122.11 B ROCm 999 1024 1024 q8_0 q8_0 1 0 tg128 @ d32768 17.60 ± 0.02
qwen35moe 122B.A10B Q4_K - Medium 70.35 GiB 122.11 B ROCm 999 1024 1024 q8_0 q8_0 1 0 pp512 @ d131072 104.86 ± 0.09
qwen35moe 122B.A10B Q4_K - Medium 70.35 GiB 122.11 B ROCm 999 1024 1024 q8_0 q8_0 1 0 tg128 @ d131072 10.72 ± 0.01
qwen35moe 122B.A10B Q5_K - Medium 82.16 GiB 122.11 B ROCm 999 1024 1024 q8_0 q8_0 1 0 pp512 @ d16384 257.08 ± 1.82
qwen35moe 122B.A10B Q5_K - Medium 82.16 GiB 122.11 B ROCm 999 1024 1024 q8_0 q8_0 1 0 tg128 @ d16384 17.76 ± 1.13
qwen35moe 122B.A10B Q5_K - Medium 82.16 GiB 122.11 B ROCm 999 1024 1024 q8_0 q8_0 1 0 pp512 @ d32768 218.64 ± 0.46
qwen35moe 122B.A10B Q5_K - Medium 82.16 GiB 122.11 B ROCm 999 1024 1024 q8_0 q8_0 1 0 tg128 @ d32768 16.52 ± 0.02
qwen35moe 122B.A10B Q5_K - Medium 82.16 GiB 122.11 B ROCm 999 1024 1024 q8_0 q8_0 1 0 pp512 @ d131072 104.35 ± 0.45
qwen35moe 122B.A10B Q5_K - Medium 82.16 GiB 122.11 B ROCm 999 1024 1024 q8_0 q8_0 1 0 tg128 @ d131072 10.32 ± 0.01
qwen35moe 122B.A10B Q6_K 98.40 GiB 122.11 B ROCm 999 1024 1024 q8_0 q8_0 1 0 pp512 @ d16384 228.76 ± 0.57
qwen35moe 122B.A10B Q6_K 98.40 GiB 122.11 B ROCm 999 1024 1024 q8_0 q8_0 1 0 tg128 @ d16384 17.56 ± 0.15
qwen35moe 122B.A10B Q6_K 98.40 GiB 122.11 B ROCm 999 1024 1024 q8_0 q8_0 1 0 pp512 @ d32768 198.65 ± 1.29
qwen35moe 122B.A10B Q6_K 98.40 GiB 122.11 B ROCm 999 1024 1024 q8_0 q8_0 1 0 tg128 @ d32768 15.89 ± 0.01
qwen35moe 122B.A10B Q6_K 98.40 GiB 122.11 B ROCm 999 1024 1024 q8_0 q8_0 1 0 pp512 @ d131072 99.73 ± 0.31
qwen35moe 122B.A10B Q6_K 98.40 GiB 122.11 B ROCm 999 1024 1024 q8_0 q8_0 1 0 tg128 @ d131072 10.06 ± 0.01
gemma4 ?B Q8_0 30.38 GiB 30.70 B ROCm 999 1024 1024 q8_0 q8_0 1 0 pp512 @ d16384 186.24 ± 0.94
gemma4 ?B Q8_0 30.38 GiB 30.70 B ROCm 999 1024 1024 q8_0 q8_0 1 0 tg128 @ d16384 5.49 ± 0.00
gemma4 ?B Q8_0 30.38 GiB 30.70 B ROCm 999 1024 1024 q8_0 q8_0 1 0 pp512 @ d32768 141.30 ± 0.19
gemma4 ?B Q8_0 30.38 GiB 30.70 B ROCm 999 1024 1024 q8_0 q8_0 1 0 tg128 @ d32768 4.99 ± 0.00
gemma4 ?B Q8_0 30.38 GiB 30.70 B ROCm 999 1024 1024 q8_0 q8_0 1 0 pp512 @ d131072 57.17 ± 0.03
gemma4 ?B Q8_0 30.38 GiB 30.70 B ROCm 999 1024 1024 q8_0 q8_0 1 0 tg128 @ d131072 3.27 ± 0.00
gpt-oss 120B Q8_0 59.02 GiB 116.83 B ROCm 999 1024 1024 q8_0 q8_0 1 0 pp512 @ d16384 413.42 ± 4.46
gpt-oss 120B Q8_0 59.02 GiB 116.83 B ROCm 999 1024 1024 q8_0 q8_0 1 0 tg128 @ d16384 39.95 ± 0.03
gpt-oss 120B Q8_0 59.02 GiB 116.83 B ROCm 999 1024 1024 q8_0 q8_0 1 0 pp512 @ d32768 299.38 ± 1.86
gpt-oss 120B Q8_0 59.02 GiB 116.83 B ROCm 999 1024 1024 q8_0 q8_0 1 0 tg128 @ d32768 33.76 ± 0.03
gpt-oss 120B Q8_0 59.02 GiB 116.83 B ROCm 999 1024 1024 q8_0 q8_0 1 0 pp512 @ d131072 112.78 ± 0.38
gpt-oss 120B Q8_0 59.02 GiB 116.83 B ROCm 999 1024 1024 q8_0 q8_0 1 0 tg128 @ d131072 15.19 ± 0.01
gpt-oss 20B Q8_0 11.27 GiB 20.91 B ROCm 999 1024 1024 q8_0 q8_0 1 0 pp512 @ d16384 833.43 ± 6.48
gpt-oss 20B Q8_0 11.27 GiB 20.91 B ROCm 999 1024 1024 q8_0 q8_0 1 0 tg128 @ d16384 57.12 ± 0.02
gpt-oss 20B Q8_0 11.27 GiB 20.91 B ROCm 999 1024 1024 q8_0 q8_0 1 0 pp512 @ d32768 550.54 ± 3.29
gpt-oss 20B Q8_0 11.27 GiB 20.91 B ROCm 999 1024 1024 q8_0 q8_0 1 0 tg128 @ d32768 48.63 ± 0.01
gpt-oss 20B Q8_0 11.27 GiB 20.91 B ROCm 999 1024 1024 q8_0 q8_0 1 0 pp512 @ d131072 182.05 ± 0.07
gpt-oss 20B Q8_0 11.27 GiB 20.91 B ROCm 999 1024 1024 q8_0 q8_0 1 0 tg128 @ d131072 22.34 ± 0.01
deepseek2 30B.A3B Q8_0 33.17 GiB 29.94 B ROCm 999 1024 1024 q8_0 q8_0 1 0 pp512 @ d16384 203.74 ± 0.03
deepseek2 30B.A3B Q8_0 33.17 GiB 29.94 B ROCm 999 1024 1024 q8_0 q8_0 1 0 tg128 @ d16384 23.16 ± 0.01
deepseek2 30B.A3B Q8_0 33.17 GiB 29.94 B ROCm 999 1024 1024 q8_0 q8_0 1 0 pp512 @ d32768 113.75 ± 0.02
deepseek2 30B.A3B Q8_0 33.17 GiB 29.94 B ROCm 999 1024 1024 q8_0 q8_0 1 0 tg128 @ d32768 17.68 ± 0.01
deepseek2 30B.A3B Q8_0 33.17 GiB 29.94 B ROCm 999 1024 1024 q8_0 q8_0 1 0 pp512 @ d131072 31.37 ± 0.00
deepseek2 30B.A3B Q8_0 33.17 GiB 29.94 B ROCm 999 1024 1024 q8_0 q8_0 1 0 tg128 @ d131072 6.84 ± 0.00
nemotron_h_moe 120B.A12B Q5_K - Medium 90.27 GiB 120.67 B ROCm 999 1024 1024 q8_0 q8_0 1 0 pp512 @ d16384 245.93 ± 0.30
nemotron_h_moe 120B.A12B Q5_K - Medium 90.27 GiB 120.67 B ROCm 999 1024 1024 q8_0 q8_0 1 0 tg128 @ d16384 16.71 ± 0.13
nemotron_h_moe 120B.A12B Q5_K - Medium 90.27 GiB 120.67 B ROCm 999 1024 1024 q8_0 q8_0 1 0 pp512 @ d32768 227.73 ± 0.07
nemotron_h_moe 120B.A12B Q5_K - Medium 90.27 GiB 120.67 B ROCm 999 1024 1024 q8_0 q8_0 1 0 tg128 @ d32768 16.29 ± 0.11
nemotron_h_moe 120B.A12B Q5_K - Medium 90.27 GiB 120.67 B ROCm 999 1024 1024 q8_0 q8_0 1 0 pp512 @ d131072 160.49 ± 0.45
nemotron_h_moe 120B.A12B Q5_K - Medium 90.27 GiB 120.67 B ROCm 999 1024 1024 q8_0 q8_0 1 0 tg128 @ d131072 13.48 ± 0.02
nemotron_h_moe 120B.A12B Q4_K - Medium 81.37 GiB 120.67 B ROCm 999 1024 1024 q8_0 q8_0 1 0 pp512 @ d16384 252.98 ± 0.31
nemotron_h_moe 120B.A12B Q4_K - Medium 81.37 GiB 120.67 B ROCm 999 1024 1024 q8_0 q8_0 1 0 tg128 @ d16384 17.96 ± 0.43
nemotron_h_moe 120B.A12B Q4_K - Medium 81.37 GiB 120.67 B ROCm 999 1024 1024 q8_0 q8_0 1 0 pp512 @ d32768 237.76 ± 0.48
nemotron_h_moe 120B.A12B Q4_K - Medium 81.37 GiB 120.67 B ROCm 999 1024 1024 q8_0 q8_0 1 0 tg128 @ d32768 17.71 ± 0.03
nemotron_h_moe 120B.A12B Q4_K - Medium 81.37 GiB 120.67 B ROCm 999 1024 1024 q8_0 q8_0 1 0 pp512 @ d131072 165.08 ± 1.04
nemotron_h_moe 120B.A12B Q4_K - Medium 81.37 GiB 120.67 B ROCm 999 1024 1024 q8_0 q8_0 1 0 tg128 @ d131072 14.38 ± 0.02

build: ff5ef8278 (1100)

model size params backend ngl n_batch n_ubatch type_k type_v fa mmap test t/s
qwen35moe 122B.A10B Q4_K - Medium 69.83 GiB 122.11 B ROCm 999 1024 1024 q8_0 q8_0 1 0 pp512 @ d16384 270.71 ± 0.00
qwen35moe 122B.A10B Q4_K - Medium 69.83 GiB 122.11 B ROCm 999 1024 1024 q8_0 q8_0 1 0 tg128 @ d16384 17.84 ± 0.00
qwen35moe 122B.A10B Q4_K - Medium 69.83 GiB 122.11 B ROCm 999 1024 1024 q8_0 q8_0 1 0 pp512 @ d32768 220.98 ± 0.00
qwen35moe 122B.A10B Q4_K - Medium 69.83 GiB 122.11 B ROCm 999 1024 1024 q8_0 q8_0 1 0 tg128 @ d32768 16.10 ± 0.00
qwen35moe 122B.A10B Q4_K - Medium 69.83 GiB 122.11 B ROCm 999 1024 1024 q8_0 q8_0 1 0 pp512 @ d131072 101.11 ± 0.00
qwen35moe 122B.A10B Q4_K - Medium 69.83 GiB 122.11 B ROCm 999 1024 1024 q8_0 q8_0 1 0 tg128 @ d131072 10.22 ± 0.00
qwen35moe 122B.A10B Q4_K - Medium 70.35 GiB 122.11 B ROCm 999 1024 1024 q8_0 q8_0 1 0 pp512 @ d16384 258.95 ± 0.00
qwen35moe 122B.A10B Q4_K - Medium 70.35 GiB 122.11 B ROCm 999 1024 1024 q8_0 q8_0 1 0 tg128 @ d16384 16.76 ± 0.00
qwen35moe 122B.A10B Q4_K - Medium 70.35 GiB 122.11 B ROCm 999 1024 1024 q8_0 q8_0 1 0 pp512 @ d32768 220.73 ± 0.00
qwen35moe 122B.A10B Q4_K - Medium 70.35 GiB 122.11 B ROCm 999 1024 1024 q8_0 q8_0 1 0 tg128 @ d32768 15.98 ± 0.00
qwen35moe 122B.A10B Q4_K - Medium 70.35 GiB 122.11 B ROCm 999 1024 1024 q8_0 q8_0 1 0 pp512 @ d131072 104.99 ± 0.00
qwen35moe 122B.A10B Q4_K - Medium 70.35 GiB 122.11 B ROCm 999 1024 1024 q8_0 q8_0 1 0 tg128 @ d131072 10.18 ± 0.00
qwen35moe 122B.A10B Q5_K - Medium 81.72 GiB 122.11 B ROCm 999 1024 1024 q8_0 q8_0 1 0 pp512 @ d16384 255.86 ± 0.00
qwen35moe 122B.A10B Q5_K - Medium 81.72 GiB 122.11 B ROCm 999 1024 1024 q8_0 q8_0 1 0 tg128 @ d16384 16.38 ± 0.00
qwen35moe 122B.A10B Q5_K - Medium 81.72 GiB 122.11 B ROCm 999 1024 1024 q8_0 q8_0 1 0 pp512 @ d32768 219.97 ± 0.00
qwen35moe 122B.A10B Q5_K - Medium 81.72 GiB 122.11 B ROCm 999 1024 1024 q8_0 q8_0 1 0 tg128 @ d32768 15.11 ± 0.00
qwen35moe 122B.A10B Q5_K - Medium 81.72 GiB 122.11 B ROCm 999 1024 1024 q8_0 q8_0 1 0 pp512 @ d131072 104.22 ± 0.00
qwen35moe 122B.A10B Q5_K - Medium 81.72 GiB 122.11 B ROCm 999 1024 1024 q8_0 q8_0 1 0 tg128 @ d131072 9.81 ± 0.00
qwen35moe 122B.A10B Q5_K - Medium 82.16 GiB 122.11 B ROCm 999 1024 1024 q8_0 q8_0 1 0 pp512 @ d16384 258.46 ± 0.00
qwen35moe 122B.A10B Q5_K - Medium 82.16 GiB 122.11 B ROCm 999 1024 1024 q8_0 q8_0 1 0 tg128 @ d16384 16.44 ± 0.00
qwen35moe 122B.A10B Q5_K - Medium 82.16 GiB 122.11 B ROCm 999 1024 1024 q8_0 q8_0 1 0 pp512 @ d32768 217.79 ± 0.00
qwen35moe 122B.A10B Q5_K - Medium 82.16 GiB 122.11 B ROCm 999 1024 1024 q8_0 q8_0 1 0 tg128 @ d32768 14.92 ± 0.00
qwen35moe 122B.A10B Q5_K - Medium 82.16 GiB 122.11 B ROCm 999 1024 1024 q8_0 q8_0 1 0 pp512 @ d131072 104.26 ± 0.00
qwen35moe 122B.A10B Q5_K - Medium 82.16 GiB 122.11 B ROCm 999 1024 1024 q8_0 q8_0 1 0 tg128 @ d131072 9.74 ± 0.00
qwen35moe 122B.A10B Q6_K 98.40 GiB 122.11 B ROCm 999 1024 1024 q8_0 q8_0 1 0 pp512 @ d16384 222.98 ± 0.00
qwen35moe 122B.A10B Q6_K 98.40 GiB 122.11 B ROCm 999 1024 1024 q8_0 q8_0 1 0 tg128 @ d16384 14.91 ± 0.00
qwen35moe 122B.A10B Q6_K 98.40 GiB 122.11 B ROCm 999 1024 1024 q8_0 q8_0 1 0 pp512 @ d32768 197.51 ± 0.00
qwen35moe 122B.A10B Q6_K 98.40 GiB 122.11 B ROCm 999 1024 1024 q8_0 q8_0 1 0 tg128 @ d32768 14.39 ± 0.00
qwen35moe 122B.A10B Q6_K 98.40 GiB 122.11 B ROCm 999 1024 1024 q8_0 q8_0 1 0 pp512 @ d131072 98.88 ± 0.00
qwen35moe 122B.A10B Q6_K 98.40 GiB 122.11 B ROCm 999 1024 1024 q8_0 q8_0 1 0 tg128 @ d131072 9.52 ± 0.00
qwen35moe 122B.A10B Q6_K 98.74 GiB 122.11 B ROCm 999 1024 1024 q8_0 q8_0 1 0 pp512 @ d16384 227.98 ± 0.00
qwen35moe 122B.A10B Q6_K 98.74 GiB 122.11 B ROCm 999 1024 1024 q8_0 q8_0 1 0 tg128 @ d16384 15.27 ± 0.00
qwen35moe 122B.A10B Q6_K 98.74 GiB 122.11 B ROCm 999 1024 1024 q8_0 q8_0 1 0 pp512 @ d32768 189.36 ± 0.00
qwen35moe 122B.A10B Q6_K 98.74 GiB 122.11 B ROCm 999 1024 1024 q8_0 q8_0 1 0 tg128 @ d32768 14.16 ± 0.00
qwen35moe 122B.A10B Q6_K 98.74 GiB 122.11 B ROCm 999 1024 1024 q8_0 q8_0 1 0 pp512 @ d131072 92.13 ± 0.00
qwen35moe 122B.A10B Q6_K 98.74 GiB 122.11 B ROCm 999 1024 1024 q8_0 q8_0 1 0 tg128 @ d131072 9.27 ± 0.00

build: 429391906 (1058)

New run - 2026-04-23 17:48:18

model size params backend ngl n_batch n_ubatch type_k type_v fa mmap test t/s
qwen35 27B bartowski:Q8_0 26.69 GiB 26.90 B ROCm 999 1024 1024 q8_0 q8_0 1 0 pp512 @ d16384 288.15 ± 1.40
qwen35 27B bartowski:Q8_0 26.69 GiB 26.90 B ROCm 999 1024 1024 q8_0 q8_0 1 0 tg128 @ d16384 7.10 ± 0.01
qwen35 27B bartowski:Q8_0 26.69 GiB 26.90 B ROCm 999 1024 1024 q8_0 q8_0 1 0 pp512 @ d32768 228.82 ± 0.60
qwen35 27B bartowski:Q8_0 26.69 GiB 26.90 B ROCm 999 1024 1024 q8_0 q8_0 1 0 tg128 @ d32768 6.68 ± 0.00
qwen35 27B bartowski:Q8_0 26.69 GiB 26.90 B ROCm 999 1024 1024 q8_0 q8_0 1 0 pp512 @ d131072 101.69 ± 0.07
qwen35 27B bartowski:Q8_0 26.69 GiB 26.90 B ROCm 999 1024 1024 q8_0 q8_0 1 0 tg128 @ d131072 5.09 ± 0.00
qwen35 27B unsloth:Q8_K_XL 32.89 GiB 26.90 B ROCm 999 1024 1024 q8_0 q8_0 1 0 pp512 @ d16384 285.58 ± 1.29
qwen35 27B unsloth:Q8_K_XL 32.89 GiB 26.90 B ROCm 999 1024 1024 q8_0 q8_0 1 0 tg128 @ d16384 6.06 ± 0.00
qwen35 27B unsloth:Q8_K_XL 32.89 GiB 26.90 B ROCm 999 1024 1024 q8_0 q8_0 1 0 pp512 @ d32768 225.11 ± 0.18
qwen35 27B unsloth:Q8_K_XL 32.89 GiB 26.90 B ROCm 999 1024 1024 q8_0 q8_0 1 0 tg128 @ d32768 5.75 ± 0.00
qwen35 27B unsloth:Q8_K_XL 32.89 GiB 26.90 B ROCm 999 1024 1024 q8_0 q8_0 1 0 pp512 @ d131072 101.65 ± 0.14
qwen35 27B unsloth:Q8_K_XL 32.89 GiB 26.90 B ROCm 999 1024 1024 q8_0 q8_0 1 0 tg128 @ d131072 4.53 ± 0.00
qwen35 27B unsloth:Q8_0 26.62 GiB 26.90 B ROCm 999 1024 1024 q8_0 q8_0 1 0 pp512 @ d16384 288.79 ± 0.97
qwen35 27B unsloth:Q8_0 26.62 GiB 26.90 B ROCm 999 1024 1024 q8_0 q8_0 1 0 tg128 @ d16384 7.09 ± 0.00
qwen35 27B unsloth:Q8_0 26.62 GiB 26.90 B ROCm 999 1024 1024 q8_0 q8_0 1 0 pp512 @ d32768 227.47 ± 0.27
qwen35 27B unsloth:Q8_0 26.62 GiB 26.90 B ROCm 999 1024 1024 q8_0 q8_0 1 0 tg128 @ d32768 6.67 ± 0.00
qwen35 27B unsloth:Q8_0 26.62 GiB 26.90 B ROCm 999 1024 1024 q8_0 q8_0 1 0 pp512 @ d131072 102.24 ± 0.15
qwen35 27B unsloth:Q8_0 26.62 GiB 26.90 B ROCm 999 1024 1024 q8_0 q8_0 1 0 tg128 @ d131072 5.09 ± 0.00
qwen35 27B bartowski::Q4_K_L 17.20 GiB 26.90 B ROCm 999 1024 1024 q8_0 q8_0 1 0 pp512 @ d16384 280.51 ± 1.03
qwen35 27B bartowski::Q4_K_L 17.20 GiB 26.90 B ROCm 999 1024 1024 q8_0 q8_0 1 0 tg128 @ d16384 10.24 ± 0.01
qwen35 27B bartowski::Q4_K_L 17.20 GiB 26.90 B ROCm 999 1024 1024 q8_0 q8_0 1 0 pp512 @ d32768 223.90 ± 0.47
qwen35 27B bartowski::Q4_K_L 17.20 GiB 26.90 B ROCm 999 1024 1024 q8_0 q8_0 1 0 tg128 @ d32768 9.40 ± 0.01
qwen35 27B bartowski::Q4_K_L 17.20 GiB 26.90 B ROCm 999 1024 1024 q8_0 q8_0 1 0 pp512 @ d131072 100.95 ± 0.18
qwen35 27B bartowski::Q4_K_L 17.20 GiB 26.90 B ROCm 999 1024 1024 q8_0 q8_0 1 0 tg128 @ d131072 6.53 ± 0.00
qwen35 27B unsloth::Q4_K_XL 16.39 GiB 26.90 B ROCm 999 1024 1024 q8_0 q8_0 1 0 pp512 @ d16384 282.86 ± 0.64
qwen35 27B unsloth::Q4_K_XL 16.39 GiB 26.90 B ROCm 999 1024 1024 q8_0 q8_0 1 0 tg128 @ d16384 10.37 ± 0.01
qwen35 27B unsloth::Q4_K_XL 16.39 GiB 26.90 B ROCm 999 1024 1024 q8_0 q8_0 1 0 pp512 @ d32768 225.45 ± 0.37
qwen35 27B unsloth::Q4_K_XL 16.39 GiB 26.90 B ROCm 999 1024 1024 q8_0 q8_0 1 0 tg128 @ d32768 9.51 ± 0.01
qwen35 27B unsloth::Q4_K_XL 16.39 GiB 26.90 B ROCm 999 1024 1024 q8_0 q8_0 1 0 pp512 @ d131072 101.40 ± 0.13
qwen35 27B unsloth::Q4_K_XL 16.39 GiB 26.90 B ROCm 999 1024 1024 q8_0 q8_0 1 0 tg128 @ d131072 6.58 ± 0.00

build: 6217b4958 (1230)

New run - 2026-04-23 23:55:35

model size params backend ngl n_batch n_ubatch type_k type_v fa mmap test t/s
qwen35moe 35B.A3B bartowski:Q8_0 34.37 GiB 34.66 B ROCm 999 1024 1024 q8_0 q8_0 1 0 pp512 @ d16384 828.99 ± 6.07
qwen35moe 35B.A3B bartowski:Q8_0 34.37 GiB 34.66 B ROCm 999 1024 1024 q8_0 q8_0 1 0 tg128 @ d16384 41.42 ± 0.11
qwen35moe 35B.A3B bartowski:Q8_0 34.37 GiB 34.66 B ROCm 999 1024 1024 q8_0 q8_0 1 0 pp512 @ d32768 638.03 ± 0.90
qwen35moe 35B.A3B bartowski:Q8_0 34.37 GiB 34.66 B ROCm 999 1024 1024 q8_0 q8_0 1 0 tg128 @ d32768 38.22 ± 0.08
qwen35moe 35B.A3B bartowski:Q8_0 34.37 GiB 34.66 B ROCm 999 1024 1024 q8_0 q8_0 1 0 pp512 @ d131072 263.09 ± 2.55
qwen35moe 35B.A3B bartowski:Q8_0 34.37 GiB 34.66 B ROCm 999 1024 1024 q8_0 q8_0 1 0 tg128 @ d131072 25.91 ± 0.04
qwen35moe 35B.A3B unsloth:Q8_K_XL 35.80 GiB 34.66 B ROCm 999 1024 1024 q8_0 q8_0 1 0 pp512 @ d16384 790.91 ± 1.64
qwen35moe 35B.A3B unsloth:Q8_K_XL 35.80 GiB 34.66 B ROCm 999 1024 1024 q8_0 q8_0 1 0 tg128 @ d16384 35.08 ± 0.01
qwen35moe 35B.A3B unsloth:Q8_K_XL 35.80 GiB 34.66 B ROCm 999 1024 1024 q8_0 q8_0 1 0 pp512 @ d32768 603.61 ± 25.26
qwen35moe 35B.A3B unsloth:Q8_K_XL 35.80 GiB 34.66 B ROCm 999 1024 1024 q8_0 q8_0 1 0 tg128 @ d32768 32.81 ± 0.00
qwen35moe 35B.A3B unsloth:Q8_K_XL 35.80 GiB 34.66 B ROCm 999 1024 1024 q8_0 q8_0 1 0 pp512 @ d131072 259.00 ± 3.64
qwen35moe 35B.A3B unsloth:Q8_K_XL 35.80 GiB 34.66 B ROCm 999 1024 1024 q8_0 q8_0 1 0 tg128 @ d131072 23.17 ± 0.01
qwen35moe 35B.A3B unsloth:Q8_0 34.36 GiB 34.66 B ROCm 999 1024 1024 q8_0 q8_0 1 0 pp512 @ d16384 833.32 ± 3.19
qwen35moe 35B.A3B unsloth:Q8_0 34.36 GiB 34.66 B ROCm 999 1024 1024 q8_0 q8_0 1 0 tg128 @ d16384 41.13 ± 0.10
qwen35moe 35B.A3B unsloth:Q8_0 34.36 GiB 34.66 B ROCm 999 1024 1024 q8_0 q8_0 1 0 pp512 @ d32768 637.03 ± 0.61
qwen35moe 35B.A3B unsloth:Q8_0 34.36 GiB 34.66 B ROCm 999 1024 1024 q8_0 q8_0 1 0 tg128 @ d32768 37.95 ± 0.07
qwen35moe 35B.A3B unsloth:Q8_0 34.36 GiB 34.66 B ROCm 999 1024 1024 q8_0 q8_0 1 0 pp512 @ d131072 263.93 ± 1.40
qwen35moe 35B.A3B unsloth:Q8_0 34.36 GiB 34.66 B ROCm 999 1024 1024 q8_0 q8_0 1 0 tg128 @ d131072 25.77 ± 0.04
qwen35moe 35B.A3B bartowsk:Q4_K_L 20.26 GiB 34.66 B ROCm 999 1024 1024 q8_0 q8_0 1 0 pp512 @ d16384 767.66 ± 2.68
qwen35moe 35B.A3B bartowsk:Q4_K_L 20.26 GiB 34.66 B ROCm 999 1024 1024 q8_0 q8_0 1 0 tg128 @ d16384 48.71 ± 0.13
qwen35moe 35B.A3B bartowsk:Q4_K_L 20.26 GiB 34.66 B ROCm 999 1024 1024 q8_0 q8_0 1 0 pp512 @ d32768 601.58 ± 1.48
qwen35moe 35B.A3B bartowsk:Q4_K_L 20.26 GiB 34.66 B ROCm 999 1024 1024 q8_0 q8_0 1 0 tg128 @ d32768 44.32 ± 0.08
qwen35moe 35B.A3B bartowsk:Q4_K_L 20.26 GiB 34.66 B ROCm 999 1024 1024 q8_0 q8_0 1 0 pp512 @ d131072 251.36 ± 0.54
qwen35moe 35B.A3B bartowsk:Q4_K_L 20.26 GiB 34.66 B ROCm 999 1024 1024 q8_0 q8_0 1 0 tg128 @ d131072 28.56 ± 0.05
qwen35moe 35B.A3B unsloth:Q4_K_XL 20.81 GiB 34.66 B ROCm 999 1024 1024 q8_0 q8_0 1 0 pp512 @ d16384 803.28 ± 0.92
qwen35moe 35B.A3B unsloth:Q4_K_XL 20.81 GiB 34.66 B ROCm 999 1024 1024 q8_0 q8_0 1 0 tg128 @ d16384 44.42 ± 0.93
qwen35moe 35B.A3B unsloth:Q4_K_XL 20.81 GiB 34.66 B ROCm 999 1024 1024 q8_0 q8_0 1 0 pp512 @ d32768 625.22 ± 3.31
qwen35moe 35B.A3B unsloth:Q4_K_XL 20.81 GiB 34.66 B ROCm 999 1024 1024 q8_0 q8_0 1 0 tg128 @ d32768 41.15 ± 0.09
qwen35moe 35B.A3B unsloth:Q4_K_XL 20.81 GiB 34.66 B ROCm 999 1024 1024 q8_0 q8_0 1 0 pp512 @ d131072 252.17 ± 0.28
qwen35moe 35B.A3B unsloth:Q4_K_XL 20.81 GiB 34.66 B ROCm 999 1024 1024 q8_0 q8_0 1 0 tg128 @ d131072 27.22 ± 0.04

build: 6217b4958 (1230)

model size params backend ngl n_batch n_ubatch type_k type_v fa mmap test t/s
qwen35moe 122B.A10B Q5_K - Medium 82.16 GiB 122.11 B ROCm 999 2048 1024 q8_0 q8_0 1 0 pp512 @ d16384 96.71 ± 0.00
qwen35moe 122B.A10B Q5_K - Medium 82.16 GiB 122.11 B ROCm 999 2048 1024 q8_0 q8_0 1 0 tg128 @ d16384 15.90 ± 0.00
qwen35moe 122B.A10B Q5_K - Medium 82.16 GiB 122.11 B ROCm 999 2048 1024 q8_0 q8_0 1 0 pp512 @ d32768 57.69 ± 0.00
qwen35moe 122B.A10B Q5_K - Medium 82.16 GiB 122.11 B ROCm 999 2048 1024 q8_0 q8_0 1 0 tg128 @ d32768 14.18 ± 0.00
qwen35moe 122B.A10B Q5_K - Medium 82.16 GiB 122.11 B ROCm 999 2048 1024 q8_0 q8_0 1 0 pp512 @ d131072 16.48 ± 0.00
qwen35moe 122B.A10B Q5_K - Medium 82.16 GiB 122.11 B ROCm 999 2048 1024 q8_0 q8_0 1 0 tg128 @ d131072 7.84 ± 0.00
qwen35moe 122B.A10B Q5_K - Medium 82.16 GiB 122.11 B ROCm 999 2048 512 q8_0 q8_0 1 0 pp512 @ d16384 97.63 ± 0.00
qwen35moe 122B.A10B Q5_K - Medium 82.16 GiB 122.11 B ROCm 999 2048 512 q8_0 q8_0 1 0 tg128 @ d16384 15.85 ± 0.00
qwen35moe 122B.A10B Q5_K - Medium 82.16 GiB 122.11 B ROCm 999 2048 512 q8_0 q8_0 1 0 pp512 @ d32768 57.33 ± 0.00
qwen35moe 122B.A10B Q5_K - Medium 82.16 GiB 122.11 B ROCm 999 2048 512 q8_0 q8_0 1 0 tg128 @ d32768 14.15 ± 0.00
qwen35moe 122B.A10B Q5_K - Medium 82.16 GiB 122.11 B ROCm 999 2048 512 q8_0 q8_0 1 0 pp512 @ d131072 16.55 ± 0.00
qwen35moe 122B.A10B Q5_K - Medium 82.16 GiB 122.11 B ROCm 999 2048 512 q8_0 q8_0 1 0 tg128 @ d131072 7.81 ± 0.00
qwen35moe 122B.A10B Q5_K - Medium 82.16 GiB 122.11 B ROCm 999 2048 256 q8_0 q8_0 1 0 pp512 @ d16384 86.07 ± 0.00
qwen35moe 122B.A10B Q5_K - Medium 82.16 GiB 122.11 B ROCm 999 2048 256 q8_0 q8_0 1 0 tg128 @ d16384 15.87 ± 0.00
qwen35moe 122B.A10B Q5_K - Medium 82.16 GiB 122.11 B ROCm 999 2048 256 q8_0 q8_0 1 0 pp512 @ d32768 52.13 ± 0.00
qwen35moe 122B.A10B Q5_K - Medium 82.16 GiB 122.11 B ROCm 999 2048 256 q8_0 q8_0 1 0 tg128 @ d32768 14.16 ± 0.00
qwen35moe 122B.A10B Q5_K - Medium 82.16 GiB 122.11 B ROCm 999 2048 256 q8_0 q8_0 1 0 pp512 @ d131072 15.46 ± 0.00
qwen35moe 122B.A10B Q5_K - Medium 82.16 GiB 122.11 B ROCm 999 2048 256 q8_0 q8_0 1 0 tg128 @ d131072 7.83 ± 0.00
qwen35moe 122B.A10B Q5_K - Medium 82.16 GiB 122.11 B ROCm 999 1024 1024 q8_0 q8_0 1 0 pp512 @ d16384 98.23 ± 0.00
qwen35moe 122B.A10B Q5_K - Medium 82.16 GiB 122.11 B ROCm 999 1024 1024 q8_0 q8_0 1 0 tg128 @ d16384 15.87 ± 0.00
qwen35moe 122B.A10B Q5_K - Medium 82.16 GiB 122.11 B ROCm 999 1024 1024 q8_0 q8_0 1 0 pp512 @ d32768 57.69 ± 0.00
qwen35moe 122B.A10B Q5_K - Medium 82.16 GiB 122.11 B ROCm 999 1024 1024 q8_0 q8_0 1 0 tg128 @ d32768 14.15 ± 0.00
qwen35moe 122B.A10B Q5_K - Medium 82.16 GiB 122.11 B ROCm 999 1024 1024 q8_0 q8_0 1 0 pp512 @ d131072 16.58 ± 0.00
qwen35moe 122B.A10B Q5_K - Medium 82.16 GiB 122.11 B ROCm 999 1024 1024 q8_0 q8_0 1 0 tg128 @ d131072 7.84 ± 0.00
qwen35moe 122B.A10B Q5_K - Medium 82.16 GiB 122.11 B ROCm 999 1024 512 q8_0 q8_0 1 0 pp512 @ d16384 97.69 ± 0.00
qwen35moe 122B.A10B Q5_K - Medium 82.16 GiB 122.11 B ROCm 999 1024 512 q8_0 q8_0 1 0 tg128 @ d16384 15.88 ± 0.00
qwen35moe 122B.A10B Q5_K - Medium 82.16 GiB 122.11 B ROCm 999 1024 512 q8_0 q8_0 1 0 pp512 @ d32768 57.42 ± 0.00
qwen35moe 122B.A10B Q5_K - Medium 82.16 GiB 122.11 B ROCm 999 1024 512 q8_0 q8_0 1 0 tg128 @ d32768 14.15 ± 0.00
qwen35moe 122B.A10B Q5_K - Medium 82.16 GiB 122.11 B ROCm 999 1024 512 q8_0 q8_0 1 0 pp512 @ d131072 16.53 ± 0.00
qwen35moe 122B.A10B Q5_K - Medium 82.16 GiB 122.11 B ROCm 999 1024 512 q8_0 q8_0 1 0 tg128 @ d131072 7.81 ± 0.00
qwen35moe 122B.A10B Q5_K - Medium 82.16 GiB 122.11 B ROCm 999 1024 256 q8_0 q8_0 1 0 pp512 @ d16384 86.60 ± 0.00
qwen35moe 122B.A10B Q5_K - Medium 82.16 GiB 122.11 B ROCm 999 1024 256 q8_0 q8_0 1 0 tg128 @ d16384 15.84 ± 0.00
qwen35moe 122B.A10B Q5_K - Medium 82.16 GiB 122.11 B ROCm 999 1024 256 q8_0 q8_0 1 0 pp512 @ d32768 51.91 ± 0.00
qwen35moe 122B.A10B Q5_K - Medium 82.16 GiB 122.11 B ROCm 999 1024 256 q8_0 q8_0 1 0 tg128 @ d32768 14.16 ± 0.00
qwen35moe 122B.A10B Q5_K - Medium 82.16 GiB 122.11 B ROCm 999 1024 256 q8_0 q8_0 1 0 pp512 @ d131072 15.45 ± 0.00
qwen35moe 122B.A10B Q5_K - Medium 82.16 GiB 122.11 B ROCm 999 1024 256 q8_0 q8_0 1 0 tg128 @ d131072 7.83 ± 0.00
qwen35moe 122B.A10B Q5_K - Medium 82.16 GiB 122.11 B ROCm 999 1024 256 q8_0 q8_0 1 0 pp512 @ d16384 85.42 ± 0.00
qwen35moe 122B.A10B Q5_K - Medium 82.16 GiB 122.11 B ROCm 999 1024 256 q8_0 q8_0 1 0 tg128 @ d16384 15.87 ± 0.00
qwen35moe 122B.A10B Q5_K - Medium 82.16 GiB 122.11 B ROCm 999 1024 256 q8_0 q8_0 1 0 pp512 @ d32768 51.75 ± 0.00
qwen35moe 122B.A10B Q5_K - Medium 82.16 GiB 122.11 B ROCm 999 1024 256 q8_0 q8_0 1 0 tg128 @ d32768 14.15 ± 0.00
qwen35moe 122B.A10B Q5_K - Medium 82.16 GiB 122.11 B ROCm 999 1024 256 q8_0 q8_0 1 0 pp512 @ d131072 15.31 ± 0.00
qwen35moe 122B.A10B Q5_K - Medium 82.16 GiB 122.11 B ROCm 999 1024 256 q8_0 q8_0 1 0 tg128 @ d131072 7.84 ± 0.00
qwen35moe 122B.A10B Q5_K - Medium 82.16 GiB 122.11 B ROCm 999 1024 128 q8_0 q8_0 1 0 pp512 @ d16384 73.28 ± 0.00
qwen35moe 122B.A10B Q5_K - Medium 82.16 GiB 122.11 B ROCm 999 1024 128 q8_0 q8_0 1 0 tg128 @ d16384 15.88 ± 0.00
qwen35moe 122B.A10B Q5_K - Medium 82.16 GiB 122.11 B ROCm 999 1024 128 q8_0 q8_0 1 0 pp512 @ d32768 46.74 ± 0.00
qwen35moe 122B.A10B Q5_K - Medium 82.16 GiB 122.11 B ROCm 999 1024 128 q8_0 q8_0 1 0 tg128 @ d32768 14.07 ± 0.00
qwen35moe 122B.A10B Q5_K - Medium 82.16 GiB 122.11 B ROCm 999 1024 128 q8_0 q8_0 1 0 pp512 @ d131072 14.56 ± 0.00
qwen35moe 122B.A10B Q5_K - Medium 82.16 GiB 122.11 B ROCm 999 1024 128 q8_0 q8_0 1 0 tg128 @ d131072 7.84 ± 0.00
qwen35moe 122B.A10B Q5_K - Medium 82.16 GiB 122.11 B ROCm 999 512 256 q8_0 q8_0 1 0 pp512 @ d16384 85.46 ± 0.00
qwen35moe 122B.A10B Q5_K - Medium 82.16 GiB 122.11 B ROCm 999 512 256 q8_0 q8_0 1 0 tg128 @ d16384 15.89 ± 0.00
qwen35moe 122B.A10B Q5_K - Medium 82.16 GiB 122.11 B ROCm 999 512 256 q8_0 q8_0 1 0 pp512 @ d32768 51.55 ± 0.00
qwen35moe 122B.A10B Q5_K - Medium 82.16 GiB 122.11 B ROCm 999 512 256 q8_0 q8_0 1 0 tg128 @ d32768 14.14 ± 0.00
qwen35moe 122B.A10B Q5_K - Medium 82.16 GiB 122.11 B ROCm 999 512 256 q8_0 q8_0 1 0 pp512 @ d131072 15.34 ± 0.00
qwen35moe 122B.A10B Q5_K - Medium 82.16 GiB 122.11 B ROCm 999 512 256 q8_0 q8_0 1 0 tg128 @ d131072 7.84 ± 0.00
qwen35moe 122B.A10B Q5_K - Medium 82.16 GiB 122.11 B ROCm 999 512 128 q8_0 q8_0 1 0 pp512 @ d16384 73.37 ± 0.00
qwen35moe 122B.A10B Q5_K - Medium 82.16 GiB 122.11 B ROCm 999 512 128 q8_0 q8_0 1 0 tg128 @ d16384 15.90 ± 0.00
qwen35moe 122B.A10B Q5_K - Medium 82.16 GiB 122.11 B ROCm 999 512 128 q8_0 q8_0 1 0 pp512 @ d32768 46.31 ± 0.00
qwen35moe 122B.A10B Q5_K - Medium 82.16 GiB 122.11 B ROCm 999 512 128 q8_0 q8_0 1 0 tg128 @ d32768 14.10 ± 0.00
qwen35moe 122B.A10B Q5_K - Medium 82.16 GiB 122.11 B ROCm 999 512 128 q8_0 q8_0 1 0 pp512 @ d131072 14.54 ± 0.00
qwen35moe 122B.A10B Q5_K - Medium 82.16 GiB 122.11 B ROCm 999 512 128 q8_0 q8_0 1 0 tg128 @ d131072 7.84 ± 0.00
qwen35moe 122B.A10B Q5_K - Medium 82.16 GiB 122.11 B ROCm 999 256 256 q8_0 q8_0 1 0 pp512 @ d16384 85.75 ± 0.00
qwen35moe 122B.A10B Q5_K - Medium 82.16 GiB 122.11 B ROCm 999 256 256 q8_0 q8_0 1 0 tg128 @ d16384 15.89 ± 0.00
qwen35moe 122B.A10B Q5_K - Medium 82.16 GiB 122.11 B ROCm 999 256 256 q8_0 q8_0 1 0 pp512 @ d32768 51.72 ± 0.00
qwen35moe 122B.A10B Q5_K - Medium 82.16 GiB 122.11 B ROCm 999 256 256 q8_0 q8_0 1 0 tg128 @ d32768 14.14 ± 0.00
qwen35moe 122B.A10B Q5_K - Medium 82.16 GiB 122.11 B ROCm 999 256 256 q8_0 q8_0 1 0 pp512 @ d131072 15.28 ± 0.00
qwen35moe 122B.A10B Q5_K - Medium 82.16 GiB 122.11 B ROCm 999 256 256 q8_0 q8_0 1 0 tg128 @ d131072 7.84 ± 0.00
qwen35moe 122B.A10B Q5_K - Medium 82.16 GiB 122.11 B ROCm 999 256 128 q8_0 q8_0 1 0 pp512 @ d16384 72.31 ± 0.00
qwen35moe 122B.A10B Q5_K - Medium 82.16 GiB 122.11 B ROCm 999 256 128 q8_0 q8_0 1 0 tg128 @ d16384 15.87 ± 0.00
qwen35moe 122B.A10B Q5_K - Medium 82.16 GiB 122.11 B ROCm 999 256 128 q8_0 q8_0 1 0 pp512 @ d32768 46.71 ± 0.00
qwen35moe 122B.A10B Q5_K - Medium 82.16 GiB 122.11 B ROCm 999 256 128 q8_0 q8_0 1 0 tg128 @ d32768 14.10 ± 0.00
qwen35moe 122B.A10B Q5_K - Medium 82.16 GiB 122.11 B ROCm 999 256 128 q8_0 q8_0 1 0 pp512 @ d131072 14.53 ± 0.00
qwen35moe 122B.A10B Q5_K - Medium 82.16 GiB 122.11 B ROCm 999 256 128 q8_0 q8_0 1 0 tg128 @ d131072 7.84 ± 0.00

build: 2b86e5cae (947)

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment