| model | size | params | backend | ngl | n_batch | n_ubatch | type_k | type_v | fa | mmap | test | t/s |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| qwen35moe 122B.A10B Q5_K - Medium | 82.16 GiB | 122.11 B | ROCm | 999 | 1024 | 1024 | q8_0 | q8_0 | 1 | 0 | pp512 @ d16384 | 98.24 ± 0.32 |
| qwen35moe 122B.A10B Q5_K - Medium | 82.16 GiB | 122.11 B | ROCm | 999 | 1024 | 1024 | q8_0 | q8_0 | 1 | 0 | tg128 @ d16384 | 16.43 ± 0.09 |
| qwen35moe 122B.A10B Q5_K - Medium | 82.16 GiB | 122.11 B | ROCm | 999 | 1024 | 1024 | q8_0 | q8_0 | 1 | 0 | pp512 @ d32768 | 58.65 ± 0.09 |
| qwen35moe 122B.A10B Q5_K - Medium | 82.16 GiB | 122.11 B | ROCm | 999 | 1024 | 1024 | q8_0 | q8_0 | 1 | 0 | tg128 @ d32768 | 14.93 ± 0.01 |
| qwen35moe 122B.A10B Q5_K - Medium | 82.16 GiB | 122.11 B | ROCm | 999 | 1024 | 1024 | q8_0 | q8_0 | 1 | 0 | pp512 @ d131072 | 16.53 ± 0.12 |
| qwen35moe 122B.A10B Q5_K - Medium | 82.16 GiB | 122.11 B | ROCm | 999 | 1024 | 1024 | q8_0 | q8_0 | 1 | 0 | tg128 @ d131072 | 9.75 ± 0.01 |
build: ff5ef8278 (1100)
#!/usr/bin/env sh
echo "Setting up..."
cmake -S . \
-B build \
-DCMAKE_BUILD_TYPE=Release \
-DCMAKE_C_COMPILER="${LLVM_BIN}/clang" \
-DCMAKE_CXX_COMPILER="${LLVM_BIN}/clang++" \
-DCMAKE_HIP_COMPILER="${LLVM_BIN}/clang" \
-DCMAKE_HIP_FLAGS="--rocm-path=${ROCM_PATHf} -mllvm --amdgpu-unroll-threshold-local=600" \
-DGGML_HIP_ROCWMMA_FATTN=ON \
-DGGML_HIP=ON \
-DGGML_HIPBLAS=ON \
-DGPU_TARGETS=gfx1151 \
-DHIP_PLATFORM=amd \
-DLLAMA_BUILD_EXAMPLES=OFF \
-DLLAMA_BUILD_TESTS=OFF \
-DLLAMA_OPENSSL=ON \
--fresh
echo "Building..."
cmake --build build \
--clean-first \
--config Release -- -j$(nproc)| model | size | params | backend | ngl | n_batch | n_ubatch | type_k | type_v | fa | mmap | test | t/s |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| qwen35moe 122B.A10B Q5_K - Medium | 82.16 GiB | 122.11 B | ROCm | 999 | 1024 | 1024 | q8_0 | q8_0 | 1 | 0 | pp512 @ d16384 | 269.90 ± 0.44 |
| qwen35moe 122B.A10B Q5_K - Medium | 82.16 GiB | 122.11 B | ROCm | 999 | 1024 | 1024 | q8_0 | q8_0 | 1 | 0 | tg128 @ d16384 | 16.47 ± 0.02 |
| qwen35moe 122B.A10B Q5_K - Medium | 82.16 GiB | 122.11 B | ROCm | 999 | 1024 | 1024 | q8_0 | q8_0 | 1 | 0 | pp512 @ d32768 | 220.99 ± 0.41 |
| qwen35moe 122B.A10B Q5_K - Medium | 82.16 GiB | 122.11 B | ROCm | 999 | 1024 | 1024 | q8_0 | q8_0 | 1 | 0 | tg128 @ d32768 | 14.94 ± 0.01 |
| qwen35moe 122B.A10B Q5_K - Medium | 82.16 GiB | 122.11 B | ROCm | 999 | 1024 | 1024 | q8_0 | q8_0 | 1 | 0 | pp512 @ d131072 | 100.91 ± 0.23 |
| qwen35moe 122B.A10B Q5_K - Medium | 82.16 GiB | 122.11 B | ROCm | 999 | 1024 | 1024 | q8_0 | q8_0 | 1 | 0 | tg128 @ d131072 | 9.75 ± 0.00 |
build: ff5ef8278 (1100)
#!/usr/bin/env sh
echo "Setting up..."
cmake -S . \
-B build \
-DCMAKE_C_COMPILER="${LLVM_BIN}/clang" \
-DCMAKE_CXX_COMPILER="${LLVM_BIN}/clang++" \
-DCMAKE_HIP_COMPILER="${LLVM_BIN}/clang" \
-DCMAKE_HIP_FLAGS="--rocm-path=${ROCM_PATHf} -mllvm --amdgpu-unroll-threshold-local=600" \
-DGGML_HIP_ROCWMMA_FATTN=OFF \
-DGGML_HIP=ON \
-DGGML_HIPBLAS=ON \
-DGPU_TARGETS="gfx1151" \
-DHIP_PLATFORM=amd \
-DLLAMA_BUILD_EXAMPLES=OFF \
-DLLAMA_BUILD_TESTS=OFF \
-DLLAMA_OPENSSL=ON \
--fresh
echo "Building..."
cmake --build build \
--clean-first \
--config Release -- -j$(nproc)| model | size | params | backend | ngl | n_batch | n_ubatch | type_k | type_v | fa | mmap | test | t/s |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| qwen35moe 122B.A10B Q5_K - Medium | 82.16 GiB | 122.11 B | ROCm | 999 | 1024 | 1024 | q8_0 | q8_0 | 1 | 0 | pp512 @ d16384 | 257.73 ± 2.07 |
| qwen35moe 122B.A10B Q5_K - Medium | 82.16 GiB | 122.11 B | ROCm | 999 | 1024 | 1024 | q8_0 | q8_0 | 1 | 0 | tg128 @ d16384 | 16.37 ± 0.14 |
| qwen35moe 122B.A10B Q5_K - Medium | 82.16 GiB | 122.11 B | ROCm | 999 | 1024 | 1024 | q8_0 | q8_0 | 1 | 0 | pp512 @ d32768 | 220.84 ± 1.66 |
| qwen35moe 122B.A10B Q5_K - Medium | 82.16 GiB | 122.11 B | ROCm | 999 | 1024 | 1024 | q8_0 | q8_0 | 1 | 0 | tg128 @ d32768 | 14.95 ± 0.02 |
| qwen35moe 122B.A10B Q5_K - Medium | 82.16 GiB | 122.11 B | ROCm | 999 | 1024 | 1024 | q8_0 | q8_0 | 1 | 0 | pp512 @ d131072 | 104.69 ± 0.22 |
| qwen35moe 122B.A10B Q5_K - Medium | 82.16 GiB | 122.11 B | ROCm | 999 | 1024 | 1024 | q8_0 | q8_0 | 1 | 0 | tg128 @ d131072 | 9.75 ± 0.00 |
build: ff5ef8278 (1100)
#!/usr/bin/env sh
echo "Setting up..."
cmake -S . \
-B build \
-DCMAKE_C_COMPILER="${LLVM_BIN}/clang" \
-DCMAKE_CXX_COMPILER="${LLVM_BIN}/clang++" \
-DCMAKE_HIP_COMPILER="${LLVM_BIN}/clang" \
-DCMAKE_HIP_FLAGS="--rocm-path=${ROCM_PATHf} -mllvm --amdgpu-unroll-threshold-local=600" \
-DGGML_HIP_NO_VMM=OFF \
-DGGML_HIP_ROCWMMA_FATTN=OFF \
-DGGML_HIP=ON \
-DGGML_HIPBLAS=ON \
-DGPU_TARGETS="gfx1151" \
-DHIP_PLATFORM=amd \
-DLLAMA_BUILD_EXAMPLES=OFF \
-DLLAMA_BUILD_TESTS=OFF \
-DLLAMA_OPENSSL=ON \
--fresh
echo "Building..."
cmake --build build \
--clean-first \
--config Release -- -j$(nproc)| model | size | params | backend | ngl | n_batch | n_ubatch | type_k | type_v | fa | mmap | test | t/s |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| qwen35moe 122B.A10B Q5_K - Medium | 82.16 GiB | 122.11 B | ROCm | 999 | 1024 | 1024 | q8_0 | q8_0 | 1 | 0 | pp512 @ d16384 | 268.95 ± 0.65 |
| qwen35moe 122B.A10B Q5_K - Medium | 82.16 GiB | 122.11 B | ROCm | 999 | 1024 | 1024 | q8_0 | q8_0 | 1 | 0 | tg128 @ d16384 | 16.47 ± 0.02 |
| qwen35moe 122B.A10B Q5_K - Medium | 82.16 GiB | 122.11 B | ROCm | 999 | 1024 | 1024 | q8_0 | q8_0 | 1 | 0 | pp512 @ d32768 | 220.80 ± 1.00 |
| qwen35moe 122B.A10B Q5_K - Medium | 82.16 GiB | 122.11 B | ROCm | 999 | 1024 | 1024 | q8_0 | q8_0 | 1 | 0 | tg128 @ d32768 | 14.93 ± 0.01 |
| qwen35moe 122B.A10B Q5_K - Medium | 82.16 GiB | 122.11 B | ROCm | 999 | 1024 | 1024 | q8_0 | q8_0 | 1 | 0 | pp512 @ d131072 | 104.33 ± 0.18 |
| qwen35moe 122B.A10B Q5_K - Medium | 82.16 GiB | 122.11 B | ROCm | 999 | 1024 | 1024 | q8_0 | q8_0 | 1 | 0 | tg128 @ d131072 | 9.71 ± 0.04 |
build: ff5ef8278 (1100)
#!/usr/bin/env sh
echo "Setting up..."
cmake -S . \
-B build \
-DCMAKE_BUILD_TYPE=Release \
-DCMAKE_C_COMPILER="${LLVM_BIN}/clang" \
-DCMAKE_CXX_COMPILER="${LLVM_BIN}/clang++" \
-DCMAKE_HIP_COMPILER="${LLVM_BIN}/clang" \
-DCMAKE_HIP_FLAGS="--rocm-path=${ROCM_PATH} -mllvm --amdgpu-unroll-threshold-local=600" \
-DGGML_HIP_NO_VMM=OFF \
-DGGML_HIP_ROCWMMA_FATTN=OFF \
-DGGML_HIP=ON \
-DGGML_HIPBLAS=OFF \
-DGPU_TARGETS="gfx1151" \
-DHIP_PLATFORM=amd \
-DLLAMA_BUILD_EXAMPLES=OFF \
-DLLAMA_BUILD_TESTS=OFF \
-DLLAMA_OPENSSL=ON \
--fresh
echo "Building..."
cmake --build build \
--clean-first \
--config Release -- -j$(nproc)| model | size | params | backend | ngl | n_batch | n_ubatch | type_k | type_v | fa | mmap | test | t/s |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| qwen35moe 122B.A10B Q5_K - Medium | 82.16 GiB | 122.11 B | ROCm | 999 | 1024 | 1024 | q8_0 | q8_0 | 1 | 0 | pp512 @ d16384 | 271.34 ± 0.44 |
| qwen35moe 122B.A10B Q5_K - Medium | 82.16 GiB | 122.11 B | ROCm | 999 | 1024 | 1024 | q8_0 | q8_0 | 1 | 0 | tg128 @ d16384 | 16.47 ± 0.02 |
| qwen35moe 122B.A10B Q5_K - Medium | 82.16 GiB | 122.11 B | ROCm | 999 | 1024 | 1024 | q8_0 | q8_0 | 1 | 0 | pp512 @ d32768 | 221.16 ± 0.94 |
| qwen35moe 122B.A10B Q5_K - Medium | 82.16 GiB | 122.11 B | ROCm | 999 | 1024 | 1024 | q8_0 | q8_0 | 1 | 0 | tg128 @ d32768 | 14.93 ± 0.02 |
| qwen35moe 122B.A10B Q5_K - Medium | 82.16 GiB | 122.11 B | ROCm | 999 | 1024 | 1024 | q8_0 | q8_0 | 1 | 0 | pp512 @ d131072 | 103.40 ± 0.26 |
| qwen35moe 122B.A10B Q5_K - Medium | 82.16 GiB | 122.11 B | ROCm | 999 | 1024 | 1024 | q8_0 | q8_0 | 1 | 0 | tg128 @ d131072 | 9.74 ± 0.01 |
build: ff5ef8278 (1100)
#!/usr/bin/env sh
echo "Setting up..."
cmake -S . \
-B build \
-DCMAKE_BUILD_TYPE=Release \
-DCMAKE_C_COMPILER="${LLVM_BIN}/clang" \
-DCMAKE_CXX_COMPILER="${LLVM_BIN}/clang++" \
-DCMAKE_HIP_COMPILER="${LLVM_BIN}/clang" \
-DCMAKE_HIP_FLAGS="--rocm-path=${ROCM_PATH} -mllvm --amdgpu-unroll-threshold-local=600" \
-DGGML_CUDA_FA_ALL_QUANTS=ON \
-DGGML_HIP_ROCWMMA_FATTN=OFF \
-DGGML_HIP=ON \
-DGPU_TARGETS="gfx1151" \
-DHIP_PLATFORM=amd \
-DLLAMA_BUILD_EXAMPLES=OFF \
-DLLAMA_BUILD_TESTS=OFF \
-DLLAMA_OPENSSL=ON \
--fresh
echo "Building..."
cmake --build build \
--clean-first \
--config Release -- -j$(nproc)| model | size | params | backend | ngl | n_batch | n_ubatch | type_k | type_v | fa | mmap | test | t/s |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| qwen35moe 122B.A10B Q5_K - Medium | 82.16 GiB | 122.11 B | ROCm | 999 | 1024 | 1024 | q8_0 | q8_0 | 1 | 0 | pp512 @ d16384 | 265.93 ± 0.26 |
| qwen35moe 122B.A10B Q5_K - Medium | 82.16 GiB | 122.11 B | ROCm | 999 | 1024 | 1024 | q8_0 | q8_0 | 1 | 0 | tg128 @ d16384 | 16.46 ± 0.01 |
| qwen35moe 122B.A10B Q5_K - Medium | 82.16 GiB | 122.11 B | ROCm | 999 | 1024 | 1024 | q8_0 | q8_0 | 1 | 0 | pp512 @ d32768 | 220.91 ± 0.88 |
| qwen35moe 122B.A10B Q5_K - Medium | 82.16 GiB | 122.11 B | ROCm | 999 | 1024 | 1024 | q8_0 | q8_0 | 1 | 0 | tg128 @ d32768 | 14.94 ± 0.01 |
| qwen35moe 122B.A10B Q5_K - Medium | 82.16 GiB | 122.11 B | ROCm | 999 | 1024 | 1024 | q8_0 | q8_0 | 1 | 0 | pp512 @ d131072 | 104.26 ± 0.18 |
| qwen35moe 122B.A10B Q5_K - Medium | 82.16 GiB | 122.11 B | ROCm | 999 | 1024 | 1024 | q8_0 | q8_0 | 1 | 0 | tg128 @ d131072 | 9.74 ± 0.01 |
build: ff5ef8278 (1100)
#!/usr/bin/env sh
echo "Setting up..."
cmake -S . \
-B build \
-DAMDGPU_TARGETS="gfx1151" \
-DCMAKE_BUILD_TYPE=Release \
-DCMAKE_C_COMPILER="${LLVM_BIN}/clang" \
-DCMAKE_C_FLAGS="-O3 -march=znver5 -mtune=znver5" \
-DCMAKE_CXX_COMPILER="${LLVM_BIN}/clang++" \
-DCMAKE_CXX_FLAGS="-O3 -march=znver5 -mtune=znver5" \
-DCMAKE_HIP_COMPILER="${LLVM_BIN}/clang" \
-DCMAKE_HIP_FLAGS="--rocm-path=${ROCM_PATH} -mllvm --amdgpu-unroll-threshold-local=600" \
-DGGML_AVX=ON \
-DGGML_AVX2=ON \
-DGGML_AVX512_VBMI=ON \
-DGGML_AVX512_VNNI=ON \
-DGGML_AVX512=ON \
-DGGML_BMI2=ON \
-DGGML_CUDA_FA_ALL_QUANTS=ON \
-DGGML_F16C=ON \
-DGGML_FMA=ON \
-DGGML_HIP_ROCWMMA_FATTN=OFF \
-DGGML_HIP=ON \
-DGGML_NATIVE=OFF \
-DGPU_TARGETS="gfx1151" \
-DLLAMA_BUILD_EXAMPLES=OFF \
-DLLAMA_BUILD_TESTS=OFF \
-DLLAMA_OPENSSL=ON \
--fresh
echo "Building..."
cmake --build build \
--clean-first \
--config Release -- -j$(nproc)| model | size | params | backend | ngl | n_batch | n_ubatch | type_k | type_v | fa | mmap | test | t/s |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| qwen35moe 122B.A10B Q5_K - Medium | 82.16 GiB | 122.11 B | ROCm | 999 | 1024 | 1024 | q8_0 | q8_0 | 1 | 0 | pp512 @ d16384 | 269.02 ± 1.10 |
| qwen35moe 122B.A10B Q5_K - Medium | 82.16 GiB | 122.11 B | ROCm | 999 | 1024 | 1024 | q8_0 | q8_0 | 1 | 0 | tg128 @ d16384 | 18.31 ± 0.25 |
| qwen35moe 122B.A10B Q5_K - Medium | 82.16 GiB | 122.11 B | ROCm | 999 | 1024 | 1024 | q8_0 | q8_0 | 1 | 0 | pp512 @ d32768 | 219.73 ± 0.76 |
| qwen35moe 122B.A10B Q5_K - Medium | 82.16 GiB | 122.11 B | ROCm | 999 | 1024 | 1024 | q8_0 | q8_0 | 1 | 0 | tg128 @ d32768 | 16.52 ± 0.02 |
| qwen35moe 122B.A10B Q5_K - Medium | 82.16 GiB | 122.11 B | ROCm | 999 | 1024 | 1024 | q8_0 | q8_0 | 1 | 0 | pp512 @ d131072 | 104.34 ± 0.51 |
| qwen35moe 122B.A10B Q5_K - Medium | 82.16 GiB | 122.11 B | ROCm | 999 | 1024 | 1024 | q8_0 | q8_0 | 1 | 0 | tg128 @ d131072 | 10.31 ± 0.01 |
build: ff5ef8278 (1100)
#!/usr/bin/env sh
echo "Setting up..."
cmake -S . \
-B build \
-DAMDGPU_TARGETS="gfx1151" \
-DCMAKE_BUILD_TYPE=Release \
-DCMAKE_C_COMPILER="${LLVM_BIN}/clang" \
-DCMAKE_C_FLAGS="-O3 -march=znver5 -mtune=znver5" \
-DCMAKE_CXX_COMPILER="${LLVM_BIN}/clang++" \
-DCMAKE_CXX_FLAGS="-O3 -march=znver5 -mtune=znver5" \
-DCMAKE_HIP_COMPILER="${LLVM_BIN}/clang" \
-DCMAKE_HIP_FLAGS="--rocm-path=${ROCM_PATH} -mllvm --amdgpu-unroll-threshold-local=600" \
-DGGML_CUDA_ENABLE_UNIFIED_MEMORY=ON \
-DGGML_CUDA_FA_ALL_QUANTS=ON \
-DGGML_CUDA_FA=ON \
-DGGML_CUDA_FORCE_CUBLAS=OFF \
-DGGML_CUDA_FORCE_MMQ=OFF \
-DGGML_HIP_GRAPHS=ON \
-DGGML_HIP_ROCWMMA_FATTN=OFF \
-DGGML_HIP_UMA=OFF \
-DGGML_HIP=ON \
-DGGML_NATIVE=ON \
-DGGML_OPENMP=ON \
-DGGML_RPC=ON \
-DGPU_TARGETS="gfx1151" \
-DHIP_PLATFORM=amd \
-DLLAMA_BUILD_EXAMPLES=OFF \
-DLLAMA_BUILD_TESTS=OFF \
-DLLAMA_OPENSSL=ON \
--fresh
echo "Building..."
cmake --build build \
--clean-first \
--config Release -- -j$(nproc)| model | size | params | backend | ngl | n_batch | n_ubatch | type_k | type_v | fa | mmap | test | t/s |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| qwen35moe 122B.A10B Q5_K - Medium | 82.16 GiB | 122.11 B | ROCm | 999 | 1024 | 1024 | q8_0 | q8_0 | 1 | 0 | pp512 @ d16384 | 270.73 ± 0.60 |
| qwen35moe 122B.A10B Q5_K - Medium | 82.16 GiB | 122.11 B | ROCm | 999 | 1024 | 1024 | q8_0 | q8_0 | 1 | 0 | tg128 @ d16384 | 18.45 ± 0.03 |
| qwen35moe 122B.A10B Q5_K - Medium | 82.16 GiB | 122.11 B | ROCm | 999 | 1024 | 1024 | q8_0 | q8_0 | 1 | 0 | pp512 @ d32768 | 221.20 ± 1.34 |
| qwen35moe 122B.A10B Q5_K - Medium | 82.16 GiB | 122.11 B | ROCm | 999 | 1024 | 1024 | q8_0 | q8_0 | 1 | 0 | tg128 @ d32768 | 16.54 ± 0.02 |
| qwen35moe 122B.A10B Q5_K - Medium | 82.16 GiB | 122.11 B | ROCm | 999 | 1024 | 1024 | q8_0 | q8_0 | 1 | 0 | pp512 @ d131072 | 102.17 ± 0.19 |
| qwen35moe 122B.A10B Q5_K - Medium | 82.16 GiB | 122.11 B | ROCm | 999 | 1024 | 1024 | q8_0 | q8_0 | 1 | 0 | tg128 @ d131072 | 10.32 ± 0.01 |
build: ff5ef8278 (1100)
#!/usr/bin/env sh
echo "Setting up..."
cmake -S . \
-B build \
-DAMDGPU_TARGETS="gfx1151" \
-DAMDGPU_TARGETS=gfx1151 \
-DCMAKE_BUILD_TYPE=Release \
-DCMAKE_BUILD_TYPE=Release \
-DCMAKE_C_COMPILER="${LLVM_BIN}/clang" \
-DCMAKE_C_FLAGS="-I${ROCM_PATH}/include" \
-DCMAKE_CXX_COMPILER="${LLVM_BIN}/clang++" \
-DCMAKE_CXX_FLAGS="-I${ROCM_PATH}/include" \
-DCMAKE_HIP_COMPILER="${LLVM_BIN}/clang" \
-DCMAKE_HIP_FLAGS="--rocm-path=${ROCM_PATH} -mllvm --amdgpu-unroll-threshold-local=600" \
-DGGML_CUDA_ENABLE_UNIFIED_MEMORY=ON \
-DGGML_CUDA_FA_ALL_QUANTS=ON \
-DGGML_CUDA_FA=ON \
-DGGML_CUDA_FORCE_CUBLAS=OFF \
-DGGML_CUDA_FORCE_MMQ=OFF \
-DGGML_HIP_GRAPHS=ON \
-DGGML_HIP_ROCWMMA_FATTN=OFF \
-DGGML_HIP_UMA=OFF \
-DGGML_HIP=ON \
-DGGML_NATIVE=ON \
-DGGML_OPENMP=ON \
-DGGML_RPC=ON \
-DGPU_TARGETS="gfx1151" \
-DHIP_PLATFORM=amd \
-DLLAMA_BUILD_EXAMPLES=OFF \
-DLLAMA_BUILD_TESTS=OFF \
-DLLAMA_OPENSSL=ON \
--fresh
echo "Building..."
cmake --build build \
--clean-first \
--config Release -- -j$(nproc)