|
version = 1 |
|
|
|
# Global |
|
[*] |
|
n-gpu-layers = 999 |
|
threads = -1 |
|
fit = on |
|
mlock = 1 |
|
no-mmap = 1 |
|
flash-attn = on |
|
direct-io = 1 |
|
|
|
# Cache |
|
cache-prompt = 1 |
|
cache-type-k = q8_0 |
|
cache-type-v = q8_0 |
|
|
|
batch-size = 1024 |
|
ubatch-size = 1024 |
|
|
|
# unsloth suggest --ctx-size 16384 (16k) which seems low |
|
# Sugested to use --ctx-size 32768 (32k) for fast coder |
|
# Suggested to use --ctx-size 65536 (64k) for Multi-file work or big refactor |
|
# Suggested for crazy --ctx-size 131072 (128k) for One-shot analysis (Project dump) |
|
# Some models support --ctx-size 204800 (200k) which is HUGE |
|
ctx-size = 131072 |
|
|
|
# GLM-4.7-Flash |
|
[glm-4.7-flash] |
|
# hf-repo = unsloth/GLM-4.7-Flash-GGUF:Q8_0 |
|
# hf-repo = unsloth/GLM-4.7-Flash-GGUF:Q6_K_XL |
|
hf-repo = unsloth/GLM-4.7-Flash-GGUF:Q8_K_XL |
|
seed = 3407 |
|
temp = 1.0 |
|
top-p = 0.95 |
|
min-p = 0.01 |
|
repeat-penalty = 1.0 |
|
ctx-size = 131072 |
|
|
|
# [glm-4.7-flash-16k] |
|
# hf-repo = unsloth/GLM-4.7-Flash-GGUF:Q8_0 |
|
# hf-repo = unsloth/GLM-4.7-Flash-GGUF:Q6_K_XL |
|
# hf-repo = unsloth/GLM-4.7-Flash-GGUF:Q8_K_XL |
|
# seed = 3407 |
|
# temp = 1.0 |
|
# top-p = 0.95 |
|
# min-p = 0.01 |
|
# repeat-penalty = 1.0 |
|
# ctx-size = 16348 |
|
|
|
# [glm-4.7-flash-32k] |
|
# hf-repo = unsloth/GLM-4.7-Flash-GGUF:Q8_0 |
|
# hf-repo = unsloth/GLM-4.7-Flash-GGUF:Q6_K_XL |
|
# hf-repo = unsloth/GLM-4.7-Flash-GGUF:Q8_K_XL |
|
# seed = 3407 |
|
# temp = 1.0 |
|
# top-p = 0.95 |
|
# min-p = 0.01 |
|
# repeat-penalty = 1.0 |
|
# ctx-size = 32768 |
|
|
|
# [glm-4.7-flash-64k] |
|
# hf-repo = unsloth/GLM-4.7-Flash-GGUF:Q8_0 |
|
# hf-repo = unsloth/GLM-4.7-Flash-GGUF:Q6_K_XL |
|
# hf-repo = unsloth/GLM-4.7-Flash-GGUF:Q8_K_XL |
|
# seed = 3407 |
|
# temp = 1.0 |
|
# top-p = 0.95 |
|
# min-p = 0.01 |
|
# repeat-penalty = 1.0 |
|
# ctx-size = 65536 |
|
|
|
# gpt-oss-120b-GGUF |
|
[gpt-oss-120b] |
|
# hf-repo = ggml-org/gpt-oss-120b-GGUF |
|
# hf-repo = unsloth/gpt-oss-120b-GGUF:Q8_0 |
|
hf-repo = unsloth/gpt-oss-120b-GGUF:Q8_K_XL |
|
temp = 1.0 |
|
min-p = 0.0 |
|
top-p = 1.0 |
|
top-k = 0.0 |
|
ctx-size = 131072 |
|
|
|
# gpt-oss-20b-GGUF |
|
[gpt-oss-20b] |
|
# hf-repo = ggml-org/gpt-oss-20b-GGUF |
|
# hf-repo = unsloth/gpt-oss-20b-GGUF:Q6_K_XL |
|
# hf-repo = unsloth/gpt-oss-20b-GGUF:Q8_0 |
|
hf-repo = unsloth/gpt-oss-20b-GGUF:Q8_K_XL |
|
temp = 1.0 |
|
min-p = 0.0 |
|
top-p = 1.0 |
|
top-k = 0.0 |
|
|
|
[nemotron-3-super-120B-A12B] |
|
hf-repo = bartowski/nvidia_Nemotron-3-Super-120B-A12B-GGUF:Q5_K_M |
|
temp = 1.0 |
|
top-p = 1.0 |
|
ctx-size = 131072 |
|
|
|
# Qwen 3.x-122b |
|
[qwen3-122b-coder] |
|
# Not recommened: |
|
# hf-repo = unsloth/Qwen3.5-122B-A10B-GGUF:UD-Q4_K_XL |
|
# hf-repo = bartowski/Qwen_Qwen3.5-122B-A10B-GGUF:Q4_K_M |
|
# hf-repo = bartowski/Qwen_Qwen3.5-122B-A10B-GGUF:Q4_K_M |
|
# hf-repo = bartowski/Qwen_Qwen3.5-122B-A10B-GGUF:Q5_K_M |
|
# hf-repo = bartowski/Qwen_Qwen3.5-122B-A10B-GGUF:Q6_K_L |
|
# Recommended: |
|
# Best value, surprisingly fast pp, slightly better quality than Q4_K_M, only 70.4 GiB |
|
# hf-repo = bartowski/Qwen_Qwen3.5-122B-A10B-GGUF:Q4_K_L |
|
# Sweet spot, balanced quality/speed, ~82 GiB |
|
hf-repo = bartowski/Qwen_Qwen3.5-122B-A10B-GGUF:Q5_K_L |
|
# Max quality, best quality with no pp regression penalty, ~98.4 GiB |
|
# hf-repo = bartowski/Qwen_Qwen3.5-122B-A10B-GGUF:Q6_K |
|
temp = 0.6 |
|
top-p = 0.95 |
|
top-k = 20 |
|
min-p = 0.00 |
|
repeat-penalty = 1.0 |
|
presence-penalty = 0.00 |
|
image-min-tokens = 1024 |
|
ctx-size = 131072 |
|
|
|
[qwen3-122b-tasks] |
|
# hf-repo = unsloth/Qwen3.5-122B-A10B-GGUF:UD-Q4_K_XL |
|
hf-repo = bartowski/Qwen_Qwen3.5-122B-A10B-GGUF:Q5_K_L |
|
temp = 1.0 |
|
top-p = 0.95 |
|
top-k = 20 |
|
min-p = 0.00 |
|
image-min-tokens = 1024 |
|
ctx-size = 131072 |
|
|
|
# Qwen3.x-27B |
|
[qwen3-27b-coder] |
|
# hf-repo = unsloth/Qwen3.6-27B-GGUF:UD-Q8_K_XL |
|
hf-repo = bartowski/Qwen_Qwen3.6-27B-GGUF:Q8_0 |
|
temp = 0.6 |
|
top-p = 0,95 |
|
top-k = 20 |
|
min-p = 0.00 |
|
repeat-penalty = 1.0 |
|
presence-penalty = 0.00 |
|
|
|
[qwen3-27b-tasks] |
|
# hf-repo = unsloth/Qwen3.6-27B-GGUF:UD-Q8_K_XL |
|
hf-repo = bartowski/Qwen_Qwen3.6-27B-GGUF:Q8_0 |
|
temp = 1.0 |
|
top-p = 0,95 |
|
top-k = 20 |
|
min-p = 0.00 |
|
repeat-penalty = 1.0 |
|
presence-penalty = 1.5 |
|
|
|
# Qwen3.x-35B-A3B |
|
[qwen3-35b-a3b-coder] |
|
# hf-repo = unsloth/Qwen3.5-35B-A3B-GGUF:UD-Q8_K_XL |
|
hf-repo = bartowski/Qwen_Qwen3.6-35B-A3B-GGUF:Q8_0 |
|
temp = 0.6 |
|
top-p = 0,95 |
|
top-k = 20 |
|
min-p = 0.00 |
|
repeat-penalty = 1.0 |
|
presence-penalty = 0.00 |
|
|
|
[qwen3-35b-a3b-tasks] |
|
# hf-repo = unsloth/Qwen3.5-35B-A3B-GGUF:UD-Q8_K_XL |
|
hf-repo = bartowski/Qwen_Qwen3.6-35B-A3B-GGUF:Q8_0 |
|
temp = 1.0 |
|
top-p = 0,95 |
|
top-k = 20 |
|
min-p = 0.00 |
|
repeat-penalty = 1.0 |
|
presence-penalty = 1.5 |
|
|
|
# Qwen3-Coder-30B-A3B-Instruct # |
|
[qwen3-coder-30b] |
|
# hf-repo = unsloth/Qwen3-Coder-30B-A3B-Instruct-GGUF:Q6_K_XL |
|
# hf-repo = unsloth/Qwen3-Coder-30B-A3B-Instruct-GGUF:Q8_0 |
|
hf-repo = unsloth/Qwen3-Coder-30B-A3B-Instruct-GGUF:Q8_K_XL |
|
# hf-repo = ggml-org/Qwen3-Coder-30B-A3B-Instruct-Q8_0-GGUF |
|
ngl = 99 |
|
temp = 0.7 |
|
min-p = 0.0 |
|
top-p = 0.80 |
|
top-k = 20 |
|
repeat-penalty = 1.05 |
|
ctx-size = 131072 |
|
|
|
# Qwen3-Coder-Next |
|
[qwen3-coder-next] |
|
hf-repo = unsloth/Qwen3-Coder-Next-GGUF:Q8_K_XL |
|
seed = 3407 |
|
temp = 1.0 |
|
top-p = 0.95 |
|
min-p = 0.01 |
|
top-k = 40 |
|
|
|
[qwen3-coder-next-q6-k] |
|
hf-repo = unsloth/Qwen3-Coder-Next-GGUF:Q6_K |
|
seed = 3407 |
|
temp = 1.0 |
|
top-p = 0.95 |
|
min-p = 0.01 |
|
top-k = 40 |
|
|
|
# [qwen3-coder-next-q8-0] |
|
# hf-repo = unsloth/Qwen3-Coder-Next-GGUF:Q8_0 |
|
# seed = 3407 |
|
# temp = 1.0 |
|
# top-p = 0.95 |
|
# min-p = 0.01 |
|
# top-k = 40 |
|
|
|
# Qwen3-Next-80B-A3B-Instruct |
|
[qwen3-next-instruct] |
|
hf-repo = unsloth/Qwen3-Next-80B-A3B-Instruct-GGUF:Q8_K_XL |
|
ngl = 99 |
|
temp = 0.7 |
|
min-p = 0.0 |
|
top-p = 0.80 |
|
top-k = 20 |
|
presence-penalty = 1.0 |
|
|
|
[qwen3-next-instruct-q6-k] |
|
hf-repo = unsloth/Qwen3-Next-80B-A3B-Instruct-GGUF:Q6_K |
|
ngl = 99 |
|
temp = 0.7 |
|
min-p = 0.0 |
|
top-p = 0.80 |
|
top-k = 20 |
|
presence-penalty = 1.0 |
|
|
|
[qwen3-next-instruct-q8-0] |
|
hf-repo = unsloth/Qwen3-Next-80B-A3B-Instruct-GGUF:Q8_0 |
|
ngl = 99 |
|
temp = 0.7 |
|
min-p = 0.0 |
|
top-p = 0.80 |
|
top-k = 20 |
|
presence-penalty = 1.0 |
|
|
|
# Gemma 3 |
|
[gemma3] |
|
# hf-repo = unsloth/gemma-3-27b-it-GGUF:Q6_K_XL |
|
hf-repo = unsloth/gemma-3-27b-it-GGUF:Q8_K_XL |
|
seed = 3407 |
|
prio = 2 |
|
temp = 1.0 |
|
repeat-penalty = 1.0 |
|
min-p = 0.01 |
|
top-k = 64 |
|
top-p = 0.95 |
|
|
|
[gemma3-12b] |
|
# hf-repo = unsloth/gemma-3-27b-it-GGUF:Q6_K_XL |
|
hf-repo = unsloth/gemma-3-12b-it-GGUF:UD-Q8_K_XL |
|
seed = 3407 |
|
prio = 2 |
|
temp = 1.0 |
|
repeat-penalty = 1.0 |
|
min-p = 0.01 |
|
top-k = 64 |
|
top-p = 0.95 |
|
|
|
# Gemma 4 |
|
[gemma4] |
|
# hf-repo = unsloth/gemma-4-31B-it-GGUF:UD-Q8_K_XL |
|
# hf-repo = bartowski/google_gemma-4-31B-it-GGUF:Q6_K_L |
|
hf-repo = bartowski/google_gemma-4-31B-it-GGUF:Q8_0 |
|
temp = 1.0 |
|
top-k = 64 |
|
top-p = 0.95 |
|
ctx-size = 131072 |
|
|
|
# |
|
[devstral-small-2-24b] |
|
# hf-repo = ggml-org/Devstral-Small-2-24B-Instruct-2512-GGUF |
|
hf-repo = unsloth/Devstral-Small-2-24B-Instruct-2512-GGUF:Q8_K_XL |
|
seed = 3407 |
|
prio = 3 |
|
temp = 0.15 |
|
min-p = 0.01 |
|
|
|
# |
|
[devstral-2-123b] |
|
hf-repo = unsloth/Devstral-2-123B-Instruct-2512-GGUF:Q4_K_XL |
|
# hf-repo = unsloth/Devstral-2-123B-Instruct-2512-GGUF:Q5_K_XL |
|
# hf-repo = unsloth/Devstral-2-123B-Instruct-2512-GGUF:Q6_K |
|
# hf-repo = unsloth/Devstral-2-123B-Instruct-2512-GGUF:Q6_K_XL |
|
seed = 3407 |
|
prio = 3 |
|
temp = 0.15 |
|
min-p = 0.01 |