Created
January 23, 2026 04:37
-
-
Save kiuckhuang/a6549856b1030d23b58f02b937f23091 to your computer and use it in GitHub Desktop.
Batch file to run GLM 4.6V
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| @echo off | |
| SET BASE_DRIVE=C: | |
| %BASE_DRIVE% | |
| REM Update the driver letter if necessary | |
| REM https://huggingface.co/unsloth/GLM-4.6V-Flash-GGUF/resolve/main/GLM-4.6V-Flash-UD-Q8_K_XL.gguf?download=true | |
| REM https://huggingface.co/unsloth/GLM-4.6V-Flash-GGUF/resolve/main/mmproj-BF16.gguf?download=true | |
| REM rename to mmproj-GLM-4.6V-Flash-BF16.gguf | |
| REM PUT gguf files at \ai\models | |
| REM https://github.com/ggml-org/llama.cpp/releases/download/b7814/llama-b7814-bin-win-cuda-13.1-x64.zip | |
| REM unzip llama.cpp files at \ai\llama_bin | |
| SET BASE_PATH=%BASE_DRIVE%\ai | |
| REM mkdir %BASE_PATH% | |
| REM mkdir %BASE_PATH%\llama_bin | |
| REM mkdir %BASE_PATH%\models | |
| SET LLAMA_CPP_PATH=%BASE_PATH%\llama_bin | |
| SET LLAMA_ARG_HOST=localhost | |
| SET LLAMA_ARG_PORT=8080 | |
| SET LLAMA_ARG_JINJA=true | |
| SET LLAMA_ARG_FLASH_ATTN=on | |
| SET LLAMA_ARG_CACHE_TYPE_K=q8_0 | |
| SET LLAMA_ARG_CACHE_TYPE_V=q8_0 | |
| SET LLAMA_ARG_N_GPU_LAYERS=999 | |
| SET LLAMA_ARG_NO_MMAP=1 | |
| SET LLAMA_ARG_CTX_SIZE=131272 | |
| SET LLAMA_ARG_MODEL=GLM-4.6V-Flash-UD-Q8_K_XL.gguf | |
| SET LLAMA_ARG_ALIAS=GLM-4.6V-Flash-UD-Q8KL | |
| SET PATH=%LLAMA_CPP_PATH%;%PATH% | |
| cd %BASE_PATH%\models\ | |
| @echo on | |
| %LLAMA_CPP_PATH%\llama-server.exe ^ | |
| --prio -1 ^ | |
| --temp 0.8 ^ | |
| --top-k 2 ^ | |
| --top-p 0.6 ^ | |
| --min-p 0.0 ^ | |
| --repeat-penalty 1.1 ^ | |
| --mmproj mmproj-GLM-4.6V-Flash-BF16.gguf | |
| @echo off |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment