Created
May 12, 2026 02:48
-
-
Save ashvinnihalani/952342f092a11f43edd48abe20959a5c to your computer and use it in GitHub Desktop.
Slime GLM-5.1-tiny
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| MOE_ROUTED_EXPERTS=256 | |
| MOE_ACTIVE_ROUTED_EXPERTS=8 | |
| MOE_SHARED_EXPERTS=1 | |
| NHIDDEN=8 | |
| MOE_FFN_HIDDEN=32 | |
| MOE_SHARED_EXPERT_INTERMEDIATE_SIZE=$(($MOE_FFN_HIDDEN * $MOE_SHARED_EXPERTS)) | |
| FFN_HIDDEN=32 | |
| N_DENSE_LAYERS=1 | |
| N_MOE_LAYERS=1 | |
| NHEADS=8 | |
| MODEL_ARGS=( | |
| --spec "slime_plugins.models.glm5.glm5" "get_glm5_spec" | |
| --moe-layer-freq [0]*$N_DENSE_LAYERS+[1]*$N_MOE_LAYERS | |
| --num-experts $MOE_ROUTED_EXPERTS | |
| --moe-shared-expert-intermediate-size $MOE_SHARED_EXPERT_INTERMEDIATE_SIZE | |
| --moe-router-topk $MOE_ACTIVE_ROUTED_EXPERTS | |
| --moe-grouped-gemm | |
| --moe-permute-fusion | |
| --moe-ffn-hidden-size $MOE_FFN_HIDDEN | |
| --moe-router-score-function sigmoid | |
| --moe-router-pre-softmax | |
| --moe-router-enable-expert-bias | |
| --moe-router-bias-update-rate 0 | |
| --moe-router-load-balancing-type seq_aux_loss | |
| --moe-router-topk-scaling-factor 2.5 | |
| --moe-aux-loss-coeff 0 | |
| --moe-router-dtype fp32 | |
| --make-vocab-size-divisible-by 16 | |
| --num-layers $((N_DENSE_LAYERS + N_MOE_LAYERS)) | |
| --hidden-size $NHIDDEN | |
| --ffn-hidden-size $FFN_HIDDEN | |
| --num-attention-heads $NHEADS | |
| --disable-bias-linear | |
| --swiglu | |
| --untie-embeddings-and-output-weights | |
| --position-embedding-type rope | |
| --no-position-embedding | |
| --normalization RMSNorm | |
| --qk-layernorm | |
| --multi-latent-attention | |
| --q-lora-rank 32 | |
| --kv-lora-rank 512 | |
| --qk-head-dim 192 | |
| --v-head-dim 256 | |
| --kv-channels 192 | |
| --qk-pos-emb-head-dim 64 | |
| --vocab-size 154880 | |
| --rotary-base 1000000 | |
| --enable-experimental | |
| # slime specific args | |
| --allgather-cp | |
| ) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment