Skip to content

Instantly share code, notes, and snippets.

@yayoimizuha
Created February 2, 2026 06:43
Show Gist options
  • Select an option

  • Save yayoimizuha/23e912074021efa2be9bbe2c2476c637 to your computer and use it in GitHub Desktop.

Select an option

Save yayoimizuha/23e912074021efa2be9bbe2c2476c637 to your computer and use it in GitHub Desktop.
#!/bin/bash
set -x
export CONTAINER_IMAGE="sglang_dev-x86-cu13.sif"
export MODEL_NAME="moonshotai/Kimi-K2.5"
export HOST_MODEL_PATH="~/models/$MODEL_NAME"
export CONTAINER_MODEL_PATH="/sgl-workspace/sglang/$MODEL_NAME"
MASTER_ADDR=$(scontrol show hostnames $SLURM_NODELIST | head -n 1)
NODES=$(scontrol show hostnames $SLURM_NODELIST | tr '\n' ',' | sed 's/,$//')
export NCCL_IB_HCA=mlx5_0,mlx5_3,mlx5_4,mlx5_5,mlx5_6,mlx5_7,mlx5_8,mlx5_9
export NCCL_DEBUG=WARN
export GLOO_DEBUG=WARN
export SGLANG_JIT_DEEPGEMM_COMPILE_WORKERS=64
export TORCH_NCCL_HEARTBEAT_TIMEOUT_SEC=86400
export TORCH_CUDA_ARCH_LIST=90
ip a
export SGLANG_DISABLE_CUDNN_CHECK=1
export SGLANG_TOOL_STRICT_LEVEL=2
export TEMPORARY_DIR=`mktemp -d`
export TRITON_HOME=`mktemp -d`
export DEEPGEMM_TMP=`mktemp -d`
export FLASHINFER_TMP=`mktemp -d`
export NCCL_TMP=`mktemp -d`
export FLASHINFER_JIT_DIR=`mktemp -d`
rsync -ao $HOME/tmp/TRITON_HOME/ $TRITON_HOME
rsync -ao $HOME/tmp/DEEPGEMM_TMP/ $DEEPGEMM_TMP
rsync -ao $HOME/tmp/FLASHINFER_TMP/ $FLASHINFER_TMP
rsync -ao $HOME/tmp/NCCL_TMP/ $NCCL_TMP
rsync -ao $HOME/tmp/FLASHINFER_JIT_DIR/ $FLASHINFER_JIT_DIR
export COMMAND_OPT="--tp-size 8 --tool-call-parser kimi_k2 --reasoning-parser kimi_k2 --max-running-requests 512 --cuda-graph-max-bs 512 --chunked-prefill-size 65536 --mem-fraction-static 0.8"
singularity exec -B $TEMPORARY_DIR:/tmp -B $TRITON_HOME:$HOME/.triton -B $DEEPGEMM_TMP:$HOME/.cache/deep_gemm -B $FLASHINFER_TMP:$HOME/.cache/flashinfer -B $NCCL_TMP:/tmp -B $FLASHINFER_JIT_DIR:$FLASHINFER_JIT_DIR --nv ${CONTAINER_IMAGE} bash -c '${HOME}/.local/bin/uv run --no-sync sglang serve --model-path ${HOME}/models/${MODEL_NAME} ${COMMAND_OPT} --trust-remote-code --port 30000 --host 0.0.0.0 --admin-api-key **** --api-key **** --served-model-name ${MODEL_NAME}'
rsync -ao $TRITON_HOME/ $HOME/tmp/TRITON_HOME
rsync -ao $DEEPGEMM_TMP/ $HOME/tmp/DEEPGEMM_TMP
rsync -ao $FLASHINFER_TMP/ $HOME/tmp/FLASHINFER_TMP
rsync -ao $NCCL_TMP/ $HOME/tmp/NCCL_TMP
rsync -ao $FLASHINFER_JIT_DIR/ $HOME/tmp/FLASHINFER_JIT_DIR
rm -rf $TRITON_HOME $DEEPGEMM_TMP $FLASHINFER_TMP $NCCL_TMP $FLASHINFER_JIT_DIR
set +x
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment