Created
February 2, 2026 06:43
-
-
Save yayoimizuha/23e912074021efa2be9bbe2c2476c637 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/bin/bash | |
| set -x | |
| export CONTAINER_IMAGE="sglang_dev-x86-cu13.sif" | |
| export MODEL_NAME="moonshotai/Kimi-K2.5" | |
| export HOST_MODEL_PATH="~/models/$MODEL_NAME" | |
| export CONTAINER_MODEL_PATH="/sgl-workspace/sglang/$MODEL_NAME" | |
| MASTER_ADDR=$(scontrol show hostnames $SLURM_NODELIST | head -n 1) | |
| NODES=$(scontrol show hostnames $SLURM_NODELIST | tr '\n' ',' | sed 's/,$//') | |
| export NCCL_IB_HCA=mlx5_0,mlx5_3,mlx5_4,mlx5_5,mlx5_6,mlx5_7,mlx5_8,mlx5_9 | |
| export NCCL_DEBUG=WARN | |
| export GLOO_DEBUG=WARN | |
| export SGLANG_JIT_DEEPGEMM_COMPILE_WORKERS=64 | |
| export TORCH_NCCL_HEARTBEAT_TIMEOUT_SEC=86400 | |
| export TORCH_CUDA_ARCH_LIST=90 | |
| ip a | |
| export SGLANG_DISABLE_CUDNN_CHECK=1 | |
| export SGLANG_TOOL_STRICT_LEVEL=2 | |
| export TEMPORARY_DIR=`mktemp -d` | |
| export TRITON_HOME=`mktemp -d` | |
| export DEEPGEMM_TMP=`mktemp -d` | |
| export FLASHINFER_TMP=`mktemp -d` | |
| export NCCL_TMP=`mktemp -d` | |
| export FLASHINFER_JIT_DIR=`mktemp -d` | |
| rsync -ao $HOME/tmp/TRITON_HOME/ $TRITON_HOME | |
| rsync -ao $HOME/tmp/DEEPGEMM_TMP/ $DEEPGEMM_TMP | |
| rsync -ao $HOME/tmp/FLASHINFER_TMP/ $FLASHINFER_TMP | |
| rsync -ao $HOME/tmp/NCCL_TMP/ $NCCL_TMP | |
| rsync -ao $HOME/tmp/FLASHINFER_JIT_DIR/ $FLASHINFER_JIT_DIR | |
| export COMMAND_OPT="--tp-size 8 --tool-call-parser kimi_k2 --reasoning-parser kimi_k2 --max-running-requests 512 --cuda-graph-max-bs 512 --chunked-prefill-size 65536 --mem-fraction-static 0.8" | |
| singularity exec -B $TEMPORARY_DIR:/tmp -B $TRITON_HOME:$HOME/.triton -B $DEEPGEMM_TMP:$HOME/.cache/deep_gemm -B $FLASHINFER_TMP:$HOME/.cache/flashinfer -B $NCCL_TMP:/tmp -B $FLASHINFER_JIT_DIR:$FLASHINFER_JIT_DIR --nv ${CONTAINER_IMAGE} bash -c '${HOME}/.local/bin/uv run --no-sync sglang serve --model-path ${HOME}/models/${MODEL_NAME} ${COMMAND_OPT} --trust-remote-code --port 30000 --host 0.0.0.0 --admin-api-key **** --api-key **** --served-model-name ${MODEL_NAME}' | |
| rsync -ao $TRITON_HOME/ $HOME/tmp/TRITON_HOME | |
| rsync -ao $DEEPGEMM_TMP/ $HOME/tmp/DEEPGEMM_TMP | |
| rsync -ao $FLASHINFER_TMP/ $HOME/tmp/FLASHINFER_TMP | |
| rsync -ao $NCCL_TMP/ $HOME/tmp/NCCL_TMP | |
| rsync -ao $FLASHINFER_JIT_DIR/ $HOME/tmp/FLASHINFER_JIT_DIR | |
| rm -rf $TRITON_HOME $DEEPGEMM_TMP $FLASHINFER_TMP $NCCL_TMP $FLASHINFER_JIT_DIR | |
| set +x |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment