Skip to content

Instantly share code, notes, and snippets.

View yiliu30's full-sized avatar
๐ŸŒ
Working on site

Yi Liu yiliu30

๐ŸŒ
Working on site
View GitHub Profile

Concise API mapping

compressed-tensors

torch.cuda Replacement Since
torch.cuda.is_available() torch.accelerator.is_available() 2.6
torch.cuda.device_count() torch.accelerator.device_count() 2.6
torch.cuda.current_device() torch.accelerator.current_device_index() 2.6
model_path="/dataset/auto-round/qwen_moe/"
taskname=gsm8k
taskname=longbench_hotpotqa
timestamp=$(date +%Y%m%d_%H%M%S)
model_path="/storage/yiliu7/meta-llama/Llama-3.1-8B-Instruct"
output_log_file_name="${taskname}_${timestamp}"
MAX_MODEL_LEN=40960
max_length=${MAX_MODEL_LEN}
taskname=gsm8k
import os
from functools import wraps
# from vllm import envs
from loguru import logger
def with_thread_limits():
"""
Decorator to temporarily set OMP_NUM_THREADS and PyTorch threads,
and restore them after the function call.
## install uv on OS
curl -LsSf https://astral.sh/uv/install.sh | sh
## create new project
uv init myproj
## install packages
uv add django requests "pandas>=2.3"
## remove package
diff --git a/neural_compressor/torch/algorithms/fp8_quant/_quant_common/helper_modules.py b/neural_compressor/torch/algorithms/fp8_quant/_quant_common/helper_modules.py
index 69c03d8efb8..f3668018c43 100755
--- a/neural_compressor/torch/algorithms/fp8_quant/_quant_common/helper_modules.py
+++ b/neural_compressor/torch/algorithms/fp8_quant/_quant_common/helper_modules.py
@@ -930,6 +930,22 @@ class PatchedVllmMixtureOfExpertsOp(PatchedModuleBase):
router_weights,
permuted_weights=True,
activation="silu"):
+ enable_moe_chunk = hasattr(self.orig_mod, "enable_moe_chunk") and self.orig_mod.enable_moe_chunk
+ if not enable_moe_chunk:
model_path=inc-res/quantized_model_ds_mxfp8/
# VLLM_ENABLE_AR_EXT=1 \
# VLLM_AR_MXFP4_MODULAR_MOE=1 \
# VLLM_ENABLE_AR_EXT=1 \
# VLLM_MXFP4_PRE_UNPACK_TO_FP8=0 \
# VLLM_ENABLE_STATIC_MOE=0 \
# VLLM_MXFP4_PRE_UNPACK_WEIGHTS=0 \
# VLLM_USE_DEEP_GEMM=0 \
# VLLM_ENABLE_V1_MULTIPROCESSING=1 \
@yiliu30
yiliu30 / install-buildkit.sh
Created November 12, 2025 01:00 — forked from jniltinho/install-buildkit.sh
Enable BuildKit Docker on Linux Dist, Debian, Ubuntu, Fedora
#!/bin/bash
#
# https://docs.docker.com/build/buildkit/
# https://github.com/docker/buildx/releases/
# https://github.com/docker/buildx
## docker builder prune --all
## docker buildx du --verbose
## For Ubuntu 24.04 try: sudo apt install docker-buildx
diff --git a/neural_compressor/torch/algorithms/fp8_quant/_quant_common/helper_modules.py b/neural_compressor/torch/algorithms/fp8_quant/_quant_common/helper_modules.py
index 69c03d8efb8..f3668018c43 100755
--- a/neural_compressor/torch/algorithms/fp8_quant/_quant_common/helper_modules.py
+++ b/neural_compressor/torch/algorithms/fp8_quant/_quant_common/helper_modules.py
@@ -930,6 +930,22 @@ class PatchedVllmMixtureOfExpertsOp(PatchedModuleBase):
router_weights,
permuted_weights=True,
activation="silu"):
+ enable_moe_chunk = hasattr(self.orig_mod, "enable_moe_chunk") and self.orig_mod.enable_moe_chunk
+ if enable_moe_chunk:
#!/bin/bash
# Check if a model name is passed as an argument, otherwise use the default model path
if [ -z "$1" ]; then
model_path="Meta-Llama-3-8B-Instruct-W4A16-G128-AutoRound"
else
model_path="$1"
fi
tp_size=1
model_name=$(basename ${model_path})