Skip to content

Instantly share code, notes, and snippets.

@pansapiens
Created April 16, 2026 11:22
Show Gist options
  • Select an option

  • Save pansapiens/c79b7f9197846e5a21b9f71040dda9de to your computer and use it in GitHub Desktop.

Select an option

Save pansapiens/c79b7f9197846e5a21b9f71040dda9de to your computer and use it in GitHub Desktop.
acestep.cpp in Docker
services:
acestep:
image: acestep-cuda
build:
context: .
dockerfile: Dockerfile
ports:
- "8085:8085"
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: all
capabilities: [gpu]
ipc: host
environment:
ACE_HOST: "0.0.0.0"
ACE_PORT: "8085"
ACE_MODELS: "./models"
ACE_LORAS: "./loras"
ACE_MAX_BATCH: "1"
volumes:
- ./loras:/app/loras
# Build like:
# docker buildx build -t acestep-cuda -f Dockerfile .
#
# Run like:
# docker run --gpus all -p 8085:8085 acestep-cuda
#
# With custom options:
# docker run --gpus all -p 8085:8085 \
# -e ACE_PORT=8085 \
# -e ACE_MAX_BATCH=2 \
# -v $(pwd)/loras:/app/loras \
# acestep-cuda
#
# Download all models (slower, larger image):
# docker buildx build -t acestep-cuda --build-arg MODELS_ARGS="--all" .
#
# Specific quantization:
# docker buildx build -t acestep-cuda --build-arg MODELS_ARGS="--quant Q5_K_M" .
ARG UBUNTU_VERSION=22.04
ARG CUDA_VERSION=12.6.3
# ─── Stage 1: Build ──────────────────────────────────────────────
FROM nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION} AS build
RUN apt-get update && \
apt-get install -y --no-install-recommends \
build-essential \
cmake \
git \
python3 \
python3-pip \
python3-venv \
&& rm -rf /var/lib/apt/lists/*
# Install HuggingFace CLI for model downloads
RUN pip3 install --no-cache-dir --break-system-packages "huggingface_hub[cli]" || \
(python3 -m venv /opt/hf-venv && \
/opt/hf-venv/bin/pip install --no-cache-dir "huggingface_hub[cli]" && \
ln -sf /opt/hf-venv/bin/hf /usr/local/bin/hf)
ARG GIT_REPO=https://github.com/ServeurpersoCom/acestep.cpp
ARG GIT_REF=master
WORKDIR /src
RUN git clone --recurse-submodules "${GIT_REPO}" . && git checkout "${GIT_REF}" \
&& git submodule update --init --recursive
# Build with CUDA
# The linker needs libcuda.so.1 at link time for CUDA Driver API symbols (cuMemCreate, etc.)
# Create a symlink from the stub library so the linker can resolve these symbols.
RUN ln -sf /usr/local/cuda/lib64/stubs/libcuda.so /usr/local/cuda/lib64/libcuda.so.1
RUN cmake -B build \
-DGGML_CUDA=ON \
-DCMAKE_BUILD_TYPE=Release \
-DCMAKE_CUDA_COMPILER=/usr/local/cuda/bin/nvcc \
&& cmake --build build --config Release -j "$(nproc)"
# Download models (default: Q8_0 essentials)
ARG MODELS_ARGS=""
RUN bash models.sh ${MODELS_ARGS}
# Collect runtime artifacts
RUN mkdir -p /out/bin /out/lib /out/models /out/loras /out/webui && \
cp build/ace-server /out/bin/ && \
cp build/ace-synth /out/bin/ && \
cp build/ace-lm /out/bin/ && \
cp build/index.html.gz.hpp /out/webui/ && \
find build -maxdepth 1 -name "*.so*" -exec cp -P {} /out/lib/ \; && \
cp -r models/*.gguf /out/models/ 2>/dev/null || true
# ─── Stage 2: Runtime ────────────────────────────────────────────
FROM nvidia/cuda:${CUDA_VERSION}-runtime-ubuntu${UBUNTU_VERSION} AS runtime
LABEL org.opencontainers.image.source="https://github.com/ServeurpersoCom/acestep.cpp"
LABEL org.opencontainers.image.title="acestep.cpp CUDA"
LABEL org.opencontainers.image.description="ACE-Step music generation server with CUDA acceleration (GGML)"
LABEL org.opencontainers.image.usage="docker run --gpus all -p 8085:8085 acestep-cuda"
RUN apt-get update && \
apt-get install -y --no-install-recommends \
libgomp1 \
ca-certificates \
&& rm -rf /var/lib/apt/lists/*
WORKDIR /app
# Copy runtime artifacts
COPY --from=build /out/lib/ /app/build/
COPY --from=build /out/bin/ /app/build/
COPY --from=build /out/webui/ /app/build/
COPY --from=build /out/models/ /app/models/
COPY --from=build /out/loras/ /app/loras/
# Create Docker-adapted start script
COPY <<'EOF' /app/docker-entrypoint.sh
#!/usr/bin/env bash
set -euo pipefail
cd /app
HOST="${ACE_HOST:-0.0.0.0}"
PORT="${ACE_PORT:-8085}"
MODELS_DIR="${ACE_MODELS:-./models}"
LORAS_DIR="${ACE_LORAS:-./loras}"
MAX_BATCH="${ACE_MAX_BATCH:-1}"
BUILD_DIR="build"
export LD_LIBRARY_PATH="/app/${BUILD_DIR}:/usr/local/cuda/lib64${LD_LIBRARY_PATH:+:$LD_LIBRARY_PATH}"
# Preflight
if [ ! -x "${BUILD_DIR}/ace-server" ]; then
echo "Error: ace-server not found in ${BUILD_DIR}/" >&2
exit 1
fi
GGUF_COUNT=$(find "$MODELS_DIR" -maxdepth 1 -name "*.gguf" 2>/dev/null | wc -l)
if [ "$GGUF_COUNT" -eq 0 ]; then
echo "Error: No .gguf models in ${MODELS_DIR}/" >&2
echo "Rebuild with models: docker buildx build --build-arg MODELS_ARGS=... ." >&2
exit 1
fi
# GPU info
if command -v nvidia-smi &>/dev/null; then
echo "=== GPU ==="
nvidia-smi --query-gpu=name,memory.total,memory.free --format=csv,noheader 2>/dev/null || true
echo
fi
echo "=== ACE-Step server on ${HOST}:${PORT} ==="
echo " Models : ${MODELS_DIR}"
echo " LoRAs : ${LORAS_DIR}"
echo " Batch : ${MAX_BATCH}"
echo
exec "${BUILD_DIR}/ace-server" \
--host "$HOST" \
--port "$PORT" \
--models "$MODELS_DIR" \
--loras "$LORAS_DIR" \
--max-batch "$MAX_BATCH"
EOF
RUN chmod +x /app/docker-entrypoint.sh && \
ldconfig
EXPOSE 8085
ENTRYPOINT ["/app/docker-entrypoint.sh"]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment