Created
April 16, 2026 11:22
-
-
Save pansapiens/c79b7f9197846e5a21b9f71040dda9de to your computer and use it in GitHub Desktop.
acestep.cpp in Docker
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| services: | |
| acestep: | |
| image: acestep-cuda | |
| build: | |
| context: . | |
| dockerfile: Dockerfile | |
| ports: | |
| - "8085:8085" | |
| deploy: | |
| resources: | |
| reservations: | |
| devices: | |
| - driver: nvidia | |
| count: all | |
| capabilities: [gpu] | |
| ipc: host | |
| environment: | |
| ACE_HOST: "0.0.0.0" | |
| ACE_PORT: "8085" | |
| ACE_MODELS: "./models" | |
| ACE_LORAS: "./loras" | |
| ACE_MAX_BATCH: "1" | |
| volumes: | |
| - ./loras:/app/loras |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # Build like: | |
| # docker buildx build -t acestep-cuda -f Dockerfile . | |
| # | |
| # Run like: | |
| # docker run --gpus all -p 8085:8085 acestep-cuda | |
| # | |
| # With custom options: | |
| # docker run --gpus all -p 8085:8085 \ | |
| # -e ACE_PORT=8085 \ | |
| # -e ACE_MAX_BATCH=2 \ | |
| # -v $(pwd)/loras:/app/loras \ | |
| # acestep-cuda | |
| # | |
| # Download all models (slower, larger image): | |
| # docker buildx build -t acestep-cuda --build-arg MODELS_ARGS="--all" . | |
| # | |
| # Specific quantization: | |
| # docker buildx build -t acestep-cuda --build-arg MODELS_ARGS="--quant Q5_K_M" . | |
| ARG UBUNTU_VERSION=22.04 | |
| ARG CUDA_VERSION=12.6.3 | |
| # ─── Stage 1: Build ────────────────────────────────────────────── | |
| FROM nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION} AS build | |
| RUN apt-get update && \ | |
| apt-get install -y --no-install-recommends \ | |
| build-essential \ | |
| cmake \ | |
| git \ | |
| python3 \ | |
| python3-pip \ | |
| python3-venv \ | |
| && rm -rf /var/lib/apt/lists/* | |
| # Install HuggingFace CLI for model downloads | |
| RUN pip3 install --no-cache-dir --break-system-packages "huggingface_hub[cli]" || \ | |
| (python3 -m venv /opt/hf-venv && \ | |
| /opt/hf-venv/bin/pip install --no-cache-dir "huggingface_hub[cli]" && \ | |
| ln -sf /opt/hf-venv/bin/hf /usr/local/bin/hf) | |
| ARG GIT_REPO=https://github.com/ServeurpersoCom/acestep.cpp | |
| ARG GIT_REF=master | |
| WORKDIR /src | |
| RUN git clone --recurse-submodules "${GIT_REPO}" . && git checkout "${GIT_REF}" \ | |
| && git submodule update --init --recursive | |
| # Build with CUDA | |
| # The linker needs libcuda.so.1 at link time for CUDA Driver API symbols (cuMemCreate, etc.) | |
| # Create a symlink from the stub library so the linker can resolve these symbols. | |
| RUN ln -sf /usr/local/cuda/lib64/stubs/libcuda.so /usr/local/cuda/lib64/libcuda.so.1 | |
| RUN cmake -B build \ | |
| -DGGML_CUDA=ON \ | |
| -DCMAKE_BUILD_TYPE=Release \ | |
| -DCMAKE_CUDA_COMPILER=/usr/local/cuda/bin/nvcc \ | |
| && cmake --build build --config Release -j "$(nproc)" | |
| # Download models (default: Q8_0 essentials) | |
| ARG MODELS_ARGS="" | |
| RUN bash models.sh ${MODELS_ARGS} | |
| # Collect runtime artifacts | |
| RUN mkdir -p /out/bin /out/lib /out/models /out/loras /out/webui && \ | |
| cp build/ace-server /out/bin/ && \ | |
| cp build/ace-synth /out/bin/ && \ | |
| cp build/ace-lm /out/bin/ && \ | |
| cp build/index.html.gz.hpp /out/webui/ && \ | |
| find build -maxdepth 1 -name "*.so*" -exec cp -P {} /out/lib/ \; && \ | |
| cp -r models/*.gguf /out/models/ 2>/dev/null || true | |
| # ─── Stage 2: Runtime ──────────────────────────────────────────── | |
| FROM nvidia/cuda:${CUDA_VERSION}-runtime-ubuntu${UBUNTU_VERSION} AS runtime | |
| LABEL org.opencontainers.image.source="https://github.com/ServeurpersoCom/acestep.cpp" | |
| LABEL org.opencontainers.image.title="acestep.cpp CUDA" | |
| LABEL org.opencontainers.image.description="ACE-Step music generation server with CUDA acceleration (GGML)" | |
| LABEL org.opencontainers.image.usage="docker run --gpus all -p 8085:8085 acestep-cuda" | |
| RUN apt-get update && \ | |
| apt-get install -y --no-install-recommends \ | |
| libgomp1 \ | |
| ca-certificates \ | |
| && rm -rf /var/lib/apt/lists/* | |
| WORKDIR /app | |
| # Copy runtime artifacts | |
| COPY --from=build /out/lib/ /app/build/ | |
| COPY --from=build /out/bin/ /app/build/ | |
| COPY --from=build /out/webui/ /app/build/ | |
| COPY --from=build /out/models/ /app/models/ | |
| COPY --from=build /out/loras/ /app/loras/ | |
| # Create Docker-adapted start script | |
| COPY <<'EOF' /app/docker-entrypoint.sh | |
| #!/usr/bin/env bash | |
| set -euo pipefail | |
| cd /app | |
| HOST="${ACE_HOST:-0.0.0.0}" | |
| PORT="${ACE_PORT:-8085}" | |
| MODELS_DIR="${ACE_MODELS:-./models}" | |
| LORAS_DIR="${ACE_LORAS:-./loras}" | |
| MAX_BATCH="${ACE_MAX_BATCH:-1}" | |
| BUILD_DIR="build" | |
| export LD_LIBRARY_PATH="/app/${BUILD_DIR}:/usr/local/cuda/lib64${LD_LIBRARY_PATH:+:$LD_LIBRARY_PATH}" | |
| # Preflight | |
| if [ ! -x "${BUILD_DIR}/ace-server" ]; then | |
| echo "Error: ace-server not found in ${BUILD_DIR}/" >&2 | |
| exit 1 | |
| fi | |
| GGUF_COUNT=$(find "$MODELS_DIR" -maxdepth 1 -name "*.gguf" 2>/dev/null | wc -l) | |
| if [ "$GGUF_COUNT" -eq 0 ]; then | |
| echo "Error: No .gguf models in ${MODELS_DIR}/" >&2 | |
| echo "Rebuild with models: docker buildx build --build-arg MODELS_ARGS=... ." >&2 | |
| exit 1 | |
| fi | |
| # GPU info | |
| if command -v nvidia-smi &>/dev/null; then | |
| echo "=== GPU ===" | |
| nvidia-smi --query-gpu=name,memory.total,memory.free --format=csv,noheader 2>/dev/null || true | |
| echo | |
| fi | |
| echo "=== ACE-Step server on ${HOST}:${PORT} ===" | |
| echo " Models : ${MODELS_DIR}" | |
| echo " LoRAs : ${LORAS_DIR}" | |
| echo " Batch : ${MAX_BATCH}" | |
| echo | |
| exec "${BUILD_DIR}/ace-server" \ | |
| --host "$HOST" \ | |
| --port "$PORT" \ | |
| --models "$MODELS_DIR" \ | |
| --loras "$LORAS_DIR" \ | |
| --max-batch "$MAX_BATCH" | |
| EOF | |
| RUN chmod +x /app/docker-entrypoint.sh && \ | |
| ldconfig | |
| EXPOSE 8085 | |
| ENTRYPOINT ["/app/docker-entrypoint.sh"] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment