pansapiens · April 16, 2026 11:22
diff --git a/docker-compose.yaml b/docker-compose.yaml
 services:
  acestep:
    image: acestep-cuda
    build:
      context: .
      dockerfile: Dockerfile
    ports:
      - "8085:8085"
    deploy:
      resources:
        reservations:
          devices:
            - driver: nvidia
              count: all
              capabilities: [gpu]
    ipc: host
    environment:
      ACE_HOST: "0.0.0.0"
      ACE_PORT: "8085"
      ACE_MODELS: "./models"
      ACE_LORAS: "./loras"
      ACE_MAX_BATCH: "1"
    volumes:
      - ./loras:/app/loras
diff --git a/Dockerfile b/Dockerfile
 # Build like:
 #   docker buildx build -t acestep-cuda -f Dockerfile .
 #
 # Run like:
 #   docker run --gpus all -p 8085:8085 acestep-cuda
 #
 # With custom options:
 #   docker run --gpus all -p 8085:8085 \
 #     -e ACE_PORT=8085 \
 #     -e ACE_MAX_BATCH=2 \
 #     -v $(pwd)/loras:/app/loras \
 #     acestep-cuda
 #
 # Download all models (slower, larger image):
 #   docker buildx build -t acestep-cuda --build-arg MODELS_ARGS="--all" .
 #
 # Specific quantization:
 #   docker buildx build -t acestep-cuda --build-arg MODELS_ARGS="--quant Q5_K_M" .

 ARG UBUNTU_VERSION=22.04
 ARG CUDA_VERSION=12.6.3

 # ─── Stage 1: Build ──────────────────────────────────────────────
 FROM nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION} AS build

 RUN apt-get update && \
    apt-get install -y --no-install-recommends \
        build-essential \
        cmake \
        git \
        python3 \
        python3-pip \
        python3-venv \
    && rm -rf /var/lib/apt/lists/*

 # Install HuggingFace CLI for model downloads
 RUN pip3 install --no-cache-dir --break-system-packages "huggingface_hub[cli]" || \
    (python3 -m venv /opt/hf-venv && \
     /opt/hf-venv/bin/pip install --no-cache-dir "huggingface_hub[cli]" && \
     ln -sf /opt/hf-venv/bin/hf /usr/local/bin/hf)

 ARG GIT_REPO=https://github.com/ServeurpersoCom/acestep.cpp
 ARG GIT_REF=master

 WORKDIR /src
 RUN git clone --recurse-submodules "${GIT_REPO}" . && git checkout "${GIT_REF}" \
    && git submodule update --init --recursive

 # Build with CUDA
 # The linker needs libcuda.so.1 at link time for CUDA Driver API symbols (cuMemCreate, etc.)
 # Create a symlink from the stub library so the linker can resolve these symbols.
 RUN ln -sf /usr/local/cuda/lib64/stubs/libcuda.so /usr/local/cuda/lib64/libcuda.so.1

 RUN cmake -B build \
      -DGGML_CUDA=ON \
      -DCMAKE_BUILD_TYPE=Release \
      -DCMAKE_CUDA_COMPILER=/usr/local/cuda/bin/nvcc \
    && cmake --build build --config Release -j "$(nproc)"

 # Download models (default: Q8_0 essentials)
 ARG MODELS_ARGS=""
 RUN bash models.sh ${MODELS_ARGS}

 # Collect runtime artifacts
 RUN mkdir -p /out/bin /out/lib /out/models /out/loras /out/webui && \
    cp build/ace-server  /out/bin/ && \
    cp build/ace-synth   /out/bin/ && \
    cp build/ace-lm      /out/bin/ && \
    cp build/index.html.gz.hpp /out/webui/ && \
    find build -maxdepth 1 -name "*.so*" -exec cp -P {} /out/lib/ \; && \
    cp -r models/*.gguf /out/models/ 2>/dev/null || true

 # ─── Stage 2: Runtime ────────────────────────────────────────────
 FROM nvidia/cuda:${CUDA_VERSION}-runtime-ubuntu${UBUNTU_VERSION} AS runtime

 LABEL org.opencontainers.image.source="https://github.com/ServeurpersoCom/acestep.cpp"
 LABEL org.opencontainers.image.title="acestep.cpp CUDA"
 LABEL org.opencontainers.image.description="ACE-Step music generation server with CUDA acceleration (GGML)"
 LABEL org.opencontainers.image.usage="docker run --gpus all -p 8085:8085 acestep-cuda"

 RUN apt-get update && \
    apt-get install -y --no-install-recommends \
        libgomp1 \
        ca-certificates \
    && rm -rf /var/lib/apt/lists/*

 WORKDIR /app

 # Copy runtime artifacts
 COPY --from=build /out/lib/   /app/build/
 COPY --from=build /out/bin/   /app/build/
 COPY --from=build /out/webui/ /app/build/
 COPY --from=build /out/models/ /app/models/
 COPY --from=build /out/loras/  /app/loras/

 # Create Docker-adapted start script
 COPY <<'EOF' /app/docker-entrypoint.sh
 #!/usr/bin/env bash
 set -euo pipefail
 cd /app

 HOST="${ACE_HOST:-0.0.0.0}"
 PORT="${ACE_PORT:-8085}"
 MODELS_DIR="${ACE_MODELS:-./models}"
 LORAS_DIR="${ACE_LORAS:-./loras}"
 MAX_BATCH="${ACE_MAX_BATCH:-1}"
 BUILD_DIR="build"

 export LD_LIBRARY_PATH="/app/${BUILD_DIR}:/usr/local/cuda/lib64${LD_LIBRARY_PATH:+:$LD_LIBRARY_PATH}"

 # Preflight
 if [ ! -x "${BUILD_DIR}/ace-server" ]; then
    echo "Error: ace-server not found in ${BUILD_DIR}/" >&2
    exit 1
 fi

 GGUF_COUNT=$(find "$MODELS_DIR" -maxdepth 1 -name "*.gguf" 2>/dev/null | wc -l)
 if [ "$GGUF_COUNT" -eq 0 ]; then
    echo "Error: No .gguf models in ${MODELS_DIR}/" >&2
    echo "Rebuild with models: docker buildx build --build-arg MODELS_ARGS=... ." >&2
    exit 1
 fi

 # GPU info
 if command -v nvidia-smi &>/dev/null; then
    echo "=== GPU ==="
    nvidia-smi --query-gpu=name,memory.total,memory.free --format=csv,noheader 2>/dev/null || true
    echo
 fi

 echo "=== ACE-Step server on ${HOST}:${PORT} ==="
 echo "    Models : ${MODELS_DIR}"
 echo "    LoRAs  : ${LORAS_DIR}"
 echo "    Batch  : ${MAX_BATCH}"
 echo

 exec "${BUILD_DIR}/ace-server" \
    --host "$HOST" \
    --port "$PORT" \
    --models "$MODELS_DIR" \
    --loras "$LORAS_DIR" \
    --max-batch "$MAX_BATCH"
 EOF

 RUN chmod +x /app/docker-entrypoint.sh && \
    ldconfig

 EXPOSE 8085

 ENTRYPOINT ["/app/docker-entrypoint.sh"]
	services:
	acestep:
	image: acestep-cuda
	build:
	context: .
	dockerfile: Dockerfile
	ports:
	- "8085:8085"
	deploy:
	resources:
	reservations:
	devices:
	- driver: nvidia
	count: all
	capabilities: [gpu]
	ipc: host
	environment:
	ACE_HOST: "0.0.0.0"
	ACE_PORT: "8085"
	ACE_MODELS: "./models"
	ACE_LORAS: "./loras"
	ACE_MAX_BATCH: "1"
	volumes:
	- ./loras:/app/loras
	# Build like:
	# docker buildx build -t acestep-cuda -f Dockerfile .
	#
	# Run like:
	# docker run --gpus all -p 8085:8085 acestep-cuda
	#
	# With custom options:
	# docker run --gpus all -p 8085:8085 \
	# -e ACE_PORT=8085 \
	# -e ACE_MAX_BATCH=2 \
	# -v $(pwd)/loras:/app/loras \
	# acestep-cuda
	#
	# Download all models (slower, larger image):
	# docker buildx build -t acestep-cuda --build-arg MODELS_ARGS="--all" .
	#
	# Specific quantization:
	# docker buildx build -t acestep-cuda --build-arg MODELS_ARGS="--quant Q5_K_M" .

	ARG UBUNTU_VERSION=22.04
	ARG CUDA_VERSION=12.6.3

	# ─── Stage 1: Build ──────────────────────────────────────────────
	FROM nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION} AS build

	RUN apt-get update && \
	apt-get install -y --no-install-recommends \
	build-essential \
	cmake \
	git \
	python3 \
	python3-pip \
	python3-venv \
	&& rm -rf /var/lib/apt/lists/*

	# Install HuggingFace CLI for model downloads
	RUN pip3 install --no-cache-dir --break-system-packages "huggingface_hub[cli]" \|\| \
	(python3 -m venv /opt/hf-venv && \
	/opt/hf-venv/bin/pip install --no-cache-dir "huggingface_hub[cli]" && \
	ln -sf /opt/hf-venv/bin/hf /usr/local/bin/hf)

	ARG GIT_REPO=https://github.com/ServeurpersoCom/acestep.cpp
	ARG GIT_REF=master

	WORKDIR /src
	RUN git clone --recurse-submodules "${GIT_REPO}" . && git checkout "${GIT_REF}" \
	&& git submodule update --init --recursive

	# Build with CUDA
	# The linker needs libcuda.so.1 at link time for CUDA Driver API symbols (cuMemCreate, etc.)
	# Create a symlink from the stub library so the linker can resolve these symbols.
	RUN ln -sf /usr/local/cuda/lib64/stubs/libcuda.so /usr/local/cuda/lib64/libcuda.so.1

	RUN cmake -B build \
	-DGGML_CUDA=ON \
	-DCMAKE_BUILD_TYPE=Release \
	-DCMAKE_CUDA_COMPILER=/usr/local/cuda/bin/nvcc \
	&& cmake --build build --config Release -j "$(nproc)"

	# Download models (default: Q8_0 essentials)
	ARG MODELS_ARGS=""
	RUN bash models.sh ${MODELS_ARGS}

	# Collect runtime artifacts
	RUN mkdir -p /out/bin /out/lib /out/models /out/loras /out/webui && \
	cp build/ace-server /out/bin/ && \
	cp build/ace-synth /out/bin/ && \
	cp build/ace-lm /out/bin/ && \
	cp build/index.html.gz.hpp /out/webui/ && \
	find build -maxdepth 1 -name ".so" -exec cp -P {} /out/lib/ \; && \
	cp -r models/*.gguf /out/models/ 2>/dev/null \|\| true

	# ─── Stage 2: Runtime ────────────────────────────────────────────
	FROM nvidia/cuda:${CUDA_VERSION}-runtime-ubuntu${UBUNTU_VERSION} AS runtime

	LABEL org.opencontainers.image.source="https://github.com/ServeurpersoCom/acestep.cpp"
	LABEL org.opencontainers.image.title="acestep.cpp CUDA"
	LABEL org.opencontainers.image.description="ACE-Step music generation server with CUDA acceleration (GGML)"
	LABEL org.opencontainers.image.usage="docker run --gpus all -p 8085:8085 acestep-cuda"

	RUN apt-get update && \
	apt-get install -y --no-install-recommends \
	libgomp1 \
	ca-certificates \
	&& rm -rf /var/lib/apt/lists/*

	WORKDIR /app

	# Copy runtime artifacts
	COPY --from=build /out/lib/ /app/build/
	COPY --from=build /out/bin/ /app/build/
	COPY --from=build /out/webui/ /app/build/
	COPY --from=build /out/models/ /app/models/
	COPY --from=build /out/loras/ /app/loras/

	# Create Docker-adapted start script
	COPY <<'EOF' /app/docker-entrypoint.sh
	#!/usr/bin/env bash
	set -euo pipefail
	cd /app

	HOST="${ACE_HOST:-0.0.0.0}"
	PORT="${ACE_PORT:-8085}"
	MODELS_DIR="${ACE_MODELS:-./models}"
	LORAS_DIR="${ACE_LORAS:-./loras}"
	MAX_BATCH="${ACE_MAX_BATCH:-1}"
	BUILD_DIR="build"

	export LD_LIBRARY_PATH="/app/${BUILD_DIR}:/usr/local/cuda/lib64${LD_LIBRARY_PATH:+:$LD_LIBRARY_PATH}"

	# Preflight
	if [ ! -x "${BUILD_DIR}/ace-server" ]; then
	echo "Error: ace-server not found in ${BUILD_DIR}/" >&2
	exit 1
	fi

	GGUF_COUNT=$(find "$MODELS_DIR" -maxdepth 1 -name "*.gguf" 2>/dev/null \| wc -l)
	if [ "$GGUF_COUNT" -eq 0 ]; then
	echo "Error: No .gguf models in ${MODELS_DIR}/" >&2
	echo "Rebuild with models: docker buildx build --build-arg MODELS_ARGS=... ." >&2
	exit 1
	fi

	# GPU info
	if command -v nvidia-smi &>/dev/null; then
	echo "=== GPU ==="
	nvidia-smi --query-gpu=name,memory.total,memory.free --format=csv,noheader 2>/dev/null \|\| true
	echo
	fi

	echo "=== ACE-Step server on ${HOST}:${PORT} ==="
	echo " Models : ${MODELS_DIR}"
	echo " LoRAs : ${LORAS_DIR}"
	echo " Batch : ${MAX_BATCH}"
	echo

	exec "${BUILD_DIR}/ace-server" \
	--host "$HOST" \
	--port "$PORT" \
	--models "$MODELS_DIR" \
	--loras "$LORAS_DIR" \
	--max-batch "$MAX_BATCH"
	EOF

	RUN chmod +x /app/docker-entrypoint.sh && \
	ldconfig

	EXPOSE 8085

	ENTRYPOINT ["/app/docker-entrypoint.sh"]