# --- Build stage: compile llama-cpp-python with CUDA --- FROM docker.io/pytorch/pytorch:2.11.0-cuda12.8-cudnn9-devel AS builder RUN pip install --no-cache-dir --break-system-packages --upgrade pip setuptools wheel RUN CMAKE_ARGS="-DGGML_CUDA=on" pip install --no-cache-dir --break-system-packages \ "llama-cpp-python>=0.3.0" # --- Runtime stage --- FROM docker.io/pytorch/pytorch:2.11.0-cuda12.8-cudnn9-runtime # System dependencies for audio processing RUN apt-get update && apt-get install -y --no-install-recommends \ libsndfile1 \ ffmpeg \ && rm -rf /var/lib/apt/lists/* # Upgrade pip/setuptools for Python 3.12 compatibility RUN pip install --no-cache-dir --break-system-packages --upgrade \ pip setuptools wheel # Install deps that don't conflict with pre-installed torch stack RUN pip install --no-cache-dir --break-system-packages \ "fastapi>=0.115.0" \ "uvicorn[standard]>=0.34.0" \ "python-multipart>=0.0.18" \ "soundfile>=0.12.0" \ "sentencepiece>=0.2.0" \ "protobuf>=5.0.0" # Install transformers + accelerate (needed for device_map) RUN pip install --no-cache-dir --break-system-packages --no-build-isolation \ "transformers>=5.4.0" \ "accelerate>=1.0.0" # Install chatterbox-tts WITHOUT its dependencies (it would downgrade # torch from 2.11 to 2.6 and pull gradio, librosa, etc.) # Then install only the runtime deps chatterbox actually needs. RUN pip install --no-cache-dir --break-system-packages --no-deps \ "chatterbox-tts>=0.1.0" RUN pip install --no-cache-dir --break-system-packages --no-build-isolation \ "conformer>=0.3.2" \ "einops>=0.8.0" \ "omegaconf>=2.3.0" \ "scipy>=1.17.0" \ "diffusers>=0.29.0" \ "resemble-perth>=1.0.0" \ "s3tokenizer>=0.3.0" \ "librosa>=0.10.0" \ "diskcache>=5.6.0" # Copy llama-cpp-python from builder COPY --from=builder /usr/local/lib/python3.12/dist-packages/llama_cpp /usr/local/lib/python3.12/dist-packages/llama_cpp COPY --from=builder /usr/local/lib/python3.12/dist-packages/llama_cpp_python* /usr/local/lib/python3.12/dist-packages/ # Copy application code COPY llmux/ /app/llmux/ WORKDIR /app # Run the server EXPOSE 8081 CMD ["uvicorn", "llmux.main:app", "--host", "0.0.0.0", "--port", "8081"]