From f2f73d204ca11d11da8dcf047cf7bf68d65f4c339e3fe2432f7945f2bf51d2f2 Mon Sep 17 00:00:00 2001
From: tlg <thomas.langer@destengs.com>
Date: Sun, 5 Apr 2026 15:46:34 +0200
Subject: [PATCH] fix: Dockerfile multi-stage build with working dependency
 resolution

- Multi-stage: devel image builds llama-cpp-python with CUDA, runtime
  image gets the compiled library via COPY
- chatterbox-tts installed --no-deps to prevent torch 2.6 downgrade
- librosa and diskcache added as explicit chatterbox/llama-cpp deps
- All imports verified with GPU passthrough

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 kischdle/llmux/Dockerfile | 43 +++++++++++++++++++++++++++++++++------
 1 file changed, 37 insertions(+), 6 deletions(-)

diff --git a/kischdle/llmux/Dockerfile b/kischdle/llmux/Dockerfile
index c754937..8fed20f 100644
--- a/kischdle/llmux/Dockerfile
+++ b/kischdle/llmux/Dockerfile
@@ -1,3 +1,11 @@
+# --- Build stage: compile llama-cpp-python with CUDA ---
+FROM docker.io/pytorch/pytorch:2.11.0-cuda12.8-cudnn9-devel AS builder
+
+RUN pip install --no-cache-dir --break-system-packages --upgrade pip setuptools wheel
+RUN CMAKE_ARGS="-DGGML_CUDA=on" pip install --no-cache-dir --break-system-packages \
+    "llama-cpp-python>=0.3.0"
+
+# --- Runtime stage ---
 FROM docker.io/pytorch/pytorch:2.11.0-cuda12.8-cudnn9-runtime
 
 # System dependencies for audio processing
@@ -6,20 +14,43 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
     ffmpeg \
     && rm -rf /var/lib/apt/lists/*
 
-# Install Python dependencies (torch, numpy, pyyaml already in base image)
+# Upgrade pip/setuptools for Python 3.12 compatibility
+RUN pip install --no-cache-dir --break-system-packages --upgrade \
+    pip setuptools wheel
+
+# Install deps that don't conflict with pre-installed torch stack
 RUN pip install --no-cache-dir --break-system-packages \
     "fastapi>=0.115.0" \
     "uvicorn[standard]>=0.34.0" \
     "python-multipart>=0.0.18" \
-    "transformers>=5.4.0" \
-    "chatterbox-tts>=0.1.0" \
     "soundfile>=0.12.0" \
     "sentencepiece>=0.2.0" \
     "protobuf>=5.0.0"
 
-# llama-cpp-python needs separate CUDA build
-RUN CMAKE_ARGS="-DGGML_CUDA=on" pip install --no-cache-dir --break-system-packages \
-    "llama-cpp-python>=0.3.0"
+# Install transformers (doesn't touch torch)
+RUN pip install --no-cache-dir --break-system-packages --no-build-isolation \
+    "transformers>=5.4.0"
+
+# Install chatterbox-tts WITHOUT its dependencies (it would downgrade
+# torch from 2.11 to 2.6 and pull gradio, librosa, etc.)
+# Then install only the runtime deps chatterbox actually needs.
+RUN pip install --no-cache-dir --break-system-packages --no-deps \
+    "chatterbox-tts>=0.1.0"
+
+RUN pip install --no-cache-dir --break-system-packages --no-build-isolation \
+    "conformer>=0.3.2" \
+    "einops>=0.8.0" \
+    "omegaconf>=2.3.0" \
+    "scipy>=1.17.0" \
+    "diffusers>=0.29.0" \
+    "resemble-perth>=1.0.0" \
+    "s3tokenizer>=0.3.0" \
+    "librosa>=0.10.0" \
+    "diskcache>=5.6.0"
+
+# Copy llama-cpp-python from builder
+COPY --from=builder /usr/local/lib/python3.12/dist-packages/llama_cpp /usr/local/lib/python3.12/dist-packages/llama_cpp
+COPY --from=builder /usr/local/lib/python3.12/dist-packages/llama_cpp_python* /usr/local/lib/python3.12/dist-packages/
 
 # Copy application code
 COPY llmux/ /app/llmux/