feat: Dockerfile, model download script, and pod creation script

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-05 10:09:34 +02:00
parent 17818a3860
commit 1a26d34ea5
3 changed files with 175 additions and 0 deletions
--- a/kischdle/llmux/Dockerfile
+++ b/kischdle/llmux/Dockerfile
@@ -0,0 +1,22 @@
+FROM pytorch/pytorch:2.11.0-cuda12.8-cudnn9-runtime
+
+# System dependencies for audio processing
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    libsndfile1 \
+    ffmpeg \
+    && rm -rf /var/lib/apt/lists/*
+
+# Install Python dependencies
+COPY requirements.txt /tmp/requirements.txt
+RUN pip install --no-cache-dir -r /tmp/requirements.txt && rm /tmp/requirements.txt
+
+# llama-cpp-python needs CUDA build
+RUN CMAKE_ARGS="-DGGML_CUDA=on" pip install --no-cache-dir --force-reinstall llama-cpp-python>=0.3.0
+
+# Copy application code
+COPY llmux/ /app/llmux/
+WORKDIR /app
+
+# Run the server
+EXPOSE 8081
+CMD ["uvicorn", "llmux.main:app", "--host", "0.0.0.0", "--port", "8081"]
--- a/kischdle/llmux/scripts/create_pod_llmux.sh
+++ b/kischdle/llmux/scripts/create_pod_llmux.sh
@@ -0,0 +1,81 @@
+#!/bin/bash
+# Create the llmux Podman pod and systemd service.
+# Run as user llm: bash scripts/create_pod_llmux.sh
+
+set -euo pipefail
+
+POD_NAME="llmux_pod"
+CTR_NAME="llmux_ctr"
+IMAGE="localhost/llmux:latest"
+PORT="127.0.0.1:8081:8081"
+BIND_DIR="$HOME/.local/share/${POD_NAME}"
+USER_SYSTEMD_DIR="$HOME/.config/systemd/user"
+
+MODELS_DIR="${BIND_DIR}/models"
+CONFIG_DIR="${BIND_DIR}/config"
+
+if [ ! -d "$MODELS_DIR" ]; then
+    echo "ERROR: Models directory not found: $MODELS_DIR"
+    echo "Run download_models.sh first."
+    exit 1
+fi
+
+if [ ! -f "$CONFIG_DIR/models.yaml" ]; then
+    echo "ERROR: Config not found: $CONFIG_DIR/models.yaml"
+    exit 1
+fi
+
+if [ ! -f "$CONFIG_DIR/api_keys.yaml" ]; then
+    echo "ERROR: Config not found: $CONFIG_DIR/api_keys.yaml"
+    exit 1
+fi
+
+mkdir -p "$USER_SYSTEMD_DIR"
+
+if ! podman image exists "$IMAGE"; then
+    echo "Building container image..."
+    SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
+    podman build -t llmux:latest -f "$SCRIPT_DIR/../Dockerfile" "$SCRIPT_DIR/.."
+fi
+
+podman pod exists "$POD_NAME" && podman pod stop "$POD_NAME" 2>/dev/null || true
+podman pod exists "$POD_NAME" && podman pod rm -f "$POD_NAME" 2>/dev/null || true
+
+echo "Creating pod $POD_NAME..."
+podman pod create --name "$POD_NAME" -p "$PORT"
+
+echo "Creating container $CTR_NAME..."
+podman run -d \
+    --name "$CTR_NAME" \
+    --pod "$POD_NAME" \
+    --device nvidia.com/gpu=all \
+    -v "${MODELS_DIR}:/models:ro" \
+    -v "${CONFIG_DIR}:/config:ro" \
+    -e LLMUX_CONFIG_DIR=/config \
+    -e LLMUX_MODELS_DIR=/models \
+    "$IMAGE"
+
+echo "Waiting for llmux to start..."
+for i in $(seq 1 30); do
+    if curl -sf http://127.0.0.1:8081/health > /dev/null 2>&1; then
+        echo "llmux is healthy!"
+        break
+    fi
+    sleep 2
+done
+
+echo "Generating systemd units..."
+cd "$USER_SYSTEMD_DIR"
+podman generate systemd --files --new --name "$POD_NAME"
+
+podman pod stop "$POD_NAME"
+podman pod rm -f "$POD_NAME"
+
+systemctl --user daemon-reload
+systemctl --user enable --now "pod-${POD_NAME}.service"
+
+echo ""
+echo "=== llmux pod created and enabled ==="
+echo "Service: systemctl --user status pod-${POD_NAME}.service"
+echo "Health:  curl http://127.0.0.1:8081/health"
+echo "Logs:    journalctl --user -u pod-${POD_NAME}.service -f"
--- a/kischdle/llmux/scripts/download_models.sh
+++ b/kischdle/llmux/scripts/download_models.sh
@@ -0,0 +1,72 @@
+#!/bin/bash
+# Download all model weights for llmux.
+# Run as user llm: bash scripts/download_models.sh
+# Requires: pip install huggingface_hub
+# Requires: HuggingFace token at ~/.cache/huggingface/token for gated models
+
+set -euo pipefail
+
+MODELS_DIR="${LLMUX_MODELS_DIR:-$HOME/.local/share/llmux_pod/models}"
+mkdir -p "$MODELS_DIR"
+
+echo "=== Downloading models to $MODELS_DIR ==="
+
+download_hf() {
+    local repo="$1"
+    local target="$MODELS_DIR/models--${repo//\//-}"
+    if [ -d "$target" ]; then
+        echo "SKIP: $repo (already downloaded)"
+        return
+    fi
+    echo "Downloading: $repo"
+    huggingface-cli download "$repo" --cache-dir "$MODELS_DIR"
+}
+
+download_hf_files() {
+    local repo="$1"
+    shift
+    echo "Downloading specific files from: $repo"
+    huggingface-cli download "$repo" "$@" --cache-dir "$MODELS_DIR"
+}
+
+# 1. Qwen3.5-9B-FP8
+download_hf "lovedheart/Qwen3.5-9B-FP8"
+
+# 2. Qwen3.5-9B-FP8-Uncensored (GGUF files only)
+download_hf_files "HauhauCS/Qwen3.5-9B-Uncensored-HauhauCS-Aggressive" \
+    "Qwen3.5-9B-Uncensored-HauhauCS-Aggressive-Q8_0.gguf" \
+    "mmproj-Qwen3.5-9B-Uncensored-HauhauCS-Aggressive-BF16.gguf"
+
+# 3. Qwen3.5-4B
+download_hf "Qwen/Qwen3.5-4B"
+
+# 4. gpt-oss-20b
+download_hf "openai/gpt-oss-20b"
+
+# 5. gpt-oss-20b-uncensored
+download_hf "aoxo/gpt-oss-20b-uncensored"
+
+# 6. cohere-transcribe (gated — requires accepted terms)
+echo "Downloading: CohereLabs/cohere-transcribe-03-2026 (gated)"
+download_hf "CohereLabs/cohere-transcribe-03-2026" || \
+    echo "WARNING: cohere-transcribe download failed. Have you accepted the terms at https://huggingface.co/CohereLabs/cohere-transcribe-03-2026 ?"
+
+# 7. Chatterbox TTS
+echo "Downloading: Chatterbox TTS weights (auto-downloaded by library)"
+python3 -c "
+from chatterbox.tts import ChatterboxTTS
+import os
+os.environ['CUDA_VISIBLE_DEVICES'] = ''
+print('Downloading Chatterbox default...')
+ChatterboxTTS.from_pretrained(device='cpu')
+print('Downloading Chatterbox turbo...')
+ChatterboxTTS.from_pretrained(device='cpu', variant='turbo')
+print('Downloading Chatterbox multilingual...')
+ChatterboxTTS.from_pretrained(device='cpu', variant='multilingual')
+print('Chatterbox downloads complete.')
+" || echo "WARNING: Chatterbox download failed. Check chatterbox-tts installation."
+
+echo ""
+echo "=== Download complete ==="
+echo "Models directory: $MODELS_DIR"
+du -sh "$MODELS_DIR"