diff --git a/kischdle/llmux/Dockerfile b/kischdle/llmux/Dockerfile new file mode 100644 index 0000000..08e1b80 --- /dev/null +++ b/kischdle/llmux/Dockerfile @@ -0,0 +1,22 @@ +FROM pytorch/pytorch:2.11.0-cuda12.8-cudnn9-runtime + +# System dependencies for audio processing +RUN apt-get update && apt-get install -y --no-install-recommends \ + libsndfile1 \ + ffmpeg \ + && rm -rf /var/lib/apt/lists/* + +# Install Python dependencies +COPY requirements.txt /tmp/requirements.txt +RUN pip install --no-cache-dir -r /tmp/requirements.txt && rm /tmp/requirements.txt + +# llama-cpp-python needs CUDA build +RUN CMAKE_ARGS="-DGGML_CUDA=on" pip install --no-cache-dir --force-reinstall llama-cpp-python>=0.3.0 + +# Copy application code +COPY llmux/ /app/llmux/ +WORKDIR /app + +# Run the server +EXPOSE 8081 +CMD ["uvicorn", "llmux.main:app", "--host", "0.0.0.0", "--port", "8081"] diff --git a/kischdle/llmux/scripts/create_pod_llmux.sh b/kischdle/llmux/scripts/create_pod_llmux.sh new file mode 100755 index 0000000..e4cfc90 --- /dev/null +++ b/kischdle/llmux/scripts/create_pod_llmux.sh @@ -0,0 +1,81 @@ +#!/bin/bash +# Create the llmux Podman pod and systemd service. +# Run as user llm: bash scripts/create_pod_llmux.sh + +set -euo pipefail + +POD_NAME="llmux_pod" +CTR_NAME="llmux_ctr" +IMAGE="localhost/llmux:latest" +PORT="127.0.0.1:8081:8081" +BIND_DIR="$HOME/.local/share/${POD_NAME}" +USER_SYSTEMD_DIR="$HOME/.config/systemd/user" + +MODELS_DIR="${BIND_DIR}/models" +CONFIG_DIR="${BIND_DIR}/config" + +if [ ! -d "$MODELS_DIR" ]; then + echo "ERROR: Models directory not found: $MODELS_DIR" + echo "Run download_models.sh first." + exit 1 +fi + +if [ ! -f "$CONFIG_DIR/models.yaml" ]; then + echo "ERROR: Config not found: $CONFIG_DIR/models.yaml" + exit 1 +fi + +if [ ! -f "$CONFIG_DIR/api_keys.yaml" ]; then + echo "ERROR: Config not found: $CONFIG_DIR/api_keys.yaml" + exit 1 +fi + +mkdir -p "$USER_SYSTEMD_DIR" + +if ! podman image exists "$IMAGE"; then + echo "Building container image..." + SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" + podman build -t llmux:latest -f "$SCRIPT_DIR/../Dockerfile" "$SCRIPT_DIR/.." +fi + +podman pod exists "$POD_NAME" && podman pod stop "$POD_NAME" 2>/dev/null || true +podman pod exists "$POD_NAME" && podman pod rm -f "$POD_NAME" 2>/dev/null || true + +echo "Creating pod $POD_NAME..." +podman pod create --name "$POD_NAME" -p "$PORT" + +echo "Creating container $CTR_NAME..." +podman run -d \ + --name "$CTR_NAME" \ + --pod "$POD_NAME" \ + --device nvidia.com/gpu=all \ + -v "${MODELS_DIR}:/models:ro" \ + -v "${CONFIG_DIR}:/config:ro" \ + -e LLMUX_CONFIG_DIR=/config \ + -e LLMUX_MODELS_DIR=/models \ + "$IMAGE" + +echo "Waiting for llmux to start..." +for i in $(seq 1 30); do + if curl -sf http://127.0.0.1:8081/health > /dev/null 2>&1; then + echo "llmux is healthy!" + break + fi + sleep 2 +done + +echo "Generating systemd units..." +cd "$USER_SYSTEMD_DIR" +podman generate systemd --files --new --name "$POD_NAME" + +podman pod stop "$POD_NAME" +podman pod rm -f "$POD_NAME" + +systemctl --user daemon-reload +systemctl --user enable --now "pod-${POD_NAME}.service" + +echo "" +echo "=== llmux pod created and enabled ===" +echo "Service: systemctl --user status pod-${POD_NAME}.service" +echo "Health: curl http://127.0.0.1:8081/health" +echo "Logs: journalctl --user -u pod-${POD_NAME}.service -f" diff --git a/kischdle/llmux/scripts/download_models.sh b/kischdle/llmux/scripts/download_models.sh new file mode 100755 index 0000000..facefad --- /dev/null +++ b/kischdle/llmux/scripts/download_models.sh @@ -0,0 +1,72 @@ +#!/bin/bash +# Download all model weights for llmux. +# Run as user llm: bash scripts/download_models.sh +# Requires: pip install huggingface_hub +# Requires: HuggingFace token at ~/.cache/huggingface/token for gated models + +set -euo pipefail + +MODELS_DIR="${LLMUX_MODELS_DIR:-$HOME/.local/share/llmux_pod/models}" +mkdir -p "$MODELS_DIR" + +echo "=== Downloading models to $MODELS_DIR ===" + +download_hf() { + local repo="$1" + local target="$MODELS_DIR/models--${repo//\//-}" + if [ -d "$target" ]; then + echo "SKIP: $repo (already downloaded)" + return + fi + echo "Downloading: $repo" + huggingface-cli download "$repo" --cache-dir "$MODELS_DIR" +} + +download_hf_files() { + local repo="$1" + shift + echo "Downloading specific files from: $repo" + huggingface-cli download "$repo" "$@" --cache-dir "$MODELS_DIR" +} + +# 1. Qwen3.5-9B-FP8 +download_hf "lovedheart/Qwen3.5-9B-FP8" + +# 2. Qwen3.5-9B-FP8-Uncensored (GGUF files only) +download_hf_files "HauhauCS/Qwen3.5-9B-Uncensored-HauhauCS-Aggressive" \ + "Qwen3.5-9B-Uncensored-HauhauCS-Aggressive-Q8_0.gguf" \ + "mmproj-Qwen3.5-9B-Uncensored-HauhauCS-Aggressive-BF16.gguf" + +# 3. Qwen3.5-4B +download_hf "Qwen/Qwen3.5-4B" + +# 4. gpt-oss-20b +download_hf "openai/gpt-oss-20b" + +# 5. gpt-oss-20b-uncensored +download_hf "aoxo/gpt-oss-20b-uncensored" + +# 6. cohere-transcribe (gated — requires accepted terms) +echo "Downloading: CohereLabs/cohere-transcribe-03-2026 (gated)" +download_hf "CohereLabs/cohere-transcribe-03-2026" || \ + echo "WARNING: cohere-transcribe download failed. Have you accepted the terms at https://huggingface.co/CohereLabs/cohere-transcribe-03-2026 ?" + +# 7. Chatterbox TTS +echo "Downloading: Chatterbox TTS weights (auto-downloaded by library)" +python3 -c " +from chatterbox.tts import ChatterboxTTS +import os +os.environ['CUDA_VISIBLE_DEVICES'] = '' +print('Downloading Chatterbox default...') +ChatterboxTTS.from_pretrained(device='cpu') +print('Downloading Chatterbox turbo...') +ChatterboxTTS.from_pretrained(device='cpu', variant='turbo') +print('Downloading Chatterbox multilingual...') +ChatterboxTTS.from_pretrained(device='cpu', variant='multilingual') +print('Chatterbox downloads complete.') +" || echo "WARNING: Chatterbox download failed. Check chatterbox-tts installation." + +echo "" +echo "=== Download complete ===" +echo "Models directory: $MODELS_DIR" +du -sh "$MODELS_DIR"