feat: Dockerfile, model download script, and pod creation script
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
22
kischdle/llmux/Dockerfile
Normal file
22
kischdle/llmux/Dockerfile
Normal file
@@ -0,0 +1,22 @@
|
||||
FROM pytorch/pytorch:2.11.0-cuda12.8-cudnn9-runtime
|
||||
|
||||
# System dependencies for audio processing
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
libsndfile1 \
|
||||
ffmpeg \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Install Python dependencies
|
||||
COPY requirements.txt /tmp/requirements.txt
|
||||
RUN pip install --no-cache-dir -r /tmp/requirements.txt && rm /tmp/requirements.txt
|
||||
|
||||
# llama-cpp-python needs CUDA build
|
||||
RUN CMAKE_ARGS="-DGGML_CUDA=on" pip install --no-cache-dir --force-reinstall llama-cpp-python>=0.3.0
|
||||
|
||||
# Copy application code
|
||||
COPY llmux/ /app/llmux/
|
||||
WORKDIR /app
|
||||
|
||||
# Run the server
|
||||
EXPOSE 8081
|
||||
CMD ["uvicorn", "llmux.main:app", "--host", "0.0.0.0", "--port", "8081"]
|
||||
81
kischdle/llmux/scripts/create_pod_llmux.sh
Executable file
81
kischdle/llmux/scripts/create_pod_llmux.sh
Executable file
@@ -0,0 +1,81 @@
|
||||
#!/bin/bash
|
||||
# Create the llmux Podman pod and systemd service.
|
||||
# Run as user llm: bash scripts/create_pod_llmux.sh
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
POD_NAME="llmux_pod"
|
||||
CTR_NAME="llmux_ctr"
|
||||
IMAGE="localhost/llmux:latest"
|
||||
PORT="127.0.0.1:8081:8081"
|
||||
BIND_DIR="$HOME/.local/share/${POD_NAME}"
|
||||
USER_SYSTEMD_DIR="$HOME/.config/systemd/user"
|
||||
|
||||
MODELS_DIR="${BIND_DIR}/models"
|
||||
CONFIG_DIR="${BIND_DIR}/config"
|
||||
|
||||
if [ ! -d "$MODELS_DIR" ]; then
|
||||
echo "ERROR: Models directory not found: $MODELS_DIR"
|
||||
echo "Run download_models.sh first."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [ ! -f "$CONFIG_DIR/models.yaml" ]; then
|
||||
echo "ERROR: Config not found: $CONFIG_DIR/models.yaml"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [ ! -f "$CONFIG_DIR/api_keys.yaml" ]; then
|
||||
echo "ERROR: Config not found: $CONFIG_DIR/api_keys.yaml"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
mkdir -p "$USER_SYSTEMD_DIR"
|
||||
|
||||
if ! podman image exists "$IMAGE"; then
|
||||
echo "Building container image..."
|
||||
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
|
||||
podman build -t llmux:latest -f "$SCRIPT_DIR/../Dockerfile" "$SCRIPT_DIR/.."
|
||||
fi
|
||||
|
||||
podman pod exists "$POD_NAME" && podman pod stop "$POD_NAME" 2>/dev/null || true
|
||||
podman pod exists "$POD_NAME" && podman pod rm -f "$POD_NAME" 2>/dev/null || true
|
||||
|
||||
echo "Creating pod $POD_NAME..."
|
||||
podman pod create --name "$POD_NAME" -p "$PORT"
|
||||
|
||||
echo "Creating container $CTR_NAME..."
|
||||
podman run -d \
|
||||
--name "$CTR_NAME" \
|
||||
--pod "$POD_NAME" \
|
||||
--device nvidia.com/gpu=all \
|
||||
-v "${MODELS_DIR}:/models:ro" \
|
||||
-v "${CONFIG_DIR}:/config:ro" \
|
||||
-e LLMUX_CONFIG_DIR=/config \
|
||||
-e LLMUX_MODELS_DIR=/models \
|
||||
"$IMAGE"
|
||||
|
||||
echo "Waiting for llmux to start..."
|
||||
for i in $(seq 1 30); do
|
||||
if curl -sf http://127.0.0.1:8081/health > /dev/null 2>&1; then
|
||||
echo "llmux is healthy!"
|
||||
break
|
||||
fi
|
||||
sleep 2
|
||||
done
|
||||
|
||||
echo "Generating systemd units..."
|
||||
cd "$USER_SYSTEMD_DIR"
|
||||
podman generate systemd --files --new --name "$POD_NAME"
|
||||
|
||||
podman pod stop "$POD_NAME"
|
||||
podman pod rm -f "$POD_NAME"
|
||||
|
||||
systemctl --user daemon-reload
|
||||
systemctl --user enable --now "pod-${POD_NAME}.service"
|
||||
|
||||
echo ""
|
||||
echo "=== llmux pod created and enabled ==="
|
||||
echo "Service: systemctl --user status pod-${POD_NAME}.service"
|
||||
echo "Health: curl http://127.0.0.1:8081/health"
|
||||
echo "Logs: journalctl --user -u pod-${POD_NAME}.service -f"
|
||||
72
kischdle/llmux/scripts/download_models.sh
Executable file
72
kischdle/llmux/scripts/download_models.sh
Executable file
@@ -0,0 +1,72 @@
|
||||
#!/bin/bash
|
||||
# Download all model weights for llmux.
|
||||
# Run as user llm: bash scripts/download_models.sh
|
||||
# Requires: pip install huggingface_hub
|
||||
# Requires: HuggingFace token at ~/.cache/huggingface/token for gated models
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
MODELS_DIR="${LLMUX_MODELS_DIR:-$HOME/.local/share/llmux_pod/models}"
|
||||
mkdir -p "$MODELS_DIR"
|
||||
|
||||
echo "=== Downloading models to $MODELS_DIR ==="
|
||||
|
||||
download_hf() {
|
||||
local repo="$1"
|
||||
local target="$MODELS_DIR/models--${repo//\//-}"
|
||||
if [ -d "$target" ]; then
|
||||
echo "SKIP: $repo (already downloaded)"
|
||||
return
|
||||
fi
|
||||
echo "Downloading: $repo"
|
||||
huggingface-cli download "$repo" --cache-dir "$MODELS_DIR"
|
||||
}
|
||||
|
||||
download_hf_files() {
|
||||
local repo="$1"
|
||||
shift
|
||||
echo "Downloading specific files from: $repo"
|
||||
huggingface-cli download "$repo" "$@" --cache-dir "$MODELS_DIR"
|
||||
}
|
||||
|
||||
# 1. Qwen3.5-9B-FP8
|
||||
download_hf "lovedheart/Qwen3.5-9B-FP8"
|
||||
|
||||
# 2. Qwen3.5-9B-FP8-Uncensored (GGUF files only)
|
||||
download_hf_files "HauhauCS/Qwen3.5-9B-Uncensored-HauhauCS-Aggressive" \
|
||||
"Qwen3.5-9B-Uncensored-HauhauCS-Aggressive-Q8_0.gguf" \
|
||||
"mmproj-Qwen3.5-9B-Uncensored-HauhauCS-Aggressive-BF16.gguf"
|
||||
|
||||
# 3. Qwen3.5-4B
|
||||
download_hf "Qwen/Qwen3.5-4B"
|
||||
|
||||
# 4. gpt-oss-20b
|
||||
download_hf "openai/gpt-oss-20b"
|
||||
|
||||
# 5. gpt-oss-20b-uncensored
|
||||
download_hf "aoxo/gpt-oss-20b-uncensored"
|
||||
|
||||
# 6. cohere-transcribe (gated — requires accepted terms)
|
||||
echo "Downloading: CohereLabs/cohere-transcribe-03-2026 (gated)"
|
||||
download_hf "CohereLabs/cohere-transcribe-03-2026" || \
|
||||
echo "WARNING: cohere-transcribe download failed. Have you accepted the terms at https://huggingface.co/CohereLabs/cohere-transcribe-03-2026 ?"
|
||||
|
||||
# 7. Chatterbox TTS
|
||||
echo "Downloading: Chatterbox TTS weights (auto-downloaded by library)"
|
||||
python3 -c "
|
||||
from chatterbox.tts import ChatterboxTTS
|
||||
import os
|
||||
os.environ['CUDA_VISIBLE_DEVICES'] = ''
|
||||
print('Downloading Chatterbox default...')
|
||||
ChatterboxTTS.from_pretrained(device='cpu')
|
||||
print('Downloading Chatterbox turbo...')
|
||||
ChatterboxTTS.from_pretrained(device='cpu', variant='turbo')
|
||||
print('Downloading Chatterbox multilingual...')
|
||||
ChatterboxTTS.from_pretrained(device='cpu', variant='multilingual')
|
||||
print('Chatterbox downloads complete.')
|
||||
" || echo "WARNING: Chatterbox download failed. Check chatterbox-tts installation."
|
||||
|
||||
echo ""
|
||||
echo "=== Download complete ==="
|
||||
echo "Models directory: $MODELS_DIR"
|
||||
du -sh "$MODELS_DIR"
|
||||
Reference in New Issue
Block a user