Files
DesTEngSsv006_swd/kischdle/llmux/tests/test_routes.py
tlg 3edc055299 fix: Open WebUI integration — Harmony stripping, VRAM eviction, concurrency lock
- Add harmony.py: strip GPT-OSS-20B analysis/thinking channel from both
  streaming and non-streaming responses (HarmonyStreamFilter + extract_final_text)
- Add per-model asyncio.Lock in llamacpp backend to prevent concurrent C++
  access that caused container segfaults (exit 139)
- Fix chat handler swap for streaming: move inside _stream_generate within
  lock scope (was broken by try/finally running before stream was consumed)
- Filter /v1/models to return only LLM models (hide ASR/TTS from chat dropdown)
- Correct Qwen3.5-4B estimated_vram_gb: 4 → 9 (actual allocation ~8GB)
- Add GPU memory verification after eviction with retry loop in vram_manager
- Add HF_TOKEN_PATH support in main.py for gated model access
- Add /v1/audio/models and /v1/audio/voices discovery endpoints (no auth)
- Add OOM error handling in both backends and chat route
- Add AUDIO_STT_SUPPORTED_CONTENT_TYPES for webm/wav/mp3/ogg
- Add performance test script (scripts/perf_test.py)
- Update tests to match current config (42 tests pass)

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-08 21:50:39 +02:00

63 lines
1.6 KiB
Python

import pytest
from fastapi import FastAPI
from fastapi.testclient import TestClient
from llmux.config import ApiKey
from llmux.auth import create_api_key_dependency
from llmux.model_registry import ModelRegistry
from llmux.vram_manager import VRAMManager
from llmux.routes.models import create_models_router
API_KEY = "sk-test-key"
@pytest.fixture
def registry():
return ModelRegistry.from_config()
@pytest.fixture
def vram_manager():
return VRAMManager(total_vram_gb=16.0)
@pytest.fixture
def app(registry, vram_manager):
keys = [ApiKey(key=API_KEY, name="Test")]
require_api_key = create_api_key_dependency(keys)
app = FastAPI()
app.include_router(create_models_router(registry, require_api_key))
return app
@pytest.fixture
def client(app):
return TestClient(app)
@pytest.fixture
def auth_headers():
return {"Authorization": f"Bearer {API_KEY}"}
def test_list_models_returns_only_llm(client, auth_headers):
resp = client.get("/v1/models", headers=auth_headers)
assert resp.status_code == 200
body = resp.json()
assert body["object"] == "list"
assert len(body["data"]) == 12 # only LLM models
def test_list_models_contains_expected_names(client, auth_headers):
resp = client.get("/v1/models", headers=auth_headers)
names = [m["id"] for m in resp.json()["data"]]
assert "Qwen3.5-9B-FP8-Thinking" in names
assert "GPT-OSS-20B-High" in names
assert "cohere-transcribe" not in names
assert "Chatterbox-Multilingual" not in names
def test_list_models_requires_auth(client):
resp = client.get("/v1/models")
assert resp.status_code == 401