Files
DesTEngSsv006_swd/kischdle/llmux/tests/test_config.py
tlg 3edc055299 fix: Open WebUI integration — Harmony stripping, VRAM eviction, concurrency lock
- Add harmony.py: strip GPT-OSS-20B analysis/thinking channel from both
  streaming and non-streaming responses (HarmonyStreamFilter + extract_final_text)
- Add per-model asyncio.Lock in llamacpp backend to prevent concurrent C++
  access that caused container segfaults (exit 139)
- Fix chat handler swap for streaming: move inside _stream_generate within
  lock scope (was broken by try/finally running before stream was consumed)
- Filter /v1/models to return only LLM models (hide ASR/TTS from chat dropdown)
- Correct Qwen3.5-4B estimated_vram_gb: 4 → 9 (actual allocation ~8GB)
- Add GPU memory verification after eviction with retry loop in vram_manager
- Add HF_TOKEN_PATH support in main.py for gated model access
- Add /v1/audio/models and /v1/audio/voices discovery endpoints (no auth)
- Add OOM error handling in both backends and chat route
- Add AUDIO_STT_SUPPORTED_CONTENT_TYPES for webm/wav/mp3/ogg
- Add performance test script (scripts/perf_test.py)
- Update tests to match current config (42 tests pass)

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-08 21:50:39 +02:00

57 lines
1.9 KiB
Python

from llmux.config import load_models_config, load_api_keys, PhysicalModel, VirtualModel
def test_load_models_config_returns_physical_and_virtual():
physical, virtual = load_models_config()
assert isinstance(physical, dict)
assert isinstance(virtual, dict)
assert len(physical) == 8
assert len(virtual) == 15
def test_physical_model_has_required_fields():
physical, _ = load_models_config()
qwen = physical["qwen3.5-9b-fp8"]
assert qwen.type == "llm"
assert qwen.backend == "llamacpp"
assert qwen.model_id == "unsloth/Qwen3.5-9B-GGUF"
assert qwen.estimated_vram_gb == 10
assert qwen.supports_vision is False
assert qwen.supports_tools is True
def test_physical_model_llamacpp_has_gguf_fields():
physical, _ = load_models_config()
uncensored = physical["qwen3.5-9b-fp8-uncensored"]
assert uncensored.backend == "llamacpp"
assert uncensored.model_file == "Qwen3.5-9B-Uncensored-HauhauCS-Aggressive-Q8_0.gguf"
assert uncensored.mmproj_file == "mmproj-Qwen3.5-9B-Uncensored-HauhauCS-Aggressive-BF16.gguf"
def test_virtual_model_maps_to_physical():
_, virtual = load_models_config()
thinking = virtual["Qwen3.5-9B-FP8-Thinking"]
assert thinking.physical == "qwen3.5-9b-fp8"
assert thinking.params == {"enable_thinking": True}
def test_virtual_model_gpt_oss_has_system_prompt():
_, virtual = load_models_config()
low = virtual["GPT-OSS-20B-Low"]
assert low.physical == "gpt-oss-20b"
assert low.params == {"system_prompt_prefix": "Reasoning: low"}
def test_virtual_model_without_params():
_, virtual = load_models_config()
ct = virtual["cohere-transcribe"]
assert ct.physical == "cohere-transcribe"
assert ct.params == {}
def test_load_api_keys():
keys = load_api_keys()
assert len(keys) == 3
assert all(k.key.startswith("sk-llmux-") for k in keys)
assert {k.name for k in keys} == {"Open WebUI", "Remote Whisper clients", "OpenCode"}