From 61308703dc6059d664911566ebd5853e0e8c8e33c3186ce7a0fe7d5fc744154a Mon Sep 17 00:00:00 2001 From: tlg Date: Mon, 6 Apr 2026 16:41:41 +0200 Subject: [PATCH] feat: replace gpt-oss-20b-uncensored with HauhauCS MXFP4 GGUF aoxo model had no quantization (BF16, ~40GB OOM). HauhauCS model uses MXFP4 GGUF format, loads at 11.9GB via llama-cpp backend. All three reasoning levels (Low/Medium/High) work. Co-Authored-By: Claude Opus 4.6 (1M context) --- kischdle/llmux/config/models.yaml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/kischdle/llmux/config/models.yaml b/kischdle/llmux/config/models.yaml index 1b0db5a..b47c82a 100644 --- a/kischdle/llmux/config/models.yaml +++ b/kischdle/llmux/config/models.yaml @@ -36,8 +36,9 @@ physical_models: gpt-oss-20b-uncensored: type: llm - backend: transformers - model_id: "aoxo/gpt-oss-20b-uncensored" + backend: llamacpp + model_id: "HauhauCS/GPT-OSS-20B-Uncensored-HauhauCS-Aggressive" + model_file: "GPT-OSS-20B-Uncensored-HauhauCS-MXFP4-Aggressive.gguf" estimated_vram_gb: 13 supports_vision: false supports_tools: true