From 61308703dc6059d664911566ebd5853e0e8c8e33c3186ce7a0fe7d5fc744154a Mon Sep 17 00:00:00 2001
From: tlg <thomas.langer@destengs.com>
Date: Mon, 6 Apr 2026 16:41:41 +0200
Subject: [PATCH] feat: replace gpt-oss-20b-uncensored with HauhauCS MXFP4 GGUF

aoxo model had no quantization (BF16, ~40GB OOM). HauhauCS model
uses MXFP4 GGUF format, loads at 11.9GB via llama-cpp backend.
All three reasoning levels (Low/Medium/High) work.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 kischdle/llmux/config/models.yaml | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/kischdle/llmux/config/models.yaml b/kischdle/llmux/config/models.yaml
index 1b0db5a..b47c82a 100644
--- a/kischdle/llmux/config/models.yaml
+++ b/kischdle/llmux/config/models.yaml
@@ -36,8 +36,9 @@ physical_models:
 
   gpt-oss-20b-uncensored:
     type: llm
-    backend: transformers
-    model_id: "aoxo/gpt-oss-20b-uncensored"
+    backend: llamacpp
+    model_id: "HauhauCS/GPT-OSS-20B-Uncensored-HauhauCS-Aggressive"
+    model_file: "GPT-OSS-20B-Uncensored-HauhauCS-MXFP4-Aggressive.gguf"
     estimated_vram_gb: 13
     supports_vision: false
     supports_tools: true