From 39ee9c3b92874e4cbe31d6781f67b03ea4523094 Mon Sep 17 00:00:00 2001
From: llm <thomas.langer@destengs.com>
Date: Fri, 28 Nov 2025 22:23:05 +0100
Subject: [PATCH] Tried fix for gpt-oss-20b but still OOM

---
 .local/share/pytorch_pod/python-apps/ai-model.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/.local/share/pytorch_pod/python-apps/ai-model.py b/.local/share/pytorch_pod/python-apps/ai-model.py
index f5ce68e..3ea9b76 100755
--- a/.local/share/pytorch_pod/python-apps/ai-model.py
+++ b/.local/share/pytorch_pod/python-apps/ai-model.py
@@ -291,9 +291,11 @@ def _load_model_locked(model_id: str):
         else:
             # Standard Text Model (GPT-OSS)
             print(f"Loading {model_id} with AutoModelForCausalLM...")
+            # GPT-OSS-20B uses native MXFP4 quantization and needs "auto" dtype
+            use_dtype = "auto" if "gpt-oss-20b" in model_id else dtype
             model = AutoModelForCausalLM.from_pretrained(
                 model_id,
-                torch_dtype=dtype,
+                torch_dtype=use_dtype,
                 device_map=device_map,
                 attn_implementation=attn_impl,
                 trust_remote_code=True,