Tried fix for gpt-oss-20b but still OOM

2025-11-28 22:23:05 +01:00
parent 9b3d4e40e2
commit 39ee9c3b92
1 changed files with 3 additions and 1 deletions
--- a/.local/share/pytorch_pod/python-apps/ai-model.py
+++ b/.local/share/pytorch_pod/python-apps/ai-model.py
@@ -291,9 +291,11 @@ def _load_model_locked(model_id: str):
        else:
            # Standard Text Model (GPT-OSS)
            print(f"Loading {model_id} with AutoModelForCausalLM...")
+            # GPT-OSS-20B uses native MXFP4 quantization and needs "auto" dtype
+            use_dtype = "auto" if "gpt-oss-20b" in model_id else dtype
            model = AutoModelForCausalLM.from_pretrained(
                model_id,
-                torch_dtype=dtype,
+                torch_dtype=use_dtype,
                device_map=device_map,
                attn_implementation=attn_impl,
                trust_remote_code=True,