From 39ee9c3b92874e4cbe31d6781f67b03ea4523094 Mon Sep 17 00:00:00 2001 From: llm Date: Fri, 28 Nov 2025 22:23:05 +0100 Subject: [PATCH] Tried fix for gpt-oss-20b but still OOM --- .local/share/pytorch_pod/python-apps/ai-model.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.local/share/pytorch_pod/python-apps/ai-model.py b/.local/share/pytorch_pod/python-apps/ai-model.py index f5ce68e..3ea9b76 100755 --- a/.local/share/pytorch_pod/python-apps/ai-model.py +++ b/.local/share/pytorch_pod/python-apps/ai-model.py @@ -291,9 +291,11 @@ def _load_model_locked(model_id: str): else: # Standard Text Model (GPT-OSS) print(f"Loading {model_id} with AutoModelForCausalLM...") + # GPT-OSS-20B uses native MXFP4 quantization and needs "auto" dtype + use_dtype = "auto" if "gpt-oss-20b" in model_id else dtype model = AutoModelForCausalLM.from_pretrained( model_id, - torch_dtype=dtype, + torch_dtype=use_dtype, device_map=device_map, attn_implementation=attn_impl, trust_remote_code=True,