Tried fix for gpt-oss-20b but still OOM

This commit is contained in:
llm
2025-11-28 22:23:05 +01:00
parent 9b3d4e40e2
commit 39ee9c3b92

View File

@@ -291,9 +291,11 @@ def _load_model_locked(model_id: str):
else: else:
# Standard Text Model (GPT-OSS) # Standard Text Model (GPT-OSS)
print(f"Loading {model_id} with AutoModelForCausalLM...") print(f"Loading {model_id} with AutoModelForCausalLM...")
# GPT-OSS-20B uses native MXFP4 quantization and needs "auto" dtype
use_dtype = "auto" if "gpt-oss-20b" in model_id else dtype
model = AutoModelForCausalLM.from_pretrained( model = AutoModelForCausalLM.from_pretrained(
model_id, model_id,
torch_dtype=dtype, torch_dtype=use_dtype,
device_map=device_map, device_map=device_map,
attn_implementation=attn_impl, attn_implementation=attn_impl,
trust_remote_code=True, trust_remote_code=True,