Tried fix for gpt-oss-20b but still OOM
This commit is contained in:
@@ -291,9 +291,11 @@ def _load_model_locked(model_id: str):
|
||||
else:
|
||||
# Standard Text Model (GPT-OSS)
|
||||
print(f"Loading {model_id} with AutoModelForCausalLM...")
|
||||
# GPT-OSS-20B uses native MXFP4 quantization and needs "auto" dtype
|
||||
use_dtype = "auto" if "gpt-oss-20b" in model_id else dtype
|
||||
model = AutoModelForCausalLM.from_pretrained(
|
||||
model_id,
|
||||
torch_dtype=dtype,
|
||||
torch_dtype=use_dtype,
|
||||
device_map=device_map,
|
||||
attn_implementation=attn_impl,
|
||||
trust_remote_code=True,
|
||||
|
||||
Reference in New Issue
Block a user