Tried fix for gpt-oss-20b but still OOM
This commit is contained in:
@@ -291,9 +291,11 @@ def _load_model_locked(model_id: str):
|
|||||||
else:
|
else:
|
||||||
# Standard Text Model (GPT-OSS)
|
# Standard Text Model (GPT-OSS)
|
||||||
print(f"Loading {model_id} with AutoModelForCausalLM...")
|
print(f"Loading {model_id} with AutoModelForCausalLM...")
|
||||||
|
# GPT-OSS-20B uses native MXFP4 quantization and needs "auto" dtype
|
||||||
|
use_dtype = "auto" if "gpt-oss-20b" in model_id else dtype
|
||||||
model = AutoModelForCausalLM.from_pretrained(
|
model = AutoModelForCausalLM.from_pretrained(
|
||||||
model_id,
|
model_id,
|
||||||
torch_dtype=dtype,
|
torch_dtype=use_dtype,
|
||||||
device_map=device_map,
|
device_map=device_map,
|
||||||
attn_implementation=attn_impl,
|
attn_implementation=attn_impl,
|
||||||
trust_remote_code=True,
|
trust_remote_code=True,
|
||||||
|
|||||||
Reference in New Issue
Block a user