DesTEngSsv006_swd/kischdle/llmux/config/models.yaml

physical_models:
  qwen3.5-9b-fp8:
    type: llm
    backend: llamacpp
    model_id: "unsloth/Qwen3.5-9B-GGUF"
    model_file: "Qwen3.5-9B-Q8_0.gguf"
    estimated_vram_gb: 10
    supports_vision: false
    supports_tools: true

  qwen3.5-9b-fp8-uncensored:
    type: llm
    backend: llamacpp
    model_id: "HauhauCS/Qwen3.5-9B-Uncensored-HauhauCS-Aggressive"
    model_file: "Qwen3.5-9B-Uncensored-HauhauCS-Aggressive-Q8_0.gguf"
    mmproj_file: "mmproj-Qwen3.5-9B-Uncensored-HauhauCS-Aggressive-BF16.gguf"
    estimated_vram_gb: 9
    supports_vision: true
    supports_tools: true

  qwen3.5-4b:
    type: llm
    backend: transformers
    model_id: "Qwen/Qwen3.5-4B"
    estimated_vram_gb: 9
    supports_vision: true
    supports_tools: true

  gpt-oss-20b:
    type: llm
    backend: transformers
    model_id: "openai/gpt-oss-20b"
    estimated_vram_gb: 13
    supports_vision: false
    supports_tools: true

  gpt-oss-20b-uncensored:
    type: llm
    backend: llamacpp
    model_id: "HauhauCS/GPT-OSS-20B-Uncensored-HauhauCS-Aggressive"
    model_file: "GPT-OSS-20B-Uncensored-HauhauCS-MXFP4-Aggressive.gguf"
    estimated_vram_gb: 13
    supports_vision: false
    supports_tools: true

  cohere-transcribe:
    type: asr
    backend: transformers
    model_id: "CohereLabs/cohere-transcribe-03-2026"
    estimated_vram_gb: 4
    default_language: "en"

  chatterbox-multilingual:
    type: tts
    backend: chatterbox
    variant: "multilingual"
    estimated_vram_gb: 2

  chatterbox:
    type: tts
    backend: chatterbox
    variant: "default"
    estimated_vram_gb: 2

virtual_models:
  Qwen3.5-9B-FP8-Thinking:
    physical: qwen3.5-9b-fp8
    params: { enable_thinking: true }
  Qwen3.5-9B-FP8-Instruct:
    physical: qwen3.5-9b-fp8
    params: { enable_thinking: false }

  Qwen3.5-9B-FP8-Uncensored-Thinking:
    physical: qwen3.5-9b-fp8-uncensored
    params: { enable_thinking: true }
  Qwen3.5-9B-FP8-Uncensored-Instruct:
    physical: qwen3.5-9b-fp8-uncensored
    params: { enable_thinking: false }

  Qwen3.5-4B-Thinking:
    physical: qwen3.5-4b
    params: { enable_thinking: true }
  Qwen3.5-4B-Instruct:
    physical: qwen3.5-4b
    params: { enable_thinking: false }

  GPT-OSS-20B-Low:
    physical: gpt-oss-20b
    params: { system_prompt_prefix: "Reasoning: low" }
  GPT-OSS-20B-Medium:
    physical: gpt-oss-20b
    params: { system_prompt_prefix: "Reasoning: medium" }
  GPT-OSS-20B-High:
    physical: gpt-oss-20b
    params: { system_prompt_prefix: "Reasoning: high" }

  GPT-OSS-20B-Uncensored-Low:
    physical: gpt-oss-20b-uncensored
    params: { system_prompt_prefix: "Reasoning: low" }
  GPT-OSS-20B-Uncensored-Medium:
    physical: gpt-oss-20b-uncensored
    params: { system_prompt_prefix: "Reasoning: medium" }
  GPT-OSS-20B-Uncensored-High:
    physical: gpt-oss-20b-uncensored
    params: { system_prompt_prefix: "Reasoning: high" }

  cohere-transcribe:
    physical: cohere-transcribe
  Chatterbox-Multilingual:
    physical: chatterbox-multilingual
  Chatterbox:
    physical: chatterbox