fix: Chatterbox uses separate classes per variant, remove turbo
ChatterboxTTS and ChatterboxMultilingualTTS are separate classes. Turbo variant doesn't exist in chatterbox-tts 0.1.7. Multilingual generate() requires language_id parameter. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -1,4 +1,5 @@
|
||||
import asyncio
|
||||
import gc
|
||||
import io
|
||||
import logging
|
||||
|
||||
@@ -24,25 +25,26 @@ class ChatterboxTTSBackend(BaseBackend):
|
||||
logger.info(f"Loading Chatterbox {variant} to {device}")
|
||||
|
||||
def _load():
|
||||
from chatterbox.tts import ChatterboxTTS
|
||||
if variant == "turbo":
|
||||
model = ChatterboxTTS.from_pretrained(device=device, variant="turbo")
|
||||
elif variant == "multilingual":
|
||||
model = ChatterboxTTS.from_pretrained(device=device, variant="multilingual")
|
||||
if variant == "multilingual":
|
||||
from chatterbox import ChatterboxMultilingualTTS
|
||||
return ChatterboxMultilingualTTS.from_pretrained(device=device)
|
||||
else:
|
||||
model = ChatterboxTTS.from_pretrained(device=device)
|
||||
return model
|
||||
from chatterbox.tts import ChatterboxTTS
|
||||
return ChatterboxTTS.from_pretrained(device=device)
|
||||
|
||||
loop = asyncio.get_event_loop()
|
||||
model = await loop.run_in_executor(None, _load)
|
||||
self._loaded[model_id] = {"model": model, "device": device}
|
||||
self._loaded[model_id] = {"model": model, "variant": variant, "device": device}
|
||||
|
||||
async def unload(self, model_id: str) -> None:
|
||||
if model_id not in self._loaded:
|
||||
return
|
||||
entry = self._loaded.pop(model_id)
|
||||
del entry["model"]
|
||||
del entry
|
||||
gc.collect()
|
||||
torch.cuda.empty_cache()
|
||||
logger.info(f"Unloaded Chatterbox {model_id}")
|
||||
|
||||
async def generate(self, model_id, messages, params, stream=False, tools=None):
|
||||
raise NotImplementedError("TTS backend does not support chat generation")
|
||||
@@ -50,9 +52,15 @@ class ChatterboxTTSBackend(BaseBackend):
|
||||
async def synthesize(self, model_id: str, text: str, voice: str = "default") -> bytes:
|
||||
entry = self._loaded[model_id]
|
||||
model = entry["model"]
|
||||
variant = entry["variant"]
|
||||
|
||||
def _synthesize():
|
||||
wav = model.generate(text)
|
||||
if variant == "multilingual":
|
||||
# Default to English; voice param could encode language
|
||||
lang = "en" if voice == "default" else voice
|
||||
wav = model.generate(text, language_id=lang)
|
||||
else:
|
||||
wav = model.generate(text)
|
||||
buf = io.BytesIO()
|
||||
sf.write(buf, wav.cpu().numpy().squeeze(), samplerate=24000, format="WAV")
|
||||
buf.seek(0)
|
||||
|
||||
Reference in New Issue
Block a user