Fix: wrap sync generator in executor, not async for

This commit is contained in:
2026-01-11 18:32:06 -06:00
parent 4d11334f33
commit 4eab3ccc01

55
main.py
View File

@@ -166,7 +166,7 @@ def get_custom_voices() -> List[str]:
async def generate_speech(text: str, voice: str) -> bytes: async def generate_speech(text: str, voice: str) -> bytes:
""" """
Generate speech using Orpheus model (async version). Generate speech using Orpheus model (async wrapper).
Args: Args:
text: Text to convert (may include emotion tags) text: Text to convert (may include emotion tags)
@@ -191,31 +191,36 @@ async def generate_speech(text: str, voice: str) -> bytes:
print(f"{text}") print(f"{text}")
# Generate speech using Orpheus - async iteration! # Run synchronous generation in thread pool to not block event loop
import numpy as np def _generate_sync():
audio_chunks = [] import numpy as np
audio_chunks = []
syn_tokens = model.generate_speech( syn_tokens = model.generate_speech(
prompt=text, prompt=text,
voice=voice, voice=voice,
) )
# Async iteration over the generator # Sync iteration - generator yields audio chunks
async for audio_chunk in syn_tokens: for audio_chunk in syn_tokens:
audio_chunks.append(audio_chunk) audio_chunks.append(audio_chunk)
# Combine chunks into single audio # Combine chunks into single audio
audio_data = np.concatenate(audio_chunks) if len(audio_chunks) > 1 else audio_chunks[0] audio_data = np.concatenate(audio_chunks) if len(audio_chunks) > 1 else audio_chunks[0]
# Convert to WAV bytes # Convert to WAV bytes
buffer = io.BytesIO() buffer = io.BytesIO()
with wave.open(buffer, 'wb') as wf: with wave.open(buffer, 'wb') as wf:
wf.setnchannels(1) wf.setnchannels(1)
wf.setsampwidth(2) # 16-bit wf.setsampwidth(2) # 16-bit
wf.setframerate(SAMPLE_RATE) wf.setframerate(SAMPLE_RATE)
wf.writeframes(audio_data) wf.writeframes(audio_data)
return buffer.getvalue() return buffer.getvalue()
# Run in executor to avoid blocking
loop = asyncio.get_event_loop()
return await loop.run_in_executor(None, _generate_sync)
def save_audio_to_file(job_id: str, audio_bytes: bytes) -> str: def save_audio_to_file(job_id: str, audio_bytes: bytes) -> str:
@@ -528,15 +533,15 @@ async def stream_tts(request: TTSStreamRequest):
if voice not in BUILTIN_VOICES: if voice not in BUILTIN_VOICES:
voice = DEFAULT_VOICE voice = DEFAULT_VOICE
async def audio_generator(): def sync_audio_generator():
"""Generate audio chunks (async)""" """Generate audio chunks (sync generator)"""
try: try:
syn_tokens = model.generate_speech( syn_tokens = model.generate_speech(
prompt=request.text, prompt=request.text,
voice=voice, voice=voice,
) )
async for audio_chunk in syn_tokens: for audio_chunk in syn_tokens:
yield audio_chunk yield audio_chunk
except Exception as e: except Exception as e:
@@ -544,7 +549,7 @@ async def stream_tts(request: TTSStreamRequest):
raise raise
return StreamingResponse( return StreamingResponse(
audio_generator(), sync_audio_generator(),
media_type="audio/wav" media_type="audio/wav"
) )