Fix: wrap sync generator in executor, not async for

2026-01-11 18:32:06 -06:00
parent 4d11334f33
commit 4eab3ccc01
1 changed files with 34 additions and 29 deletions
--- a/main.py
+++ b/main.py
@@ -166,7 +166,7 @@ def get_custom_voices() -> List[str]:
 async def generate_speech(text: str, voice: str) -> bytes:
    """
-    Generate speech using Orpheus model (async version).
+    Generate speech using Orpheus model (async wrapper).
    Args:
        text: Text to convert (may include emotion tags)
@@ -191,7 +191,8 @@ async def generate_speech(text: str, voice: str) -> bytes:
    print(f"{text}")
-    # Generate speech using Orpheus - async iteration!
+    # Run synchronous generation in thread pool to not block event loop
    def _generate_sync():
        import numpy as np
        audio_chunks = []
@@ -200,8 +201,8 @@ async def generate_speech(text: str, voice: str) -> bytes:
            voice=voice,
        )
-    # Async iteration over the generator
+        # Sync iteration - generator yields audio chunks
-    async for audio_chunk in syn_tokens:
+        for audio_chunk in syn_tokens:
            audio_chunks.append(audio_chunk)
        # Combine chunks into single audio
@@ -217,6 +218,10 @@ async def generate_speech(text: str, voice: str) -> bytes:
        return buffer.getvalue()
    # Run in executor to avoid blocking
    loop = asyncio.get_event_loop()
    return await loop.run_in_executor(None, _generate_sync)
 def save_audio_to_file(job_id: str, audio_bytes: bytes) -> str:
    """Save audio bytes to WAV file."""
@@ -528,15 +533,15 @@ async def stream_tts(request: TTSStreamRequest):
    if voice not in BUILTIN_VOICES:
        voice = DEFAULT_VOICE
-    async def audio_generator():
+    def sync_audio_generator():
-        """Generate audio chunks (async)"""
+        """Generate audio chunks (sync generator)"""
        try:
            syn_tokens = model.generate_speech(
                prompt=request.text,
                voice=voice,
            )
-            async for audio_chunk in syn_tokens:
+            for audio_chunk in syn_tokens:
                yield audio_chunk
        except Exception as e:
@@ -544,7 +549,7 @@ async def stream_tts(request: TTSStreamRequest):
            raise
    return StreamingResponse(
-        audio_generator(),
+        sync_audio_generator(),
        media_type="audio/wav"
    )