Fix: wrap sync generator in executor, not async for
This commit is contained in:
63
main.py
63
main.py
@@ -166,7 +166,7 @@ def get_custom_voices() -> List[str]:
|
|||||||
|
|
||||||
async def generate_speech(text: str, voice: str) -> bytes:
|
async def generate_speech(text: str, voice: str) -> bytes:
|
||||||
"""
|
"""
|
||||||
Generate speech using Orpheus model (async version).
|
Generate speech using Orpheus model (async wrapper).
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
text: Text to convert (may include emotion tags)
|
text: Text to convert (may include emotion tags)
|
||||||
@@ -191,31 +191,36 @@ async def generate_speech(text: str, voice: str) -> bytes:
|
|||||||
|
|
||||||
print(f"{text}")
|
print(f"{text}")
|
||||||
|
|
||||||
# Generate speech using Orpheus - async iteration!
|
# Run synchronous generation in thread pool to not block event loop
|
||||||
import numpy as np
|
def _generate_sync():
|
||||||
audio_chunks = []
|
import numpy as np
|
||||||
|
audio_chunks = []
|
||||||
|
|
||||||
|
syn_tokens = model.generate_speech(
|
||||||
|
prompt=text,
|
||||||
|
voice=voice,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Sync iteration - generator yields audio chunks
|
||||||
|
for audio_chunk in syn_tokens:
|
||||||
|
audio_chunks.append(audio_chunk)
|
||||||
|
|
||||||
|
# Combine chunks into single audio
|
||||||
|
audio_data = np.concatenate(audio_chunks) if len(audio_chunks) > 1 else audio_chunks[0]
|
||||||
|
|
||||||
|
# Convert to WAV bytes
|
||||||
|
buffer = io.BytesIO()
|
||||||
|
with wave.open(buffer, 'wb') as wf:
|
||||||
|
wf.setnchannels(1)
|
||||||
|
wf.setsampwidth(2) # 16-bit
|
||||||
|
wf.setframerate(SAMPLE_RATE)
|
||||||
|
wf.writeframes(audio_data)
|
||||||
|
|
||||||
|
return buffer.getvalue()
|
||||||
|
|
||||||
syn_tokens = model.generate_speech(
|
# Run in executor to avoid blocking
|
||||||
prompt=text,
|
loop = asyncio.get_event_loop()
|
||||||
voice=voice,
|
return await loop.run_in_executor(None, _generate_sync)
|
||||||
)
|
|
||||||
|
|
||||||
# Async iteration over the generator
|
|
||||||
async for audio_chunk in syn_tokens:
|
|
||||||
audio_chunks.append(audio_chunk)
|
|
||||||
|
|
||||||
# Combine chunks into single audio
|
|
||||||
audio_data = np.concatenate(audio_chunks) if len(audio_chunks) > 1 else audio_chunks[0]
|
|
||||||
|
|
||||||
# Convert to WAV bytes
|
|
||||||
buffer = io.BytesIO()
|
|
||||||
with wave.open(buffer, 'wb') as wf:
|
|
||||||
wf.setnchannels(1)
|
|
||||||
wf.setsampwidth(2) # 16-bit
|
|
||||||
wf.setframerate(SAMPLE_RATE)
|
|
||||||
wf.writeframes(audio_data)
|
|
||||||
|
|
||||||
return buffer.getvalue()
|
|
||||||
|
|
||||||
|
|
||||||
def save_audio_to_file(job_id: str, audio_bytes: bytes) -> str:
|
def save_audio_to_file(job_id: str, audio_bytes: bytes) -> str:
|
||||||
@@ -528,15 +533,15 @@ async def stream_tts(request: TTSStreamRequest):
|
|||||||
if voice not in BUILTIN_VOICES:
|
if voice not in BUILTIN_VOICES:
|
||||||
voice = DEFAULT_VOICE
|
voice = DEFAULT_VOICE
|
||||||
|
|
||||||
async def audio_generator():
|
def sync_audio_generator():
|
||||||
"""Generate audio chunks (async)"""
|
"""Generate audio chunks (sync generator)"""
|
||||||
try:
|
try:
|
||||||
syn_tokens = model.generate_speech(
|
syn_tokens = model.generate_speech(
|
||||||
prompt=request.text,
|
prompt=request.text,
|
||||||
voice=voice,
|
voice=voice,
|
||||||
)
|
)
|
||||||
|
|
||||||
async for audio_chunk in syn_tokens:
|
for audio_chunk in syn_tokens:
|
||||||
yield audio_chunk
|
yield audio_chunk
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
@@ -544,7 +549,7 @@ async def stream_tts(request: TTSStreamRequest):
|
|||||||
raise
|
raise
|
||||||
|
|
||||||
return StreamingResponse(
|
return StreamingResponse(
|
||||||
audio_generator(),
|
sync_audio_generator(),
|
||||||
media_type="audio/wav"
|
media_type="audio/wav"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user