Fix async iteration over vLLM generator - use async for instead of sync for

This commit is contained in:
2026-01-11 18:18:37 -06:00
parent a164bed590
commit 4d11334f33

35
main.py
View File

@@ -164,9 +164,9 @@ def get_custom_voices() -> List[str]:
return voices return voices
def generate_speech(text: str, voice: str) -> bytes: async def generate_speech(text: str, voice: str) -> bytes:
""" """
Generate speech using Orpheus model. Generate speech using Orpheus model (async version).
Args: Args:
text: Text to convert (may include emotion tags) text: Text to convert (may include emotion tags)
@@ -189,8 +189,10 @@ def generate_speech(text: str, voice: str) -> bytes:
print(f"Unknown voice '{voice}', using default '{DEFAULT_VOICE}'") print(f"Unknown voice '{voice}', using default '{DEFAULT_VOICE}'")
voice = DEFAULT_VOICE voice = DEFAULT_VOICE
# Generate speech using Orpheus print(f"{text}")
# Note: text is passed as-is, emotion tags like <laugh> are handled by Orpheus
# Generate speech using Orpheus - async iteration!
import numpy as np
audio_chunks = [] audio_chunks = []
syn_tokens = model.generate_speech( syn_tokens = model.generate_speech(
@@ -198,12 +200,11 @@ def generate_speech(text: str, voice: str) -> bytes:
voice=voice, voice=voice,
) )
# Collect audio chunks # Async iteration over the generator
for audio_chunk in syn_tokens: async for audio_chunk in syn_tokens:
audio_chunks.append(audio_chunk) audio_chunks.append(audio_chunk)
# Combine chunks into single audio # Combine chunks into single audio
import numpy as np
audio_data = np.concatenate(audio_chunks) if len(audio_chunks) > 1 else audio_chunks[0] audio_data = np.concatenate(audio_chunks) if len(audio_chunks) > 1 else audio_chunks[0]
# Convert to WAV bytes # Convert to WAV bytes
@@ -225,8 +226,8 @@ def save_audio_to_file(job_id: str, audio_bytes: bytes) -> str:
return str(output_path) return str(output_path)
def generate_speech_background(job_id: str, text: str, voice: str): async def generate_speech_background(job_id: str, text: str, voice: str):
"""Background task for speech generation.""" """Background task for speech generation (async)."""
try: try:
jobs[job_id].status = JobStatus.PROCESSING jobs[job_id].status = JobStatus.PROCESSING
jobs[job_id].progress = 25 jobs[job_id].progress = 25
@@ -251,7 +252,7 @@ def generate_speech_background(job_id: str, text: str, voice: str):
save_jobs_to_disk() save_jobs_to_disk()
print(f"Generating audio for job {job_id}...") print(f"Generating audio for job {job_id}...")
audio_bytes = generate_speech(text, voice) audio_bytes = await generate_speech(text, voice)
# Save to file # Save to file
jobs[job_id].progress = 75 jobs[job_id].progress = 75
@@ -437,7 +438,7 @@ def list_voices():
@app.post("/tts/submit", response_model=JobResponse) @app.post("/tts/submit", response_model=JobResponse)
async def submit_tts_job(request: TTSRequest, background_tasks: BackgroundTasks): async def submit_tts_job(request: TTSRequest):
"""Submit a TTS job for processing.""" """Submit a TTS job for processing."""
job_id = str(uuid.uuid4()) job_id = str(uuid.uuid4())
@@ -453,11 +454,9 @@ async def submit_tts_job(request: TTSRequest, background_tasks: BackgroundTasks)
jobs[job_id] = job jobs[job_id] = job
save_jobs_to_disk() save_jobs_to_disk()
background_tasks.add_task( # Use asyncio.create_task for proper async execution
generate_speech_background, asyncio.create_task(
job_id, generate_speech_background(job_id, request.text, request.voice)
request.text,
request.voice
) )
print(f"Job {job_id} submitted: '{request.text[:50]}...' with voice '{request.voice}'") print(f"Job {job_id} submitted: '{request.text[:50]}...' with voice '{request.voice}'")
@@ -530,14 +529,14 @@ async def stream_tts(request: TTSStreamRequest):
voice = DEFAULT_VOICE voice = DEFAULT_VOICE
async def audio_generator(): async def audio_generator():
"""Generate audio chunks""" """Generate audio chunks (async)"""
try: try:
syn_tokens = model.generate_speech( syn_tokens = model.generate_speech(
prompt=request.text, prompt=request.text,
voice=voice, voice=voice,
) )
for audio_chunk in syn_tokens: async for audio_chunk in syn_tokens:
yield audio_chunk yield audio_chunk
except Exception as e: except Exception as e: