Fix async iteration over vLLM generator - use async for instead of sync for
This commit is contained in:
35
main.py
35
main.py
@@ -164,9 +164,9 @@ def get_custom_voices() -> List[str]:
|
|||||||
return voices
|
return voices
|
||||||
|
|
||||||
|
|
||||||
def generate_speech(text: str, voice: str) -> bytes:
|
async def generate_speech(text: str, voice: str) -> bytes:
|
||||||
"""
|
"""
|
||||||
Generate speech using Orpheus model.
|
Generate speech using Orpheus model (async version).
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
text: Text to convert (may include emotion tags)
|
text: Text to convert (may include emotion tags)
|
||||||
@@ -189,8 +189,10 @@ def generate_speech(text: str, voice: str) -> bytes:
|
|||||||
print(f"Unknown voice '{voice}', using default '{DEFAULT_VOICE}'")
|
print(f"Unknown voice '{voice}', using default '{DEFAULT_VOICE}'")
|
||||||
voice = DEFAULT_VOICE
|
voice = DEFAULT_VOICE
|
||||||
|
|
||||||
# Generate speech using Orpheus
|
print(f"{text}")
|
||||||
# Note: text is passed as-is, emotion tags like <laugh> are handled by Orpheus
|
|
||||||
|
# Generate speech using Orpheus - async iteration!
|
||||||
|
import numpy as np
|
||||||
audio_chunks = []
|
audio_chunks = []
|
||||||
|
|
||||||
syn_tokens = model.generate_speech(
|
syn_tokens = model.generate_speech(
|
||||||
@@ -198,12 +200,11 @@ def generate_speech(text: str, voice: str) -> bytes:
|
|||||||
voice=voice,
|
voice=voice,
|
||||||
)
|
)
|
||||||
|
|
||||||
# Collect audio chunks
|
# Async iteration over the generator
|
||||||
for audio_chunk in syn_tokens:
|
async for audio_chunk in syn_tokens:
|
||||||
audio_chunks.append(audio_chunk)
|
audio_chunks.append(audio_chunk)
|
||||||
|
|
||||||
# Combine chunks into single audio
|
# Combine chunks into single audio
|
||||||
import numpy as np
|
|
||||||
audio_data = np.concatenate(audio_chunks) if len(audio_chunks) > 1 else audio_chunks[0]
|
audio_data = np.concatenate(audio_chunks) if len(audio_chunks) > 1 else audio_chunks[0]
|
||||||
|
|
||||||
# Convert to WAV bytes
|
# Convert to WAV bytes
|
||||||
@@ -225,8 +226,8 @@ def save_audio_to_file(job_id: str, audio_bytes: bytes) -> str:
|
|||||||
return str(output_path)
|
return str(output_path)
|
||||||
|
|
||||||
|
|
||||||
def generate_speech_background(job_id: str, text: str, voice: str):
|
async def generate_speech_background(job_id: str, text: str, voice: str):
|
||||||
"""Background task for speech generation."""
|
"""Background task for speech generation (async)."""
|
||||||
try:
|
try:
|
||||||
jobs[job_id].status = JobStatus.PROCESSING
|
jobs[job_id].status = JobStatus.PROCESSING
|
||||||
jobs[job_id].progress = 25
|
jobs[job_id].progress = 25
|
||||||
@@ -251,7 +252,7 @@ def generate_speech_background(job_id: str, text: str, voice: str):
|
|||||||
save_jobs_to_disk()
|
save_jobs_to_disk()
|
||||||
|
|
||||||
print(f"Generating audio for job {job_id}...")
|
print(f"Generating audio for job {job_id}...")
|
||||||
audio_bytes = generate_speech(text, voice)
|
audio_bytes = await generate_speech(text, voice)
|
||||||
|
|
||||||
# Save to file
|
# Save to file
|
||||||
jobs[job_id].progress = 75
|
jobs[job_id].progress = 75
|
||||||
@@ -437,7 +438,7 @@ def list_voices():
|
|||||||
|
|
||||||
|
|
||||||
@app.post("/tts/submit", response_model=JobResponse)
|
@app.post("/tts/submit", response_model=JobResponse)
|
||||||
async def submit_tts_job(request: TTSRequest, background_tasks: BackgroundTasks):
|
async def submit_tts_job(request: TTSRequest):
|
||||||
"""Submit a TTS job for processing."""
|
"""Submit a TTS job for processing."""
|
||||||
job_id = str(uuid.uuid4())
|
job_id = str(uuid.uuid4())
|
||||||
|
|
||||||
@@ -453,11 +454,9 @@ async def submit_tts_job(request: TTSRequest, background_tasks: BackgroundTasks)
|
|||||||
jobs[job_id] = job
|
jobs[job_id] = job
|
||||||
save_jobs_to_disk()
|
save_jobs_to_disk()
|
||||||
|
|
||||||
background_tasks.add_task(
|
# Use asyncio.create_task for proper async execution
|
||||||
generate_speech_background,
|
asyncio.create_task(
|
||||||
job_id,
|
generate_speech_background(job_id, request.text, request.voice)
|
||||||
request.text,
|
|
||||||
request.voice
|
|
||||||
)
|
)
|
||||||
|
|
||||||
print(f"Job {job_id} submitted: '{request.text[:50]}...' with voice '{request.voice}'")
|
print(f"Job {job_id} submitted: '{request.text[:50]}...' with voice '{request.voice}'")
|
||||||
@@ -530,14 +529,14 @@ async def stream_tts(request: TTSStreamRequest):
|
|||||||
voice = DEFAULT_VOICE
|
voice = DEFAULT_VOICE
|
||||||
|
|
||||||
async def audio_generator():
|
async def audio_generator():
|
||||||
"""Generate audio chunks"""
|
"""Generate audio chunks (async)"""
|
||||||
try:
|
try:
|
||||||
syn_tokens = model.generate_speech(
|
syn_tokens = model.generate_speech(
|
||||||
prompt=request.text,
|
prompt=request.text,
|
||||||
voice=voice,
|
voice=voice,
|
||||||
)
|
)
|
||||||
|
|
||||||
for audio_chunk in syn_tokens:
|
async for audio_chunk in syn_tokens:
|
||||||
yield audio_chunk
|
yield audio_chunk
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
|||||||
Reference in New Issue
Block a user