Debug: add verbose logging to generate_speech_sync

This commit is contained in:
2026-01-11 18:44:07 -06:00
parent 4eab3ccc01
commit 0b88188907

67
main.py
View File

@@ -164,9 +164,9 @@ def get_custom_voices() -> List[str]:
return voices return voices
async def generate_speech(text: str, voice: str) -> bytes: def generate_speech_sync(text: str, voice: str) -> bytes:
""" """
Generate speech using Orpheus model (async wrapper). Generate speech using Orpheus model (synchronous).
Args: Args:
text: Text to convert (may include emotion tags) text: Text to convert (may include emotion tags)
@@ -176,51 +176,56 @@ async def generate_speech(text: str, voice: str) -> bytes:
WAV audio bytes WAV audio bytes
""" """
global model global model
import numpy as np
# Check if it's a custom voice (needs reference audio) # Check if it's a custom voice (needs reference audio)
custom_voice_path = VOICES_DIR / f"{voice}.wav" custom_voice_path = VOICES_DIR / f"{voice}.wav"
if custom_voice_path.exists(): if custom_voice_path.exists():
# TODO: Implement voice cloning with reference audio
# For now, fall back to built-in voice
print(f"Custom voice '{voice}' - voice cloning to be implemented") print(f"Custom voice '{voice}' - voice cloning to be implemented")
voice = DEFAULT_VOICE voice = DEFAULT_VOICE
elif voice not in BUILTIN_VOICES: elif voice not in BUILTIN_VOICES:
print(f"Unknown voice '{voice}', using default '{DEFAULT_VOICE}'") print(f"Unknown voice '{voice}', using default '{DEFAULT_VOICE}'")
voice = DEFAULT_VOICE voice = DEFAULT_VOICE
print(f"{text}") print(f"Generating: {text}")
# Run synchronous generation in thread pool to not block event loop audio_chunks = []
def _generate_sync():
import numpy as np
audio_chunks = []
syn_tokens = model.generate_speech( # Call model directly - it returns a generator
prompt=text, syn_tokens = model.generate_speech(
voice=voice, prompt=text,
) voice=voice,
)
# Sync iteration - generator yields audio chunks print(f"Got generator: {type(syn_tokens)}")
for audio_chunk in syn_tokens:
audio_chunks.append(audio_chunk)
# Combine chunks into single audio # Iterate over generator
audio_data = np.concatenate(audio_chunks) if len(audio_chunks) > 1 else audio_chunks[0] for i, audio_chunk in enumerate(syn_tokens):
print(f"Chunk {i}: {type(audio_chunk)}, shape: {audio_chunk.shape if hasattr(audio_chunk, 'shape') else 'N/A'}")
audio_chunks.append(audio_chunk)
# Convert to WAV bytes print(f"Total chunks: {len(audio_chunks)}")
buffer = io.BytesIO()
with wave.open(buffer, 'wb') as wf:
wf.setnchannels(1)
wf.setsampwidth(2) # 16-bit
wf.setframerate(SAMPLE_RATE)
wf.writeframes(audio_data)
return buffer.getvalue() # Combine chunks into single audio
audio_data = np.concatenate(audio_chunks) if len(audio_chunks) > 1 else audio_chunks[0]
# Run in executor to avoid blocking # Convert to WAV bytes
loop = asyncio.get_event_loop() buffer = io.BytesIO()
return await loop.run_in_executor(None, _generate_sync) with wave.open(buffer, 'wb') as wf:
wf.setnchannels(1)
wf.setsampwidth(2) # 16-bit
wf.setframerate(SAMPLE_RATE)
# Ensure audio is int16
if audio_data.dtype != np.int16:
if audio_data.dtype in [np.float32, np.float64]:
audio_data = (audio_data * 32767).astype(np.int16)
else:
audio_data = audio_data.astype(np.int16)
wf.writeframes(audio_data.tobytes())
print(f"Generated WAV: {len(buffer.getvalue())} bytes")
return buffer.getvalue()
def save_audio_to_file(job_id: str, audio_bytes: bytes) -> str: def save_audio_to_file(job_id: str, audio_bytes: bytes) -> str:
@@ -252,12 +257,12 @@ async def generate_speech_background(job_id: str, text: str, voice: str):
print(f"Job {job_id} completed from cache") print(f"Job {job_id} completed from cache")
return return
# Generate audio # Generate audio - call sync function directly (blocks but let's test if it works)
jobs[job_id].progress = 50 jobs[job_id].progress = 50
save_jobs_to_disk() save_jobs_to_disk()
print(f"Generating audio for job {job_id}...") print(f"Generating audio for job {job_id}...")
audio_bytes = await generate_speech(text, voice) audio_bytes = generate_speech_sync(text, voice)
# Save to file # Save to file
jobs[job_id].progress = 75 jobs[job_id].progress = 75