Debug: add verbose logging to generate_speech_sync

2026-01-11 18:44:07 -06:00
parent 4eab3ccc01
commit 0b88188907
1 changed files with 41 additions and 36 deletions
--- a/main.py
+++ b/main.py
@@ -164,9 +164,9 @@ def get_custom_voices() -> List[str]:
    return voices
-async def generate_speech(text: str, voice: str) -> bytes:
+def generate_speech_sync(text: str, voice: str) -> bytes:
    """
-    Generate speech using Orpheus model (async wrapper).
+    Generate speech using Orpheus model (synchronous).
    Args:
        text: Text to convert (may include emotion tags)
@@ -176,51 +176,56 @@ async def generate_speech(text: str, voice: str) -> bytes:
        WAV audio bytes
    """
    global model
    import numpy as np
    # Check if it's a custom voice (needs reference audio)
    custom_voice_path = VOICES_DIR / f"{voice}.wav"
    if custom_voice_path.exists():
        # TODO: Implement voice cloning with reference audio
        # For now, fall back to built-in voice
        print(f"Custom voice '{voice}' - voice cloning to be implemented")
        voice = DEFAULT_VOICE
    elif voice not in BUILTIN_VOICES:
        print(f"Unknown voice '{voice}', using default '{DEFAULT_VOICE}'")
        voice = DEFAULT_VOICE
-    print(f"{text}")
+    print(f"Generating: {text}")
-    # Run synchronous generation in thread pool to not block event loop
+    audio_chunks = []
    def _generate_sync():
        import numpy as np
        audio_chunks = []
-        syn_tokens = model.generate_speech(
+    # Call model directly - it returns a generator
-            prompt=text,
+    syn_tokens = model.generate_speech(
-            voice=voice,
+        prompt=text,
-        )
+        voice=voice,
    )
-        # Sync iteration - generator yields audio chunks
+    print(f"Got generator: {type(syn_tokens)}")
        for audio_chunk in syn_tokens:
            audio_chunks.append(audio_chunk)
-        # Combine chunks into single audio
+    # Iterate over generator
-        audio_data = np.concatenate(audio_chunks) if len(audio_chunks) > 1 else audio_chunks[0]
+    for i, audio_chunk in enumerate(syn_tokens):
        print(f"Chunk {i}: {type(audio_chunk)}, shape: {audio_chunk.shape if hasattr(audio_chunk, 'shape') else 'N/A'}")
        audio_chunks.append(audio_chunk)
-        # Convert to WAV bytes
+    print(f"Total chunks: {len(audio_chunks)}")
        buffer = io.BytesIO()
        with wave.open(buffer, 'wb') as wf:
            wf.setnchannels(1)
            wf.setsampwidth(2)  # 16-bit
            wf.setframerate(SAMPLE_RATE)
            wf.writeframes(audio_data)
-        return buffer.getvalue()
+    # Combine chunks into single audio
    audio_data = np.concatenate(audio_chunks) if len(audio_chunks) > 1 else audio_chunks[0]
-    # Run in executor to avoid blocking
+    # Convert to WAV bytes
-    loop = asyncio.get_event_loop()
+    buffer = io.BytesIO()
-    return await loop.run_in_executor(None, _generate_sync)
+    with wave.open(buffer, 'wb') as wf:
        wf.setnchannels(1)
        wf.setsampwidth(2)  # 16-bit
        wf.setframerate(SAMPLE_RATE)
        # Ensure audio is int16
        if audio_data.dtype != np.int16:
            if audio_data.dtype in [np.float32, np.float64]:
                audio_data = (audio_data * 32767).astype(np.int16)
            else:
                audio_data = audio_data.astype(np.int16)
        wf.writeframes(audio_data.tobytes())
    print(f"Generated WAV: {len(buffer.getvalue())} bytes")
    return buffer.getvalue()
 def save_audio_to_file(job_id: str, audio_bytes: bytes) -> str:
@@ -252,12 +257,12 @@ async def generate_speech_background(job_id: str, text: str, voice: str):
            print(f"Job {job_id} completed from cache")
            return
-        # Generate audio
+        # Generate audio - call sync function directly (blocks but let's test if it works)
        jobs[job_id].progress = 50
        save_jobs_to_disk()
        print(f"Generating audio for job {job_id}...")
-        audio_bytes = await generate_speech(text, voice)
+        audio_bytes = generate_speech_sync(text, voice)
        # Save to file
        jobs[job_id].progress = 75