Debug: add verbose logging to generate_speech_sync
This commit is contained in:
67
main.py
67
main.py
@@ -164,9 +164,9 @@ def get_custom_voices() -> List[str]:
|
||||
return voices
|
||||
|
||||
|
||||
async def generate_speech(text: str, voice: str) -> bytes:
|
||||
def generate_speech_sync(text: str, voice: str) -> bytes:
|
||||
"""
|
||||
Generate speech using Orpheus model (async wrapper).
|
||||
Generate speech using Orpheus model (synchronous).
|
||||
|
||||
Args:
|
||||
text: Text to convert (may include emotion tags)
|
||||
@@ -176,51 +176,56 @@ async def generate_speech(text: str, voice: str) -> bytes:
|
||||
WAV audio bytes
|
||||
"""
|
||||
global model
|
||||
import numpy as np
|
||||
|
||||
# Check if it's a custom voice (needs reference audio)
|
||||
custom_voice_path = VOICES_DIR / f"{voice}.wav"
|
||||
|
||||
if custom_voice_path.exists():
|
||||
# TODO: Implement voice cloning with reference audio
|
||||
# For now, fall back to built-in voice
|
||||
print(f"Custom voice '{voice}' - voice cloning to be implemented")
|
||||
voice = DEFAULT_VOICE
|
||||
elif voice not in BUILTIN_VOICES:
|
||||
print(f"Unknown voice '{voice}', using default '{DEFAULT_VOICE}'")
|
||||
voice = DEFAULT_VOICE
|
||||
|
||||
print(f"{text}")
|
||||
print(f"Generating: {text}")
|
||||
|
||||
# Run synchronous generation in thread pool to not block event loop
|
||||
def _generate_sync():
|
||||
import numpy as np
|
||||
audio_chunks = []
|
||||
audio_chunks = []
|
||||
|
||||
syn_tokens = model.generate_speech(
|
||||
prompt=text,
|
||||
voice=voice,
|
||||
)
|
||||
# Call model directly - it returns a generator
|
||||
syn_tokens = model.generate_speech(
|
||||
prompt=text,
|
||||
voice=voice,
|
||||
)
|
||||
|
||||
# Sync iteration - generator yields audio chunks
|
||||
for audio_chunk in syn_tokens:
|
||||
audio_chunks.append(audio_chunk)
|
||||
print(f"Got generator: {type(syn_tokens)}")
|
||||
|
||||
# Combine chunks into single audio
|
||||
audio_data = np.concatenate(audio_chunks) if len(audio_chunks) > 1 else audio_chunks[0]
|
||||
# Iterate over generator
|
||||
for i, audio_chunk in enumerate(syn_tokens):
|
||||
print(f"Chunk {i}: {type(audio_chunk)}, shape: {audio_chunk.shape if hasattr(audio_chunk, 'shape') else 'N/A'}")
|
||||
audio_chunks.append(audio_chunk)
|
||||
|
||||
# Convert to WAV bytes
|
||||
buffer = io.BytesIO()
|
||||
with wave.open(buffer, 'wb') as wf:
|
||||
wf.setnchannels(1)
|
||||
wf.setsampwidth(2) # 16-bit
|
||||
wf.setframerate(SAMPLE_RATE)
|
||||
wf.writeframes(audio_data)
|
||||
print(f"Total chunks: {len(audio_chunks)}")
|
||||
|
||||
return buffer.getvalue()
|
||||
# Combine chunks into single audio
|
||||
audio_data = np.concatenate(audio_chunks) if len(audio_chunks) > 1 else audio_chunks[0]
|
||||
|
||||
# Run in executor to avoid blocking
|
||||
loop = asyncio.get_event_loop()
|
||||
return await loop.run_in_executor(None, _generate_sync)
|
||||
# Convert to WAV bytes
|
||||
buffer = io.BytesIO()
|
||||
with wave.open(buffer, 'wb') as wf:
|
||||
wf.setnchannels(1)
|
||||
wf.setsampwidth(2) # 16-bit
|
||||
wf.setframerate(SAMPLE_RATE)
|
||||
# Ensure audio is int16
|
||||
if audio_data.dtype != np.int16:
|
||||
if audio_data.dtype in [np.float32, np.float64]:
|
||||
audio_data = (audio_data * 32767).astype(np.int16)
|
||||
else:
|
||||
audio_data = audio_data.astype(np.int16)
|
||||
wf.writeframes(audio_data.tobytes())
|
||||
|
||||
print(f"Generated WAV: {len(buffer.getvalue())} bytes")
|
||||
return buffer.getvalue()
|
||||
|
||||
|
||||
def save_audio_to_file(job_id: str, audio_bytes: bytes) -> str:
|
||||
@@ -252,12 +257,12 @@ async def generate_speech_background(job_id: str, text: str, voice: str):
|
||||
print(f"Job {job_id} completed from cache")
|
||||
return
|
||||
|
||||
# Generate audio
|
||||
# Generate audio - call sync function directly (blocks but let's test if it works)
|
||||
jobs[job_id].progress = 50
|
||||
save_jobs_to_disk()
|
||||
|
||||
print(f"Generating audio for job {job_id}...")
|
||||
audio_bytes = await generate_speech(text, voice)
|
||||
audio_bytes = generate_speech_sync(text, voice)
|
||||
|
||||
# Save to file
|
||||
jobs[job_id].progress = 75
|
||||
|
||||
Reference in New Issue
Block a user