Fix audio assembly - chunks are already bytes from SNAC decoder
This commit is contained in:
16
main.py
16
main.py
@@ -207,8 +207,12 @@ def generate_speech_sync(text: str, voice: str) -> bytes:
|
||||
|
||||
print(f"Total chunks: {len(audio_chunks)}")
|
||||
|
||||
# Combine chunks into single audio
|
||||
audio_data = np.concatenate(audio_chunks) if len(audio_chunks) > 1 else audio_chunks[0]
|
||||
# Chunks are raw int16 bytes from SNAC decoder - just concatenate
|
||||
if len(audio_chunks) == 0:
|
||||
raise ValueError("No audio chunks generated")
|
||||
|
||||
# Concatenate bytes directly
|
||||
audio_bytes_raw = b''.join(audio_chunks)
|
||||
|
||||
# Convert to WAV bytes
|
||||
buffer = io.BytesIO()
|
||||
@@ -216,13 +220,7 @@ def generate_speech_sync(text: str, voice: str) -> bytes:
|
||||
wf.setnchannels(1)
|
||||
wf.setsampwidth(2) # 16-bit
|
||||
wf.setframerate(SAMPLE_RATE)
|
||||
# Ensure audio is int16
|
||||
if audio_data.dtype != np.int16:
|
||||
if audio_data.dtype in [np.float32, np.float64]:
|
||||
audio_data = (audio_data * 32767).astype(np.int16)
|
||||
else:
|
||||
audio_data = audio_data.astype(np.int16)
|
||||
wf.writeframes(audio_data.tobytes())
|
||||
wf.writeframes(audio_bytes_raw)
|
||||
|
||||
print(f"Generated WAV: {len(buffer.getvalue())} bytes")
|
||||
return buffer.getvalue()
|
||||
|
||||
Reference in New Issue
Block a user