Fix audio assembly - chunks are already bytes from SNAC decoder
This commit is contained in:
16
main.py
16
main.py
@@ -207,8 +207,12 @@ def generate_speech_sync(text: str, voice: str) -> bytes:
|
|||||||
|
|
||||||
print(f"Total chunks: {len(audio_chunks)}")
|
print(f"Total chunks: {len(audio_chunks)}")
|
||||||
|
|
||||||
# Combine chunks into single audio
|
# Chunks are raw int16 bytes from SNAC decoder - just concatenate
|
||||||
audio_data = np.concatenate(audio_chunks) if len(audio_chunks) > 1 else audio_chunks[0]
|
if len(audio_chunks) == 0:
|
||||||
|
raise ValueError("No audio chunks generated")
|
||||||
|
|
||||||
|
# Concatenate bytes directly
|
||||||
|
audio_bytes_raw = b''.join(audio_chunks)
|
||||||
|
|
||||||
# Convert to WAV bytes
|
# Convert to WAV bytes
|
||||||
buffer = io.BytesIO()
|
buffer = io.BytesIO()
|
||||||
@@ -216,13 +220,7 @@ def generate_speech_sync(text: str, voice: str) -> bytes:
|
|||||||
wf.setnchannels(1)
|
wf.setnchannels(1)
|
||||||
wf.setsampwidth(2) # 16-bit
|
wf.setsampwidth(2) # 16-bit
|
||||||
wf.setframerate(SAMPLE_RATE)
|
wf.setframerate(SAMPLE_RATE)
|
||||||
# Ensure audio is int16
|
wf.writeframes(audio_bytes_raw)
|
||||||
if audio_data.dtype != np.int16:
|
|
||||||
if audio_data.dtype in [np.float32, np.float64]:
|
|
||||||
audio_data = (audio_data * 32767).astype(np.int16)
|
|
||||||
else:
|
|
||||||
audio_data = audio_data.astype(np.int16)
|
|
||||||
wf.writeframes(audio_data.tobytes())
|
|
||||||
|
|
||||||
print(f"Generated WAV: {len(buffer.getvalue())} bytes")
|
print(f"Generated WAV: {len(buffer.getvalue())} bytes")
|
||||||
return buffer.getvalue()
|
return buffer.getvalue()
|
||||||
|
|||||||
Reference in New Issue
Block a user