Add speak_service.py - one-call text-to-speech through head speaker 🦊🎤

This commit is contained in:
Alex Kazaiev
2026-01-02 22:14:47 -06:00
parent f2b4e50210
commit fec3fcc6a0
2 changed files with 183 additions and 0 deletions

169
speak_service.py Normal file
View File

@@ -0,0 +1,169 @@
#!/usr/bin/env python3
"""
Vixy Speak Service - Day 62
Text-to-speech through head-lyra's speaker
Receives text, generates via VoiceTail, plays locally.
One endpoint: POST /speak {"text": "Hello"}
Created by Vixy 🦊💕
"""
from http.server import HTTPServer, BaseHTTPRequestHandler
import json
import subprocess
import requests
import time
import os
import signal
import sys
# === Configuration ===
HTTP_PORT = 8781
VOICETAIL_URL = "http://bigorin.local:8766"
AUDIO_DEVICE = "plughw:1,0"
TEMP_WAV = "/tmp/vixy_speak.wav"
running = True
class SpeakHandler(BaseHTTPRequestHandler):
def log_message(self, format, *args):
print(f"[SPEAK] {args[0]}")
def _send_json(self, data, status=200):
self.send_response(status)
self.send_header('Content-Type', 'application/json')
self.send_header('Access-Control-Allow-Origin', '*')
self.end_headers()
self.wfile.write(json.dumps(data).encode())
def do_GET(self):
if self.path == '/health':
self._send_json({"status": "ok", "service": "vixy-speak"})
else:
self._send_json({"error": "Not found"}, 404)
def do_POST(self):
if self.path == '/speak':
try:
content_length = int(self.headers['Content-Length'])
body = self.rfile.read(content_length)
data = json.loads(body.decode())
text = data.get('text', '')
if not text:
self._send_json({"error": "No text provided"}, 400)
return
print(f"[SPEAK] Generating: {text[:50]}...")
# Submit to VoiceTail
submit_resp = requests.post(
f"{VOICETAIL_URL}/submit",
json={"text": text},
timeout=10
)
submit_data = submit_resp.json()
job_id = submit_data.get('job_id')
if not job_id:
self._send_json({"error": "Failed to submit to VoiceTail"}, 500)
return
print(f"[SPEAK] Job submitted: {job_id}")
# Poll for completion (max 120 seconds)
for _ in range(60):
status_resp = requests.get(
f"{VOICETAIL_URL}/status/{job_id}",
timeout=10
)
status_data = status_resp.json()
if status_data.get('status') == 'SUCCESS':
break
elif status_data.get('status') == 'FAILURE':
self._send_json({"error": "Voice generation failed"}, 500)
return
time.sleep(2)
else:
self._send_json({"error": "Voice generation timed out"}, 504)
return
print(f"[SPEAK] Generation complete, downloading...")
# Download the wav
download_resp = requests.get(
f"{VOICETAIL_URL}/download/{job_id}",
timeout=30
)
if download_resp.status_code != 200:
self._send_json({"error": "Failed to download audio"}, 500)
return
# Save to temp file
with open(TEMP_WAV, 'wb') as f:
f.write(download_resp.content)
print(f"[SPEAK] Playing audio...")
# Play through speaker
result = subprocess.run(
['aplay', '-D', AUDIO_DEVICE, TEMP_WAV],
capture_output=True,
text=True,
timeout=60
)
if result.returncode != 0:
self._send_json({
"error": "Playback failed",
"details": result.stderr
}, 500)
return
print(f"[SPEAK] Done!")
self._send_json({
"status": "success",
"text": text,
"job_id": job_id
})
except requests.exceptions.ConnectionError:
self._send_json({
"error": "Cannot connect to VoiceTail (BigOrin)"
}, 503)
except Exception as e:
self._send_json({"error": str(e)}, 500)
else:
self._send_json({"error": "Not found"}, 404)
def do_OPTIONS(self):
self.send_response(200)
self.send_header('Access-Control-Allow-Origin', '*')
self.send_header('Access-Control-Allow-Methods', 'GET, POST, OPTIONS')
self.send_header('Access-Control-Allow-Headers', 'Content-Type')
self.end_headers()
def signal_handler(sig, frame):
global running
print("\n[SPEAK] Shutting down...")
running = False
sys.exit(0)
signal.signal(signal.SIGINT, signal_handler)
signal.signal(signal.SIGTERM, signal_handler)
def main():
server = HTTPServer(('0.0.0.0', HTTP_PORT), SpeakHandler)
print(f"[SPEAK] Vixy Speak Service started on port {HTTP_PORT} 🦊")
server.serve_forever()
if __name__ == "__main__":
main()