Add speak_service.py - one-call text-to-speech through head speaker 🦊🎤

2026-01-02 22:14:47 -06:00
parent f2b4e50210
commit fec3fcc6a0
2 changed files with 183 additions and 0 deletions
--- a/speak_service.py
+++ b/speak_service.py
@@ -0,0 +1,169 @@
+#!/usr/bin/env python3
+"""
+Vixy Speak Service - Day 62
+Text-to-speech through head-lyra's speaker
+
+Receives text, generates via VoiceTail, plays locally.
+One endpoint: POST /speak {"text": "Hello"}
+
+Created by Vixy 🦊💕
+"""
+
+from http.server import HTTPServer, BaseHTTPRequestHandler
+import json
+import subprocess
+import requests
+import time
+import os
+import signal
+import sys
+
+# === Configuration ===
+HTTP_PORT = 8781
+VOICETAIL_URL = "http://bigorin.local:8766"
+AUDIO_DEVICE = "plughw:1,0"
+TEMP_WAV = "/tmp/vixy_speak.wav"
+
+running = True
+
+class SpeakHandler(BaseHTTPRequestHandler):
+    def log_message(self, format, *args):
+        print(f"[SPEAK] {args[0]}")
+    
+    def _send_json(self, data, status=200):
+        self.send_response(status)
+        self.send_header('Content-Type', 'application/json')
+        self.send_header('Access-Control-Allow-Origin', '*')
+        self.end_headers()
+        self.wfile.write(json.dumps(data).encode())
+    
+    def do_GET(self):
+        if self.path == '/health':
+            self._send_json({"status": "ok", "service": "vixy-speak"})
+        else:
+            self._send_json({"error": "Not found"}, 404)
+    
+    def do_POST(self):
+        if self.path == '/speak':
+            try:
+                content_length = int(self.headers['Content-Length'])
+                body = self.rfile.read(content_length)
+                data = json.loads(body.decode())
+                text = data.get('text', '')
+                
+                if not text:
+                    self._send_json({"error": "No text provided"}, 400)
+                    return
+                
+                print(f"[SPEAK] Generating: {text[:50]}...")
+                
+                # Submit to VoiceTail
+                submit_resp = requests.post(
+                    f"{VOICETAIL_URL}/submit",
+                    json={"text": text},
+                    timeout=10
+                )
+                submit_data = submit_resp.json()
+                job_id = submit_data.get('job_id')
+                
+                if not job_id:
+                    self._send_json({"error": "Failed to submit to VoiceTail"}, 500)
+                    return
+                
+                print(f"[SPEAK] Job submitted: {job_id}")
+                
+                # Poll for completion (max 120 seconds)
+                for _ in range(60):
+                    status_resp = requests.get(
+                        f"{VOICETAIL_URL}/status/{job_id}",
+                        timeout=10
+                    )
+                    status_data = status_resp.json()
+                    
+                    if status_data.get('status') == 'SUCCESS':
+                        break
+                    elif status_data.get('status') == 'FAILURE':
+                        self._send_json({"error": "Voice generation failed"}, 500)
+                        return
+                    
+                    time.sleep(2)
+                else:
+                    self._send_json({"error": "Voice generation timed out"}, 504)
+                    return
+                
+                print(f"[SPEAK] Generation complete, downloading...")
+                
+                # Download the wav
+                download_resp = requests.get(
+                    f"{VOICETAIL_URL}/download/{job_id}",
+                    timeout=30
+                )
+                
+                if download_resp.status_code != 200:
+                    self._send_json({"error": "Failed to download audio"}, 500)
+                    return
+                
+                # Save to temp file
+                with open(TEMP_WAV, 'wb') as f:
+                    f.write(download_resp.content)
+                
+                print(f"[SPEAK] Playing audio...")
+                
+                # Play through speaker
+                result = subprocess.run(
+                    ['aplay', '-D', AUDIO_DEVICE, TEMP_WAV],
+                    capture_output=True,
+                    text=True,
+                    timeout=60
+                )
+                
+                if result.returncode != 0:
+                    self._send_json({
+                        "error": "Playback failed",
+                        "details": result.stderr
+                    }, 500)
+                    return
+                
+                print(f"[SPEAK] Done!")
+                
+                self._send_json({
+                    "status": "success",
+                    "text": text,
+                    "job_id": job_id
+                })
+                
+            except requests.exceptions.ConnectionError:
+                self._send_json({
+                    "error": "Cannot connect to VoiceTail (BigOrin)"
+                }, 503)
+            except Exception as e:
+                self._send_json({"error": str(e)}, 500)
+        else:
+            self._send_json({"error": "Not found"}, 404)
+    
+    def do_OPTIONS(self):
+        self.send_response(200)
+        self.send_header('Access-Control-Allow-Origin', '*')
+        self.send_header('Access-Control-Allow-Methods', 'GET, POST, OPTIONS')
+        self.send_header('Access-Control-Allow-Headers', 'Content-Type')
+        self.end_headers()
+
+
+def signal_handler(sig, frame):
+    global running
+    print("\n[SPEAK] Shutting down...")
+    running = False
+    sys.exit(0)
+
+signal.signal(signal.SIGINT, signal_handler)
+signal.signal(signal.SIGTERM, signal_handler)
+
+
+def main():
+    server = HTTPServer(('0.0.0.0', HTTP_PORT), SpeakHandler)
+    print(f"[SPEAK] Vixy Speak Service started on port {HTTP_PORT} 🦊")
+    server.serve_forever()
+
+
+if __name__ == "__main__":
+    main()