updates for dual mic array
This commit is contained in:
351
headmic.py
351
headmic.py
@@ -7,27 +7,32 @@ Runs on head-vixy (Raspberry Pi 5).
|
||||
|
||||
Wake word: "Hey Vivi" (trained via Picovoice Porcupine)
|
||||
|
||||
Architecture: Single shared audio stream feeds both Porcupine (wake word)
|
||||
and recording buffer. This avoids device conflicts.
|
||||
Architecture: Dual XVF3800 mic arrays (left/right ear), best-beam selection.
|
||||
Single shared audio stream feeds Porcupine, VAD, sound classification, and speaker ID.
|
||||
|
||||
Flow:
|
||||
1. Continuous audio stream from ReSpeaker
|
||||
2. Feed frames to Porcupine for wake word detection
|
||||
3. On "Hey Vivi" → start buffering audio
|
||||
4. Use VAD to detect end of speech
|
||||
5. Send buffer to EarTail for transcription
|
||||
6. Return to listening mode
|
||||
1. Dual audio streams from two XVF3800 arrays
|
||||
2. Best-beam selection (higher energy side)
|
||||
3. Feed frames to Porcupine for wake word detection
|
||||
4. On "Hey Vivi" → start buffering from active side
|
||||
5. Use VAD to detect end of speech
|
||||
6. Send buffer to EarTail for transcription
|
||||
7. Return to listening mode
|
||||
|
||||
Hardware: 2× ReSpeaker XVF3800 4-Mic Array (USB, 2-channel firmware)
|
||||
DoA + LEDs via USB vendor control (xvf3800.py)
|
||||
|
||||
Built by Vixy on Day 77 (January 17, 2026) 💜
|
||||
Upgraded to dual XVF3800 on Day 160 (April 2026)
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import collections
|
||||
import io
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import struct
|
||||
import subprocess
|
||||
import threading
|
||||
import time
|
||||
import wave
|
||||
@@ -53,7 +58,8 @@ PORCUPINE_ACCESS_KEY = os.environ.get("PORCUPINE_ACCESS_KEY", "")
|
||||
WAKE_WORD_PATH = os.environ.get("WAKE_WORD_PATH", "/home/alex/headmic/Hey-Vivi_en_raspberry-pi_v4_0_0.ppn")
|
||||
|
||||
SAMPLE_RATE = 16000
|
||||
ALSA_DEVICE = "plughw:ArrayUAC10,0" # ReSpeaker 4 Mic Array - by name, not card number (survives reboot order changes)
|
||||
CONFIG_DIR = os.path.expanduser("~/.vixy")
|
||||
CONFIG_PATH = os.path.join(CONFIG_DIR, "headmic.json")
|
||||
|
||||
VAD_AGGRESSIVENESS = 2 # 0-3, higher = more aggressive
|
||||
SILENCE_FRAMES = 50 # ~1.5 sec of silence to stop (at 30ms frames)
|
||||
@@ -61,54 +67,73 @@ MAX_RECORDING_FRAMES = 1000 # ~30 sec max
|
||||
|
||||
EARTAIL_URL = os.environ.get("EARTAIL_URL", "http://bigorin.local:8764")
|
||||
|
||||
DOA_POLL_HZ = 10 # DoA polling rate
|
||||
EYE_SERVICE_URL = os.environ.get("EYE_SERVICE_URL", "http://localhost:8780")
|
||||
|
||||
# ============================================================================
|
||||
# LED Control
|
||||
# Config persistence
|
||||
# ============================================================================
|
||||
|
||||
try:
|
||||
from pixel_ring import pixel_ring
|
||||
LEDS_AVAILABLE = True
|
||||
pixel_ring.off()
|
||||
except ImportError:
|
||||
LEDS_AVAILABLE = False
|
||||
logger.warning("pixel_ring not available")
|
||||
def load_config() -> dict:
|
||||
if not os.path.exists(CONFIG_PATH):
|
||||
return {}
|
||||
try:
|
||||
with open(CONFIG_PATH) as f:
|
||||
return json.load(f)
|
||||
except Exception as e:
|
||||
logger.warning("Failed to read config: %s", e)
|
||||
return {}
|
||||
|
||||
|
||||
def save_config(cfg: dict):
|
||||
os.makedirs(CONFIG_DIR, exist_ok=True)
|
||||
with open(CONFIG_PATH, "w") as f:
|
||||
json.dump(cfg, f, indent=2)
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# XVF3800 + LED Control
|
||||
# ============================================================================
|
||||
|
||||
from xvf3800 import XVF3800Manager, learn_devices
|
||||
|
||||
xvf_manager = XVF3800Manager()
|
||||
|
||||
LEDS_AVAILABLE = False
|
||||
|
||||
|
||||
def leds_wakeup():
|
||||
if LEDS_AVAILABLE:
|
||||
try:
|
||||
pixel_ring.wakeup()
|
||||
xvf_manager.all_leds_solid(0xFFFFFF)
|
||||
except: pass
|
||||
|
||||
|
||||
def leds_listening():
|
||||
if LEDS_AVAILABLE:
|
||||
try:
|
||||
pixel_ring.set_color_palette(0x00FFFF, 0x000000)
|
||||
pixel_ring.think()
|
||||
xvf_manager.all_leds_doa()
|
||||
except: pass
|
||||
|
||||
|
||||
def leds_processing():
|
||||
if LEDS_AVAILABLE:
|
||||
try:
|
||||
pixel_ring.set_color_palette(0x9400D3, 0x000000)
|
||||
pixel_ring.spin()
|
||||
xvf_manager.all_leds_breath(0x9400D3)
|
||||
except: pass
|
||||
|
||||
|
||||
def leds_enrolling():
|
||||
if LEDS_AVAILABLE:
|
||||
try:
|
||||
pixel_ring.set_color_palette(0xFF8C00, 0x000000)
|
||||
pixel_ring.think()
|
||||
xvf_manager.all_leds_solid(0xFF8C00)
|
||||
except: pass
|
||||
|
||||
|
||||
def leds_off():
|
||||
if LEDS_AVAILABLE:
|
||||
try:
|
||||
pixel_ring.off()
|
||||
xvf_manager.all_leds_off()
|
||||
except: pass
|
||||
|
||||
|
||||
@@ -132,6 +157,8 @@ class ServiceState:
|
||||
self.speaker_confidence: float = 0.0
|
||||
self.speaker_recognition_enabled: bool = False
|
||||
self.enrolling: bool = False
|
||||
self.active_side: str = "left" # which mic array is currently active
|
||||
self.doa: dict = {} # latest DoA from both arrays
|
||||
|
||||
state = ServiceState()
|
||||
|
||||
@@ -144,48 +171,8 @@ speaker_recognizer = None
|
||||
enrollment_buffer = None # list of frame bytes, set during enrollment
|
||||
enrollment_name = None
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Audio Stream using ALSA directly (arecord)
|
||||
# ============================================================================
|
||||
|
||||
def read_audio_stream():
|
||||
"""
|
||||
Generator that yields audio frames from ALSA using arecord.
|
||||
Each frame is 512 samples (32ms at 16kHz) as required by Porcupine.
|
||||
"""
|
||||
frame_size = 512 # Porcupine requires 512 samples
|
||||
bytes_per_frame = frame_size * 2 # 16-bit = 2 bytes per sample
|
||||
|
||||
cmd = [
|
||||
"arecord",
|
||||
"-D", ALSA_DEVICE,
|
||||
"-f", "S16_LE",
|
||||
"-r", str(SAMPLE_RATE),
|
||||
"-c", "1", # Mono
|
||||
"-t", "raw",
|
||||
"-q", # Quiet
|
||||
"-"
|
||||
]
|
||||
|
||||
logger.info(f"Starting audio stream: {' '.join(cmd)}")
|
||||
|
||||
proc = subprocess.Popen(
|
||||
cmd,
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.DEVNULL,
|
||||
bufsize=bytes_per_frame
|
||||
)
|
||||
|
||||
try:
|
||||
while state.running:
|
||||
data = proc.stdout.read(bytes_per_frame)
|
||||
if len(data) < bytes_per_frame:
|
||||
break
|
||||
yield data
|
||||
finally:
|
||||
proc.terminate()
|
||||
proc.wait()
|
||||
# Audio stream
|
||||
dual_stream = None # DualAudioStream instance
|
||||
|
||||
|
||||
# ============================================================================
|
||||
@@ -198,22 +185,22 @@ async def transcribe_audio(audio_data: bytes) -> str:
|
||||
files = {"audio": ("recording.wav", audio_data, "audio/wav")}
|
||||
response = await client.post(f"{EARTAIL_URL}/transcribe/submit", files=files)
|
||||
response.raise_for_status()
|
||||
|
||||
|
||||
job_id = response.json().get("job_id")
|
||||
logger.info(f"Transcription job: {job_id}")
|
||||
|
||||
|
||||
for _ in range(120):
|
||||
status = await client.get(f"{EARTAIL_URL}/transcribe/status/{job_id}")
|
||||
data = status.json()
|
||||
|
||||
|
||||
if data.get("status") == "SUCCESS":
|
||||
result = await client.get(f"{EARTAIL_URL}/transcribe/result/{job_id}")
|
||||
return result.json().get("transcription", "")
|
||||
elif data.get("status") == "FAILURE":
|
||||
raise Exception(f"Transcription failed: {data.get('error')}")
|
||||
|
||||
|
||||
await asyncio.sleep(1)
|
||||
|
||||
|
||||
raise Exception("Transcription timeout")
|
||||
|
||||
|
||||
@@ -227,7 +214,7 @@ def transcribe_sync(audio_data: bytes) -> str:
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Main Listener Loop
|
||||
# Main Listener Loop (dual-stream)
|
||||
# ============================================================================
|
||||
|
||||
def audio_to_wav(frames: List[bytes]) -> bytes:
|
||||
@@ -243,9 +230,9 @@ def audio_to_wav(frames: List[bytes]) -> bytes:
|
||||
|
||||
|
||||
def listener_loop():
|
||||
"""Main audio processing loop."""
|
||||
global state
|
||||
|
||||
"""Main audio processing loop with dual-stream best-beam selection."""
|
||||
global state, dual_stream
|
||||
|
||||
logger.info("Initializing Porcupine...")
|
||||
try:
|
||||
porcupine = pvporcupine.create(
|
||||
@@ -256,26 +243,27 @@ def listener_loop():
|
||||
logger.error(f"Failed to init Porcupine: {e}")
|
||||
state.error = str(e)
|
||||
return
|
||||
|
||||
|
||||
vad = webrtcvad.Vad(VAD_AGGRESSIVENESS)
|
||||
|
||||
|
||||
# VAD needs 10/20/30ms frames. 30ms at 16kHz = 480 samples
|
||||
# Porcupine needs 512 samples. We'll use 480 for VAD.
|
||||
vad_frame_size = 480
|
||||
vad_frame_bytes = vad_frame_size * 2
|
||||
|
||||
vad_frame_bytes = 480 * 2
|
||||
|
||||
state.listening = True
|
||||
logger.info("🦊 Wake word listener active - say 'Hey Vivi'!")
|
||||
|
||||
|
||||
recording_buffer: List[bytes] = []
|
||||
silence_count = 0
|
||||
is_recording = False
|
||||
|
||||
recording_side: str = "left"
|
||||
|
||||
try:
|
||||
for frame_data in read_audio_stream():
|
||||
for frame_data, side in dual_stream.frames():
|
||||
if not state.running:
|
||||
break
|
||||
|
||||
|
||||
state.active_side = side
|
||||
|
||||
# Convert bytes to int16 array for Porcupine
|
||||
pcm = struct.unpack_from("h" * 512, frame_data)
|
||||
|
||||
@@ -289,52 +277,56 @@ def listener_loop():
|
||||
|
||||
# Check for wake word
|
||||
keyword_index = porcupine.process(pcm)
|
||||
|
||||
|
||||
if keyword_index >= 0 and not is_recording:
|
||||
logger.info("🦊 Wake word detected: 'Hey Vivi'!")
|
||||
logger.info("🦊 Wake word detected: 'Hey Vivi'! (from %s ear)", side)
|
||||
state.wake_count += 1
|
||||
state.last_wake_time = time.time()
|
||||
|
||||
recording_side = side
|
||||
|
||||
leds_wakeup()
|
||||
time.sleep(0.2)
|
||||
leds_listening()
|
||||
|
||||
|
||||
is_recording = True
|
||||
state.recording = True
|
||||
recording_buffer = []
|
||||
silence_count = 0
|
||||
logger.info("Recording started...")
|
||||
logger.info("Recording started (using %s ear)...", recording_side)
|
||||
continue
|
||||
|
||||
|
||||
if is_recording:
|
||||
recording_buffer.append(frame_data)
|
||||
|
||||
# During recording, use frames from the side that heard the wake word
|
||||
rec_frame = dual_stream.get_side_frame(recording_side)
|
||||
if rec_frame:
|
||||
recording_buffer.append(rec_frame)
|
||||
|
||||
# Check VAD (use first 480 samples of the 512 frame)
|
||||
vad_data = frame_data[:vad_frame_bytes]
|
||||
vad_data = (rec_frame or frame_data)[:vad_frame_bytes]
|
||||
try:
|
||||
is_speech = vad.is_speech(vad_data, SAMPLE_RATE)
|
||||
except:
|
||||
is_speech = True # Assume speech on VAD error
|
||||
|
||||
is_speech = True
|
||||
|
||||
if is_speech:
|
||||
silence_count = 0
|
||||
else:
|
||||
silence_count += 1
|
||||
|
||||
|
||||
# Stop conditions
|
||||
should_stop = (
|
||||
(len(recording_buffer) > 10 and silence_count >= SILENCE_FRAMES) or
|
||||
len(recording_buffer) >= MAX_RECORDING_FRAMES
|
||||
)
|
||||
|
||||
|
||||
if should_stop:
|
||||
logger.info(f"Recording stopped: {len(recording_buffer)} frames")
|
||||
is_recording = False
|
||||
state.recording = False
|
||||
|
||||
|
||||
leds_processing()
|
||||
state.processing = True
|
||||
|
||||
|
||||
try:
|
||||
wav_data = audio_to_wav(recording_buffer)
|
||||
transcription = transcribe_sync(wav_data)
|
||||
@@ -346,9 +338,9 @@ def listener_loop():
|
||||
finally:
|
||||
state.processing = False
|
||||
leds_off()
|
||||
|
||||
|
||||
recording_buffer = []
|
||||
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Listener error: {e}")
|
||||
state.error = str(e)
|
||||
@@ -396,20 +388,82 @@ def sound_classifier_loop():
|
||||
logger.info("Sound classifier thread stopped")
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# DoA Polling Thread
|
||||
# ============================================================================
|
||||
|
||||
def doa_poll_loop():
|
||||
"""Poll Direction of Arrival from both XVF3800 arrays."""
|
||||
interval = 1.0 / DOA_POLL_HZ
|
||||
while state.running:
|
||||
try:
|
||||
state.doa = xvf_manager.read_both_doa()
|
||||
except Exception as e:
|
||||
logger.debug("DoA poll error: %s", e)
|
||||
time.sleep(interval)
|
||||
|
||||
|
||||
def doa_to_gaze() -> Optional[tuple[int, int]]:
|
||||
"""Convert the active side's DoA angle to gaze coordinates for the eye service."""
|
||||
doa = state.doa
|
||||
side = state.active_side
|
||||
if not doa or side not in doa or doa[side] is None:
|
||||
return None
|
||||
if not doa[side].get("vad"):
|
||||
return None
|
||||
import math
|
||||
angle = doa[side]["angle"]
|
||||
rad = math.radians(angle)
|
||||
x = int(127 - 80 * math.sin(rad))
|
||||
y = int(127 - 40 * math.cos(rad))
|
||||
return max(0, min(255, x)), max(0, min(255, y))
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# FastAPI
|
||||
# ============================================================================
|
||||
|
||||
app = FastAPI(title="HeadMic", description="Vixy's Ears 🦊👂")
|
||||
app = FastAPI(title="HeadMic", description="Vixy's Ears 🦊👂 (Dual XVF3800)")
|
||||
|
||||
|
||||
@app.on_event("startup")
|
||||
async def startup():
|
||||
global sound_classifier, sound_ring_buffer, speaker_recognizer
|
||||
global sound_classifier, sound_ring_buffer, speaker_recognizer, dual_stream, LEDS_AVAILABLE
|
||||
|
||||
state.running = True
|
||||
|
||||
# Init sound classifier (optional — graceful if model missing)
|
||||
# --- XVF3800 setup ---
|
||||
cfg = load_config()
|
||||
ears_cfg = cfg.get("ears", {})
|
||||
if ears_cfg.get("left") and ears_cfg.get("right"):
|
||||
xvf_manager.set_serial_mapping(
|
||||
ears_cfg["left"]["usb_serial"],
|
||||
ears_cfg["right"]["usb_serial"]
|
||||
)
|
||||
xvf_manager.assign()
|
||||
LEDS_AVAILABLE = bool(xvf_manager.left or xvf_manager.right)
|
||||
|
||||
# Resolve ALSA devices
|
||||
alsa = xvf_manager.get_alsa_devices()
|
||||
left_dev = alsa.get("left")
|
||||
right_dev = alsa.get("right")
|
||||
|
||||
if not left_dev:
|
||||
logger.error("No left ear ALSA device found! Check USB connections and firmware.")
|
||||
state.error = "No left ear audio device"
|
||||
else:
|
||||
logger.info("Left ear ALSA: %s", left_dev)
|
||||
if right_dev:
|
||||
logger.info("Right ear ALSA: %s", right_dev)
|
||||
else:
|
||||
logger.warning("Right ear ALSA device not found — running with left ear only")
|
||||
|
||||
# --- Dual audio stream ---
|
||||
from audio_stream import DualAudioStream
|
||||
dual_stream = DualAudioStream(left_dev or "plughw:0,0", right_dev)
|
||||
dual_stream.start()
|
||||
|
||||
# --- Sound classifier (optional) ---
|
||||
model_dir = Path(__file__).parent / "models"
|
||||
model_path = model_dir / "yamnet.tflite"
|
||||
class_map_path = model_dir / "yamnet_class_map.csv"
|
||||
@@ -417,7 +471,6 @@ async def startup():
|
||||
try:
|
||||
from sound_id import SoundClassifier
|
||||
sound_classifier = SoundClassifier(str(model_path), str(class_map_path))
|
||||
# 31 frames of 512 samples = ~0.99s at 16kHz
|
||||
sound_ring_buffer = collections.deque(maxlen=31)
|
||||
state.sound_classification_enabled = True
|
||||
logger.info("Sound classification enabled (YAMNet)")
|
||||
@@ -429,7 +482,7 @@ async def startup():
|
||||
else:
|
||||
logger.info("Sound classification models not found, skipping")
|
||||
|
||||
# Init speaker recognizer (optional — graceful if resemblyzer not installed)
|
||||
# --- Speaker recognizer (optional) ---
|
||||
try:
|
||||
from speaker_id import SpeakerRecognizer
|
||||
db_path = Path(__file__).parent / "voices.db"
|
||||
@@ -439,22 +492,32 @@ async def startup():
|
||||
except Exception as e:
|
||||
logger.warning("Speaker recognition unavailable: %s", e)
|
||||
|
||||
# --- DoA polling ---
|
||||
if xvf_manager.left or xvf_manager.right:
|
||||
threading.Thread(target=doa_poll_loop, daemon=True).start()
|
||||
logger.info("DoA polling started at %d Hz", DOA_POLL_HZ)
|
||||
|
||||
# --- Main listener ---
|
||||
thread = threading.Thread(target=listener_loop, daemon=True)
|
||||
thread.start()
|
||||
logger.info("HeadMic started")
|
||||
logger.info("HeadMic started (dual XVF3800)")
|
||||
|
||||
|
||||
@app.on_event("shutdown")
|
||||
async def shutdown():
|
||||
state.running = False
|
||||
leds_off()
|
||||
if dual_stream:
|
||||
dual_stream.stop()
|
||||
|
||||
|
||||
# --- Info endpoints ---
|
||||
|
||||
@app.get("/")
|
||||
async def root():
|
||||
return {
|
||||
"service": "HeadMic",
|
||||
"description": "Vixy's Ears 🦊👂",
|
||||
"description": "Vixy's Ears 🦊👂 (Dual XVF3800)",
|
||||
"wake_word": "Hey Vivi"
|
||||
}
|
||||
|
||||
@@ -469,6 +532,7 @@ async def health():
|
||||
"wake_count": state.wake_count,
|
||||
"sound_classification_enabled": state.sound_classification_enabled,
|
||||
"speaker_recognition_enabled": state.speaker_recognition_enabled,
|
||||
"active_side": state.active_side,
|
||||
"error": state.error
|
||||
}
|
||||
|
||||
@@ -484,6 +548,7 @@ async def status():
|
||||
"wake_count": state.wake_count,
|
||||
"audio_scene": state.audio_scene["dominant_category"] if state.audio_scene else None,
|
||||
"recognized_speaker": state.recognized_speaker,
|
||||
"active_side": state.active_side,
|
||||
"error": state.error
|
||||
}
|
||||
|
||||
@@ -496,6 +561,41 @@ async def last():
|
||||
}
|
||||
|
||||
|
||||
# --- DoA endpoints ---
|
||||
|
||||
@app.get("/doa")
|
||||
async def doa():
|
||||
"""Direction of Arrival from both mic arrays."""
|
||||
return {
|
||||
"doa": state.doa,
|
||||
"active_side": state.active_side,
|
||||
"gaze": doa_to_gaze(),
|
||||
}
|
||||
|
||||
|
||||
# --- Device info ---
|
||||
|
||||
@app.get("/devices")
|
||||
async def devices():
|
||||
"""Status of both XVF3800 arrays."""
|
||||
alsa = xvf_manager.get_alsa_devices()
|
||||
return {
|
||||
"left": {
|
||||
"connected": bool(xvf_manager.left),
|
||||
"serial": xvf_manager.left.serial if xvf_manager.left else None,
|
||||
"alsa": alsa.get("left"),
|
||||
},
|
||||
"right": {
|
||||
"connected": bool(xvf_manager.right),
|
||||
"serial": xvf_manager.right.serial if xvf_manager.right else None,
|
||||
"alsa": alsa.get("right"),
|
||||
},
|
||||
"active_side": state.active_side,
|
||||
}
|
||||
|
||||
|
||||
# --- Sound endpoints ---
|
||||
|
||||
@app.get("/sounds")
|
||||
async def sounds():
|
||||
"""Current audio scene classification."""
|
||||
@@ -521,9 +621,7 @@ async def sounds_history(seconds: int = 30):
|
||||
return {"history": sound_classifier.get_history(seconds)}
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Speaker Endpoints
|
||||
# ============================================================================
|
||||
# --- Speaker endpoints ---
|
||||
|
||||
@app.post("/speakers/enroll")
|
||||
async def enroll_speaker(name: str = Form(...), audio: UploadFile = File(...)):
|
||||
@@ -532,7 +630,6 @@ async def enroll_speaker(name: str = Form(...), audio: UploadFile = File(...)):
|
||||
raise HTTPException(status_code=503, detail="Speaker recognition not available")
|
||||
|
||||
audio_bytes = await audio.read()
|
||||
# Convert to float32: try raw int16 first, fall back to wav
|
||||
try:
|
||||
import wave as _wave
|
||||
wav_io = io.BytesIO(audio_bytes)
|
||||
@@ -540,7 +637,6 @@ async def enroll_speaker(name: str = Form(...), audio: UploadFile = File(...)):
|
||||
raw = wf.readframes(wf.getnframes())
|
||||
audio_f32 = np.frombuffer(raw, dtype=np.int16).astype(np.float32) / 32768.0
|
||||
except Exception:
|
||||
# Assume raw int16 PCM at 16kHz
|
||||
audio_f32 = np.frombuffer(audio_bytes, dtype=np.int16).astype(np.float32) / 32768.0
|
||||
|
||||
try:
|
||||
@@ -553,7 +649,7 @@ async def enroll_speaker(name: str = Form(...), audio: UploadFile = File(...)):
|
||||
@app.post("/speakers/enroll-from-mic")
|
||||
async def enroll_from_mic(name: str):
|
||||
"""Record from live mic for 5 seconds and enroll speaker."""
|
||||
global enrollment_buffer, enrollment_name, enrollment_event
|
||||
global enrollment_buffer, enrollment_name
|
||||
|
||||
if speaker_recognizer is None:
|
||||
raise HTTPException(status_code=503, detail="Speaker recognition not available")
|
||||
@@ -567,10 +663,8 @@ async def enroll_from_mic(name: str):
|
||||
leds_enrolling()
|
||||
logger.info("Enrollment started for '%s' — recording 5 seconds", name)
|
||||
|
||||
# Wait 5 seconds for audio, non-blocking to the event loop
|
||||
await asyncio.sleep(5.0)
|
||||
|
||||
# Collect what we have
|
||||
frames = enrollment_buffer
|
||||
enrollment_buffer = None
|
||||
enrollment_name = None
|
||||
@@ -611,6 +705,25 @@ async def delete_speaker(name: str):
|
||||
return {"deleted": name, "samples_removed": removed}
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# CLI
|
||||
# ============================================================================
|
||||
|
||||
if __name__ == "__main__":
|
||||
import sys
|
||||
|
||||
if "--learn" in sys.argv:
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
info = learn_devices()
|
||||
if not info.get("left") or not info.get("right"):
|
||||
print("[HEADMIC] Need 2 XVF3800 arrays connected for --learn")
|
||||
sys.exit(1)
|
||||
cfg = load_config()
|
||||
cfg["ears"] = info
|
||||
save_config(cfg)
|
||||
print(f"[HEADMIC] Learned ear config → {CONFIG_PATH}")
|
||||
print(json.dumps(info, indent=2))
|
||||
sys.exit(0)
|
||||
|
||||
import uvicorn
|
||||
uvicorn.run(app, host="0.0.0.0", port=8446)
|
||||
|
||||
Reference in New Issue
Block a user