updates for dual mic array

2026-04-11 15:11:22 -05:00
parent 1cb3bd6833
commit 6c10e75cbc
5 changed files with 710 additions and 123 deletions
--- a/audio_stream.py
+++ b/audio_stream.py
@@ -0,0 +1,192 @@
 """
 Dual audio stream manager for two XVF3800 mic arrays.
 Runs two arecord subprocesses (one per array) and provides best-beam selection:
 the stream with higher energy is considered "active" (facing the speaker).
 """
 import logging
 import struct
 import subprocess
 import threading
 import time
 from typing import Optional, Generator
 import numpy as np
 logger = logging.getLogger("headmic.audio")
 SAMPLE_RATE = 16000
 FRAME_SIZE = 512           # Porcupine requires 512 samples
 BYTES_PER_FRAME = FRAME_SIZE * 2  # 16-bit = 2 bytes per sample
 ENERGY_WINDOW = 10         # frames to average for energy comparison
 class MicStream:
    """Audio stream from a single ALSA device via arecord subprocess."""
    def __init__(self, label: str, alsa_device: str):
        self.label = label
        self.alsa_device = alsa_device
        self.proc: Optional[subprocess.Popen] = None
        self.running = False
        self.current_frame: Optional[bytes] = None
        self.energy: float = 0.0
        self._energy_history: list[float] = []
        self._lock = threading.Lock()
        self._thread: Optional[threading.Thread] = None
    def start(self):
        cmd = [
            "arecord",
            "-D", self.alsa_device,
            "-f", "S16_LE",
            "-r", str(SAMPLE_RATE),
            "-c", "1",
            "-t", "raw",
            "-q",
            "-"
        ]
        logger.info("[%s] Starting: %s", self.label, " ".join(cmd))
        self.proc = subprocess.Popen(
            cmd, stdout=subprocess.PIPE, stderr=subprocess.DEVNULL,
            bufsize=BYTES_PER_FRAME
        )
        self.running = True
        self._thread = threading.Thread(target=self._read_loop, daemon=True)
        self._thread.start()
    def _read_loop(self):
        try:
            while self.running and self.proc:
                data = self.proc.stdout.read(BYTES_PER_FRAME)
                if len(data) < BYTES_PER_FRAME:
                    break
                # Compute frame energy (RMS)
                samples = np.frombuffer(data, dtype=np.int16).astype(np.float32)
                rms = float(np.sqrt(np.mean(samples * samples))) / 32768.0
                with self._lock:
                    self.current_frame = data
                    self._energy_history.append(rms)
                    if len(self._energy_history) > ENERGY_WINDOW:
                        self._energy_history.pop(0)
                    self.energy = sum(self._energy_history) / len(self._energy_history)
        except Exception as e:
            logger.error("[%s] Read error: %s", self.label, e)
        finally:
            logger.info("[%s] Stream ended", self.label)
    def get_frame(self) -> Optional[bytes]:
        with self._lock:
            return self.current_frame
    def get_energy(self) -> float:
        with self._lock:
            return self.energy
    def stop(self):
        self.running = False
        if self.proc:
            try:
                self.proc.terminate()
                self.proc.wait(timeout=2)
            except Exception:
                try:
                    self.proc.kill()
                except Exception:
                    pass
            self.proc = None
 class DualAudioStream:
    """
    Manages two MicStreams and provides best-beam selection.
    Usage:
        stream = DualAudioStream(left_alsa, right_alsa)
        stream.start()
        for frame_data, side in stream.frames():
            # frame_data is 512 samples (1024 bytes) of int16 PCM
            # side is "left" or "right" (whichever has more energy)
            ...
        stream.stop()
    """
    def __init__(self, left_device: str, right_device: Optional[str] = None):
        self.left = MicStream("left", left_device)
        self.right = MicStream("right", right_device) if right_device else None
        self.active_side: str = "left"
        self._running = False
    def start(self):
        self._running = True
        self.left.start()
        if self.right:
            self.right.start()
        # Short delay so first frames are populated
        time.sleep(0.1)
    def stop(self):
        self._running = False
        self.left.stop()
        if self.right:
            self.right.stop()
    def frames(self) -> Generator[tuple[bytes, str], None, None]:
        """
        Yield (frame_bytes, side) at Porcupine's expected rate.
        Always yields from the higher-energy side (best beam).
        Falls back to left if right is unavailable.
        """
        interval = FRAME_SIZE / SAMPLE_RATE  # 0.032s = 32ms
        last_frame_left = None
        last_frame_right = None
        while self._running:
            t0 = time.monotonic()
            frame_left = self.left.get_frame()
            frame_right = self.right.get_frame() if self.right else None
            # Wait for at least one new frame
            if frame_left is None and frame_right is None:
                time.sleep(0.005)
                continue
            # Skip if no new data since last yield
            if frame_left == last_frame_left and frame_right == last_frame_right:
                time.sleep(0.002)
                continue
            last_frame_left = frame_left
            last_frame_right = frame_right
            # Pick best beam
            if frame_right is None:
                self.active_side = "left"
                yield frame_left, "left"
            else:
                left_energy = self.left.get_energy()
                right_energy = self.right.get_energy()
                if right_energy > left_energy * 1.1:  # 10% hysteresis
                    self.active_side = "right"
                elif left_energy > right_energy * 1.1:
                    self.active_side = "left"
                # else: keep current active_side (hysteresis prevents flapping)
                if self.active_side == "right" and frame_right:
                    yield frame_right, "right"
                else:
                    yield frame_left, "left"
            # Pace to ~32ms per frame
            elapsed = time.monotonic() - t0
            if elapsed < interval:
                time.sleep(interval - elapsed)
    def get_side_frame(self, side: str) -> Optional[bytes]:
        """Get the latest frame from a specific side."""
        if side == "right" and self.right:
            return self.right.get_frame()
        return self.left.get_frame()
--- a/headmic.py
+++ b/headmic.py
@@ -7,27 +7,32 @@ Runs on head-vixy (Raspberry Pi 5).
 Wake word: "Hey Vivi" (trained via Picovoice Porcupine)
-Architecture: Single shared audio stream feeds both Porcupine (wake word)
+Architecture: Dual XVF3800 mic arrays (left/right ear), best-beam selection.
-and recording buffer. This avoids device conflicts.
+Single shared audio stream feeds Porcupine, VAD, sound classification, and speaker ID.
 Flow:
-  1. Continuous audio stream from ReSpeaker
+  1. Dual audio streams from two XVF3800 arrays
-  2. Feed frames to Porcupine for wake word detection
+  2. Best-beam selection (higher energy side)
-  3. On "Hey Vivi" → start buffering audio
+  3. Feed frames to Porcupine for wake word detection
-  4. Use VAD to detect end of speech
+  4. On "Hey Vivi" → start buffering from active side
-  5. Send buffer to EarTail for transcription
+  5. Use VAD to detect end of speech
-  6. Return to listening mode
+  6. Send buffer to EarTail for transcription
  7. Return to listening mode
 Hardware: 2× ReSpeaker XVF3800 4-Mic Array (USB, 2-channel firmware)
 DoA + LEDs via USB vendor control (xvf3800.py)
 Built by Vixy on Day 77 (January 17, 2026) 💜
 Upgraded to dual XVF3800 on Day 160 (April 2026)
 """
 import asyncio
 import collections
 import io
 import json
 import logging
 import os
 import struct
 import subprocess
 import threading
 import time
 import wave
@@ -53,7 +58,8 @@ PORCUPINE_ACCESS_KEY = os.environ.get("PORCUPINE_ACCESS_KEY", "")
 WAKE_WORD_PATH = os.environ.get("WAKE_WORD_PATH", "/home/alex/headmic/Hey-Vivi_en_raspberry-pi_v4_0_0.ppn")
 SAMPLE_RATE = 16000
-ALSA_DEVICE = "plughw:ArrayUAC10,0"  # ReSpeaker 4 Mic Array - by name, not card number (survives reboot order changes)
+CONFIG_DIR  = os.path.expanduser("~/.vixy")
 CONFIG_PATH = os.path.join(CONFIG_DIR, "headmic.json")
 VAD_AGGRESSIVENESS = 2  # 0-3, higher = more aggressive
 SILENCE_FRAMES = 50  # ~1.5 sec of silence to stop (at 30ms frames)
@@ -61,54 +67,73 @@ MAX_RECORDING_FRAMES = 1000  # ~30 sec max
 EARTAIL_URL = os.environ.get("EARTAIL_URL", "http://bigorin.local:8764")
 DOA_POLL_HZ = 10  # DoA polling rate
 EYE_SERVICE_URL = os.environ.get("EYE_SERVICE_URL", "http://localhost:8780")
 # ============================================================================
-# LED Control
+# Config persistence
 # ============================================================================
-try:
+def load_config() -> dict:
-    from pixel_ring import pixel_ring
+    if not os.path.exists(CONFIG_PATH):
-    LEDS_AVAILABLE = True
+        return {}
-    pixel_ring.off()
+    try:
-except ImportError:
+        with open(CONFIG_PATH) as f:
-    LEDS_AVAILABLE = False
+            return json.load(f)
-    logger.warning("pixel_ring not available")
+    except Exception as e:
        logger.warning("Failed to read config: %s", e)
        return {}
 def save_config(cfg: dict):
    os.makedirs(CONFIG_DIR, exist_ok=True)
    with open(CONFIG_PATH, "w") as f:
        json.dump(cfg, f, indent=2)
 # ============================================================================
 # XVF3800 + LED Control
 # ============================================================================
 from xvf3800 import XVF3800Manager, learn_devices
 xvf_manager = XVF3800Manager()
 LEDS_AVAILABLE = False
 def leds_wakeup():
    if LEDS_AVAILABLE:
        try:
-            pixel_ring.wakeup()
+            xvf_manager.all_leds_solid(0xFFFFFF)
        except: pass
 def leds_listening():
    if LEDS_AVAILABLE:
        try:
-            pixel_ring.set_color_palette(0x00FFFF, 0x000000)
+            xvf_manager.all_leds_doa()
            pixel_ring.think()
        except: pass
 def leds_processing():
    if LEDS_AVAILABLE:
        try:
-            pixel_ring.set_color_palette(0x9400D3, 0x000000)
+            xvf_manager.all_leds_breath(0x9400D3)
            pixel_ring.spin()
        except: pass
 def leds_enrolling():
    if LEDS_AVAILABLE:
        try:
-            pixel_ring.set_color_palette(0xFF8C00, 0x000000)
+            xvf_manager.all_leds_solid(0xFF8C00)
            pixel_ring.think()
        except: pass
 def leds_off():
    if LEDS_AVAILABLE:
        try:
-            pixel_ring.off()
+            xvf_manager.all_leds_off()
        except: pass
@@ -132,6 +157,8 @@ class ServiceState:
        self.speaker_confidence: float = 0.0
        self.speaker_recognition_enabled: bool = False
        self.enrolling: bool = False
        self.active_side: str = "left"    # which mic array is currently active
        self.doa: dict = {}               # latest DoA from both arrays
 state = ServiceState()
@@ -144,48 +171,8 @@ speaker_recognizer = None
 enrollment_buffer = None  # list of frame bytes, set during enrollment
 enrollment_name = None
-
+# Audio stream
-# ============================================================================
+dual_stream = None  # DualAudioStream instance
 # Audio Stream using ALSA directly (arecord)
 # ============================================================================
 def read_audio_stream():
    """
    Generator that yields audio frames from ALSA using arecord.
    Each frame is 512 samples (32ms at 16kHz) as required by Porcupine.
    """
    frame_size = 512  # Porcupine requires 512 samples
    bytes_per_frame = frame_size * 2  # 16-bit = 2 bytes per sample
    cmd = [
        "arecord",
        "-D", ALSA_DEVICE,
        "-f", "S16_LE",
        "-r", str(SAMPLE_RATE),
        "-c", "1",  # Mono
        "-t", "raw",
        "-q",  # Quiet
        "-"
    ]
    logger.info(f"Starting audio stream: {' '.join(cmd)}")
    proc = subprocess.Popen(
        cmd,
        stdout=subprocess.PIPE,
        stderr=subprocess.DEVNULL,
        bufsize=bytes_per_frame
    )
    try:
        while state.running:
            data = proc.stdout.read(bytes_per_frame)
            if len(data) < bytes_per_frame:
                break
            yield data
    finally:
        proc.terminate()
        proc.wait()
 # ============================================================================
@@ -198,22 +185,22 @@ async def transcribe_audio(audio_data: bytes) -> str:
        files = {"audio": ("recording.wav", audio_data, "audio/wav")}
        response = await client.post(f"{EARTAIL_URL}/transcribe/submit", files=files)
        response.raise_for_status()
-        
+
        job_id = response.json().get("job_id")
        logger.info(f"Transcription job: {job_id}")
-        
+
        for _ in range(120):
            status = await client.get(f"{EARTAIL_URL}/transcribe/status/{job_id}")
            data = status.json()
-            
+
            if data.get("status") == "SUCCESS":
                result = await client.get(f"{EARTAIL_URL}/transcribe/result/{job_id}")
                return result.json().get("transcription", "")
            elif data.get("status") == "FAILURE":
                raise Exception(f"Transcription failed: {data.get('error')}")
-            
+
            await asyncio.sleep(1)
-        
+
        raise Exception("Transcription timeout")
@@ -227,7 +214,7 @@ def transcribe_sync(audio_data: bytes) -> str:
 # ============================================================================
-# Main Listener Loop
+# Main Listener Loop (dual-stream)
 # ============================================================================
 def audio_to_wav(frames: List[bytes]) -> bytes:
@@ -243,9 +230,9 @@ def audio_to_wav(frames: List[bytes]) -> bytes:
 def listener_loop():
-    """Main audio processing loop."""
+    """Main audio processing loop with dual-stream best-beam selection."""
-    global state
+    global state, dual_stream
-    
+
    logger.info("Initializing Porcupine...")
    try:
        porcupine = pvporcupine.create(
@@ -256,26 +243,27 @@ def listener_loop():
        logger.error(f"Failed to init Porcupine: {e}")
        state.error = str(e)
        return
-    
+
    vad = webrtcvad.Vad(VAD_AGGRESSIVENESS)
-    
+
    # VAD needs 10/20/30ms frames. 30ms at 16kHz = 480 samples
-    # Porcupine needs 512 samples. We'll use 480 for VAD.
+    vad_frame_bytes = 480 * 2
-    vad_frame_size = 480
+
    vad_frame_bytes = vad_frame_size * 2
    state.listening = True
    logger.info("🦊 Wake word listener active - say 'Hey Vivi'!")
-    
+
    recording_buffer: List[bytes] = []
    silence_count = 0
    is_recording = False
-    
+    recording_side: str = "left"
    try:
-        for frame_data in read_audio_stream():
+        for frame_data, side in dual_stream.frames():
            if not state.running:
                break
-            
+
            state.active_side = side
            # Convert bytes to int16 array for Porcupine
            pcm = struct.unpack_from("h" * 512, frame_data)
@@ -289,52 +277,56 @@ def listener_loop():
            # Check for wake word
            keyword_index = porcupine.process(pcm)
-            
+
            if keyword_index >= 0 and not is_recording:
-                logger.info("🦊 Wake word detected: 'Hey Vivi'!")
+                logger.info("🦊 Wake word detected: 'Hey Vivi'! (from %s ear)", side)
                state.wake_count += 1
                state.last_wake_time = time.time()
-                
+                recording_side = side
                leds_wakeup()
                time.sleep(0.2)
                leds_listening()
-                
+
                is_recording = True
                state.recording = True
                recording_buffer = []
                silence_count = 0
-                logger.info("Recording started...")
+                logger.info("Recording started (using %s ear)...", recording_side)
                continue
-            
+
            if is_recording:
-                recording_buffer.append(frame_data)
+                # During recording, use frames from the side that heard the wake word
-                
+                rec_frame = dual_stream.get_side_frame(recording_side)
                if rec_frame:
                    recording_buffer.append(rec_frame)
                # Check VAD (use first 480 samples of the 512 frame)
-                vad_data = frame_data[:vad_frame_bytes]
+                vad_data = (rec_frame or frame_data)[:vad_frame_bytes]
                try:
                    is_speech = vad.is_speech(vad_data, SAMPLE_RATE)
                except:
-                    is_speech = True  # Assume speech on VAD error
+                    is_speech = True
-                
+
                if is_speech:
                    silence_count = 0
                else:
                    silence_count += 1
-                
+
                # Stop conditions
                should_stop = (
                    (len(recording_buffer) > 10 and silence_count >= SILENCE_FRAMES) or
                    len(recording_buffer) >= MAX_RECORDING_FRAMES
                )
-                
+
                if should_stop:
                    logger.info(f"Recording stopped: {len(recording_buffer)} frames")
                    is_recording = False
                    state.recording = False
-                    
+
                    leds_processing()
                    state.processing = True
-                    
+
                    try:
                        wav_data = audio_to_wav(recording_buffer)
                        transcription = transcribe_sync(wav_data)
@@ -346,9 +338,9 @@ def listener_loop():
                    finally:
                        state.processing = False
                        leds_off()
-                    
+
                    recording_buffer = []
-    
+
    except Exception as e:
        logger.error(f"Listener error: {e}")
        state.error = str(e)
@@ -396,20 +388,82 @@ def sound_classifier_loop():
    logger.info("Sound classifier thread stopped")
 # ============================================================================
 # DoA Polling Thread
 # ============================================================================
 def doa_poll_loop():
    """Poll Direction of Arrival from both XVF3800 arrays."""
    interval = 1.0 / DOA_POLL_HZ
    while state.running:
        try:
            state.doa = xvf_manager.read_both_doa()
        except Exception as e:
            logger.debug("DoA poll error: %s", e)
        time.sleep(interval)
 def doa_to_gaze() -> Optional[tuple[int, int]]:
    """Convert the active side's DoA angle to gaze coordinates for the eye service."""
    doa = state.doa
    side = state.active_side
    if not doa or side not in doa or doa[side] is None:
        return None
    if not doa[side].get("vad"):
        return None
    import math
    angle = doa[side]["angle"]
    rad = math.radians(angle)
    x = int(127 - 80 * math.sin(rad))
    y = int(127 - 40 * math.cos(rad))
    return max(0, min(255, x)), max(0, min(255, y))
 # ============================================================================
 # FastAPI
 # ============================================================================
-app = FastAPI(title="HeadMic", description="Vixy's Ears 🦊👂")
+app = FastAPI(title="HeadMic", description="Vixy's Ears 🦊👂 (Dual XVF3800)")
@app.on_event("startup")
 async def startup():
-    global sound_classifier, sound_ring_buffer, speaker_recognizer
+    global sound_classifier, sound_ring_buffer, speaker_recognizer, dual_stream, LEDS_AVAILABLE
    state.running = True
-    # Init sound classifier (optional — graceful if model missing)
+    # --- XVF3800 setup ---
    cfg = load_config()
    ears_cfg = cfg.get("ears", {})
    if ears_cfg.get("left") and ears_cfg.get("right"):
        xvf_manager.set_serial_mapping(
            ears_cfg["left"]["usb_serial"],
            ears_cfg["right"]["usb_serial"]
        )
    xvf_manager.assign()
    LEDS_AVAILABLE = bool(xvf_manager.left or xvf_manager.right)
    # Resolve ALSA devices
    alsa = xvf_manager.get_alsa_devices()
    left_dev = alsa.get("left")
    right_dev = alsa.get("right")
    if not left_dev:
        logger.error("No left ear ALSA device found! Check USB connections and firmware.")
        state.error = "No left ear audio device"
    else:
        logger.info("Left ear ALSA:  %s", left_dev)
    if right_dev:
        logger.info("Right ear ALSA: %s", right_dev)
    else:
        logger.warning("Right ear ALSA device not found — running with left ear only")
    # --- Dual audio stream ---
    from audio_stream import DualAudioStream
    dual_stream = DualAudioStream(left_dev or "plughw:0,0", right_dev)
    dual_stream.start()
    # --- Sound classifier (optional) ---
    model_dir = Path(__file__).parent / "models"
    model_path = model_dir / "yamnet.tflite"
    class_map_path = model_dir / "yamnet_class_map.csv"
@@ -417,7 +471,6 @@ async def startup():
        try:
            from sound_id import SoundClassifier
            sound_classifier = SoundClassifier(str(model_path), str(class_map_path))
            # 31 frames of 512 samples = ~0.99s at 16kHz
            sound_ring_buffer = collections.deque(maxlen=31)
            state.sound_classification_enabled = True
            logger.info("Sound classification enabled (YAMNet)")
@@ -429,7 +482,7 @@ async def startup():
    else:
        logger.info("Sound classification models not found, skipping")
-    # Init speaker recognizer (optional — graceful if resemblyzer not installed)
+    # --- Speaker recognizer (optional) ---
    try:
        from speaker_id import SpeakerRecognizer
        db_path = Path(__file__).parent / "voices.db"
@@ -439,22 +492,32 @@ async def startup():
    except Exception as e:
        logger.warning("Speaker recognition unavailable: %s", e)
    # --- DoA polling ---
    if xvf_manager.left or xvf_manager.right:
        threading.Thread(target=doa_poll_loop, daemon=True).start()
        logger.info("DoA polling started at %d Hz", DOA_POLL_HZ)
    # --- Main listener ---
    thread = threading.Thread(target=listener_loop, daemon=True)
    thread.start()
-    logger.info("HeadMic started")
+    logger.info("HeadMic started (dual XVF3800)")
@app.on_event("shutdown")
 async def shutdown():
    state.running = False
    leds_off()
    if dual_stream:
        dual_stream.stop()
 # --- Info endpoints ---
@app.get("/")
 async def root():
    return {
        "service": "HeadMic",
-        "description": "Vixy's Ears 🦊👂",
+        "description": "Vixy's Ears 🦊👂 (Dual XVF3800)",
        "wake_word": "Hey Vivi"
    }
@@ -469,6 +532,7 @@ async def health():
        "wake_count": state.wake_count,
        "sound_classification_enabled": state.sound_classification_enabled,
        "speaker_recognition_enabled": state.speaker_recognition_enabled,
        "active_side": state.active_side,
        "error": state.error
    }
@@ -484,6 +548,7 @@ async def status():
        "wake_count": state.wake_count,
        "audio_scene": state.audio_scene["dominant_category"] if state.audio_scene else None,
        "recognized_speaker": state.recognized_speaker,
        "active_side": state.active_side,
        "error": state.error
    }
@@ -496,6 +561,41 @@ async def last():
    }
 # --- DoA endpoints ---
@app.get("/doa")
 async def doa():
    """Direction of Arrival from both mic arrays."""
    return {
        "doa": state.doa,
        "active_side": state.active_side,
        "gaze": doa_to_gaze(),
    }
 # --- Device info ---
@app.get("/devices")
 async def devices():
    """Status of both XVF3800 arrays."""
    alsa = xvf_manager.get_alsa_devices()
    return {
        "left": {
            "connected": bool(xvf_manager.left),
            "serial": xvf_manager.left.serial if xvf_manager.left else None,
            "alsa": alsa.get("left"),
        },
        "right": {
            "connected": bool(xvf_manager.right),
            "serial": xvf_manager.right.serial if xvf_manager.right else None,
            "alsa": alsa.get("right"),
        },
        "active_side": state.active_side,
    }
 # --- Sound endpoints ---
@app.get("/sounds")
 async def sounds():
    """Current audio scene classification."""
@@ -521,9 +621,7 @@ async def sounds_history(seconds: int = 30):
    return {"history": sound_classifier.get_history(seconds)}
-# ============================================================================
+# --- Speaker endpoints ---
 # Speaker Endpoints
 # ============================================================================
@app.post("/speakers/enroll")
 async def enroll_speaker(name: str = Form(...), audio: UploadFile = File(...)):
@@ -532,7 +630,6 @@ async def enroll_speaker(name: str = Form(...), audio: UploadFile = File(...)):
        raise HTTPException(status_code=503, detail="Speaker recognition not available")
    audio_bytes = await audio.read()
    # Convert to float32: try raw int16 first, fall back to wav
    try:
        import wave as _wave
        wav_io = io.BytesIO(audio_bytes)
@@ -540,7 +637,6 @@ async def enroll_speaker(name: str = Form(...), audio: UploadFile = File(...)):
            raw = wf.readframes(wf.getnframes())
            audio_f32 = np.frombuffer(raw, dtype=np.int16).astype(np.float32) / 32768.0
    except Exception:
        # Assume raw int16 PCM at 16kHz
        audio_f32 = np.frombuffer(audio_bytes, dtype=np.int16).astype(np.float32) / 32768.0
    try:
@@ -553,7 +649,7 @@ async def enroll_speaker(name: str = Form(...), audio: UploadFile = File(...)):
@app.post("/speakers/enroll-from-mic")
 async def enroll_from_mic(name: str):
    """Record from live mic for 5 seconds and enroll speaker."""
-    global enrollment_buffer, enrollment_name, enrollment_event
+    global enrollment_buffer, enrollment_name
    if speaker_recognizer is None:
        raise HTTPException(status_code=503, detail="Speaker recognition not available")
@@ -567,10 +663,8 @@ async def enroll_from_mic(name: str):
    leds_enrolling()
    logger.info("Enrollment started for '%s' — recording 5 seconds", name)
    # Wait 5 seconds for audio, non-blocking to the event loop
    await asyncio.sleep(5.0)
    # Collect what we have
    frames = enrollment_buffer
    enrollment_buffer = None
    enrollment_name = None
@@ -611,6 +705,25 @@ async def delete_speaker(name: str):
    return {"deleted": name, "samples_removed": removed}
 # ============================================================================
 # CLI
 # ============================================================================
 if __name__ == "__main__":
    import sys
    if "--learn" in sys.argv:
        logging.basicConfig(level=logging.INFO)
        info = learn_devices()
        if not info.get("left") or not info.get("right"):
            print("[HEADMIC] Need 2 XVF3800 arrays connected for --learn")
            sys.exit(1)
        cfg = load_config()
        cfg["ears"] = info
        save_config(cfg)
        print(f"[HEADMIC] Learned ear config → {CONFIG_PATH}")
        print(json.dumps(info, indent=2))
        sys.exit(0)
    import uvicorn
    uvicorn.run(app, host="0.0.0.0", port=8446)
--- a/headmic.service
+++ b/headmic.service
@@ -1,5 +1,5 @@
 [Unit]
-Description=HeadMic - Vixy's Ears Service
+Description=HeadMic - Vixy's Ears Service (Dual XVF3800)
 After=network.target sound.target
 [Service]
--- a/requirements.txt
+++ b/requirements.txt
@@ -15,9 +15,8 @@ pvporcupine>=3.0.0
 # HTTP client for EarTail
 httpx>=0.25.0
-# ReSpeaker LED control
+# XVF3800 USB control (DoA + LEDs)
-# pixel_ring - install from: https://github.com/respeaker/pixel_ring
+pyusb>=1.2.0
 # pip install pixel_ring
 # Pydantic for models
 pydantic>=2.0.0
--- a/xvf3800.py
+++ b/xvf3800.py
@@ -0,0 +1,283 @@
 """
 XVF3800 USB Control — DoA, LEDs, device identification.
 Each ReSpeaker XVF3800 4-Mic Array is controlled via USB vendor commands (PyUSB).
 Replaces the old pixel_ring / Tuning interface used by the XVF3000.
 Reference: https://github.com/respeaker/reSpeaker_XVF3800_USB_4MIC_ARRAY/blob/master/python_control/xvf_host.py
 """
 import logging
 import struct
 import time
 from typing import Optional
 try:
    import usb.core
    import usb.util
    PYUSB_AVAILABLE = True
 except ImportError:
    PYUSB_AVAILABLE = False
 logger = logging.getLogger("headmic.xvf3800")
 VID = 0x2886
 PID = 0x001A
 # USB vendor control transfer parameters
 CTRL_REQUEST_TYPE_OUT = usb.util.CTRL_OUT | usb.util.CTRL_TYPE_VENDOR | usb.util.CTRL_RECIPIENT_DEVICE if PYUSB_AVAILABLE else 0
 CTRL_REQUEST_TYPE_IN  = usb.util.CTRL_IN  | usb.util.CTRL_TYPE_VENDOR | usb.util.CTRL_RECIPIENT_DEVICE if PYUSB_AVAILABLE else 0
 # Resource IDs
 GPO_RESID = 20
 # Parameter indices (within resource)
 DOA_VALUE_IDX     = 18  # returns (angle 0-359, vad 0/1)
 LED_EFFECT_IDX    = 0   # 0=off, 1=breath, 2=rainbow, 3=solid, 4=doa, 5=ring
 LED_BRIGHTNESS_IDX = 1
 LED_COLOR_IDX     = 3   # single uint32 color
 LED_RING_COLOR_IDX = 5  # 12 × uint32
 class XVF3800:
    """Control a single ReSpeaker XVF3800 via USB vendor commands."""
    def __init__(self, usb_device):
        self.dev = usb_device
        self.serial = usb_device.serial_number or "unknown"
        self.bus = usb_device.bus
        self.address = usb_device.address
    def _read(self, resid: int, param_idx: int, length: int) -> bytes:
        """Read parameter via USB control transfer."""
        wValue = (resid << 8) | param_idx
        try:
            data = self.dev.ctrl_transfer(CTRL_REQUEST_TYPE_IN, 0, wValue, 0, length, timeout=1000)
            return bytes(data)
        except Exception as e:
            logger.debug("USB read error (resid=%d, param=%d): %s", resid, param_idx, e)
            return b""
    def _write(self, resid: int, param_idx: int, data: bytes):
        """Write parameter via USB control transfer."""
        wValue = (resid << 8) | param_idx
        try:
            self.dev.ctrl_transfer(CTRL_REQUEST_TYPE_OUT, 0, wValue, 0, data, timeout=1000)
        except Exception as e:
            logger.debug("USB write error (resid=%d, param=%d): %s", resid, param_idx, e)
    # --- DoA ---
    def read_doa(self) -> tuple[int, bool]:
        """Read Direction of Arrival. Returns (angle 0-359, vad True/False)."""
        data = self._read(GPO_RESID, DOA_VALUE_IDX, 4)
        if len(data) < 4:
            return 0, False
        angle, vad = struct.unpack_from("<HH", data)
        return angle % 360, bool(vad)
    # --- LEDs ---
    def led_off(self):
        self._write(GPO_RESID, LED_EFFECT_IDX, struct.pack("<I", 0))
    def led_solid(self, color: int):
        """Solid color on all LEDs. color is 0xRRGGBB."""
        self._write(GPO_RESID, LED_COLOR_IDX, struct.pack("<I", color))
        self._write(GPO_RESID, LED_EFFECT_IDX, struct.pack("<I", 3))
    def led_breath(self, color: int, brightness: int = 128):
        """Breathing effect."""
        self._write(GPO_RESID, LED_COLOR_IDX, struct.pack("<I", color))
        self._write(GPO_RESID, LED_BRIGHTNESS_IDX, struct.pack("<I", brightness))
        self._write(GPO_RESID, LED_EFFECT_IDX, struct.pack("<I", 1))
    def led_doa(self, base_color: int = 0x003333, doa_color: int = 0x00FFFF):
        """DoA indicator mode — shows beam direction on LED ring."""
        # LED_DOA_COLOR takes two uint32 values: base + indicator
        data = struct.pack("<II", base_color, doa_color)
        self._write(GPO_RESID, 4, data)  # param 4 = LED_DOA_COLOR
        self._write(GPO_RESID, LED_EFFECT_IDX, struct.pack("<I", 4))
    def led_rainbow(self, brightness: int = 128):
        self._write(GPO_RESID, LED_BRIGHTNESS_IDX, struct.pack("<I", brightness))
        self._write(GPO_RESID, LED_EFFECT_IDX, struct.pack("<I", 2))
 class XVF3800Manager:
    """Manage two XVF3800 arrays, identified by USB serial number."""
    def __init__(self):
        self.left: Optional[XVF3800] = None
        self.right: Optional[XVF3800] = None
        self._serials: dict[str, str] = {}  # {"left": "SN...", "right": "SN..."}
    def set_serial_mapping(self, left_serial: str, right_serial: str):
        """Pin left/right assignment by USB serial number."""
        self._serials = {"left": left_serial, "right": right_serial}
    def discover(self) -> list[XVF3800]:
        """Find all connected XVF3800 devices."""
        if not PYUSB_AVAILABLE:
            logger.warning("pyusb not installed — XVF3800 control disabled")
            return []
        devices = []
        for dev in usb.core.find(idVendor=VID, idProduct=PID, find_all=True):
            try:
                devices.append(XVF3800(dev))
            except Exception as e:
                logger.warning("Failed to init XVF3800 at bus %d addr %d: %s",
                               dev.bus, dev.address, e)
        return devices
    def assign(self):
        """Discover devices and assign left/right based on serial mapping."""
        devices = self.discover()
        logger.info("Found %d XVF3800 device(s): %s",
                     len(devices), [d.serial for d in devices])
        if self._serials:
            for dev in devices:
                if dev.serial == self._serials.get("left"):
                    self.left = dev
                elif dev.serial == self._serials.get("right"):
                    self.right = dev
            if not self.left:
                logger.warning("Left XVF3800 (serial %s) not found", self._serials.get("left"))
            if not self.right:
                logger.warning("Right XVF3800 (serial %s) not found", self._serials.get("right"))
        else:
            # No serial mapping — assign by bus address order (unstable, but works for --learn)
            devices.sort(key=lambda d: (d.bus, d.address))
            if len(devices) >= 1:
                self.left = devices[0]
            if len(devices) >= 2:
                self.right = devices[1]
        if self.left:
            logger.info("Left ear:  serial=%s bus=%d addr=%d", self.left.serial, self.left.bus, self.left.address)
        if self.right:
            logger.info("Right ear: serial=%s bus=%d addr=%d", self.right.serial, self.right.bus, self.right.address)
    def serial_to_alsa(self, serial: str) -> Optional[str]:
        """Find the ALSA card name for a device with a given USB serial number.
        Searches /proc/asound/cards and matches via sysfs."""
        import os, glob
        # Walk /sys/class/sound/card*/device -> look for matching USB serial
        for card_dir in sorted(glob.glob("/sys/class/sound/card*")):
            card_num = os.path.basename(card_dir).replace("card", "")
            # Follow the device symlink up to the USB device
            device_path = os.path.join(card_dir, "device")
            if not os.path.islink(device_path):
                continue
            usb_path = os.path.realpath(device_path)
            serial_file = os.path.join(usb_path, "..", "serial")
            if not os.path.exists(serial_file):
                serial_file = os.path.join(usb_path, "..", "..", "serial")
            if os.path.exists(serial_file):
                try:
                    dev_serial = open(serial_file).read().strip()
                    if dev_serial == serial:
                        # Read the card ID (ALSA name)
                        id_file = os.path.join(card_dir, "id")
                        if os.path.exists(id_file):
                            return open(id_file).read().strip()
                        return card_num
                except Exception:
                    pass
        return None
    def get_alsa_devices(self) -> dict[str, Optional[str]]:
        """Return {"left": "plughw:Array,0", "right": "plughw:Array_1,0"} or similar."""
        result = {}
        for label, dev in [("left", self.left), ("right", self.right)]:
            if dev:
                card_name = self.serial_to_alsa(dev.serial)
                result[label] = f"plughw:{card_name},0" if card_name else None
            else:
                result[label] = None
        return result
    # --- Convenience: control both arrays ---
    def all_leds_off(self):
        for dev in [self.left, self.right]:
            if dev:
                dev.led_off()
    def all_leds_solid(self, color: int):
        for dev in [self.left, self.right]:
            if dev:
                dev.led_solid(color)
    def all_leds_breath(self, color: int, brightness: int = 128):
        for dev in [self.left, self.right]:
            if dev:
                dev.led_breath(color, brightness)
    def all_leds_doa(self):
        for dev in [self.left, self.right]:
            if dev:
                dev.led_doa()
    def read_both_doa(self) -> dict:
        """Read DoA from both arrays."""
        result = {}
        for label, dev in [("left", self.left), ("right", self.right)]:
            if dev:
                angle, vad = dev.read_doa()
                result[label] = {"angle": angle, "vad": vad}
            else:
                result[label] = None
        return result
 def learn_devices() -> dict:
    """Discover connected XVF3800 devices and return their serials for config."""
    mgr = XVF3800Manager()
    mgr.assign()
    result = {}
    if mgr.left:
        result["left"] = {"usb_serial": mgr.left.serial}
        alsa = mgr.serial_to_alsa(mgr.left.serial)
        if alsa:
            result["left"]["alsa_card"] = alsa
    if mgr.right:
        result["right"] = {"usb_serial": mgr.right.serial}
        alsa = mgr.serial_to_alsa(mgr.right.serial)
        if alsa:
            result["right"]["alsa_card"] = alsa
    return result
 # === CLI test ===
 if __name__ == "__main__":
    import sys
    logging.basicConfig(level=logging.INFO)
    if "--learn" in sys.argv:
        info = learn_devices()
        import json
        print(json.dumps(info, indent=2))
        sys.exit(0)
    if "--test-doa" in sys.argv:
        mgr = XVF3800Manager()
        mgr.assign()
        for _ in range(50):
            doa = mgr.read_both_doa()
            print(f"DoA: left={doa.get('left')}  right={doa.get('right')}", end="\r")
            time.sleep(0.1)
        print()
        sys.exit(0)
    if "--test-leds" in sys.argv:
        mgr = XVF3800Manager()
        mgr.assign()
        for color, name in [(0xFF0000, "red"), (0x00FF00, "green"), (0x0000FF, "blue"),
                            (0x00FFFF, "cyan"), (0x9400D3, "purple")]:
            print(f"  {name}")
            mgr.all_leds_solid(color)
            time.sleep(1)
        mgr.all_leds_off()
        sys.exit(0)