Add multi-speaker tracking with beam steering (#5)

multi_speaker.py: Tracks up to 2 speakers simultaneously. When 2 distinct
DoA angles are detected (30°+ apart) for >1s, locks the XVF3800's fixed
beams onto each speaker. Releases back to auto mode when only 1 speaker
remains (3s timeout). Manages beam gating so only the speaking beam is active.

xvf3800.py: Added beam steering commands — enable_fixed_beams(),
set_beam_azimuths(), enable_beam_gating(), read_all_beams().
Manager gets steer_beams() and release_beams() convenience methods.

headmic.py: Wire multi-speaker tracker into DoA loop. New endpoint:
GET /speakers/tracked — current speaker positions, beam mode, lock state.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Alex
2026-04-12 21:37:49 -05:00
parent 02d3ac3816
commit 38d21ef53c
3 changed files with 280 additions and 1 deletions

View File

@@ -161,6 +161,7 @@ class ServiceState:
self.doa: dict = {} # latest DoA from both arrays
self.spatial: Optional[dict] = None # triangulated position + gaze
self.last_anomaly: Optional[dict] = None # last spatial anomaly detected
self.multi_speaker: Optional[dict] = None # multi-speaker tracking state
state = ServiceState()
@@ -177,6 +178,9 @@ binaural_recorder = None
# Spatial scene
spatial_scene = None
# Multi-speaker tracker
multi_speaker = None
enrollment_name = None
# Audio stream
@@ -439,6 +443,10 @@ def doa_track_loop():
try:
state.doa = xvf_manager.read_both_doa()
# Multi-speaker tracking (beam steering)
if multi_speaker:
state.multi_speaker = multi_speaker.update(state.doa)
if spatial_tracker and dual_stream:
left_energy = dual_stream.left.get_energy() if dual_stream.left else 0.0
right_energy = dual_stream.right.get_energy() if dual_stream.right else 0.0
@@ -496,7 +504,7 @@ app = FastAPI(title="HeadMic", description="Vixy's Ears 🦊👂 (Dual XVF3800)"
@app.on_event("startup")
async def startup():
global sound_classifier, sound_ring_buffer, speaker_recognizer, dual_stream, LEDS_AVAILABLE, spatial_tracker, binaural_recorder, spatial_scene
global sound_classifier, sound_ring_buffer, speaker_recognizer, dual_stream, LEDS_AVAILABLE, spatial_tracker, binaural_recorder, spatial_scene, multi_speaker
state.running = True
@@ -577,6 +585,12 @@ async def startup():
spatial_scene = SpatialScene()
spatial_scene.start()
# --- Multi-speaker tracking ---
if xvf_manager.left or xvf_manager.right:
from multi_speaker import MultiSpeakerTracker
multi_speaker = MultiSpeakerTracker(xvf_manager)
logger.info("Multi-speaker tracking enabled (2 beams per array)")
# --- Binaural recording ---
if os.environ.get("BINAURAL_RECORD", "").lower() in ("1", "true", "yes"):
from binaural_recorder import BinauralRecorder
@@ -596,6 +610,8 @@ async def startup():
async def shutdown():
state.running = False
leds_off()
if multi_speaker and xvf_manager:
xvf_manager.release_beams()
if spatial_scene:
spatial_scene.stop()
if binaural_recorder:
@@ -666,6 +682,16 @@ async def doa():
}
# --- Multi-speaker ---
@app.get("/speakers/tracked")
async def tracked_speakers():
"""Currently tracked speaker positions and beam state."""
if not state.multi_speaker:
return {"speakers": [], "beam_mode": "auto", "active_count": 0, "total_tracked": 0}
return state.multi_speaker
# --- Spatial scene ---
@app.get("/scene")