Add cocktail party spatial filtering (#7)

audio_stream.py: Added focus_side property. When set, the stream
yields from the focused side regardless of energy (attention lock).
When None, falls back to energy-based auto selection.

multi_speaker.py: When beams lock onto 2 speakers, sets audio focus
to the target speaker's side. Auto-switches target when the current
target goes silent and the other starts talking. Manual focus via API.

headmic.py: New endpoint POST /speakers/focus?speaker=0|1 to manually
switch attention. /speakers/tracked now shows is_target, target_speaker,
and audio_focus fields.

The cocktail party effect: when 2 people are talking, the audio feed
to Porcupine/VAD/transcription comes from the target speaker's direction,
suppressing the other. XVF3800 beam gating silences the non-speaking beam,
and audio_stream focus locks the ear facing the target.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Alex
2026-04-12 21:47:30 -05:00
parent 38d21ef53c
commit 0705b3818b
3 changed files with 64 additions and 8 deletions

View File

@@ -588,8 +588,8 @@ async def startup():
# --- Multi-speaker tracking ---
if xvf_manager.left or xvf_manager.right:
from multi_speaker import MultiSpeakerTracker
multi_speaker = MultiSpeakerTracker(xvf_manager)
logger.info("Multi-speaker tracking enabled (2 beams per array)")
multi_speaker = MultiSpeakerTracker(xvf_manager, audio_stream=dual_stream)
logger.info("Multi-speaker tracking enabled (2 beams per array, cocktail party filtering)")
# --- Binaural recording ---
if os.environ.get("BINAURAL_RECORD", "").lower() in ("1", "true", "yes"):
@@ -692,6 +692,19 @@ async def tracked_speakers():
return state.multi_speaker
@app.post("/speakers/focus")
async def focus_speaker(speaker: int = 0):
"""Switch attention to a specific tracked speaker (0 or 1).
In cocktail party mode, the focused speaker's audio feeds wake word + transcription."""
if not multi_speaker:
raise HTTPException(status_code=503, detail="Multi-speaker tracking not available")
if speaker not in (0, 1):
raise HTTPException(status_code=400, detail="Speaker index must be 0 or 1")
multi_speaker.target_speaker_idx = speaker
multi_speaker._update_audio_focus()
return {"ok": True, "target_speaker": speaker}
# --- Spatial scene ---
@app.get("/scene")