Add cocktail party spatial filtering (#7)
audio_stream.py: Added focus_side property. When set, the stream yields from the focused side regardless of energy (attention lock). When None, falls back to energy-based auto selection. multi_speaker.py: When beams lock onto 2 speakers, sets audio focus to the target speaker's side. Auto-switches target when the current target goes silent and the other starts talking. Manual focus via API. headmic.py: New endpoint POST /speakers/focus?speaker=0|1 to manually switch attention. /speakers/tracked now shows is_target, target_speaker, and audio_focus fields. The cocktail party effect: when 2 people are talking, the audio feed to Porcupine/VAD/transcription comes from the target speaker's direction, suppressing the other. XVF3800 beam gating silences the non-speaking beam, and audio_stream focus locks the ear facing the target. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
17
headmic.py
17
headmic.py
@@ -588,8 +588,8 @@ async def startup():
|
||||
# --- Multi-speaker tracking ---
|
||||
if xvf_manager.left or xvf_manager.right:
|
||||
from multi_speaker import MultiSpeakerTracker
|
||||
multi_speaker = MultiSpeakerTracker(xvf_manager)
|
||||
logger.info("Multi-speaker tracking enabled (2 beams per array)")
|
||||
multi_speaker = MultiSpeakerTracker(xvf_manager, audio_stream=dual_stream)
|
||||
logger.info("Multi-speaker tracking enabled (2 beams per array, cocktail party filtering)")
|
||||
|
||||
# --- Binaural recording ---
|
||||
if os.environ.get("BINAURAL_RECORD", "").lower() in ("1", "true", "yes"):
|
||||
@@ -692,6 +692,19 @@ async def tracked_speakers():
|
||||
return state.multi_speaker
|
||||
|
||||
|
||||
@app.post("/speakers/focus")
|
||||
async def focus_speaker(speaker: int = 0):
|
||||
"""Switch attention to a specific tracked speaker (0 or 1).
|
||||
In cocktail party mode, the focused speaker's audio feeds wake word + transcription."""
|
||||
if not multi_speaker:
|
||||
raise HTTPException(status_code=503, detail="Multi-speaker tracking not available")
|
||||
if speaker not in (0, 1):
|
||||
raise HTTPException(status_code=400, detail="Speaker index must be 0 or 1")
|
||||
multi_speaker.target_speaker_idx = speaker
|
||||
multi_speaker._update_audio_focus()
|
||||
return {"ok": True, "target_speaker": speaker}
|
||||
|
||||
|
||||
# --- Spatial scene ---
|
||||
|
||||
@app.get("/scene")
|
||||
|
||||
Reference in New Issue
Block a user