spatial_scene.py: Builds a persistent map of where each sound category
usually comes from (30° angle bins, circular mean). Detects anomalies
when a sound appears from an unusual direction (90°+ deviation).
Scene map persists to ~/.vixy/scene_map.json across restarts.
headmic.py: Feed classified sounds + spatial position into scene tracker.
New endpoints:
/scene — learned scene summary + last anomaly
/scene/events — recent events with what+where+when
/scene/heatmap — per-category angular distribution (for visualization)
Example: after running for a day, /scene might show:
{"speech": {"usual_angle": 15.0, "observations": 847},
"music": {"usual_angle": 270.0, "observations": 312}}
And if speech comes from 270° (where music usually is): spatial anomaly.
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
54
headmic.py
54
headmic.py
@@ -160,6 +160,7 @@ class ServiceState:
|
||||
self.active_side: str = "left" # which mic array is currently active
|
||||
self.doa: dict = {} # latest DoA from both arrays
|
||||
self.spatial: Optional[dict] = None # triangulated position + gaze
|
||||
self.last_anomaly: Optional[dict] = None # last spatial anomaly detected
|
||||
|
||||
state = ServiceState()
|
||||
|
||||
@@ -173,6 +174,9 @@ enrollment_buffer = None # list of frame bytes, set during enrollment
|
||||
|
||||
# Binaural recorder
|
||||
binaural_recorder = None
|
||||
|
||||
# Spatial scene
|
||||
spatial_scene = None
|
||||
enrollment_name = None
|
||||
|
||||
# Audio stream
|
||||
@@ -385,6 +389,18 @@ def sound_classifier_loop():
|
||||
audio_f32 = result.pop("audio_float32", None)
|
||||
state.audio_scene = result
|
||||
|
||||
# Spatial scene: log classified sound with its position
|
||||
if spatial_scene and state.spatial and result.get("category"):
|
||||
top = result.get("top_classes", [{}])[0] if result.get("top_classes") else {}
|
||||
anomaly = spatial_scene.observe(
|
||||
category=result["category"],
|
||||
top_class=top.get("name", result["category"]),
|
||||
score=top.get("score", 0),
|
||||
spatial=state.spatial,
|
||||
)
|
||||
if anomaly:
|
||||
state.last_anomaly = anomaly
|
||||
|
||||
# Speaker identification: run when speech detected
|
||||
if speaker_recognizer and result["category"] == "speech" and audio_f32 is not None:
|
||||
try:
|
||||
@@ -466,7 +482,7 @@ app = FastAPI(title="HeadMic", description="Vixy's Ears 🦊👂 (Dual XVF3800)"
|
||||
|
||||
@app.on_event("startup")
|
||||
async def startup():
|
||||
global sound_classifier, sound_ring_buffer, speaker_recognizer, dual_stream, LEDS_AVAILABLE, spatial_tracker, binaural_recorder
|
||||
global sound_classifier, sound_ring_buffer, speaker_recognizer, dual_stream, LEDS_AVAILABLE, spatial_tracker, binaural_recorder, spatial_scene
|
||||
|
||||
state.running = True
|
||||
|
||||
@@ -542,6 +558,11 @@ async def startup():
|
||||
logger.info("Spatial tracking started (%d Hz, %.0fmm baseline, pushing gaze to %s)",
|
||||
DOA_POLL_HZ, array_sep, EYE_SERVICE_URL)
|
||||
|
||||
# --- Spatial scene mapping ---
|
||||
from spatial_scene import SpatialScene
|
||||
spatial_scene = SpatialScene()
|
||||
spatial_scene.start()
|
||||
|
||||
# --- Binaural recording ---
|
||||
if os.environ.get("BINAURAL_RECORD", "").lower() in ("1", "true", "yes"):
|
||||
from binaural_recorder import BinauralRecorder
|
||||
@@ -561,6 +582,8 @@ async def startup():
|
||||
async def shutdown():
|
||||
state.running = False
|
||||
leds_off()
|
||||
if spatial_scene:
|
||||
spatial_scene.stop()
|
||||
if binaural_recorder:
|
||||
binaural_recorder.stop()
|
||||
if dual_stream:
|
||||
@@ -629,6 +652,35 @@ async def doa():
|
||||
}
|
||||
|
||||
|
||||
# --- Spatial scene ---
|
||||
|
||||
@app.get("/scene")
|
||||
async def scene():
|
||||
"""Learned spatial audio scene — where each sound type usually comes from."""
|
||||
if not spatial_scene:
|
||||
return {"scene": {}, "last_anomaly": None}
|
||||
return {
|
||||
"scene": spatial_scene.get_scene_summary(),
|
||||
"last_anomaly": state.last_anomaly,
|
||||
}
|
||||
|
||||
|
||||
@app.get("/scene/events")
|
||||
async def scene_events(seconds: int = 30, category: str = None):
|
||||
"""Recent sound events with spatial information."""
|
||||
if not spatial_scene:
|
||||
return {"events": []}
|
||||
return {"events": spatial_scene.get_recent_events(seconds, category)}
|
||||
|
||||
|
||||
@app.get("/scene/heatmap")
|
||||
async def scene_heatmap():
|
||||
"""Observation counts per angle bin per category — for visualization."""
|
||||
if not spatial_scene:
|
||||
return {"heatmap": {}}
|
||||
return {"heatmap": spatial_scene.get_spatial_heatmap()}
|
||||
|
||||
|
||||
# --- Binaural recording ---
|
||||
|
||||
@app.get("/recording")
|
||||
|
||||
Reference in New Issue
Block a user