Add binaural triangulation + smooth gaze tracking

spatial.py: Triangulates sound source position from two DoA angles using
ray intersection. Exponential smoothing prevents jitter. Gaze drifts back
to center after 2s of silence. Converts position (mm) to gaze (0-255).

headmic.py: Replaces simple doa_poll_loop with doa_track_loop that runs
the spatial tracker and pushes gaze to the eye service when the position
changes. Rate-limited to 10 pushes/sec with minimum delta threshold.

/doa endpoint now returns triangulated position + gaze coordinates.
Array separation (175mm) stored in config, overridable.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Alex
2026-04-12 15:12:28 -05:00
parent c41e5bcafa
commit e0a4af031f
2 changed files with 264 additions and 23 deletions

View File

@@ -159,6 +159,7 @@ class ServiceState:
self.enrolling: bool = False
self.active_side: str = "left" # which mic array is currently active
self.doa: dict = {} # latest DoA from both arrays
self.spatial: Optional[dict] = None # triangulated position + gaze
state = ServiceState()
@@ -390,34 +391,58 @@ def sound_classifier_loop():
# ============================================================================
# DoA Polling Thread
# Spatial Tracking + Gaze (DoA → triangulation → eye service)
# ============================================================================
def doa_poll_loop():
"""Poll Direction of Arrival from both XVF3800 arrays."""
from spatial import SpatialTracker
spatial_tracker: Optional[SpatialTracker] = None
_last_gaze_push: tuple[int, int] = (GAZE_CENTER, GAZE_CENTER)
GAZE_CENTER = 127
GAZE_PUSH_MIN_DELTA = 3 # don't push gaze unless it moved by at least this much
GAZE_PUSH_INTERVAL = 0.1 # max 10 gaze pushes/sec to eye service
def doa_track_loop():
"""Poll DoA, triangulate, smooth, push gaze to eye service."""
global _last_gaze_push
interval = 1.0 / DOA_POLL_HZ
last_push_time = 0.0
while state.running:
try:
state.doa = xvf_manager.read_both_doa()
if spatial_tracker:
result = spatial_tracker.update(state.doa)
if result:
state.spatial = result
gx, gy = result["gaze_x"], result["gaze_y"]
# Push to eye service if changed enough and not too frequent
dx = abs(gx - _last_gaze_push[0])
dy = abs(gy - _last_gaze_push[1])
now = time.monotonic()
if ((dx >= GAZE_PUSH_MIN_DELTA or dy >= GAZE_PUSH_MIN_DELTA)
and now - last_push_time >= GAZE_PUSH_INTERVAL):
_push_gaze(gx, gy)
_last_gaze_push = (gx, gy)
last_push_time = now
except Exception as e:
logger.debug("DoA poll error: %s", e)
logger.debug("DoA/spatial error: %s", e)
time.sleep(interval)
def doa_to_gaze() -> Optional[tuple[int, int]]:
"""Convert the active side's DoA angle to gaze coordinates for the eye service."""
doa = state.doa
side = state.active_side
if not doa or side not in doa or doa[side] is None:
return None
if not doa[side].get("vad"):
return None
import math
angle = doa[side]["angle"]
rad = math.radians(angle)
x = int(127 - 80 * math.sin(rad))
y = int(127 - 40 * math.cos(rad))
return max(0, min(255, x)), max(0, min(255, y))
def _push_gaze(x: int, y: int):
"""Fire-and-forget gaze push to eye service."""
try:
import httpx
httpx.post(f"{EYE_SERVICE_URL}/gaze",
json={"x": x, "y": y}, timeout=0.5)
except Exception:
pass # eye service may be down, don't spam logs
# ============================================================================
@@ -497,10 +522,13 @@ async def startup():
except Exception as e:
logger.warning("Speaker recognition unavailable: %s", e)
# --- DoA polling ---
# --- Spatial tracking (DoA → triangulation → gaze) ---
if xvf_manager.left or xvf_manager.right:
threading.Thread(target=doa_poll_loop, daemon=True).start()
logger.info("DoA polling started at %d Hz", DOA_POLL_HZ)
array_sep = cfg.get("array_separation_mm", 175.0)
spatial_tracker = SpatialTracker(array_separation_mm=array_sep)
threading.Thread(target=doa_track_loop, daemon=True).start()
logger.info("Spatial tracking started (%d Hz, %.0fmm baseline, pushing gaze to %s)",
DOA_POLL_HZ, array_sep, EYE_SERVICE_URL)
# --- Main listener ---
thread = threading.Thread(target=listener_loop, daemon=True)
@@ -570,11 +598,11 @@ async def last():
@app.get("/doa")
async def doa():
"""Direction of Arrival from both mic arrays."""
"""Direction of Arrival from both mic arrays + triangulated position."""
return {
"doa": state.doa,
"active_side": state.active_side,
"gaze": doa_to_gaze(),
"spatial": state.spatial,
}