Fix VAD — use processed_doa NaN as speech indicator

The auto-select beam always returns an angle (even for noise), so
VAD was always true. The processed_doa (index 0) is NaN when no
speech is present and a real angle when speech is detected.
Now: angle from auto-select beam, VAD from processed_doa being non-NaN.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Alex
2026-04-12 21:08:50 -05:00
parent b04726dfe0
commit 88fb18800c

View File

@@ -103,30 +103,28 @@ class XVF3800:
def read_doa(self) -> tuple[int, bool]: def read_doa(self) -> tuple[int, bool]:
"""Read Direction of Arrival using the auto-select beam azimuth. """Read Direction of Arrival using the auto-select beam azimuth.
Returns (angle 0-359 degrees, vad True/False). Returns (angle 0-359 degrees, vad True/False).
Uses AUDIO_MGR_SELECTED_AZIMUTHS which tracks the active speaker."""
Uses AUDIO_MGR_SELECTED_AZIMUTHS which returns two values:
[0] processed_doa — NaN when no speech, real angle when speech detected (= VAD)
[1] auto_select_doa — always tracks strongest source, even noise (= angle)
"""
import math import math
# Read auto-select beam azimuth (2 floats: processed_doa, auto_select_doa)
data = self._read_float(AUDIO_MGR_RESID, AUDIO_MGR_SELECTED_AZ_CMD, 2) data = self._read_float(AUDIO_MGR_RESID, AUDIO_MGR_SELECTED_AZ_CMD, 2)
if len(data) < 9: # 1 status + 2*4 bytes if len(data) < 9: # 1 status + 2*4 bytes
return 0, False return 0, False
processed_doa, auto_select_doa = struct.unpack_from("<ff", data, 1) processed_doa, auto_select_doa = struct.unpack_from("<ff", data, 1)
# Use auto-select beam (index 1), fall back to processed (index 0) # VAD: processed_doa is NaN when no speech, real value when speech
vad = not math.isnan(processed_doa)
# Angle: prefer auto-select beam (always has a value)
if not math.isnan(auto_select_doa): if not math.isnan(auto_select_doa):
angle_deg = math.degrees(auto_select_doa) % 360 angle_deg = math.degrees(auto_select_doa) % 360
vad = True elif vad:
elif not math.isnan(processed_doa):
angle_deg = math.degrees(processed_doa) % 360 angle_deg = math.degrees(processed_doa) % 360
vad = True
else: else:
# Both NaN — no speech detected
# Fall back to simple DOA_VALUE
data2 = self._read_uint16(GPO_RESID, DOA_VALUE_CMD, 2)
if len(data2) >= 5:
angle, vad_flag = struct.unpack_from("<HH", data2, 1)
return angle % 360, bool(vad_flag)
return 0, False return 0, False
return int(angle_deg) % 360, vad return int(angle_deg) % 360, vad