Fix VAD — use processed_doa NaN as speech indicator
The auto-select beam always returns an angle (even for noise), so VAD was always true. The processed_doa (index 0) is NaN when no speech is present and a real angle when speech is detected. Now: angle from auto-select beam, VAD from processed_doa being non-NaN. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
22
xvf3800.py
22
xvf3800.py
@@ -103,30 +103,28 @@ class XVF3800:
|
|||||||
def read_doa(self) -> tuple[int, bool]:
|
def read_doa(self) -> tuple[int, bool]:
|
||||||
"""Read Direction of Arrival using the auto-select beam azimuth.
|
"""Read Direction of Arrival using the auto-select beam azimuth.
|
||||||
Returns (angle 0-359 degrees, vad True/False).
|
Returns (angle 0-359 degrees, vad True/False).
|
||||||
Uses AUDIO_MGR_SELECTED_AZIMUTHS which tracks the active speaker."""
|
|
||||||
|
Uses AUDIO_MGR_SELECTED_AZIMUTHS which returns two values:
|
||||||
|
[0] processed_doa — NaN when no speech, real angle when speech detected (= VAD)
|
||||||
|
[1] auto_select_doa — always tracks strongest source, even noise (= angle)
|
||||||
|
"""
|
||||||
import math
|
import math
|
||||||
|
|
||||||
# Read auto-select beam azimuth (2 floats: processed_doa, auto_select_doa)
|
|
||||||
data = self._read_float(AUDIO_MGR_RESID, AUDIO_MGR_SELECTED_AZ_CMD, 2)
|
data = self._read_float(AUDIO_MGR_RESID, AUDIO_MGR_SELECTED_AZ_CMD, 2)
|
||||||
if len(data) < 9: # 1 status + 2*4 bytes
|
if len(data) < 9: # 1 status + 2*4 bytes
|
||||||
return 0, False
|
return 0, False
|
||||||
|
|
||||||
processed_doa, auto_select_doa = struct.unpack_from("<ff", data, 1)
|
processed_doa, auto_select_doa = struct.unpack_from("<ff", data, 1)
|
||||||
|
|
||||||
# Use auto-select beam (index 1), fall back to processed (index 0)
|
# VAD: processed_doa is NaN when no speech, real value when speech
|
||||||
|
vad = not math.isnan(processed_doa)
|
||||||
|
|
||||||
|
# Angle: prefer auto-select beam (always has a value)
|
||||||
if not math.isnan(auto_select_doa):
|
if not math.isnan(auto_select_doa):
|
||||||
angle_deg = math.degrees(auto_select_doa) % 360
|
angle_deg = math.degrees(auto_select_doa) % 360
|
||||||
vad = True
|
elif vad:
|
||||||
elif not math.isnan(processed_doa):
|
|
||||||
angle_deg = math.degrees(processed_doa) % 360
|
angle_deg = math.degrees(processed_doa) % 360
|
||||||
vad = True
|
|
||||||
else:
|
else:
|
||||||
# Both NaN — no speech detected
|
|
||||||
# Fall back to simple DOA_VALUE
|
|
||||||
data2 = self._read_uint16(GPO_RESID, DOA_VALUE_CMD, 2)
|
|
||||||
if len(data2) >= 5:
|
|
||||||
angle, vad_flag = struct.unpack_from("<HH", data2, 1)
|
|
||||||
return angle % 360, bool(vad_flag)
|
|
||||||
return 0, False
|
return 0, False
|
||||||
|
|
||||||
return int(angle_deg) % 360, vad
|
return int(angle_deg) % 360, vad
|
||||||
|
|||||||
Reference in New Issue
Block a user