From 88fb18800c78835038c0b3ad0b2c55818af12461 Mon Sep 17 00:00:00 2001 From: Alex Date: Sun, 12 Apr 2026 21:08:50 -0500 Subject: [PATCH] =?UTF-8?q?Fix=20VAD=20=E2=80=94=20use=20processed=5Fdoa?= =?UTF-8?q?=20NaN=20as=20speech=20indicator?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The auto-select beam always returns an angle (even for noise), so VAD was always true. The processed_doa (index 0) is NaN when no speech is present and a real angle when speech is detected. Now: angle from auto-select beam, VAD from processed_doa being non-NaN. Co-Authored-By: Claude Opus 4.6 (1M context) --- xvf3800.py | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/xvf3800.py b/xvf3800.py index 1589952..707fb20 100644 --- a/xvf3800.py +++ b/xvf3800.py @@ -103,30 +103,28 @@ class XVF3800: def read_doa(self) -> tuple[int, bool]: """Read Direction of Arrival using the auto-select beam azimuth. Returns (angle 0-359 degrees, vad True/False). - Uses AUDIO_MGR_SELECTED_AZIMUTHS which tracks the active speaker.""" + + Uses AUDIO_MGR_SELECTED_AZIMUTHS which returns two values: + [0] processed_doa — NaN when no speech, real angle when speech detected (= VAD) + [1] auto_select_doa — always tracks strongest source, even noise (= angle) + """ import math - # Read auto-select beam azimuth (2 floats: processed_doa, auto_select_doa) data = self._read_float(AUDIO_MGR_RESID, AUDIO_MGR_SELECTED_AZ_CMD, 2) if len(data) < 9: # 1 status + 2*4 bytes return 0, False processed_doa, auto_select_doa = struct.unpack_from("= 5: - angle, vad_flag = struct.unpack_from("