updated features

2026-02-01 21:35:13 -06:00
parent 262b359271
commit b7d8b24070
1 changed files with 169 additions and 0 deletions
--- a/ear_mcp.py
+++ b/ear_mcp.py
@@ -21,6 +21,7 @@ mcp = FastMCP("Ear STT Transcriber")

 # Configuration from environment
 EARTAIL_BASE_URL = os.getenv("EARTAIL_BASE_URL", "http://bigorin.local:8764")
+HEADMIC_BASE_URL = os.getenv("HEADMIC_BASE_URL", "http://head-vixy.local:8446")
 DEFAULT_POLL_INTERVAL = 2  # seconds
 DEFAULT_TIMEOUT = 300  # seconds (5 minutes - transcription is usually fast)

@@ -178,5 +179,173 @@ async def ear_health() -> dict:
            return {"status": "error", "error": str(e)}


+# ============================================================================
+# HeadMic - Sound Classification & Speaker Identification
+# ============================================================================
+
+@mcp.tool()
+async def ear_sounds() -> dict:
+    """
+    Get current audio scene from HeadMic's sound classifier.
+
+    Returns what the microphone is currently hearing: speech, music, alert,
+    animal, household sounds, environment, or silence. Also includes speaker
+    identification if someone is talking.
+
+    Returns:
+        Dictionary with category, top_classes, dominant_category,
+        recognized_speaker, speaker_confidence
+    """
+    async with httpx.AsyncClient(timeout=10.0) as client:
+        try:
+            response = await client.get(f"{HEADMIC_BASE_URL}/sounds")
+            response.raise_for_status()
+            return response.json()
+        except httpx.HTTPStatusError as e:
+            if e.response.status_code == 503:
+                return {"error": "Sound classification not available on HeadMic"}
+            raise
+        except Exception as e:
+            return {"error": f"HeadMic unavailable: {e}"}
+
+
+@mcp.tool()
+async def ear_speakers() -> dict:
+    """
+    List all enrolled speakers for voice identification.
+
+    Returns:
+        Dictionary with speaker names and their enrollment sample counts
+    """
+    async with httpx.AsyncClient(timeout=10.0) as client:
+        try:
+            response = await client.get(f"{HEADMIC_BASE_URL}/speakers")
+            response.raise_for_status()
+            return response.json()
+        except httpx.HTTPStatusError as e:
+            if e.response.status_code == 503:
+                return {"error": "Speaker recognition not available on HeadMic"}
+            raise
+        except Exception as e:
+            return {"error": f"HeadMic unavailable: {e}"}
+
+
+@mcp.tool()
+async def ear_enroll_speaker(name: str) -> dict:
+    """
+    Enroll a speaker by recording 5 seconds from the live microphone.
+
+    The person should speak naturally during the recording. The ReSpeaker LEDs
+    will pulse orange while recording. Multiple enrollments of the same person
+    improve recognition accuracy.
+
+    Args:
+        name: Name to associate with the voice (e.g. "Alex")
+
+    Returns:
+        Enrollment result with duration and updated speaker list
+    """
+    async with httpx.AsyncClient(timeout=15.0) as client:
+        try:
+            print(f"🎙️ Recording 5 seconds for '{name}'...")
+            response = await client.post(
+                f"{HEADMIC_BASE_URL}/speakers/enroll-from-mic",
+                params={"name": name},
+            )
+            response.raise_for_status()
+            result = response.json()
+            print(f"✓ Enrolled '{name}' ({result.get('seconds', '?')}s)")
+            return result
+        except httpx.HTTPStatusError as e:
+            if e.response.status_code == 503:
+                return {"error": "Speaker recognition not available on HeadMic"}
+            if e.response.status_code == 409:
+                return {"error": "Enrollment already in progress"}
+            raise
+        except Exception as e:
+            return {"error": f"HeadMic unavailable: {e}"}
+
+
+@mcp.tool()
+async def ear_enroll_speaker_from_file(name: str, audio_path: str) -> dict:
+    """
+    Enroll a speaker from an audio file.
+
+    Args:
+        name: Name to associate with the voice (e.g. "Alex")
+        audio_path: Path to audio file with the person speaking
+
+    Returns:
+        Enrollment result with updated speaker list
+    """
+    path = Path(audio_path).expanduser()
+    if not path.exists():
+        raise FileNotFoundError(f"Audio file not found: {audio_path}")
+
+    async with httpx.AsyncClient(timeout=15.0) as client:
+        try:
+            with open(path, "rb") as f:
+                response = await client.post(
+                    f"{HEADMIC_BASE_URL}/speakers/enroll",
+                    data={"name": name},
+                    files={"audio": (path.name, f, "audio/wav")},
+                )
+                response.raise_for_status()
+            result = response.json()
+            print(f"✓ Enrolled '{name}' from {path.name}")
+            return result
+        except httpx.HTTPStatusError as e:
+            if e.response.status_code == 503:
+                return {"error": "Speaker recognition not available on HeadMic"}
+            raise
+        except Exception as e:
+            return {"error": f"HeadMic unavailable: {e}"}
+
+
+@mcp.tool()
+async def ear_delete_speaker(name: str) -> dict:
+    """
+    Remove an enrolled speaker and all their voice samples.
+
+    Args:
+        name: Name of the speaker to remove
+
+    Returns:
+        Deletion result with number of samples removed
+    """
+    async with httpx.AsyncClient(timeout=10.0) as client:
+        try:
+            response = await client.delete(f"{HEADMIC_BASE_URL}/speakers/{name}")
+            response.raise_for_status()
+            result = response.json()
+            print(f"✓ Deleted speaker '{name}' ({result.get('samples_removed', 0)} samples)")
+            return result
+        except httpx.HTTPStatusError as e:
+            if e.response.status_code == 503:
+                return {"error": "Speaker recognition not available on HeadMic"}
+            if e.response.status_code == 404:
+                return {"error": f"Speaker '{name}' not found"}
+            raise
+        except Exception as e:
+            return {"error": f"HeadMic unavailable: {e}"}
+
+
+@mcp.tool()
+async def ear_headmic_status() -> dict:
+    """
+    Get full HeadMic status: wake word, audio scene, and speaker identification.
+
+    Returns:
+        Dictionary with listening state, audio scene, recognized speaker, etc.
+    """
+    async with httpx.AsyncClient(timeout=10.0) as client:
+        try:
+            response = await client.get(f"{HEADMIC_BASE_URL}/status")
+            response.raise_for_status()
+            return response.json()
+        except Exception as e:
+            return {"error": f"HeadMic unavailable: {e}"}
+
+
 if __name__ == "__main__":
    mcp.run()