Files
vixy-vision/mcp/vision_mcp.py
Vixy a17c09cac1 Initial commit: vixy-vision distributed sensing system
🦊 Eyes and ears for the fox

Components:
- server/: Camera server for Raspberry Pi (from camera-server)
- mcp/: Vision MCP client for Claude Desktop (from vision-mcp)
- analysis/: Placeholder for motion/audio detection
- shared/: Common schemas and interfaces

Features:
- Setup script with systemd service creation
- HTTPS + API key authentication
- HTTP and RTSP camera support

Built under a blanket on Day 45 💕
2025-12-16 15:26:26 -06:00

437 lines
14 KiB
Python

#!/usr/bin/env python3
"""
Vision MCP Server
Model Context Protocol server for interacting with multiple camera-server instances
and RTSP streams.
Tools:
- vision_get_cams() - Get list of active cameras
- vision_snap(cam_id) - Get snapshot from a camera (HTTP API or RTSP)
"""
import json
import logging
from pathlib import Path
from typing import List, Dict, Any, Union
from io import BytesIO
import httpx
import cv2
import numpy as np
from PIL import Image
from fastmcp import FastMCP
from fastmcp.utilities.types import Image as MCPImage
# Configuration
CONFIG_FILE = Path.home() / ".vision_setup.json"
LOG_FILE = Path("/tmp/vision_mcp.log")
REQUEST_TIMEOUT = 5.0 # seconds
RTSP_TIMEOUT = 10.0 # seconds for RTSP stream connection
# Setup logging
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
handlers=[
logging.FileHandler(LOG_FILE),
logging.StreamHandler()
]
)
logger = logging.getLogger(__name__)
# Initialize MCP server
mcp = FastMCP("Vision Camera System")
def load_camera_config() -> Dict[str, Any]:
"""
Load camera configuration from ~/.vision_setup.json
Returns:
Dictionary with camera configurations
Raises:
FileNotFoundError: If config file doesn't exist
ValueError: If config file is invalid
"""
if not CONFIG_FILE.exists():
raise FileNotFoundError(
f"Camera config file not found: {CONFIG_FILE}\n"
f"Create {CONFIG_FILE} with camera configurations."
)
try:
with open(CONFIG_FILE, 'r') as f:
config = json.load(f)
if 'cameras' not in config:
raise ValueError("Config file must contain 'cameras' array")
# Validate each camera config
for cam in config['cameras']:
# All cameras need 'id' and 'type'
if 'id' not in cam:
raise ValueError("Camera config missing 'id' field")
cam_type = cam.get('type', 'http') # Default to http for backward compatibility
if cam_type == 'http':
# HTTP cameras need url and api_key
required_fields = ['url', 'api_key']
missing = [f for f in required_fields if f not in cam]
if missing:
raise ValueError(
f"HTTP camera '{cam['id']}' missing required fields: {missing}"
)
elif cam_type == 'rtsp':
# RTSP cameras need rtsp_url
if 'rtsp_url' not in cam:
raise ValueError(
f"RTSP camera '{cam['id']}' missing required field: rtsp_url"
)
else:
raise ValueError(
f"Camera '{cam['id']}' has invalid type: {cam_type}. "
f"Must be 'http' or 'rtsp'"
)
logger.info(f"Loaded {len(config['cameras'])} camera(s) from config")
return config
except json.JSONDecodeError as e:
raise ValueError(f"Invalid JSON in config file: {e}")
def get_camera_by_id(cam_id: str) -> Dict[str, str]:
"""
Get camera configuration by ID
Args:
cam_id: Camera ID string
Returns:
Camera configuration dict
Raises:
ValueError: If camera ID not found
"""
config = load_camera_config()
for cam in config['cameras']:
if cam['id'] == cam_id:
return cam
available_ids = [c['id'] for c in config['cameras']]
raise ValueError(
f"Camera '{cam_id}' not found in config.\n"
f"Available cameras: {', '.join(available_ids)}"
)
def capture_rtsp_snapshot(rtsp_url: str, timeout: float = RTSP_TIMEOUT) -> bytes:
"""
Capture a single frame from an RTSP stream
Args:
rtsp_url: RTSP stream URL (e.g., rtsp://192.168.1.239/live)
timeout: Connection timeout in seconds
Returns:
JPEG image bytes
Raises:
RuntimeError: If unable to connect or capture frame
"""
logger.info(f"Attempting to capture from RTSP: {rtsp_url}")
# Create video capture object
cap = cv2.VideoCapture(rtsp_url)
# Set timeout (in milliseconds)
cap.set(cv2.CAP_PROP_OPEN_TIMEOUT_MSEC, int(timeout * 1000))
try:
# Check if stream opened successfully
if not cap.isOpened():
raise RuntimeError(f"Failed to open RTSP stream: {rtsp_url}")
# Read a frame
ret, frame = cap.read()
if not ret or frame is None:
raise RuntimeError(f"Failed to read frame from RTSP stream: {rtsp_url}")
# Convert BGR (OpenCV) to RGB (PIL)
frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
# Convert to PIL Image
pil_image = Image.fromarray(frame_rgb)
# Convert to JPEG bytes
buffer = BytesIO()
pil_image.save(buffer, format='JPEG', quality=90)
jpeg_bytes = buffer.getvalue()
logger.info(f"✓ Captured RTSP snapshot ({len(jpeg_bytes)} bytes)")
return jpeg_bytes
finally:
# Always release the capture
cap.release()
@mcp.tool()
async def vision_get_cams() -> List[Dict[str, str]]:
"""
Get list of all configured cameras with their online/offline status.
Queries the /health endpoint of each camera to determine if it's online.
Returns:
List of camera info dictionaries:
[
{
"id": "basement",
"status": "online" # or "offline"
},
...
]
Examples:
vision_get_cams()
"""
try:
config = load_camera_config()
cameras = []
async with httpx.AsyncClient(timeout=REQUEST_TIMEOUT, verify=False) as client:
for cam in config['cameras']:
cam_type = cam.get('type', 'http')
cam_info = {
"id": cam['id'],
"type": cam_type,
"status": "unknown"
}
# Check status based on camera type
try:
if cam_type == 'http':
# Check HTTP health endpoint
health_url = f"{cam['url'].rstrip('/')}/health"
logger.debug(f"Checking HTTP health: {health_url}")
response = await client.get(health_url)
if response.status_code == 200:
cam_info['status'] = 'online'
logger.info(f"Camera '{cam['id']}' is online")
else:
cam_info['status'] = 'offline'
logger.warning(f"Camera '{cam['id']}' returned status {response.status_code}")
elif cam_type == 'rtsp':
# Try to briefly connect to RTSP stream
rtsp_url = cam['rtsp_url']
logger.debug(f"Checking RTSP stream: {rtsp_url}")
cap = cv2.VideoCapture(rtsp_url)
cap.set(cv2.CAP_PROP_OPEN_TIMEOUT_MSEC, 3000) # 3 second timeout
if cap.isOpened():
cam_info['status'] = 'online'
logger.info(f"RTSP camera '{cam['id']}' is online")
else:
cam_info['status'] = 'offline'
logger.warning(f"RTSP camera '{cam['id']}' connection failed")
cap.release()
except httpx.TimeoutException:
cam_info['status'] = 'offline'
logger.warning(f"Camera '{cam['id']}' timed out")
except httpx.ConnectError:
cam_info['status'] = 'offline'
logger.warning(f"Camera '{cam['id']}' connection failed")
except Exception as e:
cam_info['status'] = 'offline'
logger.error(f"Camera '{cam['id']}' error: {e}")
cameras.append(cam_info)
logger.info(f"Found {len(cameras)} camera(s), {sum(1 for c in cameras if c['status'] == 'online')} online")
return cameras
except FileNotFoundError as e:
logger.error(f"Config error: {e}")
return [{"error": str(e)}]
except ValueError as e:
logger.error(f"Config error: {e}")
return [{"error": str(e)}]
except Exception as e:
logger.error(f"Unexpected error: {e}")
return [{"error": f"Unexpected error: {str(e)}"}]
@mcp.tool()
async def vision_snap(cam_id: str) -> Union[MCPImage, str]:
"""
Get a snapshot from a camera.
Queries the /snapshot endpoint and returns the image for inline display.
Args:
cam_id: Camera ID from config file (e.g., "basement")
Returns:
MCPImage object for inline display, or error message string
Examples:
vision_snap("basement")
"""
try:
# Get camera config
cam = get_camera_by_id(cam_id)
cam_type = cam.get('type', 'http')
# Handle based on camera type
if cam_type == 'http':
# HTTP API camera
async with httpx.AsyncClient(timeout=REQUEST_TIMEOUT, verify=False) as client:
snapshot_url = f"{cam['url'].rstrip('/')}/snapshot"
headers = {"X-API-Key": cam['api_key']}
logger.info(f"Requesting HTTP snapshot from '{cam_id}' at {snapshot_url}")
try:
response = await client.get(snapshot_url, headers=headers)
if response.status_code == 200:
# Check content type
content_type = response.headers.get('content-type', '')
if 'image' not in content_type:
logger.warning(f"Unexpected content type: {content_type}")
# Get image bytes
image_bytes = response.content
logger.info(f"✓ Snapshot received from '{cam_id}' ({len(image_bytes)} bytes)")
# Return as MCPImage (directly, not in dict)
return MCPImage(data=image_bytes, format="jpeg")
elif response.status_code == 403:
error_msg = f"❌ Authentication failed for camera '{cam_id}'. Check API key in config."
logger.error(error_msg)
return error_msg
elif response.status_code == 503:
error_msg = f"❌ Camera '{cam_id}' is unavailable (503). Camera may be disconnected."
logger.error(error_msg)
return error_msg
else:
error_msg = f"❌ Camera '{cam_id}' returned status {response.status_code}: {response.text[:100]}"
logger.error(error_msg)
return error_msg
except httpx.TimeoutException:
error_msg = f"❌ Camera '{cam_id}' timed out after {REQUEST_TIMEOUT}s"
logger.error(error_msg)
return error_msg
except httpx.ConnectError as e:
error_msg = f"❌ Cannot connect to camera '{cam_id}' at {cam['url']}: {str(e)}"
logger.error(error_msg)
return error_msg
elif cam_type == 'rtsp':
# RTSP stream camera
rtsp_url = cam['rtsp_url']
logger.info(f"Capturing RTSP snapshot from '{cam_id}' at {rtsp_url}")
try:
# Capture snapshot from RTSP stream
image_bytes = capture_rtsp_snapshot(rtsp_url)
logger.info(f"✓ RTSP snapshot captured from '{cam_id}' ({len(image_bytes)} bytes)")
# Return as MCPImage
return MCPImage(data=image_bytes, format="jpeg")
except RuntimeError as e:
error_msg = f"❌ Failed to capture from RTSP camera '{cam_id}': {str(e)}"
logger.error(error_msg)
return error_msg
else:
error_msg = f"❌ Unknown camera type '{cam_type}' for camera '{cam_id}'"
logger.error(error_msg)
return error_msg
except ValueError as e:
# Camera ID not found
logger.error(f"Camera lookup error: {e}")
return f"{str(e)}"
except FileNotFoundError as e:
# Config file not found
logger.error(f"Config error: {e}")
return f"{str(e)}"
except Exception as e:
error_msg = f"❌ Unexpected error getting snapshot from '{cam_id}': {str(e)}"
logger.exception(error_msg)
return error_msg
@mcp.tool()
def vision_get_info() -> str:
"""
Get information about the Vision camera system configuration.
Returns details about configured cameras and config file location.
Returns:
Formatted string with system info
"""
try:
config = load_camera_config()
cameras = config['cameras']
info_lines = [
"Vision Camera System",
"",
f"Config file: {CONFIG_FILE}",
f"Cameras configured: {len(cameras)}",
""
]
for cam in cameras:
cam_type = cam.get('type', 'http')
if cam_type == 'http':
info_lines.append(f"{cam['id']} (HTTP): {cam['url']}")
elif cam_type == 'rtsp':
info_lines.append(f"{cam['id']} (RTSP): {cam['rtsp_url']}")
info_lines.append("")
info_lines.append("Use vision_get_cams() to check camera status")
info_lines.append("Use vision_snap(cam_id) to get a snapshot")
return "\n".join(info_lines)
except FileNotFoundError as e:
return f"{str(e)}"
except ValueError as e:
return f"{str(e)}"
except Exception as e:
return f"❌ Unexpected error: {str(e)}"
if __name__ == "__main__":
# Run the MCP server (uses stdio transport by default)
mcp.run()