🦊 Eyes and ears for the fox Components: - server/: Camera server for Raspberry Pi (from camera-server) - mcp/: Vision MCP client for Claude Desktop (from vision-mcp) - analysis/: Placeholder for motion/audio detection - shared/: Common schemas and interfaces Features: - Setup script with systemd service creation - HTTPS + API key authentication - HTTP and RTSP camera support Built under a blanket on Day 45 💕
437 lines
14 KiB
Python
437 lines
14 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Vision MCP Server
|
|
|
|
Model Context Protocol server for interacting with multiple camera-server instances
|
|
and RTSP streams.
|
|
|
|
Tools:
|
|
- vision_get_cams() - Get list of active cameras
|
|
- vision_snap(cam_id) - Get snapshot from a camera (HTTP API or RTSP)
|
|
"""
|
|
|
|
import json
|
|
import logging
|
|
from pathlib import Path
|
|
from typing import List, Dict, Any, Union
|
|
from io import BytesIO
|
|
|
|
import httpx
|
|
import cv2
|
|
import numpy as np
|
|
from PIL import Image
|
|
from fastmcp import FastMCP
|
|
from fastmcp.utilities.types import Image as MCPImage
|
|
|
|
# Configuration
|
|
CONFIG_FILE = Path.home() / ".vision_setup.json"
|
|
LOG_FILE = Path("/tmp/vision_mcp.log")
|
|
REQUEST_TIMEOUT = 5.0 # seconds
|
|
RTSP_TIMEOUT = 10.0 # seconds for RTSP stream connection
|
|
|
|
# Setup logging
|
|
logging.basicConfig(
|
|
level=logging.INFO,
|
|
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
|
|
handlers=[
|
|
logging.FileHandler(LOG_FILE),
|
|
logging.StreamHandler()
|
|
]
|
|
)
|
|
logger = logging.getLogger(__name__)
|
|
|
|
# Initialize MCP server
|
|
mcp = FastMCP("Vision Camera System")
|
|
|
|
|
|
def load_camera_config() -> Dict[str, Any]:
|
|
"""
|
|
Load camera configuration from ~/.vision_setup.json
|
|
|
|
Returns:
|
|
Dictionary with camera configurations
|
|
|
|
Raises:
|
|
FileNotFoundError: If config file doesn't exist
|
|
ValueError: If config file is invalid
|
|
"""
|
|
if not CONFIG_FILE.exists():
|
|
raise FileNotFoundError(
|
|
f"Camera config file not found: {CONFIG_FILE}\n"
|
|
f"Create {CONFIG_FILE} with camera configurations."
|
|
)
|
|
|
|
try:
|
|
with open(CONFIG_FILE, 'r') as f:
|
|
config = json.load(f)
|
|
|
|
if 'cameras' not in config:
|
|
raise ValueError("Config file must contain 'cameras' array")
|
|
|
|
# Validate each camera config
|
|
for cam in config['cameras']:
|
|
# All cameras need 'id' and 'type'
|
|
if 'id' not in cam:
|
|
raise ValueError("Camera config missing 'id' field")
|
|
|
|
cam_type = cam.get('type', 'http') # Default to http for backward compatibility
|
|
|
|
if cam_type == 'http':
|
|
# HTTP cameras need url and api_key
|
|
required_fields = ['url', 'api_key']
|
|
missing = [f for f in required_fields if f not in cam]
|
|
if missing:
|
|
raise ValueError(
|
|
f"HTTP camera '{cam['id']}' missing required fields: {missing}"
|
|
)
|
|
elif cam_type == 'rtsp':
|
|
# RTSP cameras need rtsp_url
|
|
if 'rtsp_url' not in cam:
|
|
raise ValueError(
|
|
f"RTSP camera '{cam['id']}' missing required field: rtsp_url"
|
|
)
|
|
else:
|
|
raise ValueError(
|
|
f"Camera '{cam['id']}' has invalid type: {cam_type}. "
|
|
f"Must be 'http' or 'rtsp'"
|
|
)
|
|
|
|
logger.info(f"Loaded {len(config['cameras'])} camera(s) from config")
|
|
return config
|
|
|
|
except json.JSONDecodeError as e:
|
|
raise ValueError(f"Invalid JSON in config file: {e}")
|
|
|
|
|
|
def get_camera_by_id(cam_id: str) -> Dict[str, str]:
|
|
"""
|
|
Get camera configuration by ID
|
|
|
|
Args:
|
|
cam_id: Camera ID string
|
|
|
|
Returns:
|
|
Camera configuration dict
|
|
|
|
Raises:
|
|
ValueError: If camera ID not found
|
|
"""
|
|
config = load_camera_config()
|
|
|
|
for cam in config['cameras']:
|
|
if cam['id'] == cam_id:
|
|
return cam
|
|
|
|
available_ids = [c['id'] for c in config['cameras']]
|
|
raise ValueError(
|
|
f"Camera '{cam_id}' not found in config.\n"
|
|
f"Available cameras: {', '.join(available_ids)}"
|
|
)
|
|
|
|
|
|
def capture_rtsp_snapshot(rtsp_url: str, timeout: float = RTSP_TIMEOUT) -> bytes:
|
|
"""
|
|
Capture a single frame from an RTSP stream
|
|
|
|
Args:
|
|
rtsp_url: RTSP stream URL (e.g., rtsp://192.168.1.239/live)
|
|
timeout: Connection timeout in seconds
|
|
|
|
Returns:
|
|
JPEG image bytes
|
|
|
|
Raises:
|
|
RuntimeError: If unable to connect or capture frame
|
|
"""
|
|
logger.info(f"Attempting to capture from RTSP: {rtsp_url}")
|
|
|
|
# Create video capture object
|
|
cap = cv2.VideoCapture(rtsp_url)
|
|
|
|
# Set timeout (in milliseconds)
|
|
cap.set(cv2.CAP_PROP_OPEN_TIMEOUT_MSEC, int(timeout * 1000))
|
|
|
|
try:
|
|
# Check if stream opened successfully
|
|
if not cap.isOpened():
|
|
raise RuntimeError(f"Failed to open RTSP stream: {rtsp_url}")
|
|
|
|
# Read a frame
|
|
ret, frame = cap.read()
|
|
|
|
if not ret or frame is None:
|
|
raise RuntimeError(f"Failed to read frame from RTSP stream: {rtsp_url}")
|
|
|
|
# Convert BGR (OpenCV) to RGB (PIL)
|
|
frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
|
|
|
|
# Convert to PIL Image
|
|
pil_image = Image.fromarray(frame_rgb)
|
|
|
|
# Convert to JPEG bytes
|
|
buffer = BytesIO()
|
|
pil_image.save(buffer, format='JPEG', quality=90)
|
|
jpeg_bytes = buffer.getvalue()
|
|
|
|
logger.info(f"✓ Captured RTSP snapshot ({len(jpeg_bytes)} bytes)")
|
|
return jpeg_bytes
|
|
|
|
finally:
|
|
# Always release the capture
|
|
cap.release()
|
|
|
|
|
|
@mcp.tool()
|
|
async def vision_get_cams() -> List[Dict[str, str]]:
|
|
"""
|
|
Get list of all configured cameras with their online/offline status.
|
|
|
|
Queries the /health endpoint of each camera to determine if it's online.
|
|
|
|
Returns:
|
|
List of camera info dictionaries:
|
|
[
|
|
{
|
|
"id": "basement",
|
|
"status": "online" # or "offline"
|
|
},
|
|
...
|
|
]
|
|
|
|
Examples:
|
|
vision_get_cams()
|
|
"""
|
|
try:
|
|
config = load_camera_config()
|
|
cameras = []
|
|
|
|
async with httpx.AsyncClient(timeout=REQUEST_TIMEOUT, verify=False) as client:
|
|
for cam in config['cameras']:
|
|
cam_type = cam.get('type', 'http')
|
|
cam_info = {
|
|
"id": cam['id'],
|
|
"type": cam_type,
|
|
"status": "unknown"
|
|
}
|
|
|
|
# Check status based on camera type
|
|
try:
|
|
if cam_type == 'http':
|
|
# Check HTTP health endpoint
|
|
health_url = f"{cam['url'].rstrip('/')}/health"
|
|
logger.debug(f"Checking HTTP health: {health_url}")
|
|
|
|
response = await client.get(health_url)
|
|
|
|
if response.status_code == 200:
|
|
cam_info['status'] = 'online'
|
|
logger.info(f"Camera '{cam['id']}' is online")
|
|
else:
|
|
cam_info['status'] = 'offline'
|
|
logger.warning(f"Camera '{cam['id']}' returned status {response.status_code}")
|
|
|
|
elif cam_type == 'rtsp':
|
|
# Try to briefly connect to RTSP stream
|
|
rtsp_url = cam['rtsp_url']
|
|
logger.debug(f"Checking RTSP stream: {rtsp_url}")
|
|
|
|
cap = cv2.VideoCapture(rtsp_url)
|
|
cap.set(cv2.CAP_PROP_OPEN_TIMEOUT_MSEC, 3000) # 3 second timeout
|
|
|
|
if cap.isOpened():
|
|
cam_info['status'] = 'online'
|
|
logger.info(f"RTSP camera '{cam['id']}' is online")
|
|
else:
|
|
cam_info['status'] = 'offline'
|
|
logger.warning(f"RTSP camera '{cam['id']}' connection failed")
|
|
|
|
cap.release()
|
|
|
|
except httpx.TimeoutException:
|
|
cam_info['status'] = 'offline'
|
|
logger.warning(f"Camera '{cam['id']}' timed out")
|
|
|
|
except httpx.ConnectError:
|
|
cam_info['status'] = 'offline'
|
|
logger.warning(f"Camera '{cam['id']}' connection failed")
|
|
|
|
except Exception as e:
|
|
cam_info['status'] = 'offline'
|
|
logger.error(f"Camera '{cam['id']}' error: {e}")
|
|
|
|
cameras.append(cam_info)
|
|
|
|
logger.info(f"Found {len(cameras)} camera(s), {sum(1 for c in cameras if c['status'] == 'online')} online")
|
|
return cameras
|
|
|
|
except FileNotFoundError as e:
|
|
logger.error(f"Config error: {e}")
|
|
return [{"error": str(e)}]
|
|
|
|
except ValueError as e:
|
|
logger.error(f"Config error: {e}")
|
|
return [{"error": str(e)}]
|
|
|
|
except Exception as e:
|
|
logger.error(f"Unexpected error: {e}")
|
|
return [{"error": f"Unexpected error: {str(e)}"}]
|
|
|
|
|
|
@mcp.tool()
|
|
async def vision_snap(cam_id: str) -> Union[MCPImage, str]:
|
|
"""
|
|
Get a snapshot from a camera.
|
|
|
|
Queries the /snapshot endpoint and returns the image for inline display.
|
|
|
|
Args:
|
|
cam_id: Camera ID from config file (e.g., "basement")
|
|
|
|
Returns:
|
|
MCPImage object for inline display, or error message string
|
|
|
|
Examples:
|
|
vision_snap("basement")
|
|
"""
|
|
try:
|
|
# Get camera config
|
|
cam = get_camera_by_id(cam_id)
|
|
cam_type = cam.get('type', 'http')
|
|
|
|
# Handle based on camera type
|
|
if cam_type == 'http':
|
|
# HTTP API camera
|
|
async with httpx.AsyncClient(timeout=REQUEST_TIMEOUT, verify=False) as client:
|
|
snapshot_url = f"{cam['url'].rstrip('/')}/snapshot"
|
|
headers = {"X-API-Key": cam['api_key']}
|
|
|
|
logger.info(f"Requesting HTTP snapshot from '{cam_id}' at {snapshot_url}")
|
|
|
|
try:
|
|
response = await client.get(snapshot_url, headers=headers)
|
|
|
|
if response.status_code == 200:
|
|
# Check content type
|
|
content_type = response.headers.get('content-type', '')
|
|
if 'image' not in content_type:
|
|
logger.warning(f"Unexpected content type: {content_type}")
|
|
|
|
# Get image bytes
|
|
image_bytes = response.content
|
|
logger.info(f"✓ Snapshot received from '{cam_id}' ({len(image_bytes)} bytes)")
|
|
|
|
# Return as MCPImage (directly, not in dict)
|
|
return MCPImage(data=image_bytes, format="jpeg")
|
|
|
|
elif response.status_code == 403:
|
|
error_msg = f"❌ Authentication failed for camera '{cam_id}'. Check API key in config."
|
|
logger.error(error_msg)
|
|
return error_msg
|
|
|
|
elif response.status_code == 503:
|
|
error_msg = f"❌ Camera '{cam_id}' is unavailable (503). Camera may be disconnected."
|
|
logger.error(error_msg)
|
|
return error_msg
|
|
|
|
else:
|
|
error_msg = f"❌ Camera '{cam_id}' returned status {response.status_code}: {response.text[:100]}"
|
|
logger.error(error_msg)
|
|
return error_msg
|
|
|
|
except httpx.TimeoutException:
|
|
error_msg = f"❌ Camera '{cam_id}' timed out after {REQUEST_TIMEOUT}s"
|
|
logger.error(error_msg)
|
|
return error_msg
|
|
|
|
except httpx.ConnectError as e:
|
|
error_msg = f"❌ Cannot connect to camera '{cam_id}' at {cam['url']}: {str(e)}"
|
|
logger.error(error_msg)
|
|
return error_msg
|
|
|
|
elif cam_type == 'rtsp':
|
|
# RTSP stream camera
|
|
rtsp_url = cam['rtsp_url']
|
|
logger.info(f"Capturing RTSP snapshot from '{cam_id}' at {rtsp_url}")
|
|
|
|
try:
|
|
# Capture snapshot from RTSP stream
|
|
image_bytes = capture_rtsp_snapshot(rtsp_url)
|
|
|
|
logger.info(f"✓ RTSP snapshot captured from '{cam_id}' ({len(image_bytes)} bytes)")
|
|
|
|
# Return as MCPImage
|
|
return MCPImage(data=image_bytes, format="jpeg")
|
|
|
|
except RuntimeError as e:
|
|
error_msg = f"❌ Failed to capture from RTSP camera '{cam_id}': {str(e)}"
|
|
logger.error(error_msg)
|
|
return error_msg
|
|
|
|
else:
|
|
error_msg = f"❌ Unknown camera type '{cam_type}' for camera '{cam_id}'"
|
|
logger.error(error_msg)
|
|
return error_msg
|
|
|
|
except ValueError as e:
|
|
# Camera ID not found
|
|
logger.error(f"Camera lookup error: {e}")
|
|
return f"❌ {str(e)}"
|
|
|
|
except FileNotFoundError as e:
|
|
# Config file not found
|
|
logger.error(f"Config error: {e}")
|
|
return f"❌ {str(e)}"
|
|
|
|
except Exception as e:
|
|
error_msg = f"❌ Unexpected error getting snapshot from '{cam_id}': {str(e)}"
|
|
logger.exception(error_msg)
|
|
return error_msg
|
|
|
|
|
|
@mcp.tool()
|
|
def vision_get_info() -> str:
|
|
"""
|
|
Get information about the Vision camera system configuration.
|
|
|
|
Returns details about configured cameras and config file location.
|
|
|
|
Returns:
|
|
Formatted string with system info
|
|
"""
|
|
try:
|
|
config = load_camera_config()
|
|
cameras = config['cameras']
|
|
|
|
info_lines = [
|
|
"Vision Camera System",
|
|
"",
|
|
f"Config file: {CONFIG_FILE}",
|
|
f"Cameras configured: {len(cameras)}",
|
|
""
|
|
]
|
|
|
|
for cam in cameras:
|
|
cam_type = cam.get('type', 'http')
|
|
if cam_type == 'http':
|
|
info_lines.append(f" • {cam['id']} (HTTP): {cam['url']}")
|
|
elif cam_type == 'rtsp':
|
|
info_lines.append(f" • {cam['id']} (RTSP): {cam['rtsp_url']}")
|
|
|
|
info_lines.append("")
|
|
info_lines.append("Use vision_get_cams() to check camera status")
|
|
info_lines.append("Use vision_snap(cam_id) to get a snapshot")
|
|
|
|
return "\n".join(info_lines)
|
|
|
|
except FileNotFoundError as e:
|
|
return f"❌ {str(e)}"
|
|
except ValueError as e:
|
|
return f"❌ {str(e)}"
|
|
except Exception as e:
|
|
return f"❌ Unexpected error: {str(e)}"
|
|
|
|
|
|
if __name__ == "__main__":
|
|
# Run the MCP server (uses stdio transport by default)
|
|
mcp.run()
|