Add TFLite object detection to reduce false positives

Motion detection now optionally runs MobileNet V2 SSD (COCO, quantized)
on frames that trigger motion, identifying objects like people, cats, and
cars. Events without detected objects are suppressed by default. Snapshots
include bounding box annotations. New MCP tool vision_get_detections()
enables label-based queries.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Alex
2026-02-08 17:04:10 -06:00
parent 68c7e9772f
commit e1171e8ff8
11 changed files with 687 additions and 50 deletions

210
server/detector.py Normal file
View File

@@ -0,0 +1,210 @@
#!/usr/bin/env python3
"""
Object Detection Module
Lightweight object detection using TensorFlow Lite with MobileNet V2 SSD.
Designed to run on Raspberry Pi 4/5 with minimal overhead.
The model is lazy-loaded on first detect() call to avoid startup delay.
"""
import cv2
import logging
import numpy as np
from dataclasses import dataclass
from pathlib import Path
from typing import Optional
logger = logging.getLogger(__name__)
@dataclass
class Detection:
"""A single detected object"""
label: str
confidence: float
bbox: tuple # (x_min, y_min, x_max, y_max) normalized 0-1
class_id: int
class ObjectDetector:
"""
Object detection using TFLite MobileNet V2 SSD.
Lazy-loads the model on first detect() call. Designed to be called
from the motion detection thread after motion is confirmed.
"""
def __init__(
self,
model_path: str,
labels_path: str,
confidence_threshold: float = 0.5,
):
self.model_path = Path(model_path)
self.labels_path = Path(labels_path)
self.confidence_threshold = confidence_threshold
self._interpreter = None
self._input_details = None
self._output_details = None
self._labels: list[str] = []
self._input_height = 0
self._input_width = 0
def _load_model(self):
"""Load TFLite model and label map"""
try:
import tflite_runtime.interpreter as tflite
except ImportError:
raise ImportError(
"tflite-runtime not installed. "
"Install with: pip install tflite-runtime"
)
if not self.model_path.exists():
raise FileNotFoundError(
f"Model file not found: {self.model_path}\n"
f"Run download_model.sh to download the model."
)
# Load labels
if self.labels_path.exists():
self._labels = self.labels_path.read_text().strip().splitlines()
else:
logger.warning(f"Labels file not found: {self.labels_path}")
self._labels = []
# Try XNNPACK delegate for ARM acceleration
delegates = []
try:
delegates = [tflite.load_delegate('libXNNPACK.so')]
logger.info("XNNPACK delegate loaded")
except (ValueError, OSError):
logger.info("XNNPACK delegate not available, using default CPU")
# Load model
self._interpreter = tflite.Interpreter(
model_path=str(self.model_path),
experimental_delegates=delegates if delegates else None,
)
self._interpreter.allocate_tensors()
self._input_details = self._interpreter.get_input_details()
self._output_details = self._interpreter.get_output_details()
# Get expected input size
input_shape = self._input_details[0]['shape']
self._input_height = input_shape[1]
self._input_width = input_shape[2]
logger.info(
f"Object detection model loaded: {self.model_path.name} "
f"(input: {self._input_width}x{self._input_height}, "
f"{len(self._labels)} classes)"
)
def detect(self, frame: np.ndarray) -> list[Detection]:
"""
Run object detection on a frame.
Args:
frame: BGR numpy array from OpenCV
Returns:
List of Detection objects above confidence threshold
"""
# Lazy load
if self._interpreter is None:
self._load_model()
# Preprocess: resize and convert BGR to RGB
input_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
input_frame = cv2.resize(input_frame, (self._input_width, self._input_height))
input_data = np.expand_dims(input_frame, axis=0)
# Ensure correct dtype
input_dtype = self._input_details[0]['dtype']
if input_dtype == np.uint8:
input_data = input_data.astype(np.uint8)
elif input_dtype == np.float32:
input_data = (input_data / 255.0).astype(np.float32)
# Run inference
self._interpreter.set_tensor(self._input_details[0]['index'], input_data)
self._interpreter.invoke()
# Parse outputs (SSD MobileNet post-processed format):
# [0] bounding boxes: [1, N, 4] (y_min, x_min, y_max, x_max) normalized
# [1] class IDs: [1, N]
# [2] scores: [1, N]
# [3] number of detections: [1]
boxes = self._interpreter.get_tensor(self._output_details[0]['index'])[0]
class_ids = self._interpreter.get_tensor(self._output_details[1]['index'])[0]
scores = self._interpreter.get_tensor(self._output_details[2]['index'])[0]
num_detections = int(self._interpreter.get_tensor(self._output_details[3]['index'])[0])
# Filter by confidence
detections = []
for i in range(num_detections):
score = float(scores[i])
if score < self.confidence_threshold:
continue
class_id = int(class_ids[i])
label = self._labels[class_id] if class_id < len(self._labels) else f"class_{class_id}"
# Convert from (y_min, x_min, y_max, x_max) to (x_min, y_min, x_max, y_max)
y_min, x_min, y_max, x_max = boxes[i]
bbox = (
float(np.clip(x_min, 0, 1)),
float(np.clip(y_min, 0, 1)),
float(np.clip(x_max, 0, 1)),
float(np.clip(y_max, 0, 1)),
)
detections.append(Detection(
label=label,
confidence=score,
bbox=bbox,
class_id=class_id,
))
return detections
def annotate_frame(frame: np.ndarray, detections: list[Detection]) -> np.ndarray:
"""
Draw bounding boxes and labels on a frame.
Args:
frame: BGR numpy array (will be copied, not modified in place)
detections: List of Detection objects
Returns:
Annotated copy of the frame
"""
annotated = frame.copy()
h, w = annotated.shape[:2]
for det in detections:
x1 = int(det.bbox[0] * w)
y1 = int(det.bbox[1] * h)
x2 = int(det.bbox[2] * w)
y2 = int(det.bbox[3] * h)
# Green box with label
color = (0, 255, 0)
cv2.rectangle(annotated, (x1, y1), (x2, y2), color, 2)
label_text = f"{det.label} {det.confidence:.0%}"
font_scale = 0.6
thickness = 1
(tw, th), _ = cv2.getTextSize(label_text, cv2.FONT_HERSHEY_SIMPLEX, font_scale, thickness)
# Background for text
cv2.rectangle(annotated, (x1, y1 - th - 8), (x1 + tw + 4, y1), color, -1)
cv2.putText(annotated, label_text, (x1 + 2, y1 - 4),
cv2.FONT_HERSHEY_SIMPLEX, font_scale, (0, 0, 0), thickness)
return annotated

55
server/download_model.sh Executable file
View File

@@ -0,0 +1,55 @@
#!/bin/bash
# Download MobileNet V2 SSD (COCO, INT8 quantized) for TFLite
#
# Model: ssd_mobilenet_v2_coco_quant_postprocess.tflite (~6MB)
# Source: TensorFlow Model Zoo
# Classes: 80 COCO object classes
#
# This script is idempotent - skips download if model exists.
set -e
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
MODEL_DIR="${SCRIPT_DIR}/models"
MODEL_FILE="${MODEL_DIR}/ssd_mobilenet_v2_coco_quant_postprocess.tflite"
ZIP_URL="https://storage.googleapis.com/download.tensorflow.org/models/tflite/coco_ssd_mobilenet_v2_quantized_300x300_uint8_20200430.zip"
ZIP_FILE="${MODEL_DIR}/model.zip"
mkdir -p "${MODEL_DIR}"
if [ -f "${MODEL_FILE}" ]; then
echo "Model already exists: ${MODEL_FILE}"
echo "Size: $(du -h "${MODEL_FILE}" | cut -f1)"
exit 0
fi
echo "Downloading MobileNet V2 SSD (quantized)..."
curl -L -o "${ZIP_FILE}" "${ZIP_URL}"
echo "Extracting model..."
unzip -o "${ZIP_FILE}" -d "${MODEL_DIR}"
# Clean up zip and extra files
rm -f "${ZIP_FILE}"
rm -f "${MODEL_DIR}/labelmap.txt" # We use our own coco_labels.txt
if [ -f "${MODEL_FILE}" ]; then
echo "Model downloaded: ${MODEL_FILE}"
echo "Size: $(du -h "${MODEL_FILE}" | cut -f1)"
else
# The zip might extract with a slightly different path
FOUND=$(find "${MODEL_DIR}" -name "*.tflite" -type f | head -1)
if [ -n "${FOUND}" ] && [ "${FOUND}" != "${MODEL_FILE}" ]; then
mv "${FOUND}" "${MODEL_FILE}"
echo "Model downloaded: ${MODEL_FILE}"
echo "Size: $(du -h "${MODEL_FILE}" | cut -f1)"
else
echo "ERROR: Model file not found after extraction"
exit 1
fi
fi
# Clean up any extracted subdirectories
find "${MODEL_DIR}" -mindepth 1 -type d -exec rm -rf {} + 2>/dev/null || true
echo "Done!"

View File

@@ -46,6 +46,20 @@ MOTION_COOLDOWN=5.0
# Seconds between frame checks
MOTION_INTERVAL=0.5
# ============ Object Detection ============
# Enable AI object detection (requires tflite-runtime and model download)
# When enabled, motion triggers object detection to identify what moved.
# Events with no detected objects are suppressed (configurable).
DETECTION_ENABLED=false
# Minimum confidence for a detection (0.0-1.0, lower = more detections)
DETECTION_CONFIDENCE=0.5
# Suppress events where motion is detected but no objects found
# Set to false to keep reporting all motion events
DETECTION_SUPPRESS_EMPTY=true
# ============ Event Collector ============
# URL to POST motion events to (collector on Mac mini)

View File

@@ -41,6 +41,13 @@ COLLECTOR_URL = os.getenv("COLLECTOR_URL", "")
COLLECTOR_API_KEY = os.getenv("COLLECTOR_API_KEY", "")
CAMERA_ID = os.getenv("CAMERA_ID", "camera")
# Object detection config
DETECTION_ENABLED = os.getenv("DETECTION_ENABLED", "false").lower() == "true"
DETECTION_MODEL_PATH = os.getenv("DETECTION_MODEL_PATH", "models/ssd_mobilenet_v2_coco_quant_postprocess.tflite")
DETECTION_LABELS_PATH = os.getenv("DETECTION_LABELS_PATH", "models/coco_labels.txt")
DETECTION_CONFIDENCE = float(os.getenv("DETECTION_CONFIDENCE", "0.5"))
DETECTION_SUPPRESS_EMPTY = os.getenv("DETECTION_SUPPRESS_EMPTY", "true").lower() == "true"
if not API_KEY:
raise ValueError("API_KEY not set in .env file")
@@ -141,6 +148,11 @@ if MOTION_ENABLED:
min_area_percent=MOTION_MIN_AREA,
cooldown_seconds=MOTION_COOLDOWN,
check_interval=MOTION_INTERVAL,
detection_enabled=DETECTION_ENABLED,
detection_model_path=DETECTION_MODEL_PATH,
detection_labels_path=DETECTION_LABELS_PATH,
detection_confidence=DETECTION_CONFIDENCE,
detection_suppress_empty=DETECTION_SUPPRESS_EMPTY,
)
@@ -219,9 +231,16 @@ def enable_motion(api_key: str = Security(verify_api_key)):
motion_detector = MotionDetector(
camera_id=CAMERA_ID,
collector_url=COLLECTOR_URL if COLLECTOR_URL else None,
collector_api_key=COLLECTOR_API_KEY if COLLECTOR_API_KEY else None,
threshold=MOTION_THRESHOLD,
min_area_percent=MOTION_MIN_AREA,
cooldown_seconds=MOTION_COOLDOWN,
check_interval=MOTION_INTERVAL,
detection_enabled=DETECTION_ENABLED,
detection_model_path=DETECTION_MODEL_PATH,
detection_labels_path=DETECTION_LABELS_PATH,
detection_confidence=DETECTION_CONFIDENCE,
detection_suppress_empty=DETECTION_SUPPRESS_EMPTY,
)
motion_detector.start(camera_manager.get_raw_frame)

View File

@@ -0,0 +1,81 @@
???
person
bicycle
car
motorcycle
airplane
bus
train
truck
boat
traffic light
fire hydrant
stop sign
parking meter
bench
bird
cat
dog
horse
sheep
cow
elephant
bear
zebra
giraffe
backpack
umbrella
handbag
tie
suitcase
frisbee
skis
snowboard
sports ball
kite
baseball bat
baseball glove
skateboard
surfboard
tennis racket
bottle
wine glass
cup
fork
knife
spoon
bowl
banana
apple
sandwich
orange
broccoli
carrot
hot dog
pizza
donut
cake
chair
couch
potted plant
bed
dining table
toilet
tv
laptop
mouse
remote
keyboard
cell phone
microwave
oven
toaster
sink
refrigerator
book
clock
vase
scissors
teddy bear
hair drier
toothbrush

View File

@@ -2,8 +2,12 @@
"""
Motion Detection Module
Simple frame-differencing motion detection with event reporting.
Frame-differencing motion detection with optional object detection.
Runs as background thread, POSTs events to collector on Mac mini.
When object detection is enabled, motion acts as a pre-filter:
motion triggers -> object detection confirms -> event reported.
If no objects are found, the event can be suppressed (configurable).
"""
import os
@@ -15,7 +19,7 @@ import httpx
import base64
from datetime import datetime
from typing import Optional, Callable
from dataclasses import dataclass, asdict
from dataclasses import dataclass, asdict, field
from pathlib import Path
logger = logging.getLogger(__name__)
@@ -23,23 +27,25 @@ logger = logging.getLogger(__name__)
@dataclass
class MotionEvent:
"""Motion detection event"""
"""Motion/detection event"""
timestamp: str
camera_id: str
event_type: str = "motion"
confidence: float = 0.0
region: str = "full" # Could be "left", "right", "center" etc.
area_percent: float = 0.0 # % of frame with motion
detections: Optional[list] = None # List of detection dicts when objects found
class MotionDetector:
"""
Background motion detection with event reporting.
Uses frame differencing to detect motion and reports
events to a collector endpoint.
Background motion detection with optional object detection.
Uses frame differencing to detect motion. When object detection is
enabled, runs inference on motion frames to identify objects and
suppress false positives.
"""
def __init__(
self,
camera_id: str,
@@ -49,6 +55,12 @@ class MotionDetector:
min_area_percent: float = 0.5, # Minimum % of frame to trigger
cooldown_seconds: float = 5.0, # Seconds between events
check_interval: float = 0.5, # Seconds between frame checks
# Object detection
detection_enabled: bool = False,
detection_model_path: Optional[str] = None,
detection_labels_path: Optional[str] = None,
detection_confidence: float = 0.5,
detection_suppress_empty: bool = True,
):
self.camera_id = camera_id
self.collector_url = collector_url
@@ -57,42 +69,58 @@ class MotionDetector:
self.min_area_percent = min_area_percent
self.cooldown_seconds = cooldown_seconds
self.check_interval = check_interval
self.detection_suppress_empty = detection_suppress_empty
self._previous_frame: Optional[any] = None
self._last_event_time: float = 0
self._running = False
self._thread: Optional[threading.Thread] = None
self._get_frame: Optional[Callable] = None
# Object detector (lazy import to avoid requiring tflite when disabled)
self._detector = None
if detection_enabled and detection_model_path:
try:
from detector import ObjectDetector
self._detector = ObjectDetector(
model_path=detection_model_path,
labels_path=detection_labels_path or "",
confidence_threshold=detection_confidence,
)
logger.info(f"Object detection enabled (model: {detection_model_path})")
except ImportError as e:
logger.error(f"Object detection unavailable: {e}")
# Stats
self.events_detected = 0
self.events_reported = 0
self.events_suppressed = 0
self.last_event: Optional[MotionEvent] = None
def start(self, get_frame_func: Callable):
"""
Start motion detection in background thread.
Args:
get_frame_func: Function that returns current frame as numpy array
"""
if self._running:
logger.warning("Motion detector already running")
return
self._get_frame = get_frame_func
self._running = True
self._thread = threading.Thread(target=self._detection_loop, daemon=True)
self._thread.start()
logger.info(f"Motion detection started (threshold={self.threshold}, cooldown={self.cooldown_seconds}s)")
def stop(self):
"""Stop motion detection"""
self._running = False
if self._thread:
self._thread.join(timeout=2.0)
logger.info("Motion detection stopped")
def _detection_loop(self):
"""Main detection loop - runs in background thread"""
while self._running:
@@ -100,91 +128,137 @@ class MotionDetector:
self._check_for_motion()
except Exception as e:
logger.error(f"Motion detection error: {e}")
time.sleep(self.check_interval)
def _check_for_motion(self):
"""Check current frame for motion"""
if not self._get_frame:
return
# Get current frame
frame = self._get_frame()
if frame is None:
return
# Convert to grayscale for comparison
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
gray = cv2.GaussianBlur(gray, (21, 21), 0)
# Need previous frame to compare
if self._previous_frame is None:
self._previous_frame = gray
return
# Compute difference
frame_delta = cv2.absdiff(self._previous_frame, gray)
thresh = cv2.threshold(frame_delta, self.threshold, 255, cv2.THRESH_BINARY)[1]
# Dilate to fill gaps
thresh = cv2.dilate(thresh, None, iterations=2)
# Calculate motion area percentage
motion_pixels = cv2.countNonZero(thresh)
total_pixels = thresh.shape[0] * thresh.shape[1]
area_percent = (motion_pixels / total_pixels) * 100
# Update previous frame
self._previous_frame = gray
# Check if motion exceeds threshold
if area_percent >= self.min_area_percent:
self._handle_motion(frame, area_percent)
def _handle_motion(self, frame, area_percent: float):
"""Handle detected motion"""
"""Handle detected motion, optionally running object detection"""
now = time.time()
# Check cooldown
if now - self._last_event_time < self.cooldown_seconds:
return
self._last_event_time = now
self.events_detected += 1
# Run object detection if enabled
detections_list = []
detections_dicts = None
snapshot_frame = frame
if self._detector:
try:
detections_list = self._detector.detect(frame)
except Exception as e:
logger.error(f"Object detection error: {e}")
if detections_list:
detections_dicts = [{
"label": d.label,
"confidence": round(d.confidence, 3),
"bbox": [round(x, 4) for x in d.bbox],
} for d in detections_list]
# Draw bounding boxes on snapshot
try:
from detector import annotate_frame
snapshot_frame = annotate_frame(frame, detections_list)
except Exception as e:
logger.warning(f"Failed to annotate frame: {e}")
elif self.detection_suppress_empty:
self.events_suppressed += 1
logger.debug(
f"Motion ({area_percent:.1f}%) but no objects detected - suppressed "
f"({self.events_suppressed} total)"
)
return
# Create event
if detections_list:
top_confidence = max(d.confidence for d in detections_list)
event_type = "object"
else:
top_confidence = min(area_percent / 10.0, 1.0)
event_type = "motion"
event = MotionEvent(
timestamp=datetime.utcnow().isoformat() + "Z",
camera_id=self.camera_id,
confidence=min(area_percent / 10.0, 1.0), # Normalize to 0-1
event_type=event_type,
confidence=round(top_confidence, 3),
area_percent=round(area_percent, 2),
detections=detections_dicts,
)
self.last_event = event
logger.info(f"Motion detected: {area_percent:.1f}% of frame (confidence: {event.confidence:.2f})")
if detections_list:
labels = ", ".join(f"{d.label}({d.confidence:.0%})" for d in detections_list)
logger.info(f"Objects detected: {labels} (motion: {area_percent:.1f}%)")
else:
logger.info(f"Motion detected: {area_percent:.1f}% of frame (confidence: {event.confidence:.2f})")
# Report to collector
if self.collector_url:
self._report_event(event, frame)
self._report_event(event, snapshot_frame)
def _report_event(self, event: MotionEvent, frame):
"""POST event to collector endpoint"""
try:
# Encode frame as JPEG
_, buffer = cv2.imencode('.jpg', frame, [cv2.IMWRITE_JPEG_QUALITY, 85])
snapshot_b64 = base64.b64encode(buffer.tobytes()).decode('utf-8')
# Build payload
payload = {
"event": asdict(event),
"snapshot": snapshot_b64,
}
# POST to collector
headers = {"Content-Type": "application/json"}
if self.collector_api_key:
headers["X-API-Key"] = self.collector_api_key
# Use sync client (we're in a thread)
with httpx.Client(timeout=5.0, verify=False) as client:
response = client.post(
@@ -192,22 +266,24 @@ class MotionDetector:
json=payload,
headers=headers,
)
if response.status_code == 200:
self.events_reported += 1
logger.info(f"Event reported to collector ({self.events_reported} total)")
else:
logger.warning(f"Collector returned {response.status_code}: {response.text[:100]}")
except Exception as e:
logger.error(f"Failed to report event: {e}")
def get_stats(self) -> dict:
"""Get detection statistics"""
return {
"running": self._running,
"events_detected": self.events_detected,
"events_reported": self.events_reported,
"events_suppressed": self.events_suppressed,
"detection_enabled": self._detector is not None,
"last_event": asdict(self.last_event) if self.last_event else None,
"config": {
"threshold": self.threshold,

View File

@@ -12,3 +12,6 @@ python-dotenv>=1.0.0
# HTTP client (for posting events to collector)
httpx>=0.24.0
# Object detection (installed separately by setup.sh)
# tflite-runtime>=2.14.0

View File

@@ -70,10 +70,16 @@ mkdir -p "${INSTALL_DIR}/ssl"
# Copy files
cp "${SCRIPT_DIR}/main.py" "${INSTALL_DIR}/"
cp "${SCRIPT_DIR}/motion.py" "${INSTALL_DIR}/"
cp "${SCRIPT_DIR}/detector.py" "${INSTALL_DIR}/"
cp "${SCRIPT_DIR}/requirements.txt" "${INSTALL_DIR}/"
cp "${SCRIPT_DIR}/generate_cert.sh" "${INSTALL_DIR}/"
cp "${SCRIPT_DIR}/download_model.sh" "${INSTALL_DIR}/"
cp "${SCRIPT_DIR}/.env" "${INSTALL_DIR}/"
# Copy model files
mkdir -p "${INSTALL_DIR}/models"
cp "${SCRIPT_DIR}/models/coco_labels.txt" "${INSTALL_DIR}/models/"
# Create virtual environment
echo_info "Creating Python virtual environment..."
cd "${INSTALL_DIR}"
@@ -85,6 +91,19 @@ echo_info "Installing Python dependencies..."
pip install --upgrade pip
pip install -r requirements.txt
# Install TFLite runtime for object detection
echo_info "Installing TFLite runtime for object detection..."
pip install tflite-runtime 2>/dev/null || echo_warn "tflite-runtime not available for this platform (object detection will be disabled)"
# Download object detection model (if not already present)
if [ -f "${INSTALL_DIR}/models/ssd_mobilenet_v2_coco_quant_postprocess.tflite" ]; then
echo_info "Object detection model already present"
else
echo_info "Downloading object detection model..."
chmod +x "${INSTALL_DIR}/download_model.sh"
cd "${INSTALL_DIR}" && ./download_model.sh
fi
# Generate SSL certificates if not present
if [ ! -f ssl/cert.pem ]; then
echo_info "Generating SSL certificates..."