From e1171e8ff808404ceead02c06a3d118ac89b2567 Mon Sep 17 00:00:00 2001
From: Alex <akazaev@proton.me>
Date: Sun, 8 Feb 2026 17:04:10 -0600
Subject: [PATCH] Add TFLite object detection to reduce false positives

Motion detection now optionally runs MobileNet V2 SSD (COCO, quantized)
on frames that trigger motion, identifying objects like people, cats, and
cars. Events without detected objects are suppressed by default. Snapshots
include bounding box annotations. New MCP tool vision_get_detections()
enables label-based queries.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 .gitignore                    |   3 +
 collector/collector.py        |  40 ++++++-
 mcp/vision_mcp.py             | 133 ++++++++++++++++++++-
 server/detector.py            | 210 ++++++++++++++++++++++++++++++++++
 server/download_model.sh      |  55 +++++++++
 server/env.example            |  14 +++
 server/main.py                |  19 +++
 server/models/coco_labels.txt |  81 +++++++++++++
 server/motion.py              | 160 +++++++++++++++++++-------
 server/requirements.txt       |   3 +
 server/setup.sh               |  19 +++
 11 files changed, 687 insertions(+), 50 deletions(-)
 create mode 100644 server/detector.py
 create mode 100755 server/download_model.sh
 create mode 100644 server/models/coco_labels.txt

diff --git a/.gitignore b/.gitignore
index eb14b10..2cf6e14 100644
--- a/.gitignore
+++ b/.gitignore
@@ -31,3 +31,6 @@ Thumbs.db
 .pytest_cache/
 .coverage
 htmlcov/
+
+# ML model binaries (downloaded during setup)
+*.tflite
diff --git a/collector/collector.py b/collector/collector.py
index 31f95c8..4ba6173 100644
--- a/collector/collector.py
+++ b/collector/collector.py
@@ -13,6 +13,7 @@ Runs as a service on Mac mini, listens for POSTs from Pis.
 """
 
 import os
+import json
 import sqlite3
 import base64
 import logging
@@ -20,7 +21,7 @@ import threading
 import time
 from datetime import datetime, timedelta
 from pathlib import Path
-from typing import Optional
+from typing import Optional, List
 from contextlib import contextmanager
 
 from fastapi import FastAPI, HTTPException, Header
@@ -87,6 +88,16 @@ def init_db():
     logger.info(f"Database initialized: {DB_PATH}")
 
 
+def migrate_db():
+    """Add new columns if they don't exist (idempotent)"""
+    with get_db() as conn:
+        columns = [row[1] for row in conn.execute("PRAGMA table_info(events)").fetchall()]
+        if "detections" not in columns:
+            conn.execute("ALTER TABLE events ADD COLUMN detections TEXT")
+            conn.commit()
+            logger.info("Migration: added 'detections' column to events table")
+
+
 @contextmanager
 def get_db():
     """Database connection context manager"""
@@ -171,6 +182,12 @@ def stop_cleanup_thread():
 
 # === Models ===
 
+class DetectionItem(BaseModel):
+    label: str
+    confidence: float
+    bbox: List[float]
+
+
 class EventData(BaseModel):
     timestamp: str
     camera_id: str
@@ -178,6 +195,7 @@ class EventData(BaseModel):
     confidence: float = 0.0
     region: str = "full"
     area_percent: float = 0.0
+    detections: Optional[List[DetectionItem]] = None
 
 
 class IncomingEvent(BaseModel):
@@ -190,6 +208,7 @@ class IncomingEvent(BaseModel):
 @app.on_event("startup")
 def startup():
     init_db()
+    migrate_db()
     start_cleanup_thread()
     logger.info(f"🦊 Event collector started on port {PORT}")
     logger.info(f"   Data directory: {DATA_DIR}")
@@ -257,14 +276,19 @@ def receive_event(
         except Exception as e:
             logger.error(f"Failed to save snapshot: {e}")
     
+    # Serialize detections to JSON if present
+    detections_json = None
+    if event.detections:
+        detections_json = json.dumps([d.model_dump() for d in event.detections])
+
     # Store in database
     try:
         with get_db() as conn:
             conn.execute("""
-                INSERT INTO events 
-                (event_id, timestamp, camera_id, event_type, confidence, 
-                 area_percent, snapshot_path, created_at)
-                VALUES (?, ?, ?, ?, ?, ?, ?, ?)
+                INSERT INTO events
+                (event_id, timestamp, camera_id, event_type, confidence,
+                 area_percent, snapshot_path, detections, created_at)
+                VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
             """, (
                 event_id,
                 event.timestamp,
@@ -273,6 +297,7 @@ def receive_event(
                 event.confidence,
                 event.area_percent,
                 snapshot_path,
+                detections_json,
                 now.isoformat() + "Z"
             ))
             conn.commit()
@@ -357,10 +382,15 @@ def get_stats():
             FROM events GROUP BY event_type
         """).fetchall()
         
+        with_detections = conn.execute(
+            "SELECT COUNT(*) FROM events WHERE detections IS NOT NULL"
+        ).fetchone()[0]
+
         return {
             "total_events": total,
             "annotated": annotated,
             "unannotated": total - annotated,
+            "with_detections": with_detections,
             "by_camera": {row[0]: row[1] for row in by_camera},
             "by_type": {row[0]: row[1] for row in by_type},
             "data_dir": str(DATA_DIR),
diff --git a/mcp/vision_mcp.py b/mcp/vision_mcp.py
index f844bac..44d186b 100644
--- a/mcp/vision_mcp.py
+++ b/mcp/vision_mcp.py
@@ -518,7 +518,7 @@ def vision_get_events(
         
         events = []
         for row in rows:
-            events.append({
+            event_dict = {
                 "id": row["id"],
                 "event_id": row["event_id"],
                 "timestamp": row["timestamp"],
@@ -529,7 +529,14 @@ def vision_get_events(
                 "snapshot_path": row["snapshot_path"],
                 "annotation": row["annotation"],
                 "tags": row["tags"],
-            })
+            }
+            # Include detections if present
+            try:
+                det_raw = row["detections"]
+                event_dict["detections"] = json.loads(det_raw) if det_raw else None
+            except (KeyError, json.JSONDecodeError, TypeError):
+                event_dict["detections"] = None
+            events.append(event_dict)
         
         logger.info(f"Retrieved {len(events)} events")
         return events
@@ -541,6 +548,99 @@ def vision_get_events(
         conn.close()
 
 
+@mcp.tool()
+def vision_get_detections(
+    label: str = None,
+    camera_id: str = None,
+    since: str = None,
+    min_confidence: float = 0.0,
+    limit: int = 20
+) -> List[Dict[str, Any]]:
+    """
+    Query events that contain specific object detections.
+
+    Filters events to only those where the AI detected objects
+    (person, cat, dog, car, etc.). More targeted than raw motion events.
+
+    Args:
+        label: Filter by detected object type (e.g., "person", "cat", "dog")
+        camera_id: Filter by camera
+        since: ISO timestamp - only events after this time
+        min_confidence: Minimum detection confidence (0.0-1.0)
+        limit: Maximum events to return (default 20)
+
+    Returns:
+        List of events with their detections
+
+    Examples:
+        vision_get_detections(label="cat")
+        vision_get_detections(label="person", camera_id="basement")
+        vision_get_detections(min_confidence=0.8)
+    """
+    conn = get_events_db()
+    if not conn:
+        return [{"error": f"Events database not found: {EVENTS_DB}"}]
+
+    try:
+        query = "SELECT * FROM events WHERE detections IS NOT NULL"
+        params = []
+
+        if since:
+            query += " AND timestamp >= ?"
+            params.append(since)
+
+        if camera_id:
+            query += " AND camera_id = ?"
+            params.append(camera_id)
+
+        # Fetch more than limit to allow for client-side filtering
+        query += " ORDER BY timestamp DESC LIMIT ?"
+        params.append(limit * 5)
+
+        rows = conn.execute(query, params).fetchall()
+
+        events = []
+        for row in rows:
+            try:
+                dets = json.loads(row["detections"])
+            except (json.JSONDecodeError, TypeError):
+                continue
+
+            # Filter by label and confidence
+            if label or min_confidence > 0:
+                matching = [
+                    d for d in dets
+                    if (not label or d.get("label") == label)
+                    and d.get("confidence", 0) >= min_confidence
+                ]
+                if not matching:
+                    continue
+            else:
+                matching = dets
+
+            events.append({
+                "event_id": row["event_id"],
+                "timestamp": row["timestamp"],
+                "camera_id": row["camera_id"],
+                "confidence": row["confidence"],
+                "annotation": row["annotation"],
+                "tags": row["tags"],
+                "detections": matching,
+            })
+
+            if len(events) >= limit:
+                break
+
+        logger.info(f"Retrieved {len(events)} detection events")
+        return events
+
+    except Exception as e:
+        logger.error(f"Error querying detections: {e}")
+        return [{"error": str(e)}]
+    finally:
+        conn.close()
+
+
 @mcp.tool()
 def vision_get_event_snapshot(event_id: str) -> Union[MCPImage, str]:
     """
@@ -702,9 +802,36 @@ def vision_event_stats() -> Dict[str, Any]:
             WHERE timestamp >= datetime('now', '-1 day')
         """).fetchone()[0]
         
+        # Detection stats
+        try:
+            with_detections = conn.execute(
+                "SELECT COUNT(*) FROM events WHERE detections IS NOT NULL"
+            ).fetchone()[0]
+            stats["with_detections"] = with_detections
+
+            if with_detections > 0:
+                det_rows = conn.execute(
+                    "SELECT detections FROM events WHERE detections IS NOT NULL"
+                ).fetchall()
+                label_counts = {}
+                for det_row in det_rows:
+                    try:
+                        dets = json.loads(det_row[0])
+                        for d in dets:
+                            lbl = d.get("label", "unknown")
+                            label_counts[lbl] = label_counts.get(lbl, 0) + 1
+                    except (json.JSONDecodeError, TypeError):
+                        pass
+                if label_counts:
+                    stats["detected_objects"] = dict(
+                        sorted(label_counts.items(), key=lambda x: -x[1])
+                    )
+        except Exception:
+            pass  # Column may not exist on older databases
+
         # Most recent event
         row = conn.execute("""
-            SELECT event_id, timestamp, camera_id 
+            SELECT event_id, timestamp, camera_id
             FROM events ORDER BY timestamp DESC LIMIT 1
         """).fetchone()
         if row:
diff --git a/server/detector.py b/server/detector.py
new file mode 100644
index 0000000..f2d5210
--- /dev/null
+++ b/server/detector.py
@@ -0,0 +1,210 @@
+#!/usr/bin/env python3
+"""
+Object Detection Module
+
+Lightweight object detection using TensorFlow Lite with MobileNet V2 SSD.
+Designed to run on Raspberry Pi 4/5 with minimal overhead.
+
+The model is lazy-loaded on first detect() call to avoid startup delay.
+"""
+
+import cv2
+import logging
+import numpy as np
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Optional
+
+logger = logging.getLogger(__name__)
+
+
+@dataclass
+class Detection:
+    """A single detected object"""
+    label: str
+    confidence: float
+    bbox: tuple  # (x_min, y_min, x_max, y_max) normalized 0-1
+    class_id: int
+
+
+class ObjectDetector:
+    """
+    Object detection using TFLite MobileNet V2 SSD.
+
+    Lazy-loads the model on first detect() call. Designed to be called
+    from the motion detection thread after motion is confirmed.
+    """
+
+    def __init__(
+        self,
+        model_path: str,
+        labels_path: str,
+        confidence_threshold: float = 0.5,
+    ):
+        self.model_path = Path(model_path)
+        self.labels_path = Path(labels_path)
+        self.confidence_threshold = confidence_threshold
+
+        self._interpreter = None
+        self._input_details = None
+        self._output_details = None
+        self._labels: list[str] = []
+        self._input_height = 0
+        self._input_width = 0
+
+    def _load_model(self):
+        """Load TFLite model and label map"""
+        try:
+            import tflite_runtime.interpreter as tflite
+        except ImportError:
+            raise ImportError(
+                "tflite-runtime not installed. "
+                "Install with: pip install tflite-runtime"
+            )
+
+        if not self.model_path.exists():
+            raise FileNotFoundError(
+                f"Model file not found: {self.model_path}\n"
+                f"Run download_model.sh to download the model."
+            )
+
+        # Load labels
+        if self.labels_path.exists():
+            self._labels = self.labels_path.read_text().strip().splitlines()
+        else:
+            logger.warning(f"Labels file not found: {self.labels_path}")
+            self._labels = []
+
+        # Try XNNPACK delegate for ARM acceleration
+        delegates = []
+        try:
+            delegates = [tflite.load_delegate('libXNNPACK.so')]
+            logger.info("XNNPACK delegate loaded")
+        except (ValueError, OSError):
+            logger.info("XNNPACK delegate not available, using default CPU")
+
+        # Load model
+        self._interpreter = tflite.Interpreter(
+            model_path=str(self.model_path),
+            experimental_delegates=delegates if delegates else None,
+        )
+        self._interpreter.allocate_tensors()
+
+        self._input_details = self._interpreter.get_input_details()
+        self._output_details = self._interpreter.get_output_details()
+
+        # Get expected input size
+        input_shape = self._input_details[0]['shape']
+        self._input_height = input_shape[1]
+        self._input_width = input_shape[2]
+
+        logger.info(
+            f"Object detection model loaded: {self.model_path.name} "
+            f"(input: {self._input_width}x{self._input_height}, "
+            f"{len(self._labels)} classes)"
+        )
+
+    def detect(self, frame: np.ndarray) -> list[Detection]:
+        """
+        Run object detection on a frame.
+
+        Args:
+            frame: BGR numpy array from OpenCV
+
+        Returns:
+            List of Detection objects above confidence threshold
+        """
+        # Lazy load
+        if self._interpreter is None:
+            self._load_model()
+
+        # Preprocess: resize and convert BGR to RGB
+        input_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
+        input_frame = cv2.resize(input_frame, (self._input_width, self._input_height))
+        input_data = np.expand_dims(input_frame, axis=0)
+
+        # Ensure correct dtype
+        input_dtype = self._input_details[0]['dtype']
+        if input_dtype == np.uint8:
+            input_data = input_data.astype(np.uint8)
+        elif input_dtype == np.float32:
+            input_data = (input_data / 255.0).astype(np.float32)
+
+        # Run inference
+        self._interpreter.set_tensor(self._input_details[0]['index'], input_data)
+        self._interpreter.invoke()
+
+        # Parse outputs (SSD MobileNet post-processed format):
+        # [0] bounding boxes: [1, N, 4] (y_min, x_min, y_max, x_max) normalized
+        # [1] class IDs: [1, N]
+        # [2] scores: [1, N]
+        # [3] number of detections: [1]
+        boxes = self._interpreter.get_tensor(self._output_details[0]['index'])[0]
+        class_ids = self._interpreter.get_tensor(self._output_details[1]['index'])[0]
+        scores = self._interpreter.get_tensor(self._output_details[2]['index'])[0]
+        num_detections = int(self._interpreter.get_tensor(self._output_details[3]['index'])[0])
+
+        # Filter by confidence
+        detections = []
+        for i in range(num_detections):
+            score = float(scores[i])
+            if score < self.confidence_threshold:
+                continue
+
+            class_id = int(class_ids[i])
+            label = self._labels[class_id] if class_id < len(self._labels) else f"class_{class_id}"
+
+            # Convert from (y_min, x_min, y_max, x_max) to (x_min, y_min, x_max, y_max)
+            y_min, x_min, y_max, x_max = boxes[i]
+            bbox = (
+                float(np.clip(x_min, 0, 1)),
+                float(np.clip(y_min, 0, 1)),
+                float(np.clip(x_max, 0, 1)),
+                float(np.clip(y_max, 0, 1)),
+            )
+
+            detections.append(Detection(
+                label=label,
+                confidence=score,
+                bbox=bbox,
+                class_id=class_id,
+            ))
+
+        return detections
+
+
+def annotate_frame(frame: np.ndarray, detections: list[Detection]) -> np.ndarray:
+    """
+    Draw bounding boxes and labels on a frame.
+
+    Args:
+        frame: BGR numpy array (will be copied, not modified in place)
+        detections: List of Detection objects
+
+    Returns:
+        Annotated copy of the frame
+    """
+    annotated = frame.copy()
+    h, w = annotated.shape[:2]
+
+    for det in detections:
+        x1 = int(det.bbox[0] * w)
+        y1 = int(det.bbox[1] * h)
+        x2 = int(det.bbox[2] * w)
+        y2 = int(det.bbox[3] * h)
+
+        # Green box with label
+        color = (0, 255, 0)
+        cv2.rectangle(annotated, (x1, y1), (x2, y2), color, 2)
+
+        label_text = f"{det.label} {det.confidence:.0%}"
+        font_scale = 0.6
+        thickness = 1
+        (tw, th), _ = cv2.getTextSize(label_text, cv2.FONT_HERSHEY_SIMPLEX, font_scale, thickness)
+
+        # Background for text
+        cv2.rectangle(annotated, (x1, y1 - th - 8), (x1 + tw + 4, y1), color, -1)
+        cv2.putText(annotated, label_text, (x1 + 2, y1 - 4),
+                    cv2.FONT_HERSHEY_SIMPLEX, font_scale, (0, 0, 0), thickness)
+
+    return annotated
diff --git a/server/download_model.sh b/server/download_model.sh
new file mode 100755
index 0000000..fb1e651
--- /dev/null
+++ b/server/download_model.sh
@@ -0,0 +1,55 @@
+#!/bin/bash
+# Download MobileNet V2 SSD (COCO, INT8 quantized) for TFLite
+#
+# Model: ssd_mobilenet_v2_coco_quant_postprocess.tflite (~6MB)
+# Source: TensorFlow Model Zoo
+# Classes: 80 COCO object classes
+#
+# This script is idempotent - skips download if model exists.
+
+set -e
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+MODEL_DIR="${SCRIPT_DIR}/models"
+MODEL_FILE="${MODEL_DIR}/ssd_mobilenet_v2_coco_quant_postprocess.tflite"
+ZIP_URL="https://storage.googleapis.com/download.tensorflow.org/models/tflite/coco_ssd_mobilenet_v2_quantized_300x300_uint8_20200430.zip"
+ZIP_FILE="${MODEL_DIR}/model.zip"
+
+mkdir -p "${MODEL_DIR}"
+
+if [ -f "${MODEL_FILE}" ]; then
+    echo "Model already exists: ${MODEL_FILE}"
+    echo "Size: $(du -h "${MODEL_FILE}" | cut -f1)"
+    exit 0
+fi
+
+echo "Downloading MobileNet V2 SSD (quantized)..."
+curl -L -o "${ZIP_FILE}" "${ZIP_URL}"
+
+echo "Extracting model..."
+unzip -o "${ZIP_FILE}" -d "${MODEL_DIR}"
+
+# Clean up zip and extra files
+rm -f "${ZIP_FILE}"
+rm -f "${MODEL_DIR}/labelmap.txt"  # We use our own coco_labels.txt
+
+if [ -f "${MODEL_FILE}" ]; then
+    echo "Model downloaded: ${MODEL_FILE}"
+    echo "Size: $(du -h "${MODEL_FILE}" | cut -f1)"
+else
+    # The zip might extract with a slightly different path
+    FOUND=$(find "${MODEL_DIR}" -name "*.tflite" -type f | head -1)
+    if [ -n "${FOUND}" ] && [ "${FOUND}" != "${MODEL_FILE}" ]; then
+        mv "${FOUND}" "${MODEL_FILE}"
+        echo "Model downloaded: ${MODEL_FILE}"
+        echo "Size: $(du -h "${MODEL_FILE}" | cut -f1)"
+    else
+        echo "ERROR: Model file not found after extraction"
+        exit 1
+    fi
+fi
+
+# Clean up any extracted subdirectories
+find "${MODEL_DIR}" -mindepth 1 -type d -exec rm -rf {} + 2>/dev/null || true
+
+echo "Done!"
diff --git a/server/env.example b/server/env.example
index 57d7437..27ed369 100644
--- a/server/env.example
+++ b/server/env.example
@@ -46,6 +46,20 @@ MOTION_COOLDOWN=5.0
 # Seconds between frame checks
 MOTION_INTERVAL=0.5
 
+# ============ Object Detection ============
+
+# Enable AI object detection (requires tflite-runtime and model download)
+# When enabled, motion triggers object detection to identify what moved.
+# Events with no detected objects are suppressed (configurable).
+DETECTION_ENABLED=false
+
+# Minimum confidence for a detection (0.0-1.0, lower = more detections)
+DETECTION_CONFIDENCE=0.5
+
+# Suppress events where motion is detected but no objects found
+# Set to false to keep reporting all motion events
+DETECTION_SUPPRESS_EMPTY=true
+
 # ============ Event Collector ============
 
 # URL to POST motion events to (collector on Mac mini)
diff --git a/server/main.py b/server/main.py
index 9c4034e..0a83639 100644
--- a/server/main.py
+++ b/server/main.py
@@ -41,6 +41,13 @@ COLLECTOR_URL = os.getenv("COLLECTOR_URL", "")
 COLLECTOR_API_KEY = os.getenv("COLLECTOR_API_KEY", "")
 CAMERA_ID = os.getenv("CAMERA_ID", "camera")
 
+# Object detection config
+DETECTION_ENABLED = os.getenv("DETECTION_ENABLED", "false").lower() == "true"
+DETECTION_MODEL_PATH = os.getenv("DETECTION_MODEL_PATH", "models/ssd_mobilenet_v2_coco_quant_postprocess.tflite")
+DETECTION_LABELS_PATH = os.getenv("DETECTION_LABELS_PATH", "models/coco_labels.txt")
+DETECTION_CONFIDENCE = float(os.getenv("DETECTION_CONFIDENCE", "0.5"))
+DETECTION_SUPPRESS_EMPTY = os.getenv("DETECTION_SUPPRESS_EMPTY", "true").lower() == "true"
+
 if not API_KEY:
     raise ValueError("API_KEY not set in .env file")
 
@@ -141,6 +148,11 @@ if MOTION_ENABLED:
         min_area_percent=MOTION_MIN_AREA,
         cooldown_seconds=MOTION_COOLDOWN,
         check_interval=MOTION_INTERVAL,
+        detection_enabled=DETECTION_ENABLED,
+        detection_model_path=DETECTION_MODEL_PATH,
+        detection_labels_path=DETECTION_LABELS_PATH,
+        detection_confidence=DETECTION_CONFIDENCE,
+        detection_suppress_empty=DETECTION_SUPPRESS_EMPTY,
     )
 
 
@@ -219,9 +231,16 @@ def enable_motion(api_key: str = Security(verify_api_key)):
         motion_detector = MotionDetector(
             camera_id=CAMERA_ID,
             collector_url=COLLECTOR_URL if COLLECTOR_URL else None,
+            collector_api_key=COLLECTOR_API_KEY if COLLECTOR_API_KEY else None,
             threshold=MOTION_THRESHOLD,
             min_area_percent=MOTION_MIN_AREA,
             cooldown_seconds=MOTION_COOLDOWN,
+            check_interval=MOTION_INTERVAL,
+            detection_enabled=DETECTION_ENABLED,
+            detection_model_path=DETECTION_MODEL_PATH,
+            detection_labels_path=DETECTION_LABELS_PATH,
+            detection_confidence=DETECTION_CONFIDENCE,
+            detection_suppress_empty=DETECTION_SUPPRESS_EMPTY,
         )
 
     motion_detector.start(camera_manager.get_raw_frame)
diff --git a/server/models/coco_labels.txt b/server/models/coco_labels.txt
new file mode 100644
index 0000000..49ea52b
--- /dev/null
+++ b/server/models/coco_labels.txt
@@ -0,0 +1,81 @@
+???
+person
+bicycle
+car
+motorcycle
+airplane
+bus
+train
+truck
+boat
+traffic light
+fire hydrant
+stop sign
+parking meter
+bench
+bird
+cat
+dog
+horse
+sheep
+cow
+elephant
+bear
+zebra
+giraffe
+backpack
+umbrella
+handbag
+tie
+suitcase
+frisbee
+skis
+snowboard
+sports ball
+kite
+baseball bat
+baseball glove
+skateboard
+surfboard
+tennis racket
+bottle
+wine glass
+cup
+fork
+knife
+spoon
+bowl
+banana
+apple
+sandwich
+orange
+broccoli
+carrot
+hot dog
+pizza
+donut
+cake
+chair
+couch
+potted plant
+bed
+dining table
+toilet
+tv
+laptop
+mouse
+remote
+keyboard
+cell phone
+microwave
+oven
+toaster
+sink
+refrigerator
+book
+clock
+vase
+scissors
+teddy bear
+hair drier
+toothbrush
diff --git a/server/motion.py b/server/motion.py
index e0d7cf1..e97f770 100644
--- a/server/motion.py
+++ b/server/motion.py
@@ -2,8 +2,12 @@
 """
 Motion Detection Module
 
-Simple frame-differencing motion detection with event reporting.
+Frame-differencing motion detection with optional object detection.
 Runs as background thread, POSTs events to collector on Mac mini.
+
+When object detection is enabled, motion acts as a pre-filter:
+motion triggers -> object detection confirms -> event reported.
+If no objects are found, the event can be suppressed (configurable).
 """
 
 import os
@@ -15,7 +19,7 @@ import httpx
 import base64
 from datetime import datetime
 from typing import Optional, Callable
-from dataclasses import dataclass, asdict
+from dataclasses import dataclass, asdict, field
 from pathlib import Path
 
 logger = logging.getLogger(__name__)
@@ -23,23 +27,25 @@ logger = logging.getLogger(__name__)
 
 @dataclass
 class MotionEvent:
-    """Motion detection event"""
+    """Motion/detection event"""
     timestamp: str
     camera_id: str
     event_type: str = "motion"
     confidence: float = 0.0
     region: str = "full"  # Could be "left", "right", "center" etc.
     area_percent: float = 0.0  # % of frame with motion
+    detections: Optional[list] = None  # List of detection dicts when objects found
 
 
 class MotionDetector:
     """
-    Background motion detection with event reporting.
-    
-    Uses frame differencing to detect motion and reports
-    events to a collector endpoint.
+    Background motion detection with optional object detection.
+
+    Uses frame differencing to detect motion. When object detection is
+    enabled, runs inference on motion frames to identify objects and
+    suppress false positives.
     """
-    
+
     def __init__(
         self,
         camera_id: str,
@@ -49,6 +55,12 @@ class MotionDetector:
         min_area_percent: float = 0.5, # Minimum % of frame to trigger
         cooldown_seconds: float = 5.0, # Seconds between events
         check_interval: float = 0.5,   # Seconds between frame checks
+        # Object detection
+        detection_enabled: bool = False,
+        detection_model_path: Optional[str] = None,
+        detection_labels_path: Optional[str] = None,
+        detection_confidence: float = 0.5,
+        detection_suppress_empty: bool = True,
     ):
         self.camera_id = camera_id
         self.collector_url = collector_url
@@ -57,42 +69,58 @@ class MotionDetector:
         self.min_area_percent = min_area_percent
         self.cooldown_seconds = cooldown_seconds
         self.check_interval = check_interval
-        
+        self.detection_suppress_empty = detection_suppress_empty
+
         self._previous_frame: Optional[any] = None
         self._last_event_time: float = 0
         self._running = False
         self._thread: Optional[threading.Thread] = None
         self._get_frame: Optional[Callable] = None
-        
+
+        # Object detector (lazy import to avoid requiring tflite when disabled)
+        self._detector = None
+        if detection_enabled and detection_model_path:
+            try:
+                from detector import ObjectDetector
+                self._detector = ObjectDetector(
+                    model_path=detection_model_path,
+                    labels_path=detection_labels_path or "",
+                    confidence_threshold=detection_confidence,
+                )
+                logger.info(f"Object detection enabled (model: {detection_model_path})")
+            except ImportError as e:
+                logger.error(f"Object detection unavailable: {e}")
+
         # Stats
         self.events_detected = 0
         self.events_reported = 0
+        self.events_suppressed = 0
         self.last_event: Optional[MotionEvent] = None
-    
+
     def start(self, get_frame_func: Callable):
         """
         Start motion detection in background thread.
-        
+
         Args:
             get_frame_func: Function that returns current frame as numpy array
         """
         if self._running:
             logger.warning("Motion detector already running")
             return
-        
+
         self._get_frame = get_frame_func
         self._running = True
         self._thread = threading.Thread(target=self._detection_loop, daemon=True)
         self._thread.start()
         logger.info(f"Motion detection started (threshold={self.threshold}, cooldown={self.cooldown_seconds}s)")
-    
+
     def stop(self):
         """Stop motion detection"""
         self._running = False
         if self._thread:
             self._thread.join(timeout=2.0)
         logger.info("Motion detection stopped")
-    
+
     def _detection_loop(self):
         """Main detection loop - runs in background thread"""
         while self._running:
@@ -100,91 +128,137 @@ class MotionDetector:
                 self._check_for_motion()
             except Exception as e:
                 logger.error(f"Motion detection error: {e}")
-            
+
             time.sleep(self.check_interval)
-    
+
     def _check_for_motion(self):
         """Check current frame for motion"""
         if not self._get_frame:
             return
-        
+
         # Get current frame
         frame = self._get_frame()
         if frame is None:
             return
-        
+
         # Convert to grayscale for comparison
         gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
         gray = cv2.GaussianBlur(gray, (21, 21), 0)
-        
+
         # Need previous frame to compare
         if self._previous_frame is None:
             self._previous_frame = gray
             return
-        
+
         # Compute difference
         frame_delta = cv2.absdiff(self._previous_frame, gray)
         thresh = cv2.threshold(frame_delta, self.threshold, 255, cv2.THRESH_BINARY)[1]
-        
+
         # Dilate to fill gaps
         thresh = cv2.dilate(thresh, None, iterations=2)
-        
+
         # Calculate motion area percentage
         motion_pixels = cv2.countNonZero(thresh)
         total_pixels = thresh.shape[0] * thresh.shape[1]
         area_percent = (motion_pixels / total_pixels) * 100
-        
+
         # Update previous frame
         self._previous_frame = gray
-        
+
         # Check if motion exceeds threshold
         if area_percent >= self.min_area_percent:
             self._handle_motion(frame, area_percent)
-    
+
     def _handle_motion(self, frame, area_percent: float):
-        """Handle detected motion"""
+        """Handle detected motion, optionally running object detection"""
         now = time.time()
-        
+
         # Check cooldown
         if now - self._last_event_time < self.cooldown_seconds:
             return
-        
+
         self._last_event_time = now
         self.events_detected += 1
-        
+
+        # Run object detection if enabled
+        detections_list = []
+        detections_dicts = None
+        snapshot_frame = frame
+
+        if self._detector:
+            try:
+                detections_list = self._detector.detect(frame)
+            except Exception as e:
+                logger.error(f"Object detection error: {e}")
+
+            if detections_list:
+                detections_dicts = [{
+                    "label": d.label,
+                    "confidence": round(d.confidence, 3),
+                    "bbox": [round(x, 4) for x in d.bbox],
+                } for d in detections_list]
+
+                # Draw bounding boxes on snapshot
+                try:
+                    from detector import annotate_frame
+                    snapshot_frame = annotate_frame(frame, detections_list)
+                except Exception as e:
+                    logger.warning(f"Failed to annotate frame: {e}")
+
+            elif self.detection_suppress_empty:
+                self.events_suppressed += 1
+                logger.debug(
+                    f"Motion ({area_percent:.1f}%) but no objects detected - suppressed "
+                    f"({self.events_suppressed} total)"
+                )
+                return
+
         # Create event
+        if detections_list:
+            top_confidence = max(d.confidence for d in detections_list)
+            event_type = "object"
+        else:
+            top_confidence = min(area_percent / 10.0, 1.0)
+            event_type = "motion"
+
         event = MotionEvent(
             timestamp=datetime.utcnow().isoformat() + "Z",
             camera_id=self.camera_id,
-            confidence=min(area_percent / 10.0, 1.0),  # Normalize to 0-1
+            event_type=event_type,
+            confidence=round(top_confidence, 3),
             area_percent=round(area_percent, 2),
+            detections=detections_dicts,
         )
         self.last_event = event
-        
-        logger.info(f"Motion detected: {area_percent:.1f}% of frame (confidence: {event.confidence:.2f})")
-        
+
+        if detections_list:
+            labels = ", ".join(f"{d.label}({d.confidence:.0%})" for d in detections_list)
+            logger.info(f"Objects detected: {labels} (motion: {area_percent:.1f}%)")
+        else:
+            logger.info(f"Motion detected: {area_percent:.1f}% of frame (confidence: {event.confidence:.2f})")
+
         # Report to collector
         if self.collector_url:
-            self._report_event(event, frame)
-    
+            self._report_event(event, snapshot_frame)
+
     def _report_event(self, event: MotionEvent, frame):
         """POST event to collector endpoint"""
         try:
             # Encode frame as JPEG
             _, buffer = cv2.imencode('.jpg', frame, [cv2.IMWRITE_JPEG_QUALITY, 85])
             snapshot_b64 = base64.b64encode(buffer.tobytes()).decode('utf-8')
-            
+
             # Build payload
             payload = {
                 "event": asdict(event),
                 "snapshot": snapshot_b64,
             }
-            
+
             # POST to collector
             headers = {"Content-Type": "application/json"}
             if self.collector_api_key:
                 headers["X-API-Key"] = self.collector_api_key
-            
+
             # Use sync client (we're in a thread)
             with httpx.Client(timeout=5.0, verify=False) as client:
                 response = client.post(
@@ -192,22 +266,24 @@ class MotionDetector:
                     json=payload,
                     headers=headers,
                 )
-                
+
                 if response.status_code == 200:
                     self.events_reported += 1
                     logger.info(f"Event reported to collector ({self.events_reported} total)")
                 else:
                     logger.warning(f"Collector returned {response.status_code}: {response.text[:100]}")
-        
+
         except Exception as e:
             logger.error(f"Failed to report event: {e}")
-    
+
     def get_stats(self) -> dict:
         """Get detection statistics"""
         return {
             "running": self._running,
             "events_detected": self.events_detected,
             "events_reported": self.events_reported,
+            "events_suppressed": self.events_suppressed,
+            "detection_enabled": self._detector is not None,
             "last_event": asdict(self.last_event) if self.last_event else None,
             "config": {
                 "threshold": self.threshold,
diff --git a/server/requirements.txt b/server/requirements.txt
index 8767eb8..278a80b 100644
--- a/server/requirements.txt
+++ b/server/requirements.txt
@@ -12,3 +12,6 @@ python-dotenv>=1.0.0
 
 # HTTP client (for posting events to collector)
 httpx>=0.24.0
+
+# Object detection (installed separately by setup.sh)
+# tflite-runtime>=2.14.0
diff --git a/server/setup.sh b/server/setup.sh
index d1f5efb..d3d04f7 100644
--- a/server/setup.sh
+++ b/server/setup.sh
@@ -70,10 +70,16 @@ mkdir -p "${INSTALL_DIR}/ssl"
 # Copy files
 cp "${SCRIPT_DIR}/main.py" "${INSTALL_DIR}/"
 cp "${SCRIPT_DIR}/motion.py" "${INSTALL_DIR}/"
+cp "${SCRIPT_DIR}/detector.py" "${INSTALL_DIR}/"
 cp "${SCRIPT_DIR}/requirements.txt" "${INSTALL_DIR}/"
 cp "${SCRIPT_DIR}/generate_cert.sh" "${INSTALL_DIR}/"
+cp "${SCRIPT_DIR}/download_model.sh" "${INSTALL_DIR}/"
 cp "${SCRIPT_DIR}/.env" "${INSTALL_DIR}/"
 
+# Copy model files
+mkdir -p "${INSTALL_DIR}/models"
+cp "${SCRIPT_DIR}/models/coco_labels.txt" "${INSTALL_DIR}/models/"
+
 # Create virtual environment
 echo_info "Creating Python virtual environment..."
 cd "${INSTALL_DIR}"
@@ -85,6 +91,19 @@ echo_info "Installing Python dependencies..."
 pip install --upgrade pip
 pip install -r requirements.txt
 
+# Install TFLite runtime for object detection
+echo_info "Installing TFLite runtime for object detection..."
+pip install tflite-runtime 2>/dev/null || echo_warn "tflite-runtime not available for this platform (object detection will be disabled)"
+
+# Download object detection model (if not already present)
+if [ -f "${INSTALL_DIR}/models/ssd_mobilenet_v2_coco_quant_postprocess.tflite" ]; then
+    echo_info "Object detection model already present"
+else
+    echo_info "Downloading object detection model..."
+    chmod +x "${INSTALL_DIR}/download_model.sh"
+    cd "${INSTALL_DIR}" && ./download_model.sh
+fi
+
 # Generate SSL certificates if not present
 if [ ! -f ssl/cert.pem ]; then
     echo_info "Generating SSL certificates..."