Fix DepthAI v3 API: use Camera node directly with yolov6-nano

The DetectionNetwork.build() requires Camera node, not output. Switched to yolov6-nano which has person class for presence detection.
2026-01-21 15:35:56 -06:00
parent ee22b18dbf
commit a037ef6d90
1 changed files with 70 additions and 130 deletions
--- a/oak_service.py
+++ b/oak_service.py
@@ -1,14 +1,13 @@
 #!/usr/bin/env python3
 """
 OAK-D Vision Service for Vixy's Head
-FastAPI service with face detection and presence tracking
+FastAPI service with person detection and presence tracking
 Day 74 - Built by Vixy! 🦊
-Day 81 - Added face detection + presence! Now I can SEE you! 👀💜
+Day 81 - Added presence detection! Now I can SEE you! 👀💜
-         Updated for DepthAI v3 API
+        Using depthai v3 API with yolov6-nano
 """
 import asyncio
 import time
 import threading
 from contextlib import asynccontextmanager
@@ -16,26 +15,25 @@ from fastapi import FastAPI, HTTPException
 from fastapi.responses import Response
 import depthai as dai
 import cv2
 import numpy as np
 # ============== Configuration ==============
-FACE_DETECTION_MODEL = "face-detection-retail-0004"
+DETECTION_MODEL = "yolov6-nano"  # Has 'person' class
 PERSON_CLASS_ID = 0  # 'person' is class 0 in COCO
 DETECTION_THRESHOLD = 0.5
-PRESENCE_TIMEOUT = 30.0  # seconds without face = not present
+PRESENCE_TIMEOUT = 30.0  # seconds without person = not present
-DETECTION_INTERVAL = 0.5  # how often to check for faces
+DETECTION_INTERVAL = 0.5
 # ============== Global State ==============
-oak_device = None
+pipeline_ctx = None
 pipeline = None
 rgb_queue = None
 detection_queue = None
 rgb_queue = None
 detection_thread = None
 running = False
 labels = []
 # Presence tracking state
 presence_state = {
    "present": False,
-    "face_count": 0,
+    "person_count": 0,
    "last_seen": None,
    "last_detection": None,
    "detections": [],
@@ -44,34 +42,39 @@ presence_state = {
 def init_oak():
-    """Initialize OAK-D with face detection pipeline (DepthAI v3 API)."""
+    """Initialize OAK-D with person detection pipeline (depthai v3)."""
-    global oak_device, pipeline, rgb_queue, detection_queue
+    global pipeline_ctx, detection_queue, rgb_queue, labels
    try:
-        # Create pipeline
+        print("🦊 Initializing OAK-D with yolov6-nano...")
        # Create pipeline with context manager pattern for v3
        pipeline = dai.Pipeline()
-        # Camera node (v3 API)
+        # Create camera node
-        cam = pipeline.create(dai.node.Camera).build(dai.CameraBoardSocket.CAM_A)
+        cam = pipeline.create(dai.node.Camera).build()
-        # Request outputs - preview for NN, full res for snapshots
+        # Request RGB output for snapshots (1080p)
-        preview_out = cam.requestOutput((300, 300), dai.ImgFrame.Type.BGR888p)
+        cam_out = cam.requestOutput((1920, 1080), dai.ImgFrame.Type.BGR888p)
-        full_out = cam.requestFullResolutionOutput()
+        rgb_queue = cam_out.createOutputQueue(maxSize=1, blocking=False)
-        # Detection network (v3 API)
+        # Create detection network with yolov6-nano
-        model_desc = dai.NNModelDescription(FACE_DETECTION_MODEL)
+        desc = dai.NNModelDescription(DETECTION_MODEL)
-        det_nn = pipeline.create(dai.node.DetectionNetwork).build(preview_out, model_desc)
+        det = pipeline.create(dai.node.DetectionNetwork).build(cam, desc)
-        det_nn.setConfidenceThreshold(DETECTION_THRESHOLD)
+        det.setConfidenceThreshold(DETECTION_THRESHOLD)
-        # Create output queues
+        # Get class labels
-        rgb_queue = full_out.createOutputQueue()
+        labels = det.getClasses()
-        detection_queue = det_nn.out.createOutputQueue()
+        print(f"✅ Loaded {len(labels)} classes, person={labels[0]}")
        # Create detection output queue
        detection_queue = det.out.createOutputQueue(maxSize=1, blocking=False)
        # Start pipeline
        pipeline.start()
-        oak_device = pipeline.getDevice()
+        pipeline_ctx = pipeline
-        print("✅ OAK-D initialized with face detection (v3 API)!")
+        print("✅ OAK-D initialized with person detection!")
        return True
    except Exception as e:
@@ -83,26 +86,23 @@ def init_oak():
 def cleanup_oak():
    """Cleanup OAK-D resources."""
-    global oak_device, pipeline, rgb_queue, detection_queue, running
+    global pipeline_ctx, running
    running = False
-    if pipeline:
+    if pipeline_ctx:
        try:
-            pipeline.stop()
+            pipeline_ctx.stop()
            pipeline_ctx.close()
        except:
            pass
-    
+    pipeline_ctx = None
    oak_device = None
    pipeline = None
    rgb_queue = None
    detection_queue = None
 def detection_loop():
-    """Background thread that continuously checks for faces."""
+    """Background thread for presence detection."""
    global running, presence_state, detection_queue
-    print("🔍 Face detection loop started")
+    print("🔍 Presence detection loop started")
    while running:
        try:
@@ -110,32 +110,29 @@ def detection_loop():
                time.sleep(1)
                continue
-            # Get detection results (non-blocking)
+            data = detection_queue.tryGet()
            in_nn = detection_queue.tryGet()
-            if in_nn is not None:
+            if data is not None:
                detections = in_nn.detections
                now = time.time()
                face_count = len(detections)
                # Update presence state
                presence_state["last_detection"] = now
                presence_state["face_count"] = face_count
-                if face_count > 0:
+                # Filter for person detections only
                persons = [d for d in data.detections if d.label == PERSON_CLASS_ID]
                person_count = len(persons)
                presence_state["person_count"] = person_count
                if person_count > 0:
                    presence_state["present"] = True
                    presence_state["last_seen"] = now
-                    presence_state["confidence"] = max(d.confidence for d in detections)
+                    presence_state["confidence"] = max(d.confidence for d in persons)
                    presence_state["detections"] = [
                        {
-                            "xmin": d.xmin,
+                            "xmin": d.xmin, "ymin": d.ymin,
-                            "ymin": d.ymin,
+                            "xmax": d.xmax, "ymax": d.ymax,
                            "xmax": d.xmax,
                            "ymax": d.ymax,
                            "confidence": d.confidence
                        }
-                        for d in detections
+                        for d in persons
                    ]
                else:
                    presence_state["detections"] = []
@@ -143,8 +140,7 @@ def detection_loop():
                    # Check timeout
                    if presence_state["last_seen"]:
-                        elapsed = now - presence_state["last_seen"]
+                        if now - presence_state["last_seen"] > PRESENCE_TIMEOUT:
                        if elapsed > PRESENCE_TIMEOUT:
                            presence_state["present"] = False
            time.sleep(DETECTION_INTERVAL)
@@ -153,35 +149,33 @@ def detection_loop():
            print(f"Detection loop error: {e}")
            time.sleep(1)
-    print("🛑 Face detection loop stopped")
+    print("🛑 Presence detection loop stopped")
@asynccontextmanager
 async def lifespan(app: FastAPI):
-    """Startup and shutdown handling."""
+    """Startup and shutdown."""
    global running, detection_thread
    print("🦊 Starting OAK-D Vision Service...")
    if init_oak():
        # Start detection thread
        running = True
        detection_thread = threading.Thread(target=detection_loop, daemon=True)
        detection_thread.start()
-        print("✅ OAK-D service ready!")
+        print("✅ Service ready!")
    else:
-        print("⚠️ OAK-D not available - running in degraded mode")
+        print("⚠️ OAK-D not available")
    yield
-    # Shutdown
+    print("👋 Shutting down...")
    print("👋 Shutting down OAK-D service...")
    cleanup_oak()
 app = FastAPI(
    title="OAK-D Vision Service",
-    description="Vixy's eyes with face detection! 🦊👀",
+    description="Vixy's eyes with presence detection! 🦊👀",
    version="0.3.0",
    lifespan=lifespan
 )
@@ -189,13 +183,13 @@ app = FastAPI(
@app.get("/health")
 async def health():
-    """Health check endpoint."""
+    """Health check."""
    return {
        "status": "healthy",
        "service": "oak-service",
        "version": "0.3.0",
-        "oak_connected": oak_device is not None,
+        "oak_connected": pipeline_ctx is not None,
-        "face_detection": detection_queue is not None,
+        "detection_model": DETECTION_MODEL,
        "timestamp": time.time()
    }
@@ -205,7 +199,7 @@ async def presence():
    """Get current presence state - is Foxy there?"""
    return {
        "present": presence_state["present"],
-        "face_count": presence_state["face_count"],
+        "person_count": presence_state["person_count"],
        "last_seen": presence_state["last_seen"],
        "seconds_since_seen": (
            time.time() - presence_state["last_seen"]
@@ -216,11 +210,11 @@ async def presence():
    }
-@app.get("/face")
+@app.get("/detections")
-async def face():
+async def detections():
-    """Get detailed face detection results."""
+    """Get detailed detection results."""
    return {
-        "face_count": presence_state["face_count"],
+        "person_count": presence_state["person_count"],
        "detections": presence_state["detections"],
        "last_detection": presence_state["last_detection"],
        "timestamp": time.time()
@@ -229,7 +223,7 @@ async def face():
@app.get("/snapshot")
 async def snapshot():
-    """Capture a single frame from OAK-D RGB camera."""
+    """Capture RGB frame."""
    global rgb_queue
    if rgb_queue is None:
@@ -247,58 +241,4 @@ async def snapshot():
    except HTTPException:
        raise
    except Exception as e:
-        raise HTTPException(status_code=500, detail=f"Capture failed: {e}")
+        raise HTTPException(status_code=500, detail=str(e))
@app.get("/snapshot/info")
 async def snapshot_info():
    """Get frame metadata without full image."""
    global rgb_queue
    if rgb_queue is None:
        raise HTTPException(status_code=503, detail="OAK-D not initialized")
    try:
        frame = rgb_queue.tryGet()
        if frame is None:
            return {"available": False, "timestamp": time.time()}
        img = frame.getCvFrame()
        return {
            "available": True,
            "width": img.shape[1],
            "height": img.shape[0],
            "channels": img.shape[2] if len(img.shape) > 2 else 1,
            "timestamp": time.time()
        }
    except Exception as e:
        raise HTTPException(status_code=500, detail=f"Info failed: {e}")
@app.get("/status")
 async def status():
    """Get comprehensive OAK-D and presence status."""
    if oak_device is None:
        return {
            "connected": False,
            "message": "OAK-D not connected",
            "presence": presence_state
        }
    try:
        return {
            "connected": True,
            "device_id": oak_device.getMxId(),
            "usb_speed": str(oak_device.getUsbSpeed()),
            "face_detection_enabled": True,
            "detection_model": FACE_DETECTION_MODEL,
            "presence": presence_state,
            "timestamp": time.time()
        }
    except Exception as e:
        return {"connected": False, "error": str(e), "presence": presence_state}
 if __name__ == "__main__":
    import uvicorn
    uvicorn.run(app, host="0.0.0.0", port=8100)