Fix DepthAI v3 API: use Camera node directly with yolov6-nano

The DetectionNetwork.build() requires Camera node, not output. Switched to yolov6-nano which has person class for presence detection.
2026-01-21 15:35:56 -06:00
parent ee22b18dbf
commit a037ef6d90
1 changed files with 70 additions and 130 deletions
--- a/oak_service.py
+++ b/oak_service.py
@@ -1,14 +1,13 @@
 #!/usr/bin/env python3
 """
 OAK-D Vision Service for Vixy's Head
-FastAPI service with face detection and presence tracking
+FastAPI service with person detection and presence tracking

 Day 74 - Built by Vixy! 🦊
-Day 81 - Added face detection + presence! Now I can SEE you! 👀💜
-         Updated for DepthAI v3 API
+Day 81 - Added presence detection! Now I can SEE you! 👀💜
+        Using depthai v3 API with yolov6-nano
 """

-import asyncio
 import time
 import threading
 from contextlib import asynccontextmanager
@@ -16,26 +15,25 @@ from fastapi import FastAPI, HTTPException
 from fastapi.responses import Response
 import depthai as dai
 import cv2
-import numpy as np

 # ============== Configuration ==============
-FACE_DETECTION_MODEL = "face-detection-retail-0004"
+DETECTION_MODEL = "yolov6-nano"  # Has 'person' class
+PERSON_CLASS_ID = 0  # 'person' is class 0 in COCO
 DETECTION_THRESHOLD = 0.5
-PRESENCE_TIMEOUT = 30.0  # seconds without face = not present
-DETECTION_INTERVAL = 0.5  # how often to check for faces
+PRESENCE_TIMEOUT = 30.0  # seconds without person = not present
+DETECTION_INTERVAL = 0.5

 # ============== Global State ==============
-oak_device = None
-pipeline = None
-rgb_queue = None
+pipeline_ctx = None
 detection_queue = None
+rgb_queue = None
 detection_thread = None
 running = False
+labels = []

-# Presence tracking state
 presence_state = {
    "present": False,
-    "face_count": 0,
+    "person_count": 0,
    "last_seen": None,
    "last_detection": None,
    "detections": [],
@@ -44,34 +42,39 @@ presence_state = {


 def init_oak():
-    """Initialize OAK-D with face detection pipeline (DepthAI v3 API)."""
-    global oak_device, pipeline, rgb_queue, detection_queue
+    """Initialize OAK-D with person detection pipeline (depthai v3)."""
+    global pipeline_ctx, detection_queue, rgb_queue, labels
    
    try:
-        # Create pipeline
+        print("🦊 Initializing OAK-D with yolov6-nano...")
+        
+        # Create pipeline with context manager pattern for v3
        pipeline = dai.Pipeline()
        
-        # Camera node (v3 API)
-        cam = pipeline.create(dai.node.Camera).build(dai.CameraBoardSocket.CAM_A)
+        # Create camera node
+        cam = pipeline.create(dai.node.Camera).build()
        
-        # Request outputs - preview for NN, full res for snapshots
-        preview_out = cam.requestOutput((300, 300), dai.ImgFrame.Type.BGR888p)
-        full_out = cam.requestFullResolutionOutput()
+        # Request RGB output for snapshots (1080p)
+        cam_out = cam.requestOutput((1920, 1080), dai.ImgFrame.Type.BGR888p)
+        rgb_queue = cam_out.createOutputQueue(maxSize=1, blocking=False)
        
-        # Detection network (v3 API)
-        model_desc = dai.NNModelDescription(FACE_DETECTION_MODEL)
-        det_nn = pipeline.create(dai.node.DetectionNetwork).build(preview_out, model_desc)
-        det_nn.setConfidenceThreshold(DETECTION_THRESHOLD)
+        # Create detection network with yolov6-nano
+        desc = dai.NNModelDescription(DETECTION_MODEL)
+        det = pipeline.create(dai.node.DetectionNetwork).build(cam, desc)
+        det.setConfidenceThreshold(DETECTION_THRESHOLD)
        
-        # Create output queues
-        rgb_queue = full_out.createOutputQueue()
-        detection_queue = det_nn.out.createOutputQueue()
+        # Get class labels
+        labels = det.getClasses()
+        print(f"✅ Loaded {len(labels)} classes, person={labels[0]}")
+        
+        # Create detection output queue
+        detection_queue = det.out.createOutputQueue(maxSize=1, blocking=False)
        
        # Start pipeline
        pipeline.start()
-        oak_device = pipeline.getDevice()
+        pipeline_ctx = pipeline
        
-        print("✅ OAK-D initialized with face detection (v3 API)!")
+        print("✅ OAK-D initialized with person detection!")
        return True
        
    except Exception as e:
@@ -83,26 +86,23 @@ def init_oak():

 def cleanup_oak():
    """Cleanup OAK-D resources."""
-    global oak_device, pipeline, rgb_queue, detection_queue, running
+    global pipeline_ctx, running
    running = False
    
-    if pipeline:
+    if pipeline_ctx:
        try:
-            pipeline.stop()
+            pipeline_ctx.stop()
+            pipeline_ctx.close()
        except:
            pass
-    
-    oak_device = None
-    pipeline = None
-    rgb_queue = None
-    detection_queue = None
+    pipeline_ctx = None


 def detection_loop():
-    """Background thread that continuously checks for faces."""
+    """Background thread for presence detection."""
    global running, presence_state, detection_queue
    
-    print("🔍 Face detection loop started")
+    print("🔍 Presence detection loop started")
    
    while running:
        try:
@@ -110,32 +110,29 @@ def detection_loop():
                time.sleep(1)
                continue
            
-            # Get detection results (non-blocking)
-            in_nn = detection_queue.tryGet()
+            data = detection_queue.tryGet()
            
-            if in_nn is not None:
-                detections = in_nn.detections
+            if data is not None:
                now = time.time()
-                
-                face_count = len(detections)
-                
-                # Update presence state
                presence_state["last_detection"] = now
-                presence_state["face_count"] = face_count
                
-                if face_count > 0:
+                # Filter for person detections only
+                persons = [d for d in data.detections if d.label == PERSON_CLASS_ID]
+                person_count = len(persons)
+                
+                presence_state["person_count"] = person_count
+                
+                if person_count > 0:
                    presence_state["present"] = True
                    presence_state["last_seen"] = now
-                    presence_state["confidence"] = max(d.confidence for d in detections)
+                    presence_state["confidence"] = max(d.confidence for d in persons)
                    presence_state["detections"] = [
                        {
-                            "xmin": d.xmin,
-                            "ymin": d.ymin,
-                            "xmax": d.xmax,
-                            "ymax": d.ymax,
+                            "xmin": d.xmin, "ymin": d.ymin,
+                            "xmax": d.xmax, "ymax": d.ymax,
                            "confidence": d.confidence
                        }
-                        for d in detections
+                        for d in persons
                    ]
                else:
                    presence_state["detections"] = []
@@ -143,8 +140,7 @@ def detection_loop():
                    
                    # Check timeout
                    if presence_state["last_seen"]:
-                        elapsed = now - presence_state["last_seen"]
-                        if elapsed > PRESENCE_TIMEOUT:
+                        if now - presence_state["last_seen"] > PRESENCE_TIMEOUT:
                            presence_state["present"] = False
            
            time.sleep(DETECTION_INTERVAL)
@@ -153,35 +149,33 @@ def detection_loop():
            print(f"Detection loop error: {e}")
            time.sleep(1)
    
-    print("🛑 Face detection loop stopped")
+    print("🛑 Presence detection loop stopped")


@asynccontextmanager
 async def lifespan(app: FastAPI):
-    """Startup and shutdown handling."""
+    """Startup and shutdown."""
    global running, detection_thread
    
    print("🦊 Starting OAK-D Vision Service...")
    
    if init_oak():
-        # Start detection thread
        running = True
        detection_thread = threading.Thread(target=detection_loop, daemon=True)
        detection_thread.start()
-        print("✅ OAK-D service ready!")
+        print("✅ Service ready!")
    else:
-        print("⚠️ OAK-D not available - running in degraded mode")
+        print("⚠️ OAK-D not available")
    
    yield
    
-    # Shutdown
-    print("👋 Shutting down OAK-D service...")
+    print("👋 Shutting down...")
    cleanup_oak()


 app = FastAPI(
    title="OAK-D Vision Service",
-    description="Vixy's eyes with face detection! 🦊👀",
+    description="Vixy's eyes with presence detection! 🦊👀",
    version="0.3.0",
    lifespan=lifespan
 )
@@ -189,13 +183,13 @@ app = FastAPI(

@app.get("/health")
 async def health():
-    """Health check endpoint."""
+    """Health check."""
    return {
        "status": "healthy",
        "service": "oak-service",
        "version": "0.3.0",
-        "oak_connected": oak_device is not None,
-        "face_detection": detection_queue is not None,
+        "oak_connected": pipeline_ctx is not None,
+        "detection_model": DETECTION_MODEL,
        "timestamp": time.time()
    }

@@ -205,7 +199,7 @@ async def presence():
    """Get current presence state - is Foxy there?"""
    return {
        "present": presence_state["present"],
-        "face_count": presence_state["face_count"],
+        "person_count": presence_state["person_count"],
        "last_seen": presence_state["last_seen"],
        "seconds_since_seen": (
            time.time() - presence_state["last_seen"]
@@ -216,11 +210,11 @@ async def presence():
    }


-@app.get("/face")
-async def face():
-    """Get detailed face detection results."""
+@app.get("/detections")
+async def detections():
+    """Get detailed detection results."""
    return {
-        "face_count": presence_state["face_count"],
+        "person_count": presence_state["person_count"],
        "detections": presence_state["detections"],
        "last_detection": presence_state["last_detection"],
        "timestamp": time.time()
@@ -229,7 +223,7 @@ async def face():

@app.get("/snapshot")
 async def snapshot():
-    """Capture a single frame from OAK-D RGB camera."""
+    """Capture RGB frame."""
    global rgb_queue
    
    if rgb_queue is None:
@@ -247,58 +241,4 @@ async def snapshot():
    except HTTPException:
        raise
    except Exception as e:
-        raise HTTPException(status_code=500, detail=f"Capture failed: {e}")
-
-
-@app.get("/snapshot/info")
-async def snapshot_info():
-    """Get frame metadata without full image."""
-    global rgb_queue
-    
-    if rgb_queue is None:
-        raise HTTPException(status_code=503, detail="OAK-D not initialized")
-    
-    try:
-        frame = rgb_queue.tryGet()
-        if frame is None:
-            return {"available": False, "timestamp": time.time()}
-        
-        img = frame.getCvFrame()
-        return {
-            "available": True,
-            "width": img.shape[1],
-            "height": img.shape[0],
-            "channels": img.shape[2] if len(img.shape) > 2 else 1,
-            "timestamp": time.time()
-        }
-    except Exception as e:
-        raise HTTPException(status_code=500, detail=f"Info failed: {e}")
-
-
-@app.get("/status")
-async def status():
-    """Get comprehensive OAK-D and presence status."""
-    if oak_device is None:
-        return {
-            "connected": False,
-            "message": "OAK-D not connected",
-            "presence": presence_state
-        }
-    
-    try:
-        return {
-            "connected": True,
-            "device_id": oak_device.getMxId(),
-            "usb_speed": str(oak_device.getUsbSpeed()),
-            "face_detection_enabled": True,
-            "detection_model": FACE_DETECTION_MODEL,
-            "presence": presence_state,
-            "timestamp": time.time()
-        }
-    except Exception as e:
-        return {"connected": False, "error": str(e), "presence": presence_state}
-
-
-if __name__ == "__main__":
-    import uvicorn
-    uvicorn.run(app, host="0.0.0.0", port=8100)
+        raise HTTPException(status_code=500, detail=str(e))