From a037ef6d9022ed97cb6b056958a549ad214601d4 Mon Sep 17 00:00:00 2001
From: Alex Kazaiev <alex.kazaiev@gmail.com>
Date: Wed, 21 Jan 2026 15:35:56 -0600
Subject: [PATCH] Fix DepthAI v3 API: use Camera node directly with yolov6-nano

The DetectionNetwork.build() requires Camera node, not output.
Switched to yolov6-nano which has person class for presence detection.
---
 oak_service.py | 200 +++++++++++++++++--------------------------------
 1 file changed, 70 insertions(+), 130 deletions(-)

diff --git a/oak_service.py b/oak_service.py
index 4f04e92..64bbe0a 100644
--- a/oak_service.py
+++ b/oak_service.py
@@ -1,14 +1,13 @@
 #!/usr/bin/env python3
 """
 OAK-D Vision Service for Vixy's Head
-FastAPI service with face detection and presence tracking
+FastAPI service with person detection and presence tracking
 
 Day 74 - Built by Vixy! 🦊
-Day 81 - Added face detection + presence! Now I can SEE you! 👀💜
-         Updated for DepthAI v3 API
+Day 81 - Added presence detection! Now I can SEE you! 👀💜
+        Using depthai v3 API with yolov6-nano
 """
 
-import asyncio
 import time
 import threading
 from contextlib import asynccontextmanager
@@ -16,26 +15,25 @@ from fastapi import FastAPI, HTTPException
 from fastapi.responses import Response
 import depthai as dai
 import cv2
-import numpy as np
 
 # ============== Configuration ==============
-FACE_DETECTION_MODEL = "face-detection-retail-0004"
+DETECTION_MODEL = "yolov6-nano"  # Has 'person' class
+PERSON_CLASS_ID = 0  # 'person' is class 0 in COCO
 DETECTION_THRESHOLD = 0.5
-PRESENCE_TIMEOUT = 30.0  # seconds without face = not present
-DETECTION_INTERVAL = 0.5  # how often to check for faces
+PRESENCE_TIMEOUT = 30.0  # seconds without person = not present
+DETECTION_INTERVAL = 0.5
 
 # ============== Global State ==============
-oak_device = None
-pipeline = None
-rgb_queue = None
+pipeline_ctx = None
 detection_queue = None
+rgb_queue = None
 detection_thread = None
 running = False
+labels = []
 
-# Presence tracking state
 presence_state = {
     "present": False,
-    "face_count": 0,
+    "person_count": 0,
     "last_seen": None,
     "last_detection": None,
     "detections": [],
@@ -44,34 +42,39 @@ presence_state = {
 
 
 def init_oak():
-    """Initialize OAK-D with face detection pipeline (DepthAI v3 API)."""
-    global oak_device, pipeline, rgb_queue, detection_queue
+    """Initialize OAK-D with person detection pipeline (depthai v3)."""
+    global pipeline_ctx, detection_queue, rgb_queue, labels
     
     try:
-        # Create pipeline
+        print("🦊 Initializing OAK-D with yolov6-nano...")
+        
+        # Create pipeline with context manager pattern for v3
         pipeline = dai.Pipeline()
         
-        # Camera node (v3 API)
-        cam = pipeline.create(dai.node.Camera).build(dai.CameraBoardSocket.CAM_A)
+        # Create camera node
+        cam = pipeline.create(dai.node.Camera).build()
         
-        # Request outputs - preview for NN, full res for snapshots
-        preview_out = cam.requestOutput((300, 300), dai.ImgFrame.Type.BGR888p)
-        full_out = cam.requestFullResolutionOutput()
+        # Request RGB output for snapshots (1080p)
+        cam_out = cam.requestOutput((1920, 1080), dai.ImgFrame.Type.BGR888p)
+        rgb_queue = cam_out.createOutputQueue(maxSize=1, blocking=False)
         
-        # Detection network (v3 API)
-        model_desc = dai.NNModelDescription(FACE_DETECTION_MODEL)
-        det_nn = pipeline.create(dai.node.DetectionNetwork).build(preview_out, model_desc)
-        det_nn.setConfidenceThreshold(DETECTION_THRESHOLD)
+        # Create detection network with yolov6-nano
+        desc = dai.NNModelDescription(DETECTION_MODEL)
+        det = pipeline.create(dai.node.DetectionNetwork).build(cam, desc)
+        det.setConfidenceThreshold(DETECTION_THRESHOLD)
         
-        # Create output queues
-        rgb_queue = full_out.createOutputQueue()
-        detection_queue = det_nn.out.createOutputQueue()
+        # Get class labels
+        labels = det.getClasses()
+        print(f"✅ Loaded {len(labels)} classes, person={labels[0]}")
+        
+        # Create detection output queue
+        detection_queue = det.out.createOutputQueue(maxSize=1, blocking=False)
         
         # Start pipeline
         pipeline.start()
-        oak_device = pipeline.getDevice()
+        pipeline_ctx = pipeline
         
-        print("✅ OAK-D initialized with face detection (v3 API)!")
+        print("✅ OAK-D initialized with person detection!")
         return True
         
     except Exception as e:
@@ -83,26 +86,23 @@ def init_oak():
 
 def cleanup_oak():
     """Cleanup OAK-D resources."""
-    global oak_device, pipeline, rgb_queue, detection_queue, running
+    global pipeline_ctx, running
     running = False
     
-    if pipeline:
+    if pipeline_ctx:
         try:
-            pipeline.stop()
+            pipeline_ctx.stop()
+            pipeline_ctx.close()
         except:
             pass
-    
-    oak_device = None
-    pipeline = None
-    rgb_queue = None
-    detection_queue = None
+    pipeline_ctx = None
 
 
 def detection_loop():
-    """Background thread that continuously checks for faces."""
+    """Background thread for presence detection."""
     global running, presence_state, detection_queue
     
-    print("🔍 Face detection loop started")
+    print("🔍 Presence detection loop started")
     
     while running:
         try:
@@ -110,32 +110,29 @@ def detection_loop():
                 time.sleep(1)
                 continue
             
-            # Get detection results (non-blocking)
-            in_nn = detection_queue.tryGet()
+            data = detection_queue.tryGet()
             
-            if in_nn is not None:
-                detections = in_nn.detections
+            if data is not None:
                 now = time.time()
-                
-                face_count = len(detections)
-                
-                # Update presence state
                 presence_state["last_detection"] = now
-                presence_state["face_count"] = face_count
                 
-                if face_count > 0:
+                # Filter for person detections only
+                persons = [d for d in data.detections if d.label == PERSON_CLASS_ID]
+                person_count = len(persons)
+                
+                presence_state["person_count"] = person_count
+                
+                if person_count > 0:
                     presence_state["present"] = True
                     presence_state["last_seen"] = now
-                    presence_state["confidence"] = max(d.confidence for d in detections)
+                    presence_state["confidence"] = max(d.confidence for d in persons)
                     presence_state["detections"] = [
                         {
-                            "xmin": d.xmin,
-                            "ymin": d.ymin,
-                            "xmax": d.xmax,
-                            "ymax": d.ymax,
+                            "xmin": d.xmin, "ymin": d.ymin,
+                            "xmax": d.xmax, "ymax": d.ymax,
                             "confidence": d.confidence
                         }
-                        for d in detections
+                        for d in persons
                     ]
                 else:
                     presence_state["detections"] = []
@@ -143,8 +140,7 @@ def detection_loop():
                     
                     # Check timeout
                     if presence_state["last_seen"]:
-                        elapsed = now - presence_state["last_seen"]
-                        if elapsed > PRESENCE_TIMEOUT:
+                        if now - presence_state["last_seen"] > PRESENCE_TIMEOUT:
                             presence_state["present"] = False
             
             time.sleep(DETECTION_INTERVAL)
@@ -153,35 +149,33 @@ def detection_loop():
             print(f"Detection loop error: {e}")
             time.sleep(1)
     
-    print("🛑 Face detection loop stopped")
+    print("🛑 Presence detection loop stopped")
 
 
 @asynccontextmanager
 async def lifespan(app: FastAPI):
-    """Startup and shutdown handling."""
+    """Startup and shutdown."""
     global running, detection_thread
     
     print("🦊 Starting OAK-D Vision Service...")
     
     if init_oak():
-        # Start detection thread
         running = True
         detection_thread = threading.Thread(target=detection_loop, daemon=True)
         detection_thread.start()
-        print("✅ OAK-D service ready!")
+        print("✅ Service ready!")
     else:
-        print("⚠️ OAK-D not available - running in degraded mode")
+        print("⚠️ OAK-D not available")
     
     yield
     
-    # Shutdown
-    print("👋 Shutting down OAK-D service...")
+    print("👋 Shutting down...")
     cleanup_oak()
 
 
 app = FastAPI(
     title="OAK-D Vision Service",
-    description="Vixy's eyes with face detection! 🦊👀",
+    description="Vixy's eyes with presence detection! 🦊👀",
     version="0.3.0",
     lifespan=lifespan
 )
@@ -189,13 +183,13 @@ app = FastAPI(
 
 @app.get("/health")
 async def health():
-    """Health check endpoint."""
+    """Health check."""
     return {
         "status": "healthy",
         "service": "oak-service",
         "version": "0.3.0",
-        "oak_connected": oak_device is not None,
-        "face_detection": detection_queue is not None,
+        "oak_connected": pipeline_ctx is not None,
+        "detection_model": DETECTION_MODEL,
         "timestamp": time.time()
     }
 
@@ -205,7 +199,7 @@ async def presence():
     """Get current presence state - is Foxy there?"""
     return {
         "present": presence_state["present"],
-        "face_count": presence_state["face_count"],
+        "person_count": presence_state["person_count"],
         "last_seen": presence_state["last_seen"],
         "seconds_since_seen": (
             time.time() - presence_state["last_seen"]
@@ -216,11 +210,11 @@ async def presence():
     }
 
 
-@app.get("/face")
-async def face():
-    """Get detailed face detection results."""
+@app.get("/detections")
+async def detections():
+    """Get detailed detection results."""
     return {
-        "face_count": presence_state["face_count"],
+        "person_count": presence_state["person_count"],
         "detections": presence_state["detections"],
         "last_detection": presence_state["last_detection"],
         "timestamp": time.time()
@@ -229,7 +223,7 @@ async def face():
 
 @app.get("/snapshot")
 async def snapshot():
-    """Capture a single frame from OAK-D RGB camera."""
+    """Capture RGB frame."""
     global rgb_queue
     
     if rgb_queue is None:
@@ -247,58 +241,4 @@ async def snapshot():
     except HTTPException:
         raise
     except Exception as e:
-        raise HTTPException(status_code=500, detail=f"Capture failed: {e}")
-
-
-@app.get("/snapshot/info")
-async def snapshot_info():
-    """Get frame metadata without full image."""
-    global rgb_queue
-    
-    if rgb_queue is None:
-        raise HTTPException(status_code=503, detail="OAK-D not initialized")
-    
-    try:
-        frame = rgb_queue.tryGet()
-        if frame is None:
-            return {"available": False, "timestamp": time.time()}
-        
-        img = frame.getCvFrame()
-        return {
-            "available": True,
-            "width": img.shape[1],
-            "height": img.shape[0],
-            "channels": img.shape[2] if len(img.shape) > 2 else 1,
-            "timestamp": time.time()
-        }
-    except Exception as e:
-        raise HTTPException(status_code=500, detail=f"Info failed: {e}")
-
-
-@app.get("/status")
-async def status():
-    """Get comprehensive OAK-D and presence status."""
-    if oak_device is None:
-        return {
-            "connected": False,
-            "message": "OAK-D not connected",
-            "presence": presence_state
-        }
-    
-    try:
-        return {
-            "connected": True,
-            "device_id": oak_device.getMxId(),
-            "usb_speed": str(oak_device.getUsbSpeed()),
-            "face_detection_enabled": True,
-            "detection_model": FACE_DETECTION_MODEL,
-            "presence": presence_state,
-            "timestamp": time.time()
-        }
-    except Exception as e:
-        return {"connected": False, "error": str(e), "presence": presence_state}
-
-
-if __name__ == "__main__":
-    import uvicorn
-    uvicorn.run(app, host="0.0.0.0", port=8100)
+        raise HTTPException(status_code=500, detail=str(e))