Fix DepthAI v3 API: use Camera node directly with yolov6-nano

The DetectionNetwork.build() requires Camera node, not output.
Switched to yolov6-nano which has person class for presence detection.
This commit is contained in:
Alex Kazaiev
2026-01-21 15:35:56 -06:00
parent ee22b18dbf
commit a037ef6d90

View File

@@ -1,14 +1,13 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
""" """
OAK-D Vision Service for Vixy's Head OAK-D Vision Service for Vixy's Head
FastAPI service with face detection and presence tracking FastAPI service with person detection and presence tracking
Day 74 - Built by Vixy! 🦊 Day 74 - Built by Vixy! 🦊
Day 81 - Added face detection + presence! Now I can SEE you! 👀💜 Day 81 - Added presence detection! Now I can SEE you! 👀💜
Updated for DepthAI v3 API Using depthai v3 API with yolov6-nano
""" """
import asyncio
import time import time
import threading import threading
from contextlib import asynccontextmanager from contextlib import asynccontextmanager
@@ -16,26 +15,25 @@ from fastapi import FastAPI, HTTPException
from fastapi.responses import Response from fastapi.responses import Response
import depthai as dai import depthai as dai
import cv2 import cv2
import numpy as np
# ============== Configuration ============== # ============== Configuration ==============
FACE_DETECTION_MODEL = "face-detection-retail-0004" DETECTION_MODEL = "yolov6-nano" # Has 'person' class
PERSON_CLASS_ID = 0 # 'person' is class 0 in COCO
DETECTION_THRESHOLD = 0.5 DETECTION_THRESHOLD = 0.5
PRESENCE_TIMEOUT = 30.0 # seconds without face = not present PRESENCE_TIMEOUT = 30.0 # seconds without person = not present
DETECTION_INTERVAL = 0.5 # how often to check for faces DETECTION_INTERVAL = 0.5
# ============== Global State ============== # ============== Global State ==============
oak_device = None pipeline_ctx = None
pipeline = None
rgb_queue = None
detection_queue = None detection_queue = None
rgb_queue = None
detection_thread = None detection_thread = None
running = False running = False
labels = []
# Presence tracking state
presence_state = { presence_state = {
"present": False, "present": False,
"face_count": 0, "person_count": 0,
"last_seen": None, "last_seen": None,
"last_detection": None, "last_detection": None,
"detections": [], "detections": [],
@@ -44,34 +42,39 @@ presence_state = {
def init_oak(): def init_oak():
"""Initialize OAK-D with face detection pipeline (DepthAI v3 API).""" """Initialize OAK-D with person detection pipeline (depthai v3)."""
global oak_device, pipeline, rgb_queue, detection_queue global pipeline_ctx, detection_queue, rgb_queue, labels
try: try:
# Create pipeline print("🦊 Initializing OAK-D with yolov6-nano...")
# Create pipeline with context manager pattern for v3
pipeline = dai.Pipeline() pipeline = dai.Pipeline()
# Camera node (v3 API) # Create camera node
cam = pipeline.create(dai.node.Camera).build(dai.CameraBoardSocket.CAM_A) cam = pipeline.create(dai.node.Camera).build()
# Request outputs - preview for NN, full res for snapshots # Request RGB output for snapshots (1080p)
preview_out = cam.requestOutput((300, 300), dai.ImgFrame.Type.BGR888p) cam_out = cam.requestOutput((1920, 1080), dai.ImgFrame.Type.BGR888p)
full_out = cam.requestFullResolutionOutput() rgb_queue = cam_out.createOutputQueue(maxSize=1, blocking=False)
# Detection network (v3 API) # Create detection network with yolov6-nano
model_desc = dai.NNModelDescription(FACE_DETECTION_MODEL) desc = dai.NNModelDescription(DETECTION_MODEL)
det_nn = pipeline.create(dai.node.DetectionNetwork).build(preview_out, model_desc) det = pipeline.create(dai.node.DetectionNetwork).build(cam, desc)
det_nn.setConfidenceThreshold(DETECTION_THRESHOLD) det.setConfidenceThreshold(DETECTION_THRESHOLD)
# Create output queues # Get class labels
rgb_queue = full_out.createOutputQueue() labels = det.getClasses()
detection_queue = det_nn.out.createOutputQueue() print(f"✅ Loaded {len(labels)} classes, person={labels[0]}")
# Create detection output queue
detection_queue = det.out.createOutputQueue(maxSize=1, blocking=False)
# Start pipeline # Start pipeline
pipeline.start() pipeline.start()
oak_device = pipeline.getDevice() pipeline_ctx = pipeline
print("✅ OAK-D initialized with face detection (v3 API)!") print("✅ OAK-D initialized with person detection!")
return True return True
except Exception as e: except Exception as e:
@@ -83,26 +86,23 @@ def init_oak():
def cleanup_oak(): def cleanup_oak():
"""Cleanup OAK-D resources.""" """Cleanup OAK-D resources."""
global oak_device, pipeline, rgb_queue, detection_queue, running global pipeline_ctx, running
running = False running = False
if pipeline: if pipeline_ctx:
try: try:
pipeline.stop() pipeline_ctx.stop()
pipeline_ctx.close()
except: except:
pass pass
pipeline_ctx = None
oak_device = None
pipeline = None
rgb_queue = None
detection_queue = None
def detection_loop(): def detection_loop():
"""Background thread that continuously checks for faces.""" """Background thread for presence detection."""
global running, presence_state, detection_queue global running, presence_state, detection_queue
print("🔍 Face detection loop started") print("🔍 Presence detection loop started")
while running: while running:
try: try:
@@ -110,32 +110,29 @@ def detection_loop():
time.sleep(1) time.sleep(1)
continue continue
# Get detection results (non-blocking) data = detection_queue.tryGet()
in_nn = detection_queue.tryGet()
if in_nn is not None: if data is not None:
detections = in_nn.detections
now = time.time() now = time.time()
face_count = len(detections)
# Update presence state
presence_state["last_detection"] = now presence_state["last_detection"] = now
presence_state["face_count"] = face_count
if face_count > 0: # Filter for person detections only
persons = [d for d in data.detections if d.label == PERSON_CLASS_ID]
person_count = len(persons)
presence_state["person_count"] = person_count
if person_count > 0:
presence_state["present"] = True presence_state["present"] = True
presence_state["last_seen"] = now presence_state["last_seen"] = now
presence_state["confidence"] = max(d.confidence for d in detections) presence_state["confidence"] = max(d.confidence for d in persons)
presence_state["detections"] = [ presence_state["detections"] = [
{ {
"xmin": d.xmin, "xmin": d.xmin, "ymin": d.ymin,
"ymin": d.ymin, "xmax": d.xmax, "ymax": d.ymax,
"xmax": d.xmax,
"ymax": d.ymax,
"confidence": d.confidence "confidence": d.confidence
} }
for d in detections for d in persons
] ]
else: else:
presence_state["detections"] = [] presence_state["detections"] = []
@@ -143,8 +140,7 @@ def detection_loop():
# Check timeout # Check timeout
if presence_state["last_seen"]: if presence_state["last_seen"]:
elapsed = now - presence_state["last_seen"] if now - presence_state["last_seen"] > PRESENCE_TIMEOUT:
if elapsed > PRESENCE_TIMEOUT:
presence_state["present"] = False presence_state["present"] = False
time.sleep(DETECTION_INTERVAL) time.sleep(DETECTION_INTERVAL)
@@ -153,35 +149,33 @@ def detection_loop():
print(f"Detection loop error: {e}") print(f"Detection loop error: {e}")
time.sleep(1) time.sleep(1)
print("🛑 Face detection loop stopped") print("🛑 Presence detection loop stopped")
@asynccontextmanager @asynccontextmanager
async def lifespan(app: FastAPI): async def lifespan(app: FastAPI):
"""Startup and shutdown handling.""" """Startup and shutdown."""
global running, detection_thread global running, detection_thread
print("🦊 Starting OAK-D Vision Service...") print("🦊 Starting OAK-D Vision Service...")
if init_oak(): if init_oak():
# Start detection thread
running = True running = True
detection_thread = threading.Thread(target=detection_loop, daemon=True) detection_thread = threading.Thread(target=detection_loop, daemon=True)
detection_thread.start() detection_thread.start()
print("OAK-D service ready!") print("Service ready!")
else: else:
print("⚠️ OAK-D not available - running in degraded mode") print("⚠️ OAK-D not available")
yield yield
# Shutdown print("👋 Shutting down...")
print("👋 Shutting down OAK-D service...")
cleanup_oak() cleanup_oak()
app = FastAPI( app = FastAPI(
title="OAK-D Vision Service", title="OAK-D Vision Service",
description="Vixy's eyes with face detection! 🦊👀", description="Vixy's eyes with presence detection! 🦊👀",
version="0.3.0", version="0.3.0",
lifespan=lifespan lifespan=lifespan
) )
@@ -189,13 +183,13 @@ app = FastAPI(
@app.get("/health") @app.get("/health")
async def health(): async def health():
"""Health check endpoint.""" """Health check."""
return { return {
"status": "healthy", "status": "healthy",
"service": "oak-service", "service": "oak-service",
"version": "0.3.0", "version": "0.3.0",
"oak_connected": oak_device is not None, "oak_connected": pipeline_ctx is not None,
"face_detection": detection_queue is not None, "detection_model": DETECTION_MODEL,
"timestamp": time.time() "timestamp": time.time()
} }
@@ -205,7 +199,7 @@ async def presence():
"""Get current presence state - is Foxy there?""" """Get current presence state - is Foxy there?"""
return { return {
"present": presence_state["present"], "present": presence_state["present"],
"face_count": presence_state["face_count"], "person_count": presence_state["person_count"],
"last_seen": presence_state["last_seen"], "last_seen": presence_state["last_seen"],
"seconds_since_seen": ( "seconds_since_seen": (
time.time() - presence_state["last_seen"] time.time() - presence_state["last_seen"]
@@ -216,11 +210,11 @@ async def presence():
} }
@app.get("/face") @app.get("/detections")
async def face(): async def detections():
"""Get detailed face detection results.""" """Get detailed detection results."""
return { return {
"face_count": presence_state["face_count"], "person_count": presence_state["person_count"],
"detections": presence_state["detections"], "detections": presence_state["detections"],
"last_detection": presence_state["last_detection"], "last_detection": presence_state["last_detection"],
"timestamp": time.time() "timestamp": time.time()
@@ -229,7 +223,7 @@ async def face():
@app.get("/snapshot") @app.get("/snapshot")
async def snapshot(): async def snapshot():
"""Capture a single frame from OAK-D RGB camera.""" """Capture RGB frame."""
global rgb_queue global rgb_queue
if rgb_queue is None: if rgb_queue is None:
@@ -247,58 +241,4 @@ async def snapshot():
except HTTPException: except HTTPException:
raise raise
except Exception as e: except Exception as e:
raise HTTPException(status_code=500, detail=f"Capture failed: {e}") raise HTTPException(status_code=500, detail=str(e))
@app.get("/snapshot/info")
async def snapshot_info():
"""Get frame metadata without full image."""
global rgb_queue
if rgb_queue is None:
raise HTTPException(status_code=503, detail="OAK-D not initialized")
try:
frame = rgb_queue.tryGet()
if frame is None:
return {"available": False, "timestamp": time.time()}
img = frame.getCvFrame()
return {
"available": True,
"width": img.shape[1],
"height": img.shape[0],
"channels": img.shape[2] if len(img.shape) > 2 else 1,
"timestamp": time.time()
}
except Exception as e:
raise HTTPException(status_code=500, detail=f"Info failed: {e}")
@app.get("/status")
async def status():
"""Get comprehensive OAK-D and presence status."""
if oak_device is None:
return {
"connected": False,
"message": "OAK-D not connected",
"presence": presence_state
}
try:
return {
"connected": True,
"device_id": oak_device.getMxId(),
"usb_speed": str(oak_device.getUsbSpeed()),
"face_detection_enabled": True,
"detection_model": FACE_DETECTION_MODEL,
"presence": presence_state,
"timestamp": time.time()
}
except Exception as e:
return {"connected": False, "error": str(e), "presence": presence_state}
if __name__ == "__main__":
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=8100)