#!/usr/bin/env python3 """ OAK-D Vision Service for Vixy's Head FastAPI service with person detection and presence tracking Day 74 - Built by Vixy! 🦊 Day 81 - Added presence detection! Now I can SEE you! 👀💜 Using depthai v3 API with yolov6-nano """ import time import threading from contextlib import asynccontextmanager from pathlib import Path from fastapi import FastAPI, HTTPException from fastapi.responses import Response import depthai as dai import cv2 from pose_estimator import PoseEstimator # ============== Configuration ============== DETECTION_MODEL = "yolov6-nano" # Has 'person' class PERSON_CLASS_ID = 0 # 'person' is class 0 in COCO DETECTION_THRESHOLD = 0.5 PRESENCE_TIMEOUT = 30.0 # seconds without person = not present DETECTION_INTERVAL = 0.5 # Pose estimation POSE_MODEL_PATH = Path(__file__).parent / "models" / "movenet_single_pose_lightning_ptq_edgetpu.tflite" POSE_CORAL_DEVICE = 1 # Second Coral (device 0 is headmic/YAMNet) # ============== Global State ============== pipeline_ctx = None detection_queue = None rgb_queue = None detection_thread = None running = False labels = [] pose_estimator = None presence_state = { "present": False, "person_count": 0, "last_seen": None, "last_detection": None, "detections": [], "confidence": 0.0, } pose_state = { "active": False, "keypoints": [], "posture": {}, "num_valid": 0, "mean_confidence": 0.0, "inference_ms": 0.0, "last_update": None, } def init_oak(): """Initialize OAK-D with person detection pipeline (depthai v3).""" global pipeline_ctx, detection_queue, rgb_queue, labels try: print("🦊 Initializing OAK-D with yolov6-nano...") # Create pipeline with context manager pattern for v3 pipeline = dai.Pipeline() # Create camera node cam = pipeline.create(dai.node.Camera).build() # Request RGB output for snapshots (1080p) cam_out = cam.requestOutput((1920, 1080), dai.ImgFrame.Type.BGR888p) rgb_queue = cam_out.createOutputQueue(maxSize=1, blocking=False) # Create detection network with yolov6-nano desc = dai.NNModelDescription(DETECTION_MODEL) det = pipeline.create(dai.node.DetectionNetwork).build(cam, desc) det.setConfidenceThreshold(DETECTION_THRESHOLD) # Get class labels labels = det.getClasses() print(f"✅ Loaded {len(labels)} classes, person={labels[0]}") # Create detection output queue detection_queue = det.out.createOutputQueue(maxSize=1, blocking=False) # Start pipeline pipeline.start() pipeline_ctx = pipeline print("✅ OAK-D initialized with person detection!") # Initialize pose estimator on Coral 2 _init_pose_estimator() return True except Exception as e: print(f"❌ Failed to initialize OAK-D: {e}") import traceback traceback.print_exc() return False def _init_pose_estimator(): """Initialize MoveNet Lightning on the second Coral Edge TPU.""" global pose_estimator if not POSE_MODEL_PATH.exists(): print(f"⚠️ Pose model not found: {POSE_MODEL_PATH}") return try: pose_estimator = PoseEstimator( model_path=str(POSE_MODEL_PATH), device_index=POSE_CORAL_DEVICE, ) print("✅ Pose estimator initialized on Coral 2!") except Exception as e: print(f"⚠️ Pose estimator failed to initialize: {e}") pose_estimator = None def cleanup_oak(): """Cleanup OAK-D resources.""" global pipeline_ctx, running running = False if pipeline_ctx: try: pipeline_ctx.stop() pipeline_ctx.close() except: pass pipeline_ctx = None def detection_loop(): """Background thread for presence detection + pose estimation.""" global running, presence_state, pose_state, detection_queue print("🔍 Presence detection loop started") while running: try: if detection_queue is None: time.sleep(1) continue data = detection_queue.tryGet() if data is not None: now = time.time() presence_state["last_detection"] = now # Filter for person detections only persons = [d for d in data.detections if d.label == PERSON_CLASS_ID] person_count = len(persons) presence_state["person_count"] = person_count if person_count > 0: presence_state["present"] = True presence_state["last_seen"] = now presence_state["confidence"] = max(d.confidence for d in persons) presence_state["detections"] = [ { "xmin": d.xmin, "ymin": d.ymin, "xmax": d.xmax, "ymax": d.ymax, "confidence": d.confidence } for d in persons ] # Run pose estimation on the latest RGB frame _run_pose_estimation() else: presence_state["detections"] = [] presence_state["confidence"] = 0.0 # Clear pose when no person if pose_state["active"]: pose_state["active"] = False pose_state["keypoints"] = [] pose_state["posture"] = {} pose_state["num_valid"] = 0 pose_state["mean_confidence"] = 0.0 # Check timeout if presence_state["last_seen"]: if now - presence_state["last_seen"] > PRESENCE_TIMEOUT: presence_state["present"] = False time.sleep(DETECTION_INTERVAL) except Exception as e: print(f"Detection loop error: {e}") time.sleep(1) print("🛑 Presence detection loop stopped") def _run_pose_estimation(): """Grab latest RGB frame and run pose estimation via Coral 2.""" global pose_state, rgb_queue, pose_estimator if pose_estimator is None or rgb_queue is None: return try: frame_msg = rgb_queue.tryGet() if frame_msg is None: return frame = frame_msg.getCvFrame() result = pose_estimator.estimate(frame) # Derive posture from keypoints posture = pose_estimator.derive_posture(result["keypoints"]) pose_state["active"] = True pose_state["keypoints"] = result["keypoints"] pose_state["posture"] = posture pose_state["num_valid"] = result["num_valid"] pose_state["mean_confidence"] = result["mean_confidence"] pose_state["inference_ms"] = result["inference_ms"] pose_state["last_update"] = result["timestamp"] except Exception as e: print(f"Pose estimation error: {e}") @asynccontextmanager async def lifespan(app: FastAPI): """Startup and shutdown.""" global running, detection_thread print("🦊 Starting OAK-D Vision Service...") if init_oak(): running = True detection_thread = threading.Thread(target=detection_loop, daemon=True) detection_thread.start() print("✅ Service ready!") else: print("⚠️ OAK-D not available") yield print("👋 Shutting down...") cleanup_oak() app = FastAPI( title="OAK-D Vision Service", description="Vixy's eyes with presence detection + pose estimation! 🦊👀", version="0.4.0", lifespan=lifespan ) @app.get("/health") async def health(): """Health check.""" return { "status": "healthy", "service": "oak-service", "version": "0.4.0", "oak_connected": pipeline_ctx is not None, "detection_model": DETECTION_MODEL, "pose_model_loaded": pose_estimator is not None, "timestamp": time.time() } @app.get("/presence") async def presence(): """Get current presence state - is Foxy there?""" return { "present": presence_state["present"], "person_count": presence_state["person_count"], "last_seen": presence_state["last_seen"], "seconds_since_seen": ( time.time() - presence_state["last_seen"] if presence_state["last_seen"] else None ), "confidence": presence_state["confidence"], "timestamp": time.time() } @app.get("/detections") async def detections(): """Get detailed detection results.""" return { "person_count": presence_state["person_count"], "detections": presence_state["detections"], "last_detection": presence_state["last_detection"], "timestamp": time.time() } @app.get("/snapshot") async def snapshot(): """Capture RGB frame.""" global rgb_queue if rgb_queue is None: raise HTTPException(status_code=503, detail="OAK-D not initialized") try: frame = rgb_queue.tryGet() if frame is None: raise HTTPException(status_code=503, detail="No frame available") img = frame.getCvFrame() _, jpeg = cv2.imencode(".jpg", img, [cv2.IMWRITE_JPEG_QUALITY, 85]) return Response(content=jpeg.tobytes(), media_type="image/jpeg") except HTTPException: raise except Exception as e: raise HTTPException(status_code=500, detail=str(e)) @app.get("/pose") async def pose(): """Get current pose keypoints.""" if pose_estimator is None: raise HTTPException(status_code=503, detail="Pose estimator not available") return { "active": pose_state["active"], "keypoints": pose_state["keypoints"], "num_valid": pose_state["num_valid"], "mean_confidence": pose_state["mean_confidence"], "inference_ms": pose_state["inference_ms"], "last_update": pose_state["last_update"], "timestamp": time.time(), } @app.get("/pose/summary") async def pose_summary(): """Get derived posture summary.""" if pose_estimator is None: raise HTTPException(status_code=503, detail="Pose estimator not available") return { "active": pose_state["active"], "posture": pose_state["posture"].get("posture", "unknown"), "facing_camera": pose_state["posture"].get("facing_camera", False), "arms_raised": pose_state["posture"].get("arms_raised", False), "mean_confidence": pose_state["mean_confidence"], "num_valid": pose_state["num_valid"], "timestamp": time.time(), } if __name__ == "__main__": import uvicorn uvicorn.run(app, host="0.0.0.0", port=8100)