facial recognition

2026-02-01 11:36:22 -06:00
parent 3c273d7d02
commit 3ac8778cac
3 changed files with 507 additions and 20 deletions
--- a/oak_service_spatial.py
+++ b/oak_service_spatial.py
@@ -11,13 +11,20 @@ Day 82 - SPATIAL UPGRADE! Now I know how far away you are! 📏🦊

 import time
 import threading
+import logging
+from pathlib import Path
 from contextlib import asynccontextmanager
-from fastapi import FastAPI, HTTPException
+from fastapi import FastAPI, File, Form, HTTPException, UploadFile
 from fastapi.responses import Response
 import depthai as dai
 import cv2
 import numpy as np

+from face_recognition import FaceRecognizer
+
+logger = logging.getLogger("oak-service")
+logging.basicConfig(level=logging.INFO)
+
 # ============== Configuration ==============
 DETECTION_MODEL = "yolov6-nano"  # Has 'person' class
 PERSON_CLASS_ID = 0  # 'person' is class 0 in COCO
@@ -29,6 +36,12 @@ DETECTION_INTERVAL = 0.5
 DEPTH_LOWER_THRESHOLD = 100   # 10cm minimum
 DEPTH_UPPER_THRESHOLD = 10000  # 10m maximum

+# Face recognition models
+MODELS_DIR = Path(__file__).parent / "models"
+FACE_DETECT_MODEL = MODELS_DIR / "ssd_mobilenet_v2_face_quant_postprocess_edgetpu.tflite"
+FACE_EMBED_MODEL = MODELS_DIR / "facenet.tflite"
+FACE_DB_PATH = Path(__file__).parent / "faces.db"
+
 # ============== Global State ==============
 pipeline_ctx = None
 detection_queue = None
@@ -37,6 +50,7 @@ depth_queue = None
 detection_thread = None
 running = False
 labels = []
+face_recognizer = None

 presence_state = {
    "present": False,
@@ -45,14 +59,35 @@ presence_state = {
    "last_detection": None,
    "detections": [],
    "confidence": 0.0,
-    # NEW: spatial data!
+    # Spatial data
    "distance_mm": None,
    "spatial_x": None,
    "spatial_y": None,
    "spatial_z": None,
+    # Face recognition
+    "recognized_name": None,
+    "recognition_confidence": None,
 }


+def init_face_recognition():
+    """Initialize Coral face detection + FaceNet embedding."""
+    global face_recognizer
+    try:
+        face_recognizer = FaceRecognizer(
+            face_model_path=FACE_DETECT_MODEL,
+            embed_model_path=FACE_EMBED_MODEL,
+            db_path=FACE_DB_PATH,
+        )
+        print("✅ Face recognition initialized (Coral + FaceNet)")
+        return True
+    except Exception as e:
+        print(f"⚠️ Face recognition unavailable: {e}")
+        import traceback
+        traceback.print_exc()
+        return False
+
+
 def init_oak():
    """Initialize OAK-D with SPATIAL person detection pipeline (depthai v3)."""
    global pipeline_ctx, detection_queue, rgb_queue, depth_queue, labels
@@ -123,9 +158,13 @@ def init_oak():

 def cleanup_oak():
    """Cleanup OAK-D resources."""
-    global pipeline_ctx, running
+    global pipeline_ctx, running, face_recognizer
    running = False
-    
+
+    if face_recognizer:
+        face_recognizer.close()
+        face_recognizer = None
+
    if pipeline_ctx:
        try:
            pipeline_ctx.stop()
@@ -162,30 +201,60 @@ def detection_loop():
                if person_count > 0:
                    presence_state["present"] = True
                    presence_state["last_seen"] = now
-                    
+
                    # Get highest confidence detection
                    best = max(persons, key=lambda d: d.confidence)
                    presence_state["confidence"] = best.confidence
-                    
-                    # SPATIAL DATA! 🎉
+
+                    # Spatial data
                    presence_state["spatial_x"] = best.spatialCoordinates.x
                    presence_state["spatial_y"] = best.spatialCoordinates.y
                    presence_state["spatial_z"] = best.spatialCoordinates.z
-                    presence_state["distance_mm"] = best.spatialCoordinates.z  # Z is depth
-                    
-                    presence_state["detections"] = [
-                        {
+                    presence_state["distance_mm"] = best.spatialCoordinates.z
+
+                    # Face recognition
+                    face_results = []
+                    if face_recognizer and rgb_queue:
+                        rgb_data = rgb_queue.tryGet()
+                        if rgb_data is not None:
+                            rgb_frame = rgb_data.getCvFrame()
+                            try:
+                                face_results = face_recognizer.process_frame(
+                                    rgb_frame, persons
+                                )
+                            except Exception as e:
+                                logger.warning("Face recognition error: %s", e)
+
+                    det_list = []
+                    best_recognized = None
+                    best_recog_conf = 0.0
+                    for i, d in enumerate(persons):
+                        det = {
                            "xmin": d.xmin, "ymin": d.ymin,
                            "xmax": d.xmax, "ymax": d.ymax,
                            "confidence": d.confidence,
-                            # Spatial coordinates in mm
                            "x_mm": d.spatialCoordinates.x,
                            "y_mm": d.spatialCoordinates.y,
                            "z_mm": d.spatialCoordinates.z,
                            "distance_m": d.spatialCoordinates.z / 1000.0,
+                            "recognized_name": None,
+                            "recognition_confidence": None,
                        }
-                        for d in persons
-                    ]
+                        if i < len(face_results):
+                            det["recognized_name"] = face_results[i]["recognized_name"]
+                            det["recognition_confidence"] = face_results[i]["recognition_confidence"]
+                            if det["recognized_name"] and (
+                                det["recognition_confidence"] or 0
+                            ) > best_recog_conf:
+                                best_recognized = det["recognized_name"]
+                                best_recog_conf = det["recognition_confidence"]
+                        det_list.append(det)
+
+                    presence_state["detections"] = det_list
+                    presence_state["recognized_name"] = best_recognized
+                    presence_state["recognition_confidence"] = (
+                        round(best_recog_conf, 3) if best_recognized else None
+                    )
                else:
                    presence_state["detections"] = []
                    presence_state["confidence"] = 0.0
@@ -193,7 +262,9 @@ def detection_loop():
                    presence_state["spatial_y"] = None
                    presence_state["spatial_z"] = None
                    presence_state["distance_mm"] = None
-                    
+                    presence_state["recognized_name"] = None
+                    presence_state["recognition_confidence"] = None
+
                    # Check timeout
                    if presence_state["last_seen"]:
                        if now - presence_state["last_seen"] > PRESENCE_TIMEOUT:
@@ -214,7 +285,9 @@ async def lifespan(app: FastAPI):
    global running, detection_thread
    
    print("🦊 Starting OAK-D SPATIAL Vision Service...")
-    
+
+    init_face_recognition()
+
    if init_oak():
        running = True
        detection_thread = threading.Thread(target=detection_loop, daemon=True)
@@ -231,8 +304,8 @@ async def lifespan(app: FastAPI):

 app = FastAPI(
    title="OAK-D SPATIAL Vision Service",
-    description="Vixy's eyes with SPATIAL presence detection! 🦊👀📏",
-    version="0.4.0",
+    description="Vixy's eyes with SPATIAL presence detection + face recognition! 🦊👀📏",
+    version="0.5.0",
    lifespan=lifespan
 )

@@ -243,10 +316,11 @@ async def health():
    return {
        "status": "healthy",
        "service": "oak-service",
-        "version": "0.4.0",
+        "version": "0.5.0",
        "oak_connected": pipeline_ctx is not None,
        "detection_model": DETECTION_MODEL,
        "spatial_enabled": True,
+        "face_recognition_enabled": face_recognizer is not None,
        "timestamp": time.time()
    }

@@ -267,7 +341,6 @@ async def presence():
            if presence_state["last_seen"] else None
        ),
        "confidence": presence_state["confidence"],
-        # SPATIAL DATA
        "distance_mm": presence_state["distance_mm"],
        "distance_m": distance_m,
        "spatial": {
@@ -275,6 +348,8 @@ async def presence():
            "y_mm": presence_state["spatial_y"],
            "z_mm": presence_state["spatial_z"],
        } if presence_state["spatial_z"] else None,
+        "recognized_name": presence_state["recognized_name"],
+        "recognition_confidence": presence_state["recognition_confidence"],
        "timestamp": time.time()
    }

@@ -340,6 +415,68 @@ async def depth_frame():
        raise HTTPException(status_code=500, detail=str(e))


+# ============== Face Enrollment API ==============
+
+
+@app.post("/faces/enroll")
+async def enroll_face_upload(
+    name: str = Form(...),
+    photo: UploadFile = File(...),
+):
+    """Enroll a face by uploading a photo (multipart form: name + photo)."""
+    if face_recognizer is None:
+        raise HTTPException(status_code=503, detail="Face recognition not available")
+
+    contents = await photo.read()
+    nparr = np.frombuffer(contents, np.uint8)
+    image = cv2.imdecode(nparr, cv2.IMREAD_COLOR)
+    if image is None:
+        raise HTTPException(status_code=400, detail="Could not decode image")
+
+    result = face_recognizer.enroll(name, image)
+    if not result["success"]:
+        raise HTTPException(status_code=400, detail=result["error"])
+    return result
+
+
+@app.post("/faces/enroll-from-camera")
+async def enroll_face_camera(name: str):
+    """Enroll a face using the current camera frame. Pass name as query param."""
+    if face_recognizer is None:
+        raise HTTPException(status_code=503, detail="Face recognition not available")
+    if rgb_queue is None:
+        raise HTTPException(status_code=503, detail="Camera not available")
+
+    frame_data = rgb_queue.tryGet()
+    if frame_data is None:
+        raise HTTPException(status_code=503, detail="No frame available")
+
+    image = frame_data.getCvFrame()
+    result = face_recognizer.enroll(name, image)
+    if not result["success"]:
+        raise HTTPException(status_code=400, detail=result["error"])
+    return result
+
+
+@app.get("/faces")
+async def list_faces():
+    """List enrolled faces."""
+    if face_recognizer is None:
+        raise HTTPException(status_code=503, detail="Face recognition not available")
+    return {"faces": face_recognizer.list_faces()}
+
+
+@app.delete("/faces/{name}")
+async def delete_face(name: str):
+    """Remove all embeddings for a person."""
+    if face_recognizer is None:
+        raise HTTPException(status_code=503, detail="Face recognition not available")
+    result = face_recognizer.delete_face(name)
+    if not result["success"]:
+        raise HTTPException(status_code=404, detail=f"No face found for '{name}'")
+    return result
+
+
 if __name__ == "__main__":
    import uvicorn
    uvicorn.run(app, host="0.0.0.0", port=8100)