facial recognition
This commit is contained in:
80
docs/plans/2026-02-01-facial-recognition-design.md
Normal file
80
docs/plans/2026-02-01-facial-recognition-design.md
Normal file
@@ -0,0 +1,80 @@
|
|||||||
|
# Facial Recognition: OAK-D + Coral Edge TPU
|
||||||
|
|
||||||
|
Add face detection and recognition to the oak-service spatial pipeline.
|
||||||
|
|
||||||
|
## Architecture
|
||||||
|
|
||||||
|
```
|
||||||
|
OAK-D Lite (Myriad X) Coral Edge TPU Host (Pi 5)
|
||||||
|
────────────────────── ────────────── ───────────
|
||||||
|
yolov6-nano spatial ssd_mobilenet_v2_face crop person bbox
|
||||||
|
→ person bboxes → face bboxes cosine similarity
|
||||||
|
→ spatial coords (X,Y,Z) arcface/facenet edgetpu vs SQLite DB
|
||||||
|
→ RGB frames → 128-dim embedding → name + confidence
|
||||||
|
```
|
||||||
|
|
||||||
|
Per detection cycle (~0.5s):
|
||||||
|
1. OAK-D outputs person detections + spatial coords + RGB frame (unchanged)
|
||||||
|
2. Host crops upper-body region from RGB for each person bbox
|
||||||
|
3. Coral runs face detection on crop (ssd_mobilenet_v2_face edgetpu)
|
||||||
|
4. If face found, crop face, resize to model input, run embedding via Coral
|
||||||
|
5. Host compares embedding against SQLite DB (cosine similarity)
|
||||||
|
6. Attach recognized_name + recognition_confidence to detection
|
||||||
|
|
||||||
|
## Setup: Coral Runtime
|
||||||
|
|
||||||
|
Install pycoral + tflite-runtime in the oak-service venv:
|
||||||
|
```bash
|
||||||
|
pip install tflite-runtime pycoral
|
||||||
|
```
|
||||||
|
|
||||||
|
Download Edge TPU models:
|
||||||
|
- ssd_mobilenet_v2_face_quant_postprocess_edgetpu.tflite
|
||||||
|
- face embedding model (facenet or arcface quantized for edgetpu)
|
||||||
|
|
||||||
|
Models stored in oak-service/models/ directory.
|
||||||
|
|
||||||
|
## SQLite Face Database
|
||||||
|
|
||||||
|
Path: configurable, default `faces.db` in service directory.
|
||||||
|
|
||||||
|
```sql
|
||||||
|
CREATE TABLE faces (
|
||||||
|
id INTEGER PRIMARY KEY,
|
||||||
|
name TEXT NOT NULL,
|
||||||
|
embedding BLOB NOT NULL,
|
||||||
|
enrolled_at REAL NOT NULL,
|
||||||
|
source TEXT
|
||||||
|
);
|
||||||
|
CREATE INDEX idx_faces_name ON faces(name);
|
||||||
|
```
|
||||||
|
|
||||||
|
- Multiple embeddings per person (different angles/lighting)
|
||||||
|
- Embedding stored as packed float32 bytes
|
||||||
|
- Matching: cosine similarity, threshold ~0.5 for positive match
|
||||||
|
- Best match across all embeddings for a name wins
|
||||||
|
|
||||||
|
## API Changes
|
||||||
|
|
||||||
|
New endpoints:
|
||||||
|
- `POST /faces/enroll` — multipart: name + photo, or name + use current frame
|
||||||
|
- `GET /faces` — list enrolled names with embedding count
|
||||||
|
- `DELETE /faces/{name}` — remove person from DB
|
||||||
|
|
||||||
|
Modified responses:
|
||||||
|
- `/presence` adds: recognized_name, recognition_confidence
|
||||||
|
- `/detections` adds per-detection: recognized_name, recognition_confidence
|
||||||
|
|
||||||
|
## Files
|
||||||
|
|
||||||
|
- `oak_service_spatial.py` — add Coral face pipeline to detection loop
|
||||||
|
- `models/` — Edge TPU model files
|
||||||
|
- `faces.db` — SQLite database (created on first run)
|
||||||
|
|
||||||
|
## Verification
|
||||||
|
|
||||||
|
1. Install Coral runtime, verify device detected
|
||||||
|
2. Download face models, verify inference runs
|
||||||
|
3. Enroll a face via API
|
||||||
|
4. Test recognition: stand in front of camera, check /presence for name
|
||||||
|
5. Test unknown: different person, should show "unknown"
|
||||||
270
face_recognition.py
Normal file
270
face_recognition.py
Normal file
@@ -0,0 +1,270 @@
|
|||||||
|
"""
|
||||||
|
Face Recognition Module for OAK-D Vision Service
|
||||||
|
Coral Edge TPU for face detection + CPU FaceNet for embeddings + SQLite DB
|
||||||
|
"""
|
||||||
|
|
||||||
|
import sqlite3
|
||||||
|
import threading
|
||||||
|
import time
|
||||||
|
import logging
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import ai_edge_litert.interpreter as tfl
|
||||||
|
import cv2
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
logger = logging.getLogger("face_recognition")
|
||||||
|
|
||||||
|
FACE_DETECT_THRESHOLD = 0.5
|
||||||
|
RECOGNITION_THRESHOLD = 0.5
|
||||||
|
EMBEDDING_DIM = 512
|
||||||
|
|
||||||
|
|
||||||
|
class FaceRecognizer:
|
||||||
|
def __init__(self, face_model_path, embed_model_path, db_path="faces.db"):
|
||||||
|
self._lock = threading.Lock()
|
||||||
|
|
||||||
|
# Coral face detector
|
||||||
|
logger.info("Loading face detection model on Edge TPU...")
|
||||||
|
delegate = tfl.load_delegate("libedgetpu.so.1")
|
||||||
|
self._face_interp = tfl.Interpreter(
|
||||||
|
model_path=str(face_model_path),
|
||||||
|
experimental_delegates=[delegate],
|
||||||
|
)
|
||||||
|
self._face_interp.allocate_tensors()
|
||||||
|
self._face_input = self._face_interp.get_input_details()[0]
|
||||||
|
self._face_outputs = self._face_interp.get_output_details()
|
||||||
|
logger.info(
|
||||||
|
"Face detector ready: input %s %s",
|
||||||
|
self._face_input["shape"],
|
||||||
|
self._face_input["dtype"],
|
||||||
|
)
|
||||||
|
|
||||||
|
# CPU FaceNet embedder
|
||||||
|
logger.info("Loading FaceNet embedding model on CPU...")
|
||||||
|
self._embed_interp = tfl.Interpreter(model_path=str(embed_model_path))
|
||||||
|
self._embed_interp.allocate_tensors()
|
||||||
|
self._embed_input = self._embed_interp.get_input_details()[0]
|
||||||
|
self._embed_output = self._embed_interp.get_output_details()[0]
|
||||||
|
logger.info(
|
||||||
|
"FaceNet ready: input %s, output %s",
|
||||||
|
self._embed_input["shape"],
|
||||||
|
self._embed_output["shape"],
|
||||||
|
)
|
||||||
|
|
||||||
|
# SQLite DB
|
||||||
|
self._db_path = str(db_path)
|
||||||
|
self._db = sqlite3.connect(self._db_path, check_same_thread=False)
|
||||||
|
self._db.execute(
|
||||||
|
"""CREATE TABLE IF NOT EXISTS faces (
|
||||||
|
id INTEGER PRIMARY KEY,
|
||||||
|
name TEXT NOT NULL,
|
||||||
|
embedding BLOB NOT NULL,
|
||||||
|
enrolled_at REAL NOT NULL,
|
||||||
|
source TEXT
|
||||||
|
)"""
|
||||||
|
)
|
||||||
|
self._db.execute(
|
||||||
|
"CREATE INDEX IF NOT EXISTS idx_faces_name ON faces(name)"
|
||||||
|
)
|
||||||
|
self._db.commit()
|
||||||
|
|
||||||
|
# Load embedding cache
|
||||||
|
self._cache = [] # list of (name, embedding_array)
|
||||||
|
self._reload_cache()
|
||||||
|
logger.info("Face DB: %d embeddings loaded", len(self._cache))
|
||||||
|
|
||||||
|
def _reload_cache(self):
|
||||||
|
rows = self._db.execute("SELECT name, embedding FROM faces").fetchall()
|
||||||
|
cache = []
|
||||||
|
for name, blob in rows:
|
||||||
|
emb = np.frombuffer(blob, dtype=np.float32).copy()
|
||||||
|
if len(emb) == EMBEDDING_DIM:
|
||||||
|
cache.append((name, emb))
|
||||||
|
self._cache = cache
|
||||||
|
|
||||||
|
def _detect_face(self, image):
|
||||||
|
"""Run face detection on Coral. Returns best face bbox (y1,x1,y2,x2 in pixels) or None."""
|
||||||
|
h, w = image.shape[:2]
|
||||||
|
inp_h, inp_w = self._face_input["shape"][1:3]
|
||||||
|
resized = cv2.resize(image, (inp_w, inp_h))
|
||||||
|
if resized.dtype != np.uint8:
|
||||||
|
resized = resized.astype(np.uint8)
|
||||||
|
self._face_interp.set_tensor(
|
||||||
|
self._face_input["index"], resized[np.newaxis]
|
||||||
|
)
|
||||||
|
self._face_interp.invoke()
|
||||||
|
|
||||||
|
# Parse outputs: boxes [1,50,4], classes [1,50], scores [1,50], count [1]
|
||||||
|
boxes = self._face_interp.get_tensor(self._face_outputs[0]["index"])[0]
|
||||||
|
scores = self._face_interp.get_tensor(self._face_outputs[2]["index"])[0]
|
||||||
|
count = int(
|
||||||
|
self._face_interp.get_tensor(self._face_outputs[3]["index"])[0]
|
||||||
|
)
|
||||||
|
|
||||||
|
best_score = 0.0
|
||||||
|
best_box = None
|
||||||
|
for i in range(min(count, len(scores))):
|
||||||
|
if scores[i] >= FACE_DETECT_THRESHOLD and scores[i] > best_score:
|
||||||
|
best_score = scores[i]
|
||||||
|
# boxes are [ymin, xmin, ymax, xmax] normalized 0-1
|
||||||
|
ymin, xmin, ymax, xmax = boxes[i]
|
||||||
|
best_box = (
|
||||||
|
max(0, int(ymin * h)),
|
||||||
|
max(0, int(xmin * w)),
|
||||||
|
min(h, int(ymax * h)),
|
||||||
|
min(w, int(xmax * w)),
|
||||||
|
)
|
||||||
|
|
||||||
|
return best_box, best_score
|
||||||
|
|
||||||
|
def _compute_embedding(self, face_image):
|
||||||
|
"""Compute 512-dim embedding from a face crop. Returns numpy array."""
|
||||||
|
inp_h, inp_w = self._embed_input["shape"][1:3]
|
||||||
|
resized = cv2.resize(face_image, (inp_w, inp_h))
|
||||||
|
# FaceNet preprocessing: normalize to [-1, 1]
|
||||||
|
normalized = (resized.astype(np.float32) / 127.5) - 1.0
|
||||||
|
self._embed_interp.set_tensor(
|
||||||
|
self._embed_input["index"], normalized[np.newaxis]
|
||||||
|
)
|
||||||
|
self._embed_interp.invoke()
|
||||||
|
return self._embed_interp.get_tensor(self._embed_output["index"])[0].copy()
|
||||||
|
|
||||||
|
def _match_embedding(self, embedding):
|
||||||
|
"""Match embedding against DB. Returns (name, confidence) or (None, 0.0)."""
|
||||||
|
cache = self._cache # snapshot reference
|
||||||
|
if not cache:
|
||||||
|
return None, 0.0
|
||||||
|
|
||||||
|
# Cosine similarity (embeddings are L2-normalized, so dot product works)
|
||||||
|
best_scores = {} # name -> best score
|
||||||
|
for name, stored_emb in cache:
|
||||||
|
score = float(np.dot(embedding, stored_emb))
|
||||||
|
if name not in best_scores or score > best_scores[name]:
|
||||||
|
best_scores[name] = score
|
||||||
|
|
||||||
|
if not best_scores:
|
||||||
|
return None, 0.0
|
||||||
|
|
||||||
|
best_name = max(best_scores, key=best_scores.get)
|
||||||
|
best_conf = best_scores[best_name]
|
||||||
|
|
||||||
|
if best_conf >= RECOGNITION_THRESHOLD:
|
||||||
|
return best_name, best_conf
|
||||||
|
return None, best_conf
|
||||||
|
|
||||||
|
def process_frame(self, rgb_frame, person_detections):
|
||||||
|
"""Process an RGB frame with person detections, return face recognition results.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
rgb_frame: BGR numpy array from OAK-D (H, W, 3)
|
||||||
|
person_detections: list of depthai detection objects with
|
||||||
|
xmin/ymin/xmax/ymax (normalized 0-1)
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
list of dicts (same order as person_detections):
|
||||||
|
{recognized_name: str|None, recognition_confidence: float|None}
|
||||||
|
"""
|
||||||
|
h, w = rgb_frame.shape[:2]
|
||||||
|
results = []
|
||||||
|
|
||||||
|
for det in person_detections:
|
||||||
|
# Crop upper 40% of person bbox (head + shoulders)
|
||||||
|
px1 = max(0, int(det.xmin * w))
|
||||||
|
py1 = max(0, int(det.ymin * h))
|
||||||
|
px2 = min(w, int(det.xmax * w))
|
||||||
|
py2 = min(h, int(det.ymax * h))
|
||||||
|
|
||||||
|
bbox_h = py2 - py1
|
||||||
|
upper_y2 = py1 + int(bbox_h * 0.4)
|
||||||
|
|
||||||
|
# Add 10% horizontal padding
|
||||||
|
pad_x = int((px2 - px1) * 0.1)
|
||||||
|
crop_x1 = max(0, px1 - pad_x)
|
||||||
|
crop_x2 = min(w, px2 + pad_x)
|
||||||
|
|
||||||
|
crop = rgb_frame[py1:upper_y2, crop_x1:crop_x2]
|
||||||
|
if crop.size == 0:
|
||||||
|
results.append({"recognized_name": None, "recognition_confidence": None})
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Face detection on Coral
|
||||||
|
face_box, face_score = self._detect_face(crop)
|
||||||
|
if face_box is None:
|
||||||
|
results.append({"recognized_name": None, "recognition_confidence": None})
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Crop face and compute embedding
|
||||||
|
fy1, fx1, fy2, fx2 = face_box
|
||||||
|
face_crop = crop[fy1:fy2, fx1:fx2]
|
||||||
|
if face_crop.size == 0:
|
||||||
|
results.append({"recognized_name": None, "recognition_confidence": None})
|
||||||
|
continue
|
||||||
|
|
||||||
|
embedding = self._compute_embedding(face_crop)
|
||||||
|
name, confidence = self._match_embedding(embedding)
|
||||||
|
|
||||||
|
results.append({
|
||||||
|
"recognized_name": name,
|
||||||
|
"recognition_confidence": round(confidence, 3),
|
||||||
|
})
|
||||||
|
|
||||||
|
return results
|
||||||
|
|
||||||
|
def enroll(self, name, image):
|
||||||
|
"""Detect face in image, compute embedding, store in DB.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
name: person's name
|
||||||
|
image: BGR numpy array containing a face
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
dict with success status and embedding count
|
||||||
|
"""
|
||||||
|
face_box, face_score = self._detect_face(image)
|
||||||
|
if face_box is None:
|
||||||
|
return {"success": False, "error": "No face detected in image"}
|
||||||
|
|
||||||
|
fy1, fx1, fy2, fx2 = face_box
|
||||||
|
face_crop = image[fy1:fy2, fx1:fx2]
|
||||||
|
if face_crop.size == 0:
|
||||||
|
return {"success": False, "error": "Face crop is empty"}
|
||||||
|
|
||||||
|
embedding = self._compute_embedding(face_crop)
|
||||||
|
|
||||||
|
with self._lock:
|
||||||
|
self._db.execute(
|
||||||
|
"INSERT INTO faces (name, embedding, enrolled_at, source) VALUES (?, ?, ?, ?)",
|
||||||
|
(name, embedding.tobytes(), time.time(), "api"),
|
||||||
|
)
|
||||||
|
self._db.commit()
|
||||||
|
self._reload_cache()
|
||||||
|
|
||||||
|
count = sum(1 for n, _ in self._cache if n == name)
|
||||||
|
logger.info("Enrolled face for '%s' (score=%.2f), %d total embeddings", name, face_score, count)
|
||||||
|
return {"success": True, "name": name, "embedding_count": count}
|
||||||
|
|
||||||
|
def list_faces(self):
|
||||||
|
"""Return list of enrolled names with embedding counts."""
|
||||||
|
rows = self._db.execute(
|
||||||
|
"SELECT name, COUNT(*) as cnt, MIN(enrolled_at) as first "
|
||||||
|
"FROM faces GROUP BY name ORDER BY name"
|
||||||
|
).fetchall()
|
||||||
|
return [
|
||||||
|
{"name": r[0], "embedding_count": r[1], "enrolled_at": r[2]}
|
||||||
|
for r in rows
|
||||||
|
]
|
||||||
|
|
||||||
|
def delete_face(self, name):
|
||||||
|
"""Remove all embeddings for a name."""
|
||||||
|
with self._lock:
|
||||||
|
cur = self._db.execute("DELETE FROM faces WHERE name = ?", (name,))
|
||||||
|
self._db.commit()
|
||||||
|
self._reload_cache()
|
||||||
|
deleted = cur.rowcount
|
||||||
|
logger.info("Deleted %d embeddings for '%s'", deleted, name)
|
||||||
|
return {"success": deleted > 0, "name": name, "deleted": deleted}
|
||||||
|
|
||||||
|
def close(self):
|
||||||
|
"""Close DB connection."""
|
||||||
|
self._db.close()
|
||||||
@@ -11,13 +11,20 @@ Day 82 - SPATIAL UPGRADE! Now I know how far away you are! 📏🦊
|
|||||||
|
|
||||||
import time
|
import time
|
||||||
import threading
|
import threading
|
||||||
|
import logging
|
||||||
|
from pathlib import Path
|
||||||
from contextlib import asynccontextmanager
|
from contextlib import asynccontextmanager
|
||||||
from fastapi import FastAPI, HTTPException
|
from fastapi import FastAPI, File, Form, HTTPException, UploadFile
|
||||||
from fastapi.responses import Response
|
from fastapi.responses import Response
|
||||||
import depthai as dai
|
import depthai as dai
|
||||||
import cv2
|
import cv2
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
|
from face_recognition import FaceRecognizer
|
||||||
|
|
||||||
|
logger = logging.getLogger("oak-service")
|
||||||
|
logging.basicConfig(level=logging.INFO)
|
||||||
|
|
||||||
# ============== Configuration ==============
|
# ============== Configuration ==============
|
||||||
DETECTION_MODEL = "yolov6-nano" # Has 'person' class
|
DETECTION_MODEL = "yolov6-nano" # Has 'person' class
|
||||||
PERSON_CLASS_ID = 0 # 'person' is class 0 in COCO
|
PERSON_CLASS_ID = 0 # 'person' is class 0 in COCO
|
||||||
@@ -29,6 +36,12 @@ DETECTION_INTERVAL = 0.5
|
|||||||
DEPTH_LOWER_THRESHOLD = 100 # 10cm minimum
|
DEPTH_LOWER_THRESHOLD = 100 # 10cm minimum
|
||||||
DEPTH_UPPER_THRESHOLD = 10000 # 10m maximum
|
DEPTH_UPPER_THRESHOLD = 10000 # 10m maximum
|
||||||
|
|
||||||
|
# Face recognition models
|
||||||
|
MODELS_DIR = Path(__file__).parent / "models"
|
||||||
|
FACE_DETECT_MODEL = MODELS_DIR / "ssd_mobilenet_v2_face_quant_postprocess_edgetpu.tflite"
|
||||||
|
FACE_EMBED_MODEL = MODELS_DIR / "facenet.tflite"
|
||||||
|
FACE_DB_PATH = Path(__file__).parent / "faces.db"
|
||||||
|
|
||||||
# ============== Global State ==============
|
# ============== Global State ==============
|
||||||
pipeline_ctx = None
|
pipeline_ctx = None
|
||||||
detection_queue = None
|
detection_queue = None
|
||||||
@@ -37,6 +50,7 @@ depth_queue = None
|
|||||||
detection_thread = None
|
detection_thread = None
|
||||||
running = False
|
running = False
|
||||||
labels = []
|
labels = []
|
||||||
|
face_recognizer = None
|
||||||
|
|
||||||
presence_state = {
|
presence_state = {
|
||||||
"present": False,
|
"present": False,
|
||||||
@@ -45,14 +59,35 @@ presence_state = {
|
|||||||
"last_detection": None,
|
"last_detection": None,
|
||||||
"detections": [],
|
"detections": [],
|
||||||
"confidence": 0.0,
|
"confidence": 0.0,
|
||||||
# NEW: spatial data!
|
# Spatial data
|
||||||
"distance_mm": None,
|
"distance_mm": None,
|
||||||
"spatial_x": None,
|
"spatial_x": None,
|
||||||
"spatial_y": None,
|
"spatial_y": None,
|
||||||
"spatial_z": None,
|
"spatial_z": None,
|
||||||
|
# Face recognition
|
||||||
|
"recognized_name": None,
|
||||||
|
"recognition_confidence": None,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def init_face_recognition():
|
||||||
|
"""Initialize Coral face detection + FaceNet embedding."""
|
||||||
|
global face_recognizer
|
||||||
|
try:
|
||||||
|
face_recognizer = FaceRecognizer(
|
||||||
|
face_model_path=FACE_DETECT_MODEL,
|
||||||
|
embed_model_path=FACE_EMBED_MODEL,
|
||||||
|
db_path=FACE_DB_PATH,
|
||||||
|
)
|
||||||
|
print("✅ Face recognition initialized (Coral + FaceNet)")
|
||||||
|
return True
|
||||||
|
except Exception as e:
|
||||||
|
print(f"⚠️ Face recognition unavailable: {e}")
|
||||||
|
import traceback
|
||||||
|
traceback.print_exc()
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
def init_oak():
|
def init_oak():
|
||||||
"""Initialize OAK-D with SPATIAL person detection pipeline (depthai v3)."""
|
"""Initialize OAK-D with SPATIAL person detection pipeline (depthai v3)."""
|
||||||
global pipeline_ctx, detection_queue, rgb_queue, depth_queue, labels
|
global pipeline_ctx, detection_queue, rgb_queue, depth_queue, labels
|
||||||
@@ -123,9 +158,13 @@ def init_oak():
|
|||||||
|
|
||||||
def cleanup_oak():
|
def cleanup_oak():
|
||||||
"""Cleanup OAK-D resources."""
|
"""Cleanup OAK-D resources."""
|
||||||
global pipeline_ctx, running
|
global pipeline_ctx, running, face_recognizer
|
||||||
running = False
|
running = False
|
||||||
|
|
||||||
|
if face_recognizer:
|
||||||
|
face_recognizer.close()
|
||||||
|
face_recognizer = None
|
||||||
|
|
||||||
if pipeline_ctx:
|
if pipeline_ctx:
|
||||||
try:
|
try:
|
||||||
pipeline_ctx.stop()
|
pipeline_ctx.stop()
|
||||||
@@ -167,25 +206,55 @@ def detection_loop():
|
|||||||
best = max(persons, key=lambda d: d.confidence)
|
best = max(persons, key=lambda d: d.confidence)
|
||||||
presence_state["confidence"] = best.confidence
|
presence_state["confidence"] = best.confidence
|
||||||
|
|
||||||
# SPATIAL DATA! 🎉
|
# Spatial data
|
||||||
presence_state["spatial_x"] = best.spatialCoordinates.x
|
presence_state["spatial_x"] = best.spatialCoordinates.x
|
||||||
presence_state["spatial_y"] = best.spatialCoordinates.y
|
presence_state["spatial_y"] = best.spatialCoordinates.y
|
||||||
presence_state["spatial_z"] = best.spatialCoordinates.z
|
presence_state["spatial_z"] = best.spatialCoordinates.z
|
||||||
presence_state["distance_mm"] = best.spatialCoordinates.z # Z is depth
|
presence_state["distance_mm"] = best.spatialCoordinates.z
|
||||||
|
|
||||||
presence_state["detections"] = [
|
# Face recognition
|
||||||
{
|
face_results = []
|
||||||
|
if face_recognizer and rgb_queue:
|
||||||
|
rgb_data = rgb_queue.tryGet()
|
||||||
|
if rgb_data is not None:
|
||||||
|
rgb_frame = rgb_data.getCvFrame()
|
||||||
|
try:
|
||||||
|
face_results = face_recognizer.process_frame(
|
||||||
|
rgb_frame, persons
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning("Face recognition error: %s", e)
|
||||||
|
|
||||||
|
det_list = []
|
||||||
|
best_recognized = None
|
||||||
|
best_recog_conf = 0.0
|
||||||
|
for i, d in enumerate(persons):
|
||||||
|
det = {
|
||||||
"xmin": d.xmin, "ymin": d.ymin,
|
"xmin": d.xmin, "ymin": d.ymin,
|
||||||
"xmax": d.xmax, "ymax": d.ymax,
|
"xmax": d.xmax, "ymax": d.ymax,
|
||||||
"confidence": d.confidence,
|
"confidence": d.confidence,
|
||||||
# Spatial coordinates in mm
|
|
||||||
"x_mm": d.spatialCoordinates.x,
|
"x_mm": d.spatialCoordinates.x,
|
||||||
"y_mm": d.spatialCoordinates.y,
|
"y_mm": d.spatialCoordinates.y,
|
||||||
"z_mm": d.spatialCoordinates.z,
|
"z_mm": d.spatialCoordinates.z,
|
||||||
"distance_m": d.spatialCoordinates.z / 1000.0,
|
"distance_m": d.spatialCoordinates.z / 1000.0,
|
||||||
|
"recognized_name": None,
|
||||||
|
"recognition_confidence": None,
|
||||||
}
|
}
|
||||||
for d in persons
|
if i < len(face_results):
|
||||||
]
|
det["recognized_name"] = face_results[i]["recognized_name"]
|
||||||
|
det["recognition_confidence"] = face_results[i]["recognition_confidence"]
|
||||||
|
if det["recognized_name"] and (
|
||||||
|
det["recognition_confidence"] or 0
|
||||||
|
) > best_recog_conf:
|
||||||
|
best_recognized = det["recognized_name"]
|
||||||
|
best_recog_conf = det["recognition_confidence"]
|
||||||
|
det_list.append(det)
|
||||||
|
|
||||||
|
presence_state["detections"] = det_list
|
||||||
|
presence_state["recognized_name"] = best_recognized
|
||||||
|
presence_state["recognition_confidence"] = (
|
||||||
|
round(best_recog_conf, 3) if best_recognized else None
|
||||||
|
)
|
||||||
else:
|
else:
|
||||||
presence_state["detections"] = []
|
presence_state["detections"] = []
|
||||||
presence_state["confidence"] = 0.0
|
presence_state["confidence"] = 0.0
|
||||||
@@ -193,6 +262,8 @@ def detection_loop():
|
|||||||
presence_state["spatial_y"] = None
|
presence_state["spatial_y"] = None
|
||||||
presence_state["spatial_z"] = None
|
presence_state["spatial_z"] = None
|
||||||
presence_state["distance_mm"] = None
|
presence_state["distance_mm"] = None
|
||||||
|
presence_state["recognized_name"] = None
|
||||||
|
presence_state["recognition_confidence"] = None
|
||||||
|
|
||||||
# Check timeout
|
# Check timeout
|
||||||
if presence_state["last_seen"]:
|
if presence_state["last_seen"]:
|
||||||
@@ -215,6 +286,8 @@ async def lifespan(app: FastAPI):
|
|||||||
|
|
||||||
print("🦊 Starting OAK-D SPATIAL Vision Service...")
|
print("🦊 Starting OAK-D SPATIAL Vision Service...")
|
||||||
|
|
||||||
|
init_face_recognition()
|
||||||
|
|
||||||
if init_oak():
|
if init_oak():
|
||||||
running = True
|
running = True
|
||||||
detection_thread = threading.Thread(target=detection_loop, daemon=True)
|
detection_thread = threading.Thread(target=detection_loop, daemon=True)
|
||||||
@@ -231,8 +304,8 @@ async def lifespan(app: FastAPI):
|
|||||||
|
|
||||||
app = FastAPI(
|
app = FastAPI(
|
||||||
title="OAK-D SPATIAL Vision Service",
|
title="OAK-D SPATIAL Vision Service",
|
||||||
description="Vixy's eyes with SPATIAL presence detection! 🦊👀📏",
|
description="Vixy's eyes with SPATIAL presence detection + face recognition! 🦊👀📏",
|
||||||
version="0.4.0",
|
version="0.5.0",
|
||||||
lifespan=lifespan
|
lifespan=lifespan
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -243,10 +316,11 @@ async def health():
|
|||||||
return {
|
return {
|
||||||
"status": "healthy",
|
"status": "healthy",
|
||||||
"service": "oak-service",
|
"service": "oak-service",
|
||||||
"version": "0.4.0",
|
"version": "0.5.0",
|
||||||
"oak_connected": pipeline_ctx is not None,
|
"oak_connected": pipeline_ctx is not None,
|
||||||
"detection_model": DETECTION_MODEL,
|
"detection_model": DETECTION_MODEL,
|
||||||
"spatial_enabled": True,
|
"spatial_enabled": True,
|
||||||
|
"face_recognition_enabled": face_recognizer is not None,
|
||||||
"timestamp": time.time()
|
"timestamp": time.time()
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -267,7 +341,6 @@ async def presence():
|
|||||||
if presence_state["last_seen"] else None
|
if presence_state["last_seen"] else None
|
||||||
),
|
),
|
||||||
"confidence": presence_state["confidence"],
|
"confidence": presence_state["confidence"],
|
||||||
# SPATIAL DATA
|
|
||||||
"distance_mm": presence_state["distance_mm"],
|
"distance_mm": presence_state["distance_mm"],
|
||||||
"distance_m": distance_m,
|
"distance_m": distance_m,
|
||||||
"spatial": {
|
"spatial": {
|
||||||
@@ -275,6 +348,8 @@ async def presence():
|
|||||||
"y_mm": presence_state["spatial_y"],
|
"y_mm": presence_state["spatial_y"],
|
||||||
"z_mm": presence_state["spatial_z"],
|
"z_mm": presence_state["spatial_z"],
|
||||||
} if presence_state["spatial_z"] else None,
|
} if presence_state["spatial_z"] else None,
|
||||||
|
"recognized_name": presence_state["recognized_name"],
|
||||||
|
"recognition_confidence": presence_state["recognition_confidence"],
|
||||||
"timestamp": time.time()
|
"timestamp": time.time()
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -340,6 +415,68 @@ async def depth_frame():
|
|||||||
raise HTTPException(status_code=500, detail=str(e))
|
raise HTTPException(status_code=500, detail=str(e))
|
||||||
|
|
||||||
|
|
||||||
|
# ============== Face Enrollment API ==============
|
||||||
|
|
||||||
|
|
||||||
|
@app.post("/faces/enroll")
|
||||||
|
async def enroll_face_upload(
|
||||||
|
name: str = Form(...),
|
||||||
|
photo: UploadFile = File(...),
|
||||||
|
):
|
||||||
|
"""Enroll a face by uploading a photo (multipart form: name + photo)."""
|
||||||
|
if face_recognizer is None:
|
||||||
|
raise HTTPException(status_code=503, detail="Face recognition not available")
|
||||||
|
|
||||||
|
contents = await photo.read()
|
||||||
|
nparr = np.frombuffer(contents, np.uint8)
|
||||||
|
image = cv2.imdecode(nparr, cv2.IMREAD_COLOR)
|
||||||
|
if image is None:
|
||||||
|
raise HTTPException(status_code=400, detail="Could not decode image")
|
||||||
|
|
||||||
|
result = face_recognizer.enroll(name, image)
|
||||||
|
if not result["success"]:
|
||||||
|
raise HTTPException(status_code=400, detail=result["error"])
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
@app.post("/faces/enroll-from-camera")
|
||||||
|
async def enroll_face_camera(name: str):
|
||||||
|
"""Enroll a face using the current camera frame. Pass name as query param."""
|
||||||
|
if face_recognizer is None:
|
||||||
|
raise HTTPException(status_code=503, detail="Face recognition not available")
|
||||||
|
if rgb_queue is None:
|
||||||
|
raise HTTPException(status_code=503, detail="Camera not available")
|
||||||
|
|
||||||
|
frame_data = rgb_queue.tryGet()
|
||||||
|
if frame_data is None:
|
||||||
|
raise HTTPException(status_code=503, detail="No frame available")
|
||||||
|
|
||||||
|
image = frame_data.getCvFrame()
|
||||||
|
result = face_recognizer.enroll(name, image)
|
||||||
|
if not result["success"]:
|
||||||
|
raise HTTPException(status_code=400, detail=result["error"])
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
@app.get("/faces")
|
||||||
|
async def list_faces():
|
||||||
|
"""List enrolled faces."""
|
||||||
|
if face_recognizer is None:
|
||||||
|
raise HTTPException(status_code=503, detail="Face recognition not available")
|
||||||
|
return {"faces": face_recognizer.list_faces()}
|
||||||
|
|
||||||
|
|
||||||
|
@app.delete("/faces/{name}")
|
||||||
|
async def delete_face(name: str):
|
||||||
|
"""Remove all embeddings for a person."""
|
||||||
|
if face_recognizer is None:
|
||||||
|
raise HTTPException(status_code=503, detail="Face recognition not available")
|
||||||
|
result = face_recognizer.delete_face(name)
|
||||||
|
if not result["success"]:
|
||||||
|
raise HTTPException(status_code=404, detail=f"No face found for '{name}'")
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
import uvicorn
|
import uvicorn
|
||||||
uvicorn.run(app, host="0.0.0.0", port=8100)
|
uvicorn.run(app, host="0.0.0.0", port=8100)
|
||||||
|
|||||||
Reference in New Issue
Block a user