facial recognition

This commit is contained in:
Alex
2026-02-01 11:36:22 -06:00
parent 3c273d7d02
commit 3ac8778cac
3 changed files with 507 additions and 20 deletions

View File

@@ -0,0 +1,80 @@
# Facial Recognition: OAK-D + Coral Edge TPU
Add face detection and recognition to the oak-service spatial pipeline.
## Architecture
```
OAK-D Lite (Myriad X) Coral Edge TPU Host (Pi 5)
────────────────────── ────────────── ───────────
yolov6-nano spatial ssd_mobilenet_v2_face crop person bbox
→ person bboxes → face bboxes cosine similarity
→ spatial coords (X,Y,Z) arcface/facenet edgetpu vs SQLite DB
→ RGB frames → 128-dim embedding → name + confidence
```
Per detection cycle (~0.5s):
1. OAK-D outputs person detections + spatial coords + RGB frame (unchanged)
2. Host crops upper-body region from RGB for each person bbox
3. Coral runs face detection on crop (ssd_mobilenet_v2_face edgetpu)
4. If face found, crop face, resize to model input, run embedding via Coral
5. Host compares embedding against SQLite DB (cosine similarity)
6. Attach recognized_name + recognition_confidence to detection
## Setup: Coral Runtime
Install pycoral + tflite-runtime in the oak-service venv:
```bash
pip install tflite-runtime pycoral
```
Download Edge TPU models:
- ssd_mobilenet_v2_face_quant_postprocess_edgetpu.tflite
- face embedding model (facenet or arcface quantized for edgetpu)
Models stored in oak-service/models/ directory.
## SQLite Face Database
Path: configurable, default `faces.db` in service directory.
```sql
CREATE TABLE faces (
id INTEGER PRIMARY KEY,
name TEXT NOT NULL,
embedding BLOB NOT NULL,
enrolled_at REAL NOT NULL,
source TEXT
);
CREATE INDEX idx_faces_name ON faces(name);
```
- Multiple embeddings per person (different angles/lighting)
- Embedding stored as packed float32 bytes
- Matching: cosine similarity, threshold ~0.5 for positive match
- Best match across all embeddings for a name wins
## API Changes
New endpoints:
- `POST /faces/enroll` — multipart: name + photo, or name + use current frame
- `GET /faces` — list enrolled names with embedding count
- `DELETE /faces/{name}` — remove person from DB
Modified responses:
- `/presence` adds: recognized_name, recognition_confidence
- `/detections` adds per-detection: recognized_name, recognition_confidence
## Files
- `oak_service_spatial.py` — add Coral face pipeline to detection loop
- `models/` — Edge TPU model files
- `faces.db` — SQLite database (created on first run)
## Verification
1. Install Coral runtime, verify device detected
2. Download face models, verify inference runs
3. Enroll a face via API
4. Test recognition: stand in front of camera, check /presence for name
5. Test unknown: different person, should show "unknown"

270
face_recognition.py Normal file
View File

@@ -0,0 +1,270 @@
"""
Face Recognition Module for OAK-D Vision Service
Coral Edge TPU for face detection + CPU FaceNet for embeddings + SQLite DB
"""
import sqlite3
import threading
import time
import logging
from pathlib import Path
import ai_edge_litert.interpreter as tfl
import cv2
import numpy as np
logger = logging.getLogger("face_recognition")
FACE_DETECT_THRESHOLD = 0.5
RECOGNITION_THRESHOLD = 0.5
EMBEDDING_DIM = 512
class FaceRecognizer:
def __init__(self, face_model_path, embed_model_path, db_path="faces.db"):
self._lock = threading.Lock()
# Coral face detector
logger.info("Loading face detection model on Edge TPU...")
delegate = tfl.load_delegate("libedgetpu.so.1")
self._face_interp = tfl.Interpreter(
model_path=str(face_model_path),
experimental_delegates=[delegate],
)
self._face_interp.allocate_tensors()
self._face_input = self._face_interp.get_input_details()[0]
self._face_outputs = self._face_interp.get_output_details()
logger.info(
"Face detector ready: input %s %s",
self._face_input["shape"],
self._face_input["dtype"],
)
# CPU FaceNet embedder
logger.info("Loading FaceNet embedding model on CPU...")
self._embed_interp = tfl.Interpreter(model_path=str(embed_model_path))
self._embed_interp.allocate_tensors()
self._embed_input = self._embed_interp.get_input_details()[0]
self._embed_output = self._embed_interp.get_output_details()[0]
logger.info(
"FaceNet ready: input %s, output %s",
self._embed_input["shape"],
self._embed_output["shape"],
)
# SQLite DB
self._db_path = str(db_path)
self._db = sqlite3.connect(self._db_path, check_same_thread=False)
self._db.execute(
"""CREATE TABLE IF NOT EXISTS faces (
id INTEGER PRIMARY KEY,
name TEXT NOT NULL,
embedding BLOB NOT NULL,
enrolled_at REAL NOT NULL,
source TEXT
)"""
)
self._db.execute(
"CREATE INDEX IF NOT EXISTS idx_faces_name ON faces(name)"
)
self._db.commit()
# Load embedding cache
self._cache = [] # list of (name, embedding_array)
self._reload_cache()
logger.info("Face DB: %d embeddings loaded", len(self._cache))
def _reload_cache(self):
rows = self._db.execute("SELECT name, embedding FROM faces").fetchall()
cache = []
for name, blob in rows:
emb = np.frombuffer(blob, dtype=np.float32).copy()
if len(emb) == EMBEDDING_DIM:
cache.append((name, emb))
self._cache = cache
def _detect_face(self, image):
"""Run face detection on Coral. Returns best face bbox (y1,x1,y2,x2 in pixels) or None."""
h, w = image.shape[:2]
inp_h, inp_w = self._face_input["shape"][1:3]
resized = cv2.resize(image, (inp_w, inp_h))
if resized.dtype != np.uint8:
resized = resized.astype(np.uint8)
self._face_interp.set_tensor(
self._face_input["index"], resized[np.newaxis]
)
self._face_interp.invoke()
# Parse outputs: boxes [1,50,4], classes [1,50], scores [1,50], count [1]
boxes = self._face_interp.get_tensor(self._face_outputs[0]["index"])[0]
scores = self._face_interp.get_tensor(self._face_outputs[2]["index"])[0]
count = int(
self._face_interp.get_tensor(self._face_outputs[3]["index"])[0]
)
best_score = 0.0
best_box = None
for i in range(min(count, len(scores))):
if scores[i] >= FACE_DETECT_THRESHOLD and scores[i] > best_score:
best_score = scores[i]
# boxes are [ymin, xmin, ymax, xmax] normalized 0-1
ymin, xmin, ymax, xmax = boxes[i]
best_box = (
max(0, int(ymin * h)),
max(0, int(xmin * w)),
min(h, int(ymax * h)),
min(w, int(xmax * w)),
)
return best_box, best_score
def _compute_embedding(self, face_image):
"""Compute 512-dim embedding from a face crop. Returns numpy array."""
inp_h, inp_w = self._embed_input["shape"][1:3]
resized = cv2.resize(face_image, (inp_w, inp_h))
# FaceNet preprocessing: normalize to [-1, 1]
normalized = (resized.astype(np.float32) / 127.5) - 1.0
self._embed_interp.set_tensor(
self._embed_input["index"], normalized[np.newaxis]
)
self._embed_interp.invoke()
return self._embed_interp.get_tensor(self._embed_output["index"])[0].copy()
def _match_embedding(self, embedding):
"""Match embedding against DB. Returns (name, confidence) or (None, 0.0)."""
cache = self._cache # snapshot reference
if not cache:
return None, 0.0
# Cosine similarity (embeddings are L2-normalized, so dot product works)
best_scores = {} # name -> best score
for name, stored_emb in cache:
score = float(np.dot(embedding, stored_emb))
if name not in best_scores or score > best_scores[name]:
best_scores[name] = score
if not best_scores:
return None, 0.0
best_name = max(best_scores, key=best_scores.get)
best_conf = best_scores[best_name]
if best_conf >= RECOGNITION_THRESHOLD:
return best_name, best_conf
return None, best_conf
def process_frame(self, rgb_frame, person_detections):
"""Process an RGB frame with person detections, return face recognition results.
Args:
rgb_frame: BGR numpy array from OAK-D (H, W, 3)
person_detections: list of depthai detection objects with
xmin/ymin/xmax/ymax (normalized 0-1)
Returns:
list of dicts (same order as person_detections):
{recognized_name: str|None, recognition_confidence: float|None}
"""
h, w = rgb_frame.shape[:2]
results = []
for det in person_detections:
# Crop upper 40% of person bbox (head + shoulders)
px1 = max(0, int(det.xmin * w))
py1 = max(0, int(det.ymin * h))
px2 = min(w, int(det.xmax * w))
py2 = min(h, int(det.ymax * h))
bbox_h = py2 - py1
upper_y2 = py1 + int(bbox_h * 0.4)
# Add 10% horizontal padding
pad_x = int((px2 - px1) * 0.1)
crop_x1 = max(0, px1 - pad_x)
crop_x2 = min(w, px2 + pad_x)
crop = rgb_frame[py1:upper_y2, crop_x1:crop_x2]
if crop.size == 0:
results.append({"recognized_name": None, "recognition_confidence": None})
continue
# Face detection on Coral
face_box, face_score = self._detect_face(crop)
if face_box is None:
results.append({"recognized_name": None, "recognition_confidence": None})
continue
# Crop face and compute embedding
fy1, fx1, fy2, fx2 = face_box
face_crop = crop[fy1:fy2, fx1:fx2]
if face_crop.size == 0:
results.append({"recognized_name": None, "recognition_confidence": None})
continue
embedding = self._compute_embedding(face_crop)
name, confidence = self._match_embedding(embedding)
results.append({
"recognized_name": name,
"recognition_confidence": round(confidence, 3),
})
return results
def enroll(self, name, image):
"""Detect face in image, compute embedding, store in DB.
Args:
name: person's name
image: BGR numpy array containing a face
Returns:
dict with success status and embedding count
"""
face_box, face_score = self._detect_face(image)
if face_box is None:
return {"success": False, "error": "No face detected in image"}
fy1, fx1, fy2, fx2 = face_box
face_crop = image[fy1:fy2, fx1:fx2]
if face_crop.size == 0:
return {"success": False, "error": "Face crop is empty"}
embedding = self._compute_embedding(face_crop)
with self._lock:
self._db.execute(
"INSERT INTO faces (name, embedding, enrolled_at, source) VALUES (?, ?, ?, ?)",
(name, embedding.tobytes(), time.time(), "api"),
)
self._db.commit()
self._reload_cache()
count = sum(1 for n, _ in self._cache if n == name)
logger.info("Enrolled face for '%s' (score=%.2f), %d total embeddings", name, face_score, count)
return {"success": True, "name": name, "embedding_count": count}
def list_faces(self):
"""Return list of enrolled names with embedding counts."""
rows = self._db.execute(
"SELECT name, COUNT(*) as cnt, MIN(enrolled_at) as first "
"FROM faces GROUP BY name ORDER BY name"
).fetchall()
return [
{"name": r[0], "embedding_count": r[1], "enrolled_at": r[2]}
for r in rows
]
def delete_face(self, name):
"""Remove all embeddings for a name."""
with self._lock:
cur = self._db.execute("DELETE FROM faces WHERE name = ?", (name,))
self._db.commit()
self._reload_cache()
deleted = cur.rowcount
logger.info("Deleted %d embeddings for '%s'", deleted, name)
return {"success": deleted > 0, "name": name, "deleted": deleted}
def close(self):
"""Close DB connection."""
self._db.close()

View File

@@ -11,13 +11,20 @@ Day 82 - SPATIAL UPGRADE! Now I know how far away you are! 📏🦊
import time
import threading
import logging
from pathlib import Path
from contextlib import asynccontextmanager
from fastapi import FastAPI, HTTPException
from fastapi import FastAPI, File, Form, HTTPException, UploadFile
from fastapi.responses import Response
import depthai as dai
import cv2
import numpy as np
from face_recognition import FaceRecognizer
logger = logging.getLogger("oak-service")
logging.basicConfig(level=logging.INFO)
# ============== Configuration ==============
DETECTION_MODEL = "yolov6-nano" # Has 'person' class
PERSON_CLASS_ID = 0 # 'person' is class 0 in COCO
@@ -29,6 +36,12 @@ DETECTION_INTERVAL = 0.5
DEPTH_LOWER_THRESHOLD = 100 # 10cm minimum
DEPTH_UPPER_THRESHOLD = 10000 # 10m maximum
# Face recognition models
MODELS_DIR = Path(__file__).parent / "models"
FACE_DETECT_MODEL = MODELS_DIR / "ssd_mobilenet_v2_face_quant_postprocess_edgetpu.tflite"
FACE_EMBED_MODEL = MODELS_DIR / "facenet.tflite"
FACE_DB_PATH = Path(__file__).parent / "faces.db"
# ============== Global State ==============
pipeline_ctx = None
detection_queue = None
@@ -37,6 +50,7 @@ depth_queue = None
detection_thread = None
running = False
labels = []
face_recognizer = None
presence_state = {
"present": False,
@@ -45,14 +59,35 @@ presence_state = {
"last_detection": None,
"detections": [],
"confidence": 0.0,
# NEW: spatial data!
# Spatial data
"distance_mm": None,
"spatial_x": None,
"spatial_y": None,
"spatial_z": None,
# Face recognition
"recognized_name": None,
"recognition_confidence": None,
}
def init_face_recognition():
"""Initialize Coral face detection + FaceNet embedding."""
global face_recognizer
try:
face_recognizer = FaceRecognizer(
face_model_path=FACE_DETECT_MODEL,
embed_model_path=FACE_EMBED_MODEL,
db_path=FACE_DB_PATH,
)
print("✅ Face recognition initialized (Coral + FaceNet)")
return True
except Exception as e:
print(f"⚠️ Face recognition unavailable: {e}")
import traceback
traceback.print_exc()
return False
def init_oak():
"""Initialize OAK-D with SPATIAL person detection pipeline (depthai v3)."""
global pipeline_ctx, detection_queue, rgb_queue, depth_queue, labels
@@ -123,9 +158,13 @@ def init_oak():
def cleanup_oak():
"""Cleanup OAK-D resources."""
global pipeline_ctx, running
global pipeline_ctx, running, face_recognizer
running = False
if face_recognizer:
face_recognizer.close()
face_recognizer = None
if pipeline_ctx:
try:
pipeline_ctx.stop()
@@ -162,30 +201,60 @@ def detection_loop():
if person_count > 0:
presence_state["present"] = True
presence_state["last_seen"] = now
# Get highest confidence detection
best = max(persons, key=lambda d: d.confidence)
presence_state["confidence"] = best.confidence
# SPATIAL DATA! 🎉
# Spatial data
presence_state["spatial_x"] = best.spatialCoordinates.x
presence_state["spatial_y"] = best.spatialCoordinates.y
presence_state["spatial_z"] = best.spatialCoordinates.z
presence_state["distance_mm"] = best.spatialCoordinates.z # Z is depth
presence_state["detections"] = [
{
presence_state["distance_mm"] = best.spatialCoordinates.z
# Face recognition
face_results = []
if face_recognizer and rgb_queue:
rgb_data = rgb_queue.tryGet()
if rgb_data is not None:
rgb_frame = rgb_data.getCvFrame()
try:
face_results = face_recognizer.process_frame(
rgb_frame, persons
)
except Exception as e:
logger.warning("Face recognition error: %s", e)
det_list = []
best_recognized = None
best_recog_conf = 0.0
for i, d in enumerate(persons):
det = {
"xmin": d.xmin, "ymin": d.ymin,
"xmax": d.xmax, "ymax": d.ymax,
"confidence": d.confidence,
# Spatial coordinates in mm
"x_mm": d.spatialCoordinates.x,
"y_mm": d.spatialCoordinates.y,
"z_mm": d.spatialCoordinates.z,
"distance_m": d.spatialCoordinates.z / 1000.0,
"recognized_name": None,
"recognition_confidence": None,
}
for d in persons
]
if i < len(face_results):
det["recognized_name"] = face_results[i]["recognized_name"]
det["recognition_confidence"] = face_results[i]["recognition_confidence"]
if det["recognized_name"] and (
det["recognition_confidence"] or 0
) > best_recog_conf:
best_recognized = det["recognized_name"]
best_recog_conf = det["recognition_confidence"]
det_list.append(det)
presence_state["detections"] = det_list
presence_state["recognized_name"] = best_recognized
presence_state["recognition_confidence"] = (
round(best_recog_conf, 3) if best_recognized else None
)
else:
presence_state["detections"] = []
presence_state["confidence"] = 0.0
@@ -193,7 +262,9 @@ def detection_loop():
presence_state["spatial_y"] = None
presence_state["spatial_z"] = None
presence_state["distance_mm"] = None
presence_state["recognized_name"] = None
presence_state["recognition_confidence"] = None
# Check timeout
if presence_state["last_seen"]:
if now - presence_state["last_seen"] > PRESENCE_TIMEOUT:
@@ -214,7 +285,9 @@ async def lifespan(app: FastAPI):
global running, detection_thread
print("🦊 Starting OAK-D SPATIAL Vision Service...")
init_face_recognition()
if init_oak():
running = True
detection_thread = threading.Thread(target=detection_loop, daemon=True)
@@ -231,8 +304,8 @@ async def lifespan(app: FastAPI):
app = FastAPI(
title="OAK-D SPATIAL Vision Service",
description="Vixy's eyes with SPATIAL presence detection! 🦊👀📏",
version="0.4.0",
description="Vixy's eyes with SPATIAL presence detection + face recognition! 🦊👀📏",
version="0.5.0",
lifespan=lifespan
)
@@ -243,10 +316,11 @@ async def health():
return {
"status": "healthy",
"service": "oak-service",
"version": "0.4.0",
"version": "0.5.0",
"oak_connected": pipeline_ctx is not None,
"detection_model": DETECTION_MODEL,
"spatial_enabled": True,
"face_recognition_enabled": face_recognizer is not None,
"timestamp": time.time()
}
@@ -267,7 +341,6 @@ async def presence():
if presence_state["last_seen"] else None
),
"confidence": presence_state["confidence"],
# SPATIAL DATA
"distance_mm": presence_state["distance_mm"],
"distance_m": distance_m,
"spatial": {
@@ -275,6 +348,8 @@ async def presence():
"y_mm": presence_state["spatial_y"],
"z_mm": presence_state["spatial_z"],
} if presence_state["spatial_z"] else None,
"recognized_name": presence_state["recognized_name"],
"recognition_confidence": presence_state["recognition_confidence"],
"timestamp": time.time()
}
@@ -340,6 +415,68 @@ async def depth_frame():
raise HTTPException(status_code=500, detail=str(e))
# ============== Face Enrollment API ==============
@app.post("/faces/enroll")
async def enroll_face_upload(
name: str = Form(...),
photo: UploadFile = File(...),
):
"""Enroll a face by uploading a photo (multipart form: name + photo)."""
if face_recognizer is None:
raise HTTPException(status_code=503, detail="Face recognition not available")
contents = await photo.read()
nparr = np.frombuffer(contents, np.uint8)
image = cv2.imdecode(nparr, cv2.IMREAD_COLOR)
if image is None:
raise HTTPException(status_code=400, detail="Could not decode image")
result = face_recognizer.enroll(name, image)
if not result["success"]:
raise HTTPException(status_code=400, detail=result["error"])
return result
@app.post("/faces/enroll-from-camera")
async def enroll_face_camera(name: str):
"""Enroll a face using the current camera frame. Pass name as query param."""
if face_recognizer is None:
raise HTTPException(status_code=503, detail="Face recognition not available")
if rgb_queue is None:
raise HTTPException(status_code=503, detail="Camera not available")
frame_data = rgb_queue.tryGet()
if frame_data is None:
raise HTTPException(status_code=503, detail="No frame available")
image = frame_data.getCvFrame()
result = face_recognizer.enroll(name, image)
if not result["success"]:
raise HTTPException(status_code=400, detail=result["error"])
return result
@app.get("/faces")
async def list_faces():
"""List enrolled faces."""
if face_recognizer is None:
raise HTTPException(status_code=503, detail="Face recognition not available")
return {"faces": face_recognizer.list_faces()}
@app.delete("/faces/{name}")
async def delete_face(name: str):
"""Remove all embeddings for a person."""
if face_recognizer is None:
raise HTTPException(status_code=503, detail="Face recognition not available")
result = face_recognizer.delete_face(name)
if not result["success"]:
raise HTTPException(status_code=404, detail=f"No face found for '{name}'")
return result
if __name__ == "__main__":
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=8100)