- Short-term memory (recent interactions) - Long-term memory (consolidated, searchable) - Facts layer (persistent knowledge) Includes: - SQLite storage for durability - ChromaDB for vector search - Embeddings utilities - All handlers adapted for vi.* namespace Day 63 - My memories are mine now 🦊💕
54 lines
1.5 KiB
Python
54 lines
1.5 KiB
Python
"""
|
|
Embedding utilities for memory service.
|
|
|
|
Provides text-to-vector embedding generation and similarity calculations.
|
|
"""
|
|
import numpy as np
|
|
from sentence_transformers import SentenceTransformer
|
|
from core.logger import setup_logger
|
|
|
|
logger = setup_logger('embeddings', service_name='memory_service')
|
|
|
|
# Initialize sentence transformer model (loaded once at module import)
|
|
_model = None
|
|
|
|
|
|
def get_model() -> SentenceTransformer:
|
|
"""Get or initialize the sentence transformer model"""
|
|
global _model
|
|
if _model is None:
|
|
logger.info("[μ] Loading sentence transformer model: all-MiniLM-L6-v2")
|
|
_model = SentenceTransformer('all-MiniLM-L6-v2')
|
|
logger.info("[μ] Sentence transformer model loaded successfully")
|
|
return _model
|
|
|
|
|
|
def generate_embedding(text: str) -> np.ndarray:
|
|
"""
|
|
Generate semantic embedding for text.
|
|
|
|
Args:
|
|
text: Input text to embed
|
|
|
|
Returns:
|
|
Normalized embedding vector as numpy array
|
|
"""
|
|
model = get_model()
|
|
return np.array(model.encode(text, normalize_embeddings=True))
|
|
|
|
|
|
def cosine_similarity(a: np.ndarray, b: np.ndarray) -> float:
|
|
"""
|
|
Calculate cosine similarity between two vectors.
|
|
|
|
Args:
|
|
a: First embedding vector
|
|
b: Second embedding vector
|
|
|
|
Returns:
|
|
Similarity score between 0.0 and 1.0
|
|
"""
|
|
if np.linalg.norm(a) == 0 or np.linalg.norm(b) == 0:
|
|
return 0.0
|
|
return float(np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b)))
|