Files
vi/core/base_service.py
Alex Kazaiev e2d24a66f1 Add core service infrastructure
- NATS event bus (pub/sub, JetStream, KV storage)
- Service registry with health monitoring
- Base service class with lifecycle management
- Config system
- Logger with Vi formatting

Adapted from Lyra's patterns, namespace changed to vi.*

🦊💕
2026-01-02 13:04:26 -06:00

398 lines
14 KiB
Python

"""
Base Service Class for Vi
Provides standardized service lifecycle management, registration, and health monitoring.
All Vi services should inherit from this base class.
"""
import asyncio
import json
import time
import uuid
from abc import ABC, abstractmethod
from typing import Dict, Any, List, Optional
from datetime import datetime
from .logger import setup_logger
from .service_registry import ServiceManifest, ServiceOperation, ServiceStatus, service_registry
logger = setup_logger('base_service')
class BaseService(ABC):
"""Base class for all Vi services providing standardized lifecycle management"""
def __init__(self, service_id: str, event_bus=None):
self.service_id = service_id
self.event_bus = event_bus
self.instance_id = f"{service_id}-{uuid.uuid4().hex[:8]}"
self._running = False
self._heartbeat_task = None
self._health_check_task = None
self.heartbeat_interval = 60
self.health_check_interval = 10
self._health_data = {}
self._status = ServiceStatus.UNKNOWN
self._heartbeat_failures = 0
self._max_heartbeat_failures = 3
self._registration_confirmed = False
self._first_heartbeat = True
self.logger = setup_logger(service_id, service_name=service_id)
@abstractmethod
def get_service_manifest(self) -> ServiceManifest:
"""Return service manifest with operations and metadata"""
pass
@abstractmethod
async def initialize_service(self):
"""Initialize service-specific resources"""
pass
@abstractmethod
async def cleanup_service(self):
"""Cleanup service-specific resources"""
pass
@abstractmethod
async def perform_health_check(self) -> Dict[str, Any]:
"""Perform service-specific health check"""
pass
async def start(self, event_bus=None):
"""Start the service with full lifecycle management"""
if event_bus:
self.event_bus = event_bus
if not self.event_bus:
raise ValueError("Event bus is required")
try:
self.logger.info(f"[🚀] Starting service: {self.service_id}")
if self.service_id == 'health':
await service_registry.initialize(self.event_bus)
manifest = self.get_service_manifest()
if self.service_id == 'health':
service_registry.register_service(self.service_id, manifest, self.instance_id)
self._registration_confirmed = True
else:
await self._send_registration_message(manifest)
await self.initialize_service()
self._running = True
self._heartbeat_task = asyncio.create_task(self._heartbeat_loop())
self._health_check_task = asyncio.create_task(self._health_check_loop())
self._status = ServiceStatus.HEALTHY
await self._send_heartbeat()
self.logger.info(f"[✅] Service started: {self.service_id}")
except Exception as e:
self.logger.exception(f"[❌] Failed to start {self.service_id}: {e}")
self._status = ServiceStatus.UNHEALTHY
raise
async def stop(self):
"""Stop the service gracefully"""
self.logger.info(f"[🛑] Stopping service: {self.service_id}")
self._running = False
self._status = ServiceStatus.OFFLINE
if self._heartbeat_task:
self._heartbeat_task.cancel()
try:
await self._heartbeat_task
except asyncio.CancelledError:
pass
if self._health_check_task:
self._health_check_task.cancel()
try:
await self._health_check_task
except asyncio.CancelledError:
pass
try:
await self.cleanup_service()
await self._send_heartbeat()
if self.service_id == 'health':
service_registry.deregister_service(self.service_id)
else:
await self._send_deregistration_message()
self.logger.info(f"[✅] Service stopped: {self.service_id}")
except Exception as e:
self.logger.exception(f"[❌] Error during shutdown: {e}")
async def _heartbeat_loop(self):
"""Background task to send periodic heartbeats"""
while self._running:
try:
await self._send_heartbeat()
await asyncio.sleep(self.heartbeat_interval)
except asyncio.CancelledError:
break
except Exception as e:
self.logger.exception(f"[💔] Heartbeat error: {e}")
await asyncio.sleep(5)
async def _health_check_loop(self):
"""Background task for periodic health checks"""
while self._running:
try:
health_data = await self.perform_health_check()
self._health_data = health_data
if health_data.get('healthy', True):
if self._status == ServiceStatus.UNHEALTHY:
self._status = ServiceStatus.HEALTHY
self.logger.info(f"[💚] Service recovered")
else:
if self._status == ServiceStatus.HEALTHY:
self._status = ServiceStatus.UNHEALTHY
self.logger.warning(f"[💔] Service unhealthy")
await asyncio.sleep(self.health_check_interval)
except asyncio.CancelledError:
break
except Exception as e:
self.logger.exception(f"[💔] Health check error: {e}")
self._status = ServiceStatus.UNHEALTHY
await asyncio.sleep(10)
async def _send_heartbeat(self):
"""Send heartbeat to service registry"""
try:
health_data = {
'status': self._status.value,
'timestamp': datetime.utcnow().isoformat(),
'instance_id': self.instance_id,
**self._health_data
}
if self.service_id == 'health':
service_registry.update_service_heartbeat(self.service_id, health_data)
self._heartbeat_failures = 0
else:
if self.event_bus:
await self._send_resilient_heartbeat(health_data)
except Exception as e:
self.logger.exception(f"[💔] Failed to send heartbeat: {e}")
self._heartbeat_failures += 1
if self._heartbeat_failures >= self._max_heartbeat_failures:
await self._attempt_reregistration()
async def request_service(self, target_service: str, operation: str,
payload: Dict[str, Any], timeout: float = 5.0) -> Dict[str, Any]:
"""Make a request to another service"""
topic = f"vi.services.{target_service}.{operation}"
try:
request_data = json.dumps(payload).encode()
response_msg = await self.event_bus.client.request(topic, request_data, timeout=timeout)
return json.loads(response_msg.data.decode())
except Exception as e:
self.logger.exception(f"[🔗] Request failed {target_service}.{operation}: {e}")
raise
async def emit_event(self, event_type: str, payload: Dict[str, Any]):
"""Emit an event using standardized topic naming"""
if not self.event_bus:
raise ValueError("Event bus not available")
await self.event_bus.emit(event_type, payload)
def register_handler(self, operation: str, handler):
"""Register a request-reply handler for a service operation"""
if not self.event_bus:
raise ValueError("Event bus not available")
async def wrapped_handler(msg):
try:
result = await handler(msg)
if result is not None:
await msg.respond(json.dumps(result).encode())
except Exception as e:
error_response = {"error": str(e), "status": "error"}
await msg.respond(json.dumps(error_response).encode())
self.logger.error(f"Handler error for {operation}: {e}")
topic = f"vi.services.{self.service_id}.{operation}"
return self.event_bus.on(topic, wrapped_handler)
def create_service_operation(self, operation_id: str, description: str,
request_topic: Optional[str] = None,
response_pattern: str = "request-reply",
timeout_ms: int = 5000,
parameters: Optional[List[Dict[str, Any]]] = None,
metadata: Optional[Dict[str, Any]] = None) -> ServiceOperation:
"""Helper to create a ServiceOperation"""
if request_topic is None:
request_topic = f"vi.services.{self.service_id}.{operation_id}"
return ServiceOperation(
operation_id=operation_id,
description=description,
request_topic=request_topic,
response_pattern=response_pattern,
parameters=parameters or [],
timeout_ms=timeout_ms,
metadata=metadata or {}
)
def get_status(self) -> ServiceStatus:
return self._status
def set_status(self, status: ServiceStatus):
if self._status != status:
self.logger.info(f"[📊] Status: {self._status.value}{status.value}")
self._status = status
def update_health_data(self, health_data: Dict[str, Any]):
self._health_data.update(health_data)
def get_health_data(self) -> Dict[str, Any]:
return self._health_data.copy()
def get_service_info(self) -> Dict[str, Any]:
manifest = self.get_service_manifest()
return {
'service_id': self.service_id,
'instance_id': self.instance_id,
'status': self._status.value,
'manifest': manifest.__dict__,
'health_data': self._health_data,
'running': self._running
}
async def _send_registration_message(self, manifest: ServiceManifest):
"""Send registration message via NATS"""
if not self.event_bus:
return
registration_payload = {
'service_id': self.service_id,
'instance_id': self.instance_id,
'manifest': {
'service_id': manifest.service_id,
'name': manifest.name,
'description': manifest.description,
'version': manifest.version,
'operations': [op.__dict__ for op in manifest.operations],
'health_check_topic': manifest.health_check_topic,
'metadata': manifest.metadata
}
}
await self.event_bus.emit("vi.services.register", registration_payload)
self.logger.info(f"[🗂️] Registered: {self.service_id}")
async def _send_deregistration_message(self):
"""Send deregistration message via NATS"""
if not self.event_bus:
return
await self.event_bus.emit("vi.services.deregister", {
'service_id': self.service_id,
'instance_id': self.instance_id
})
self.logger.info(f"[🗂️] Deregistered: {self.service_id}")
async def _send_resilient_heartbeat(self, health_data: Dict[str, Any]):
"""Send heartbeat with acknowledgment"""
try:
heartbeat_payload = {
'service_id': self.service_id,
'instance_id': self.instance_id,
'health_data': health_data
}
request_data = json.dumps(heartbeat_payload).encode()
response_msg = await self.event_bus.client.request(
"vi.services.heartbeat",
request_data,
timeout=5.0
)
response = json.loads(response_msg.data.decode())
acknowledged = response.get('acknowledged', False)
if acknowledged:
self._heartbeat_failures = 0
if not self._registration_confirmed:
self._registration_confirmed = True
self.logger.info(f"[✅] Registration confirmed")
else:
self._registration_confirmed = False
await self._attempt_reregistration()
except Exception as e:
self._heartbeat_failures += 1
self.logger.warning(f"[💔] Heartbeat failed ({self._heartbeat_failures}): {e}")
if self._heartbeat_failures >= self._max_heartbeat_failures:
await self._attempt_reregistration()
async def _attempt_reregistration(self):
"""Attempt to re-register service"""
self.logger.warning(f"[🔄] Re-registering {self.service_id}")
try:
self._heartbeat_failures = 0
manifest = self.get_service_manifest()
await self._send_registration_message(manifest)
self.logger.info(f"[✅] Re-registered {self.service_id}")
except Exception as e:
self.logger.error(f"[❌] Re-registration failed: {e}")
class SimpleService(BaseService):
"""Simple implementation of BaseService"""
def __init__(self, service_id: str, name: str = "", description: str = "",
version: str = "1.0.0", operations: Optional[List[ServiceOperation]] = None,
event_bus=None):
super().__init__(service_id, event_bus)
self._name = name or service_id
self._description = description or f"Service: {service_id}"
self._version = version
self._operations = operations or []
def get_service_manifest(self) -> ServiceManifest:
return ServiceManifest(
service_id=self.service_id,
name=self._name,
description=self._description,
version=self._version,
operations=self._operations,
health_check_topic=f"vi.services.{self.service_id}.health"
)
async def initialize_service(self):
pass
async def cleanup_service(self):
pass
async def perform_health_check(self) -> Dict[str, Any]:
return {
'healthy': True,
'checks': {
'running': self._running,
'event_bus': self.event_bus is not None
}
}