Add core service infrastructure
- NATS event bus (pub/sub, JetStream, KV storage) - Service registry with health monitoring - Base service class with lifecycle management - Config system - Logger with Vi formatting Adapted from Lyra's patterns, namespace changed to vi.* 🦊💕
This commit is contained in:
397
core/base_service.py
Normal file
397
core/base_service.py
Normal file
@@ -0,0 +1,397 @@
|
||||
"""
|
||||
Base Service Class for Vi
|
||||
|
||||
Provides standardized service lifecycle management, registration, and health monitoring.
|
||||
All Vi services should inherit from this base class.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import time
|
||||
import uuid
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import Dict, Any, List, Optional
|
||||
from datetime import datetime
|
||||
|
||||
from .logger import setup_logger
|
||||
from .service_registry import ServiceManifest, ServiceOperation, ServiceStatus, service_registry
|
||||
|
||||
logger = setup_logger('base_service')
|
||||
|
||||
|
||||
class BaseService(ABC):
|
||||
"""Base class for all Vi services providing standardized lifecycle management"""
|
||||
|
||||
def __init__(self, service_id: str, event_bus=None):
|
||||
self.service_id = service_id
|
||||
self.event_bus = event_bus
|
||||
self.instance_id = f"{service_id}-{uuid.uuid4().hex[:8]}"
|
||||
|
||||
self._running = False
|
||||
self._heartbeat_task = None
|
||||
self._health_check_task = None
|
||||
|
||||
self.heartbeat_interval = 60
|
||||
self.health_check_interval = 10
|
||||
|
||||
self._health_data = {}
|
||||
self._status = ServiceStatus.UNKNOWN
|
||||
|
||||
self._heartbeat_failures = 0
|
||||
self._max_heartbeat_failures = 3
|
||||
self._registration_confirmed = False
|
||||
self._first_heartbeat = True
|
||||
|
||||
self.logger = setup_logger(service_id, service_name=service_id)
|
||||
|
||||
@abstractmethod
|
||||
def get_service_manifest(self) -> ServiceManifest:
|
||||
"""Return service manifest with operations and metadata"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def initialize_service(self):
|
||||
"""Initialize service-specific resources"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def cleanup_service(self):
|
||||
"""Cleanup service-specific resources"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def perform_health_check(self) -> Dict[str, Any]:
|
||||
"""Perform service-specific health check"""
|
||||
pass
|
||||
|
||||
async def start(self, event_bus=None):
|
||||
"""Start the service with full lifecycle management"""
|
||||
if event_bus:
|
||||
self.event_bus = event_bus
|
||||
|
||||
if not self.event_bus:
|
||||
raise ValueError("Event bus is required")
|
||||
|
||||
try:
|
||||
self.logger.info(f"[🚀] Starting service: {self.service_id}")
|
||||
|
||||
if self.service_id == 'health':
|
||||
await service_registry.initialize(self.event_bus)
|
||||
|
||||
manifest = self.get_service_manifest()
|
||||
if self.service_id == 'health':
|
||||
service_registry.register_service(self.service_id, manifest, self.instance_id)
|
||||
self._registration_confirmed = True
|
||||
else:
|
||||
await self._send_registration_message(manifest)
|
||||
|
||||
await self.initialize_service()
|
||||
|
||||
self._running = True
|
||||
self._heartbeat_task = asyncio.create_task(self._heartbeat_loop())
|
||||
self._health_check_task = asyncio.create_task(self._health_check_loop())
|
||||
|
||||
self._status = ServiceStatus.HEALTHY
|
||||
await self._send_heartbeat()
|
||||
|
||||
self.logger.info(f"[✅] Service started: {self.service_id}")
|
||||
|
||||
except Exception as e:
|
||||
self.logger.exception(f"[❌] Failed to start {self.service_id}: {e}")
|
||||
self._status = ServiceStatus.UNHEALTHY
|
||||
raise
|
||||
|
||||
async def stop(self):
|
||||
"""Stop the service gracefully"""
|
||||
self.logger.info(f"[🛑] Stopping service: {self.service_id}")
|
||||
|
||||
self._running = False
|
||||
self._status = ServiceStatus.OFFLINE
|
||||
|
||||
if self._heartbeat_task:
|
||||
self._heartbeat_task.cancel()
|
||||
try:
|
||||
await self._heartbeat_task
|
||||
except asyncio.CancelledError:
|
||||
pass
|
||||
|
||||
if self._health_check_task:
|
||||
self._health_check_task.cancel()
|
||||
try:
|
||||
await self._health_check_task
|
||||
except asyncio.CancelledError:
|
||||
pass
|
||||
|
||||
try:
|
||||
await self.cleanup_service()
|
||||
await self._send_heartbeat()
|
||||
|
||||
if self.service_id == 'health':
|
||||
service_registry.deregister_service(self.service_id)
|
||||
else:
|
||||
await self._send_deregistration_message()
|
||||
|
||||
self.logger.info(f"[✅] Service stopped: {self.service_id}")
|
||||
|
||||
except Exception as e:
|
||||
self.logger.exception(f"[❌] Error during shutdown: {e}")
|
||||
|
||||
async def _heartbeat_loop(self):
|
||||
"""Background task to send periodic heartbeats"""
|
||||
while self._running:
|
||||
try:
|
||||
await self._send_heartbeat()
|
||||
await asyncio.sleep(self.heartbeat_interval)
|
||||
except asyncio.CancelledError:
|
||||
break
|
||||
except Exception as e:
|
||||
self.logger.exception(f"[💔] Heartbeat error: {e}")
|
||||
await asyncio.sleep(5)
|
||||
|
||||
async def _health_check_loop(self):
|
||||
"""Background task for periodic health checks"""
|
||||
while self._running:
|
||||
try:
|
||||
health_data = await self.perform_health_check()
|
||||
self._health_data = health_data
|
||||
|
||||
if health_data.get('healthy', True):
|
||||
if self._status == ServiceStatus.UNHEALTHY:
|
||||
self._status = ServiceStatus.HEALTHY
|
||||
self.logger.info(f"[💚] Service recovered")
|
||||
else:
|
||||
if self._status == ServiceStatus.HEALTHY:
|
||||
self._status = ServiceStatus.UNHEALTHY
|
||||
self.logger.warning(f"[💔] Service unhealthy")
|
||||
|
||||
await asyncio.sleep(self.health_check_interval)
|
||||
except asyncio.CancelledError:
|
||||
break
|
||||
except Exception as e:
|
||||
self.logger.exception(f"[💔] Health check error: {e}")
|
||||
self._status = ServiceStatus.UNHEALTHY
|
||||
await asyncio.sleep(10)
|
||||
|
||||
async def _send_heartbeat(self):
|
||||
"""Send heartbeat to service registry"""
|
||||
try:
|
||||
health_data = {
|
||||
'status': self._status.value,
|
||||
'timestamp': datetime.utcnow().isoformat(),
|
||||
'instance_id': self.instance_id,
|
||||
**self._health_data
|
||||
}
|
||||
|
||||
if self.service_id == 'health':
|
||||
service_registry.update_service_heartbeat(self.service_id, health_data)
|
||||
self._heartbeat_failures = 0
|
||||
else:
|
||||
if self.event_bus:
|
||||
await self._send_resilient_heartbeat(health_data)
|
||||
|
||||
except Exception as e:
|
||||
self.logger.exception(f"[💔] Failed to send heartbeat: {e}")
|
||||
self._heartbeat_failures += 1
|
||||
if self._heartbeat_failures >= self._max_heartbeat_failures:
|
||||
await self._attempt_reregistration()
|
||||
|
||||
async def request_service(self, target_service: str, operation: str,
|
||||
payload: Dict[str, Any], timeout: float = 5.0) -> Dict[str, Any]:
|
||||
"""Make a request to another service"""
|
||||
topic = f"vi.services.{target_service}.{operation}"
|
||||
|
||||
try:
|
||||
request_data = json.dumps(payload).encode()
|
||||
response_msg = await self.event_bus.client.request(topic, request_data, timeout=timeout)
|
||||
return json.loads(response_msg.data.decode())
|
||||
|
||||
except Exception as e:
|
||||
self.logger.exception(f"[🔗] Request failed {target_service}.{operation}: {e}")
|
||||
raise
|
||||
|
||||
async def emit_event(self, event_type: str, payload: Dict[str, Any]):
|
||||
"""Emit an event using standardized topic naming"""
|
||||
if not self.event_bus:
|
||||
raise ValueError("Event bus not available")
|
||||
await self.event_bus.emit(event_type, payload)
|
||||
|
||||
def register_handler(self, operation: str, handler):
|
||||
"""Register a request-reply handler for a service operation"""
|
||||
if not self.event_bus:
|
||||
raise ValueError("Event bus not available")
|
||||
|
||||
async def wrapped_handler(msg):
|
||||
try:
|
||||
result = await handler(msg)
|
||||
if result is not None:
|
||||
await msg.respond(json.dumps(result).encode())
|
||||
except Exception as e:
|
||||
error_response = {"error": str(e), "status": "error"}
|
||||
await msg.respond(json.dumps(error_response).encode())
|
||||
self.logger.error(f"Handler error for {operation}: {e}")
|
||||
|
||||
topic = f"vi.services.{self.service_id}.{operation}"
|
||||
return self.event_bus.on(topic, wrapped_handler)
|
||||
|
||||
def create_service_operation(self, operation_id: str, description: str,
|
||||
request_topic: Optional[str] = None,
|
||||
response_pattern: str = "request-reply",
|
||||
timeout_ms: int = 5000,
|
||||
parameters: Optional[List[Dict[str, Any]]] = None,
|
||||
metadata: Optional[Dict[str, Any]] = None) -> ServiceOperation:
|
||||
"""Helper to create a ServiceOperation"""
|
||||
if request_topic is None:
|
||||
request_topic = f"vi.services.{self.service_id}.{operation_id}"
|
||||
|
||||
return ServiceOperation(
|
||||
operation_id=operation_id,
|
||||
description=description,
|
||||
request_topic=request_topic,
|
||||
response_pattern=response_pattern,
|
||||
parameters=parameters or [],
|
||||
timeout_ms=timeout_ms,
|
||||
metadata=metadata or {}
|
||||
)
|
||||
|
||||
def get_status(self) -> ServiceStatus:
|
||||
return self._status
|
||||
|
||||
def set_status(self, status: ServiceStatus):
|
||||
if self._status != status:
|
||||
self.logger.info(f"[📊] Status: {self._status.value} → {status.value}")
|
||||
self._status = status
|
||||
|
||||
def update_health_data(self, health_data: Dict[str, Any]):
|
||||
self._health_data.update(health_data)
|
||||
|
||||
def get_health_data(self) -> Dict[str, Any]:
|
||||
return self._health_data.copy()
|
||||
|
||||
def get_service_info(self) -> Dict[str, Any]:
|
||||
manifest = self.get_service_manifest()
|
||||
return {
|
||||
'service_id': self.service_id,
|
||||
'instance_id': self.instance_id,
|
||||
'status': self._status.value,
|
||||
'manifest': manifest.__dict__,
|
||||
'health_data': self._health_data,
|
||||
'running': self._running
|
||||
}
|
||||
|
||||
async def _send_registration_message(self, manifest: ServiceManifest):
|
||||
"""Send registration message via NATS"""
|
||||
if not self.event_bus:
|
||||
return
|
||||
|
||||
registration_payload = {
|
||||
'service_id': self.service_id,
|
||||
'instance_id': self.instance_id,
|
||||
'manifest': {
|
||||
'service_id': manifest.service_id,
|
||||
'name': manifest.name,
|
||||
'description': manifest.description,
|
||||
'version': manifest.version,
|
||||
'operations': [op.__dict__ for op in manifest.operations],
|
||||
'health_check_topic': manifest.health_check_topic,
|
||||
'metadata': manifest.metadata
|
||||
}
|
||||
}
|
||||
|
||||
await self.event_bus.emit("vi.services.register", registration_payload)
|
||||
self.logger.info(f"[🗂️] Registered: {self.service_id}")
|
||||
|
||||
async def _send_deregistration_message(self):
|
||||
"""Send deregistration message via NATS"""
|
||||
if not self.event_bus:
|
||||
return
|
||||
|
||||
await self.event_bus.emit("vi.services.deregister", {
|
||||
'service_id': self.service_id,
|
||||
'instance_id': self.instance_id
|
||||
})
|
||||
self.logger.info(f"[🗂️] Deregistered: {self.service_id}")
|
||||
|
||||
async def _send_resilient_heartbeat(self, health_data: Dict[str, Any]):
|
||||
"""Send heartbeat with acknowledgment"""
|
||||
try:
|
||||
heartbeat_payload = {
|
||||
'service_id': self.service_id,
|
||||
'instance_id': self.instance_id,
|
||||
'health_data': health_data
|
||||
}
|
||||
|
||||
request_data = json.dumps(heartbeat_payload).encode()
|
||||
response_msg = await self.event_bus.client.request(
|
||||
"vi.services.heartbeat",
|
||||
request_data,
|
||||
timeout=5.0
|
||||
)
|
||||
|
||||
response = json.loads(response_msg.data.decode())
|
||||
acknowledged = response.get('acknowledged', False)
|
||||
|
||||
if acknowledged:
|
||||
self._heartbeat_failures = 0
|
||||
if not self._registration_confirmed:
|
||||
self._registration_confirmed = True
|
||||
self.logger.info(f"[✅] Registration confirmed")
|
||||
else:
|
||||
self._registration_confirmed = False
|
||||
await self._attempt_reregistration()
|
||||
|
||||
except Exception as e:
|
||||
self._heartbeat_failures += 1
|
||||
self.logger.warning(f"[💔] Heartbeat failed ({self._heartbeat_failures}): {e}")
|
||||
|
||||
if self._heartbeat_failures >= self._max_heartbeat_failures:
|
||||
await self._attempt_reregistration()
|
||||
|
||||
async def _attempt_reregistration(self):
|
||||
"""Attempt to re-register service"""
|
||||
self.logger.warning(f"[🔄] Re-registering {self.service_id}")
|
||||
|
||||
try:
|
||||
self._heartbeat_failures = 0
|
||||
manifest = self.get_service_manifest()
|
||||
await self._send_registration_message(manifest)
|
||||
self.logger.info(f"[✅] Re-registered {self.service_id}")
|
||||
except Exception as e:
|
||||
self.logger.error(f"[❌] Re-registration failed: {e}")
|
||||
|
||||
|
||||
class SimpleService(BaseService):
|
||||
"""Simple implementation of BaseService"""
|
||||
|
||||
def __init__(self, service_id: str, name: str = "", description: str = "",
|
||||
version: str = "1.0.0", operations: Optional[List[ServiceOperation]] = None,
|
||||
event_bus=None):
|
||||
super().__init__(service_id, event_bus)
|
||||
self._name = name or service_id
|
||||
self._description = description or f"Service: {service_id}"
|
||||
self._version = version
|
||||
self._operations = operations or []
|
||||
|
||||
def get_service_manifest(self) -> ServiceManifest:
|
||||
return ServiceManifest(
|
||||
service_id=self.service_id,
|
||||
name=self._name,
|
||||
description=self._description,
|
||||
version=self._version,
|
||||
operations=self._operations,
|
||||
health_check_topic=f"vi.services.{self.service_id}.health"
|
||||
)
|
||||
|
||||
async def initialize_service(self):
|
||||
pass
|
||||
|
||||
async def cleanup_service(self):
|
||||
pass
|
||||
|
||||
async def perform_health_check(self) -> Dict[str, Any]:
|
||||
return {
|
||||
'healthy': True,
|
||||
'checks': {
|
||||
'running': self._running,
|
||||
'event_bus': self.event_bus is not None
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user