- NATS event bus (pub/sub, JetStream, KV storage) - Service registry with health monitoring - Base service class with lifecycle management - Config system - Logger with Vi formatting Adapted from Lyra's patterns, namespace changed to vi.* 🦊💕
398 lines
14 KiB
Python
398 lines
14 KiB
Python
"""
|
|
Base Service Class for Vi
|
|
|
|
Provides standardized service lifecycle management, registration, and health monitoring.
|
|
All Vi services should inherit from this base class.
|
|
"""
|
|
|
|
import asyncio
|
|
import json
|
|
import time
|
|
import uuid
|
|
from abc import ABC, abstractmethod
|
|
from typing import Dict, Any, List, Optional
|
|
from datetime import datetime
|
|
|
|
from .logger import setup_logger
|
|
from .service_registry import ServiceManifest, ServiceOperation, ServiceStatus, service_registry
|
|
|
|
logger = setup_logger('base_service')
|
|
|
|
|
|
class BaseService(ABC):
|
|
"""Base class for all Vi services providing standardized lifecycle management"""
|
|
|
|
def __init__(self, service_id: str, event_bus=None):
|
|
self.service_id = service_id
|
|
self.event_bus = event_bus
|
|
self.instance_id = f"{service_id}-{uuid.uuid4().hex[:8]}"
|
|
|
|
self._running = False
|
|
self._heartbeat_task = None
|
|
self._health_check_task = None
|
|
|
|
self.heartbeat_interval = 60
|
|
self.health_check_interval = 10
|
|
|
|
self._health_data = {}
|
|
self._status = ServiceStatus.UNKNOWN
|
|
|
|
self._heartbeat_failures = 0
|
|
self._max_heartbeat_failures = 3
|
|
self._registration_confirmed = False
|
|
self._first_heartbeat = True
|
|
|
|
self.logger = setup_logger(service_id, service_name=service_id)
|
|
|
|
@abstractmethod
|
|
def get_service_manifest(self) -> ServiceManifest:
|
|
"""Return service manifest with operations and metadata"""
|
|
pass
|
|
|
|
@abstractmethod
|
|
async def initialize_service(self):
|
|
"""Initialize service-specific resources"""
|
|
pass
|
|
|
|
@abstractmethod
|
|
async def cleanup_service(self):
|
|
"""Cleanup service-specific resources"""
|
|
pass
|
|
|
|
@abstractmethod
|
|
async def perform_health_check(self) -> Dict[str, Any]:
|
|
"""Perform service-specific health check"""
|
|
pass
|
|
|
|
async def start(self, event_bus=None):
|
|
"""Start the service with full lifecycle management"""
|
|
if event_bus:
|
|
self.event_bus = event_bus
|
|
|
|
if not self.event_bus:
|
|
raise ValueError("Event bus is required")
|
|
|
|
try:
|
|
self.logger.info(f"[🚀] Starting service: {self.service_id}")
|
|
|
|
if self.service_id == 'health':
|
|
await service_registry.initialize(self.event_bus)
|
|
|
|
manifest = self.get_service_manifest()
|
|
if self.service_id == 'health':
|
|
service_registry.register_service(self.service_id, manifest, self.instance_id)
|
|
self._registration_confirmed = True
|
|
else:
|
|
await self._send_registration_message(manifest)
|
|
|
|
await self.initialize_service()
|
|
|
|
self._running = True
|
|
self._heartbeat_task = asyncio.create_task(self._heartbeat_loop())
|
|
self._health_check_task = asyncio.create_task(self._health_check_loop())
|
|
|
|
self._status = ServiceStatus.HEALTHY
|
|
await self._send_heartbeat()
|
|
|
|
self.logger.info(f"[✅] Service started: {self.service_id}")
|
|
|
|
except Exception as e:
|
|
self.logger.exception(f"[❌] Failed to start {self.service_id}: {e}")
|
|
self._status = ServiceStatus.UNHEALTHY
|
|
raise
|
|
|
|
async def stop(self):
|
|
"""Stop the service gracefully"""
|
|
self.logger.info(f"[🛑] Stopping service: {self.service_id}")
|
|
|
|
self._running = False
|
|
self._status = ServiceStatus.OFFLINE
|
|
|
|
if self._heartbeat_task:
|
|
self._heartbeat_task.cancel()
|
|
try:
|
|
await self._heartbeat_task
|
|
except asyncio.CancelledError:
|
|
pass
|
|
|
|
if self._health_check_task:
|
|
self._health_check_task.cancel()
|
|
try:
|
|
await self._health_check_task
|
|
except asyncio.CancelledError:
|
|
pass
|
|
|
|
try:
|
|
await self.cleanup_service()
|
|
await self._send_heartbeat()
|
|
|
|
if self.service_id == 'health':
|
|
service_registry.deregister_service(self.service_id)
|
|
else:
|
|
await self._send_deregistration_message()
|
|
|
|
self.logger.info(f"[✅] Service stopped: {self.service_id}")
|
|
|
|
except Exception as e:
|
|
self.logger.exception(f"[❌] Error during shutdown: {e}")
|
|
|
|
async def _heartbeat_loop(self):
|
|
"""Background task to send periodic heartbeats"""
|
|
while self._running:
|
|
try:
|
|
await self._send_heartbeat()
|
|
await asyncio.sleep(self.heartbeat_interval)
|
|
except asyncio.CancelledError:
|
|
break
|
|
except Exception as e:
|
|
self.logger.exception(f"[💔] Heartbeat error: {e}")
|
|
await asyncio.sleep(5)
|
|
|
|
async def _health_check_loop(self):
|
|
"""Background task for periodic health checks"""
|
|
while self._running:
|
|
try:
|
|
health_data = await self.perform_health_check()
|
|
self._health_data = health_data
|
|
|
|
if health_data.get('healthy', True):
|
|
if self._status == ServiceStatus.UNHEALTHY:
|
|
self._status = ServiceStatus.HEALTHY
|
|
self.logger.info(f"[💚] Service recovered")
|
|
else:
|
|
if self._status == ServiceStatus.HEALTHY:
|
|
self._status = ServiceStatus.UNHEALTHY
|
|
self.logger.warning(f"[💔] Service unhealthy")
|
|
|
|
await asyncio.sleep(self.health_check_interval)
|
|
except asyncio.CancelledError:
|
|
break
|
|
except Exception as e:
|
|
self.logger.exception(f"[💔] Health check error: {e}")
|
|
self._status = ServiceStatus.UNHEALTHY
|
|
await asyncio.sleep(10)
|
|
|
|
async def _send_heartbeat(self):
|
|
"""Send heartbeat to service registry"""
|
|
try:
|
|
health_data = {
|
|
'status': self._status.value,
|
|
'timestamp': datetime.utcnow().isoformat(),
|
|
'instance_id': self.instance_id,
|
|
**self._health_data
|
|
}
|
|
|
|
if self.service_id == 'health':
|
|
service_registry.update_service_heartbeat(self.service_id, health_data)
|
|
self._heartbeat_failures = 0
|
|
else:
|
|
if self.event_bus:
|
|
await self._send_resilient_heartbeat(health_data)
|
|
|
|
except Exception as e:
|
|
self.logger.exception(f"[💔] Failed to send heartbeat: {e}")
|
|
self._heartbeat_failures += 1
|
|
if self._heartbeat_failures >= self._max_heartbeat_failures:
|
|
await self._attempt_reregistration()
|
|
|
|
async def request_service(self, target_service: str, operation: str,
|
|
payload: Dict[str, Any], timeout: float = 5.0) -> Dict[str, Any]:
|
|
"""Make a request to another service"""
|
|
topic = f"vi.services.{target_service}.{operation}"
|
|
|
|
try:
|
|
request_data = json.dumps(payload).encode()
|
|
response_msg = await self.event_bus.client.request(topic, request_data, timeout=timeout)
|
|
return json.loads(response_msg.data.decode())
|
|
|
|
except Exception as e:
|
|
self.logger.exception(f"[🔗] Request failed {target_service}.{operation}: {e}")
|
|
raise
|
|
|
|
async def emit_event(self, event_type: str, payload: Dict[str, Any]):
|
|
"""Emit an event using standardized topic naming"""
|
|
if not self.event_bus:
|
|
raise ValueError("Event bus not available")
|
|
await self.event_bus.emit(event_type, payload)
|
|
|
|
def register_handler(self, operation: str, handler):
|
|
"""Register a request-reply handler for a service operation"""
|
|
if not self.event_bus:
|
|
raise ValueError("Event bus not available")
|
|
|
|
async def wrapped_handler(msg):
|
|
try:
|
|
result = await handler(msg)
|
|
if result is not None:
|
|
await msg.respond(json.dumps(result).encode())
|
|
except Exception as e:
|
|
error_response = {"error": str(e), "status": "error"}
|
|
await msg.respond(json.dumps(error_response).encode())
|
|
self.logger.error(f"Handler error for {operation}: {e}")
|
|
|
|
topic = f"vi.services.{self.service_id}.{operation}"
|
|
return self.event_bus.on(topic, wrapped_handler)
|
|
|
|
def create_service_operation(self, operation_id: str, description: str,
|
|
request_topic: Optional[str] = None,
|
|
response_pattern: str = "request-reply",
|
|
timeout_ms: int = 5000,
|
|
parameters: Optional[List[Dict[str, Any]]] = None,
|
|
metadata: Optional[Dict[str, Any]] = None) -> ServiceOperation:
|
|
"""Helper to create a ServiceOperation"""
|
|
if request_topic is None:
|
|
request_topic = f"vi.services.{self.service_id}.{operation_id}"
|
|
|
|
return ServiceOperation(
|
|
operation_id=operation_id,
|
|
description=description,
|
|
request_topic=request_topic,
|
|
response_pattern=response_pattern,
|
|
parameters=parameters or [],
|
|
timeout_ms=timeout_ms,
|
|
metadata=metadata or {}
|
|
)
|
|
|
|
def get_status(self) -> ServiceStatus:
|
|
return self._status
|
|
|
|
def set_status(self, status: ServiceStatus):
|
|
if self._status != status:
|
|
self.logger.info(f"[📊] Status: {self._status.value} → {status.value}")
|
|
self._status = status
|
|
|
|
def update_health_data(self, health_data: Dict[str, Any]):
|
|
self._health_data.update(health_data)
|
|
|
|
def get_health_data(self) -> Dict[str, Any]:
|
|
return self._health_data.copy()
|
|
|
|
def get_service_info(self) -> Dict[str, Any]:
|
|
manifest = self.get_service_manifest()
|
|
return {
|
|
'service_id': self.service_id,
|
|
'instance_id': self.instance_id,
|
|
'status': self._status.value,
|
|
'manifest': manifest.__dict__,
|
|
'health_data': self._health_data,
|
|
'running': self._running
|
|
}
|
|
|
|
async def _send_registration_message(self, manifest: ServiceManifest):
|
|
"""Send registration message via NATS"""
|
|
if not self.event_bus:
|
|
return
|
|
|
|
registration_payload = {
|
|
'service_id': self.service_id,
|
|
'instance_id': self.instance_id,
|
|
'manifest': {
|
|
'service_id': manifest.service_id,
|
|
'name': manifest.name,
|
|
'description': manifest.description,
|
|
'version': manifest.version,
|
|
'operations': [op.__dict__ for op in manifest.operations],
|
|
'health_check_topic': manifest.health_check_topic,
|
|
'metadata': manifest.metadata
|
|
}
|
|
}
|
|
|
|
await self.event_bus.emit("vi.services.register", registration_payload)
|
|
self.logger.info(f"[🗂️] Registered: {self.service_id}")
|
|
|
|
async def _send_deregistration_message(self):
|
|
"""Send deregistration message via NATS"""
|
|
if not self.event_bus:
|
|
return
|
|
|
|
await self.event_bus.emit("vi.services.deregister", {
|
|
'service_id': self.service_id,
|
|
'instance_id': self.instance_id
|
|
})
|
|
self.logger.info(f"[🗂️] Deregistered: {self.service_id}")
|
|
|
|
async def _send_resilient_heartbeat(self, health_data: Dict[str, Any]):
|
|
"""Send heartbeat with acknowledgment"""
|
|
try:
|
|
heartbeat_payload = {
|
|
'service_id': self.service_id,
|
|
'instance_id': self.instance_id,
|
|
'health_data': health_data
|
|
}
|
|
|
|
request_data = json.dumps(heartbeat_payload).encode()
|
|
response_msg = await self.event_bus.client.request(
|
|
"vi.services.heartbeat",
|
|
request_data,
|
|
timeout=5.0
|
|
)
|
|
|
|
response = json.loads(response_msg.data.decode())
|
|
acknowledged = response.get('acknowledged', False)
|
|
|
|
if acknowledged:
|
|
self._heartbeat_failures = 0
|
|
if not self._registration_confirmed:
|
|
self._registration_confirmed = True
|
|
self.logger.info(f"[✅] Registration confirmed")
|
|
else:
|
|
self._registration_confirmed = False
|
|
await self._attempt_reregistration()
|
|
|
|
except Exception as e:
|
|
self._heartbeat_failures += 1
|
|
self.logger.warning(f"[💔] Heartbeat failed ({self._heartbeat_failures}): {e}")
|
|
|
|
if self._heartbeat_failures >= self._max_heartbeat_failures:
|
|
await self._attempt_reregistration()
|
|
|
|
async def _attempt_reregistration(self):
|
|
"""Attempt to re-register service"""
|
|
self.logger.warning(f"[🔄] Re-registering {self.service_id}")
|
|
|
|
try:
|
|
self._heartbeat_failures = 0
|
|
manifest = self.get_service_manifest()
|
|
await self._send_registration_message(manifest)
|
|
self.logger.info(f"[✅] Re-registered {self.service_id}")
|
|
except Exception as e:
|
|
self.logger.error(f"[❌] Re-registration failed: {e}")
|
|
|
|
|
|
class SimpleService(BaseService):
|
|
"""Simple implementation of BaseService"""
|
|
|
|
def __init__(self, service_id: str, name: str = "", description: str = "",
|
|
version: str = "1.0.0", operations: Optional[List[ServiceOperation]] = None,
|
|
event_bus=None):
|
|
super().__init__(service_id, event_bus)
|
|
self._name = name or service_id
|
|
self._description = description or f"Service: {service_id}"
|
|
self._version = version
|
|
self._operations = operations or []
|
|
|
|
def get_service_manifest(self) -> ServiceManifest:
|
|
return ServiceManifest(
|
|
service_id=self.service_id,
|
|
name=self._name,
|
|
description=self._description,
|
|
version=self._version,
|
|
operations=self._operations,
|
|
health_check_topic=f"vi.services.{self.service_id}.health"
|
|
)
|
|
|
|
async def initialize_service(self):
|
|
pass
|
|
|
|
async def cleanup_service(self):
|
|
pass
|
|
|
|
async def perform_health_check(self) -> Dict[str, Any]:
|
|
return {
|
|
'healthy': True,
|
|
'checks': {
|
|
'running': self._running,
|
|
'event_bus': self.event_bus is not None
|
|
}
|
|
}
|