Add Oracle service - LLM wrapper
First service for Vi's nervous system:
- Oracle service with NATS integration
- vLLM backend for Qwen3-32B
- GPTQ quantization support
- Thinking mode sampling configs
Simplified from Lyra's patterns, ready to test.
🦊✺
This commit is contained in:
69
services/oracle/llm/generator.py
Normal file
69
services/oracle/llm/generator.py
Normal file
@@ -0,0 +1,69 @@
|
||||
"""
|
||||
Text generation using vLLM.
|
||||
"""
|
||||
|
||||
from typing import Optional
|
||||
from core.logger import setup_logger
|
||||
|
||||
logger = setup_logger('text_generator', service_name='oracle_service')
|
||||
|
||||
|
||||
class TextGenerator:
|
||||
"""Text generation with vLLM"""
|
||||
|
||||
def __init__(self, llm_model, sampling_config: dict):
|
||||
self.llm = llm_model
|
||||
self.sampling_config = sampling_config
|
||||
|
||||
def generate(
|
||||
self,
|
||||
prompt: str,
|
||||
max_tokens: int = None,
|
||||
temperature: float = None,
|
||||
top_p: float = None,
|
||||
top_k: int = None,
|
||||
min_p: float = None
|
||||
) -> Optional[str]:
|
||||
"""Generate text using vLLM"""
|
||||
if not self.llm:
|
||||
logger.error("[✺] LLM not initialized")
|
||||
return None
|
||||
|
||||
try:
|
||||
params = self.sampling_config.copy()
|
||||
|
||||
if max_tokens is not None:
|
||||
params["max_new_tokens"] = max_tokens
|
||||
if temperature is not None:
|
||||
params["temperature"] = temperature
|
||||
if top_p is not None:
|
||||
params["top_p"] = top_p
|
||||
if top_k is not None:
|
||||
params["top_k"] = top_k
|
||||
if min_p is not None:
|
||||
params["min_p"] = min_p
|
||||
|
||||
from vllm import SamplingParams
|
||||
|
||||
sampling_params = SamplingParams(
|
||||
temperature=params["temperature"],
|
||||
top_p=params["top_p"],
|
||||
top_k=params.get("top_k", -1),
|
||||
min_p=params.get("min_p", 0.0),
|
||||
max_tokens=params["max_new_tokens"],
|
||||
repetition_penalty=params["repetition_penalty"]
|
||||
)
|
||||
|
||||
outputs = self.llm.generate([prompt], sampling_params)
|
||||
|
||||
if outputs and outputs[0].outputs:
|
||||
raw_text = outputs[0].outputs[0].text
|
||||
logger.info(f"[✺] Generated {len(raw_text)} chars")
|
||||
return raw_text.strip()
|
||||
else:
|
||||
logger.warning("[✺] Empty output")
|
||||
return ""
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"[✺] Generation failed: {e}")
|
||||
return None
|
||||
Reference in New Issue
Block a user