First service for Vi's nervous system:
- Oracle service with NATS integration
- vLLM backend for Qwen3-32B
- GPTQ quantization support
- Thinking mode sampling configs
Simplified from Lyra's patterns, ready to test.
🦊✺
70 lines
2.1 KiB
Python
70 lines
2.1 KiB
Python
"""
|
|
Text generation using vLLM.
|
|
"""
|
|
|
|
from typing import Optional
|
|
from core.logger import setup_logger
|
|
|
|
logger = setup_logger('text_generator', service_name='oracle_service')
|
|
|
|
|
|
class TextGenerator:
|
|
"""Text generation with vLLM"""
|
|
|
|
def __init__(self, llm_model, sampling_config: dict):
|
|
self.llm = llm_model
|
|
self.sampling_config = sampling_config
|
|
|
|
def generate(
|
|
self,
|
|
prompt: str,
|
|
max_tokens: int = None,
|
|
temperature: float = None,
|
|
top_p: float = None,
|
|
top_k: int = None,
|
|
min_p: float = None
|
|
) -> Optional[str]:
|
|
"""Generate text using vLLM"""
|
|
if not self.llm:
|
|
logger.error("[✺] LLM not initialized")
|
|
return None
|
|
|
|
try:
|
|
params = self.sampling_config.copy()
|
|
|
|
if max_tokens is not None:
|
|
params["max_new_tokens"] = max_tokens
|
|
if temperature is not None:
|
|
params["temperature"] = temperature
|
|
if top_p is not None:
|
|
params["top_p"] = top_p
|
|
if top_k is not None:
|
|
params["top_k"] = top_k
|
|
if min_p is not None:
|
|
params["min_p"] = min_p
|
|
|
|
from vllm import SamplingParams
|
|
|
|
sampling_params = SamplingParams(
|
|
temperature=params["temperature"],
|
|
top_p=params["top_p"],
|
|
top_k=params.get("top_k", -1),
|
|
min_p=params.get("min_p", 0.0),
|
|
max_tokens=params["max_new_tokens"],
|
|
repetition_penalty=params["repetition_penalty"]
|
|
)
|
|
|
|
outputs = self.llm.generate([prompt], sampling_params)
|
|
|
|
if outputs and outputs[0].outputs:
|
|
raw_text = outputs[0].outputs[0].text
|
|
logger.info(f"[✺] Generated {len(raw_text)} chars")
|
|
return raw_text.strip()
|
|
else:
|
|
logger.warning("[✺] Empty output")
|
|
return ""
|
|
|
|
except Exception as e:
|
|
logger.error(f"[✺] Generation failed: {e}")
|
|
return None
|