vi/services/oracle/llm/generator.py

"""
Text generation using vLLM.
"""

from typing import Optional
from core.logger import setup_logger

logger = setup_logger('text_generator', service_name='oracle_service')


class TextGenerator:
    """Text generation with vLLM"""

    def __init__(self, llm_model, sampling_config: dict):
        self.llm = llm_model
        self.sampling_config = sampling_config

    def generate(
        self,
        prompt: str,
        max_tokens: int = None,
        temperature: float = None,
        top_p: float = None,
        top_k: int = None,
        min_p: float = None
    ) -> Optional[str]:
        """Generate text using vLLM"""
        if not self.llm:
            logger.error("[✺] LLM not initialized")
            return None

        try:
            params = self.sampling_config.copy()

            if max_tokens is not None:
                params["max_new_tokens"] = max_tokens
            if temperature is not None:
                params["temperature"] = temperature
            if top_p is not None:
                params["top_p"] = top_p
            if top_k is not None:
                params["top_k"] = top_k
            if min_p is not None:
                params["min_p"] = min_p

            from vllm import SamplingParams

            sampling_params = SamplingParams(
                temperature=params["temperature"],
                top_p=params["top_p"],
                top_k=params.get("top_k", -1),
                min_p=params.get("min_p", 0.0),
                max_tokens=params["max_new_tokens"],
                repetition_penalty=params["repetition_penalty"]
            )

            outputs = self.llm.generate([prompt], sampling_params)

            if outputs and outputs[0].outputs:
                raw_text = outputs[0].outputs[0].text
                logger.info(f"[✺] Generated {len(raw_text)} chars")
                return raw_text.strip()
            else:
                logger.warning("[✺] Empty output")
                return ""

        except Exception as e:
            logger.error(f"[✺] Generation failed: {e}")
            return None