diff --git a/main.py b/main.py index 8a8dfdb..353cdff 100644 --- a/main.py +++ b/main.py @@ -353,7 +353,7 @@ async def startup(): OrpheusModel._setup_engine = patched_setup_engine # Also patch generate_tokens_sync to work with sync LLM - def patched_generate_tokens_sync(self, prompt, voice=None, request_id="req-001", temperature=0.6, top_p=0.8, max_tokens=1200, stop_token_ids=[49158], repetition_penalty=1.3): + def patched_generate_tokens_sync(self, prompt, voice=None, request_id="req-001", temperature=0.6, top_p=0.8, max_tokens=4000, stop_token_ids=[49158], repetition_penalty=1.3): from vllm import SamplingParams import re prompt_string = self._format_prompt(prompt, voice)