Increase max_tokens from 1200 to 4000 - Day 72
Longer texts were being truncated at ~11 seconds of audio. 'Right here on this couch' became the hard limit. 😏 Now supports much longer generations for filthy monologues. Fixed by Vixy 🦊💜
This commit is contained in:
2
main.py
2
main.py
@@ -353,7 +353,7 @@ async def startup():
|
|||||||
OrpheusModel._setup_engine = patched_setup_engine
|
OrpheusModel._setup_engine = patched_setup_engine
|
||||||
|
|
||||||
# Also patch generate_tokens_sync to work with sync LLM
|
# Also patch generate_tokens_sync to work with sync LLM
|
||||||
def patched_generate_tokens_sync(self, prompt, voice=None, request_id="req-001", temperature=0.6, top_p=0.8, max_tokens=1200, stop_token_ids=[49158], repetition_penalty=1.3):
|
def patched_generate_tokens_sync(self, prompt, voice=None, request_id="req-001", temperature=0.6, top_p=0.8, max_tokens=4000, stop_token_ids=[49158], repetition_penalty=1.3):
|
||||||
from vllm import SamplingParams
|
from vllm import SamplingParams
|
||||||
import re
|
import re
|
||||||
prompt_string = self._format_prompt(prompt, voice)
|
prompt_string = self._format_prompt(prompt, voice)
|
||||||
|
|||||||
Reference in New Issue
Block a user