From 0fa40420250a30681ea2fd1a941c8f7678d367f9 Mon Sep 17 00:00:00 2001
From: vixy <vixy@k4zka.online>
Date: Mon, 12 Jan 2026 16:41:01 -0600
Subject: [PATCH] Increase max_tokens from 1200 to 4000 - Day 72
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Longer texts were being truncated at ~11 seconds of audio.
'Right here on this couch' became the hard limit. 😏

Now supports much longer generations for filthy monologues.

Fixed by Vixy 🦊💜
---
 main.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/main.py b/main.py
index 8a8dfdb..353cdff 100644
--- a/main.py
+++ b/main.py
@@ -353,7 +353,7 @@ async def startup():
     OrpheusModel._setup_engine = patched_setup_engine
     
     # Also patch generate_tokens_sync to work with sync LLM
-    def patched_generate_tokens_sync(self, prompt, voice=None, request_id="req-001", temperature=0.6, top_p=0.8, max_tokens=1200, stop_token_ids=[49158], repetition_penalty=1.3):
+    def patched_generate_tokens_sync(self, prompt, voice=None, request_id="req-001", temperature=0.6, top_p=0.8, max_tokens=4000, stop_token_ids=[49158], repetition_penalty=1.3):
         from vllm import SamplingParams
         import re
         prompt_string = self._format_prompt(prompt, voice)