diff --git a/main.py b/main.py
index 8264009..7932fbe 100644
--- a/main.py
+++ b/main.py
@@ -357,6 +357,7 @@ async def startup():
     # Also patch generate_tokens_sync to work with sync LLM
     def patched_generate_tokens_sync(self, prompt, voice=None, request_id="req-001", temperature=0.6, top_p=0.8, max_tokens=1200, stop_token_ids=[49158], repetition_penalty=1.3):
         from vllm import SamplingParams
+        import re
         prompt_string = self._format_prompt(prompt, voice)
         print(prompt)
         sampling_params = SamplingParams(
@@ -371,8 +372,11 @@ async def startup():
         # Yield individual tokens from the output text
         for output in outputs:
             text = output.outputs[0].text
-            # Tokens are space-separated custom_token_XXX
-            for token in text.split():
+            print(f"Raw output (first 500 chars): {text[:500]}")
+            # Extract all <custom_token_XXXX> patterns
+            tokens = re.findall(r'<custom_token_\d+>', text)
+            print(f"Found {len(tokens)} tokens")
+            for token in tokens:
                 yield token
     OrpheusModel.generate_tokens_sync = patched_generate_tokens_sync