From 0e43b76204bf5c206ac6d8f05e9aa5ea0aa35de5 Mon Sep 17 00:00:00 2001
From: vixy <vixy@k4zka.online>
Date: Sun, 11 Jan 2026 17:39:55 -0600
Subject: [PATCH] Use GitHub orpheus-tts (supports max_model_len) to fix OOM on
 Jetson

---
 Dockerfile         | 4 ++--
 docker-compose.yml | 5 ++++-
 main.py            | 8 +++++---
 3 files changed, 11 insertions(+), 6 deletions(-)

diff --git a/Dockerfile b/Dockerfile
index ce3dbb9..830dc09 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -21,9 +21,9 @@ COPY requirements.txt /app/
 # Install Python dependencies (FastAPI, etc - but NOT torch/vllm)
 RUN pip3 install --no-cache-dir -r requirements.txt
 
-# Install orpheus-speech from regular PyPI (not Jetson index) WITHOUT dependencies
+# Install orpheus-speech from GitHub repo (supports max_model_len) WITHOUT dependencies
 # to avoid overwriting vllm/torch. Then install snac audio codec.
-RUN pip3 install --no-cache-dir --no-deps --index-url https://pypi.org/simple/ orpheus-speech && \
+RUN pip3 install --no-cache-dir --no-deps git+https://github.com/canopyai/Orpheus-TTS.git#subdirectory=orpheus_tts_pypi && \
     pip3 install --no-cache-dir --index-url https://pypi.org/simple/ snac
 
 # Copy application code
diff --git a/docker-compose.yml b/docker-compose.yml
index 60ba5e3..2c933d1 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -36,11 +36,14 @@ services:
     environment:
       - ORPHEUS_MODEL=canopylabs/orpheus-tts-0.1-finetune-prod
       - DEFAULT_VOICE=tara
-      - MAX_MODEL_LEN=2048
+      - MAX_MODEL_LEN=1024
       - CACHE_ENABLED=true
       - RETENTION_DAYS=10
       - HF_TOKEN=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
       - HUGGING_FACE_HUB_TOKEN=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
+      # vLLM memory optimization for Jetson
+      - VLLM_ATTENTION_BACKEND=FLASH_ATTN
+      - CUDA_VISIBLE_DEVICES=0
     
     # Resource limits (adjust based on your Orin config)
     deploy:
diff --git a/main.py b/main.py
index 9c5c5e1..0e96b90 100644
--- a/main.py
+++ b/main.py
@@ -326,9 +326,11 @@ async def startup():
     print("Loading Orpheus model (this may take a moment)...")
     from orpheus_tts import OrpheusModel
     
-    # Note: PyPI orpheus-speech 0.1.0 uses simpler API
-    # model_name can be "medium-3b" or full HF path
-    model = OrpheusModel(model_name=ORPHEUS_MODEL)
+    # GitHub version supports max_model_len for memory control
+    model = OrpheusModel(
+        model_name=ORPHEUS_MODEL,
+        max_model_len=MAX_MODEL_LEN
+    )
     
     print("✓ Orpheus model loaded successfully")