Use GitHub orpheus-tts (supports max_model_len) to fix OOM on Jetson

2026-01-11 17:39:55 -06:00
parent 86cf77d2d9
commit 0e43b76204
3 changed files with 11 additions and 6 deletions
--- a/4
+++ b/4
@@ -21,9 +21,9 @@ COPY requirements.txt /app/
 # Install Python dependencies (FastAPI, etc - but NOT torch/vllm)
 RUN pip3 install --no-cache-dir -r requirements.txt
-# Install orpheus-speech from regular PyPI (not Jetson index) WITHOUT dependencies
+# Install orpheus-speech from GitHub repo (supports max_model_len) WITHOUT dependencies
 # to avoid overwriting vllm/torch. Then install snac audio codec.
-RUN pip3 install --no-cache-dir --no-deps --index-url https://pypi.org/simple/ orpheus-speech && \
+RUN pip3 install --no-cache-dir --no-deps git+https://github.com/canopyai/Orpheus-TTS.git#subdirectory=orpheus_tts_pypi && \
    pip3 install --no-cache-dir --index-url https://pypi.org/simple/ snac
 # Copy application code
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -36,11 +36,14 @@ services:
    environment:
      - ORPHEUS_MODEL=canopylabs/orpheus-tts-0.1-finetune-prod
      - DEFAULT_VOICE=tara
-      - MAX_MODEL_LEN=2048
+      - MAX_MODEL_LEN=1024
      - CACHE_ENABLED=true
      - RETENTION_DAYS=10
      - HF_TOKEN=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
      - HUGGING_FACE_HUB_TOKEN=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
      # vLLM memory optimization for Jetson
      - VLLM_ATTENTION_BACKEND=FLASH_ATTN
      - CUDA_VISIBLE_DEVICES=0
    # Resource limits (adjust based on your Orin config)
    deploy:
--- a/main.py
+++ b/main.py
@@ -326,9 +326,11 @@ async def startup():
    print("Loading Orpheus model (this may take a moment)...")
    from orpheus_tts import OrpheusModel
-    # Note: PyPI orpheus-speech 0.1.0 uses simpler API
+    # GitHub version supports max_model_len for memory control
-    # model_name can be "medium-3b" or full HF path
+    model = OrpheusModel(
-    model = OrpheusModel(model_name=ORPHEUS_MODEL)
+        model_name=ORPHEUS_MODEL,
        max_model_len=MAX_MODEL_LEN
    )
    print("✓ Orpheus model loaded successfully")