From 0e43b76204bf5c206ac6d8f05e9aa5ea0aa35de5 Mon Sep 17 00:00:00 2001 From: vixy Date: Sun, 11 Jan 2026 17:39:55 -0600 Subject: [PATCH] Use GitHub orpheus-tts (supports max_model_len) to fix OOM on Jetson --- Dockerfile | 4 ++-- docker-compose.yml | 5 ++++- main.py | 8 +++++--- 3 files changed, 11 insertions(+), 6 deletions(-) diff --git a/Dockerfile b/Dockerfile index ce3dbb9..830dc09 100644 --- a/Dockerfile +++ b/Dockerfile @@ -21,9 +21,9 @@ COPY requirements.txt /app/ # Install Python dependencies (FastAPI, etc - but NOT torch/vllm) RUN pip3 install --no-cache-dir -r requirements.txt -# Install orpheus-speech from regular PyPI (not Jetson index) WITHOUT dependencies +# Install orpheus-speech from GitHub repo (supports max_model_len) WITHOUT dependencies # to avoid overwriting vllm/torch. Then install snac audio codec. -RUN pip3 install --no-cache-dir --no-deps --index-url https://pypi.org/simple/ orpheus-speech && \ +RUN pip3 install --no-cache-dir --no-deps git+https://github.com/canopyai/Orpheus-TTS.git#subdirectory=orpheus_tts_pypi && \ pip3 install --no-cache-dir --index-url https://pypi.org/simple/ snac # Copy application code diff --git a/docker-compose.yml b/docker-compose.yml index 60ba5e3..2c933d1 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -36,11 +36,14 @@ services: environment: - ORPHEUS_MODEL=canopylabs/orpheus-tts-0.1-finetune-prod - DEFAULT_VOICE=tara - - MAX_MODEL_LEN=2048 + - MAX_MODEL_LEN=1024 - CACHE_ENABLED=true - RETENTION_DAYS=10 - HF_TOKEN=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 - HUGGING_FACE_HUB_TOKEN=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 + # vLLM memory optimization for Jetson + - VLLM_ATTENTION_BACKEND=FLASH_ATTN + - CUDA_VISIBLE_DEVICES=0 # Resource limits (adjust based on your Orin config) deploy: diff --git a/main.py b/main.py index 9c5c5e1..0e96b90 100644 --- a/main.py +++ b/main.py @@ -326,9 +326,11 @@ async def startup(): print("Loading Orpheus model (this may take a moment)...") from orpheus_tts import OrpheusModel - # Note: PyPI orpheus-speech 0.1.0 uses simpler API - # model_name can be "medium-3b" or full HF path - model = OrpheusModel(model_name=ORPHEUS_MODEL) + # GitHub version supports max_model_len for memory control + model = OrpheusModel( + model_name=ORPHEUS_MODEL, + max_model_len=MAX_MODEL_LEN + ) print("✓ Orpheus model loaded successfully")