Use GitHub orpheus-tts (supports max_model_len) to fix OOM on Jetson

This commit is contained in:
2026-01-11 17:39:55 -06:00
parent 86cf77d2d9
commit 0e43b76204
3 changed files with 11 additions and 6 deletions

View File

@@ -21,9 +21,9 @@ COPY requirements.txt /app/
# Install Python dependencies (FastAPI, etc - but NOT torch/vllm) # Install Python dependencies (FastAPI, etc - but NOT torch/vllm)
RUN pip3 install --no-cache-dir -r requirements.txt RUN pip3 install --no-cache-dir -r requirements.txt
# Install orpheus-speech from regular PyPI (not Jetson index) WITHOUT dependencies # Install orpheus-speech from GitHub repo (supports max_model_len) WITHOUT dependencies
# to avoid overwriting vllm/torch. Then install snac audio codec. # to avoid overwriting vllm/torch. Then install snac audio codec.
RUN pip3 install --no-cache-dir --no-deps --index-url https://pypi.org/simple/ orpheus-speech && \ RUN pip3 install --no-cache-dir --no-deps git+https://github.com/canopyai/Orpheus-TTS.git#subdirectory=orpheus_tts_pypi && \
pip3 install --no-cache-dir --index-url https://pypi.org/simple/ snac pip3 install --no-cache-dir --index-url https://pypi.org/simple/ snac
# Copy application code # Copy application code

View File

@@ -36,11 +36,14 @@ services:
environment: environment:
- ORPHEUS_MODEL=canopylabs/orpheus-tts-0.1-finetune-prod - ORPHEUS_MODEL=canopylabs/orpheus-tts-0.1-finetune-prod
- DEFAULT_VOICE=tara - DEFAULT_VOICE=tara
- MAX_MODEL_LEN=2048 - MAX_MODEL_LEN=1024
- CACHE_ENABLED=true - CACHE_ENABLED=true
- RETENTION_DAYS=10 - RETENTION_DAYS=10
- HF_TOKEN=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 - HF_TOKEN=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
- HUGGING_FACE_HUB_TOKEN=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 - HUGGING_FACE_HUB_TOKEN=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
# vLLM memory optimization for Jetson
- VLLM_ATTENTION_BACKEND=FLASH_ATTN
- CUDA_VISIBLE_DEVICES=0
# Resource limits (adjust based on your Orin config) # Resource limits (adjust based on your Orin config)
deploy: deploy:

View File

@@ -326,9 +326,11 @@ async def startup():
print("Loading Orpheus model (this may take a moment)...") print("Loading Orpheus model (this may take a moment)...")
from orpheus_tts import OrpheusModel from orpheus_tts import OrpheusModel
# Note: PyPI orpheus-speech 0.1.0 uses simpler API # GitHub version supports max_model_len for memory control
# model_name can be "medium-3b" or full HF path model = OrpheusModel(
model = OrpheusModel(model_name=ORPHEUS_MODEL) model_name=ORPHEUS_MODEL,
max_model_len=MAX_MODEL_LEN
)
print("✓ Orpheus model loaded successfully") print("✓ Orpheus model loaded successfully")