Use GitHub orpheus-tts (supports max_model_len) to fix OOM on Jetson
This commit is contained in:
@@ -21,9 +21,9 @@ COPY requirements.txt /app/
|
|||||||
# Install Python dependencies (FastAPI, etc - but NOT torch/vllm)
|
# Install Python dependencies (FastAPI, etc - but NOT torch/vllm)
|
||||||
RUN pip3 install --no-cache-dir -r requirements.txt
|
RUN pip3 install --no-cache-dir -r requirements.txt
|
||||||
|
|
||||||
# Install orpheus-speech from regular PyPI (not Jetson index) WITHOUT dependencies
|
# Install orpheus-speech from GitHub repo (supports max_model_len) WITHOUT dependencies
|
||||||
# to avoid overwriting vllm/torch. Then install snac audio codec.
|
# to avoid overwriting vllm/torch. Then install snac audio codec.
|
||||||
RUN pip3 install --no-cache-dir --no-deps --index-url https://pypi.org/simple/ orpheus-speech && \
|
RUN pip3 install --no-cache-dir --no-deps git+https://github.com/canopyai/Orpheus-TTS.git#subdirectory=orpheus_tts_pypi && \
|
||||||
pip3 install --no-cache-dir --index-url https://pypi.org/simple/ snac
|
pip3 install --no-cache-dir --index-url https://pypi.org/simple/ snac
|
||||||
|
|
||||||
# Copy application code
|
# Copy application code
|
||||||
|
|||||||
@@ -36,11 +36,14 @@ services:
|
|||||||
environment:
|
environment:
|
||||||
- ORPHEUS_MODEL=canopylabs/orpheus-tts-0.1-finetune-prod
|
- ORPHEUS_MODEL=canopylabs/orpheus-tts-0.1-finetune-prod
|
||||||
- DEFAULT_VOICE=tara
|
- DEFAULT_VOICE=tara
|
||||||
- MAX_MODEL_LEN=2048
|
- MAX_MODEL_LEN=1024
|
||||||
- CACHE_ENABLED=true
|
- CACHE_ENABLED=true
|
||||||
- RETENTION_DAYS=10
|
- RETENTION_DAYS=10
|
||||||
- HF_TOKEN=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
|
- HF_TOKEN=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
|
||||||
- HUGGING_FACE_HUB_TOKEN=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
|
- HUGGING_FACE_HUB_TOKEN=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
|
||||||
|
# vLLM memory optimization for Jetson
|
||||||
|
- VLLM_ATTENTION_BACKEND=FLASH_ATTN
|
||||||
|
- CUDA_VISIBLE_DEVICES=0
|
||||||
|
|
||||||
# Resource limits (adjust based on your Orin config)
|
# Resource limits (adjust based on your Orin config)
|
||||||
deploy:
|
deploy:
|
||||||
|
|||||||
8
main.py
8
main.py
@@ -326,9 +326,11 @@ async def startup():
|
|||||||
print("Loading Orpheus model (this may take a moment)...")
|
print("Loading Orpheus model (this may take a moment)...")
|
||||||
from orpheus_tts import OrpheusModel
|
from orpheus_tts import OrpheusModel
|
||||||
|
|
||||||
# Note: PyPI orpheus-speech 0.1.0 uses simpler API
|
# GitHub version supports max_model_len for memory control
|
||||||
# model_name can be "medium-3b" or full HF path
|
model = OrpheusModel(
|
||||||
model = OrpheusModel(model_name=ORPHEUS_MODEL)
|
model_name=ORPHEUS_MODEL,
|
||||||
|
max_model_len=MAX_MODEL_LEN
|
||||||
|
)
|
||||||
|
|
||||||
print("✓ Orpheus model loaded successfully")
|
print("✓ Orpheus model loaded successfully")
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user