Use GitHub orpheus-tts (supports max_model_len) to fix OOM on Jetson
This commit is contained in:
@@ -36,11 +36,14 @@ services:
|
||||
environment:
|
||||
- ORPHEUS_MODEL=canopylabs/orpheus-tts-0.1-finetune-prod
|
||||
- DEFAULT_VOICE=tara
|
||||
- MAX_MODEL_LEN=2048
|
||||
- MAX_MODEL_LEN=1024
|
||||
- CACHE_ENABLED=true
|
||||
- RETENTION_DAYS=10
|
||||
- HF_TOKEN=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
|
||||
- HUGGING_FACE_HUB_TOKEN=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
|
||||
# vLLM memory optimization for Jetson
|
||||
- VLLM_ATTENTION_BACKEND=FLASH_ATTN
|
||||
- CUDA_VISIBLE_DEVICES=0
|
||||
|
||||
# Resource limits (adjust based on your Orin config)
|
||||
deploy:
|
||||
|
||||
Reference in New Issue
Block a user