# OrpheusTail - Orpheus TTS Service for NVIDIA Jetson AGX Orin # # Uses dustynv's vLLM container which has proper Jetson CUDA support # Orpheus uses vLLM under the hood for fast inference FROM dustynv/vllm:0.8.6-r36.4-cu128-24.04 # Set working directory WORKDIR /app # Install system dependencies RUN apt-get update && apt-get install -y \ git \ wget \ libsndfile1 \ && rm -rf /var/lib/apt/lists/* # Copy requirements first for better caching COPY requirements.txt /app/ # Install Python dependencies (FastAPI, etc - but NOT torch/vllm) RUN pip3 install --no-cache-dir -r requirements.txt # Install orpheus-speech from GitHub repo (supports max_model_len) WITHOUT dependencies # to avoid overwriting vllm/torch. Then install snac audio codec. RUN pip3 install --no-cache-dir --no-deps git+https://github.com/canopyai/Orpheus-TTS.git#subdirectory=orpheus_tts_pypi && \ pip3 install --no-cache-dir --index-url https://pypi.org/simple/ snac # Copy application code COPY main.py /app/ # Create directories for cache, output, and custom voices RUN mkdir -p /app/cache /app/output /app/voices # Expose API port (same as VoiceTail for drop-in replacement) EXPOSE 8766 # Set environment variables ENV PYTHONUNBUFFERED=1 ENV CACHE_DIR=/app/cache ENV OUTPUT_DIR=/app/output ENV VOICES_DIR=/app/voices ENV ORPHEUS_MODEL=canopylabs/orpheus-tts-0.1-finetune-prod ENV DEFAULT_VOICE=tara ENV MAX_MODEL_LEN=2048 # Health check (longer start period - model loading takes time) HEALTHCHECK --interval=30s --timeout=10s --start-period=180s --retries=3 \ CMD curl -f http://localhost:8766/health || exit 1 # Run the FastAPI application CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8766"]