Files
orpheus-tts/Dockerfile

53 lines
1.6 KiB
Docker

# OrpheusTail - Orpheus TTS Service for NVIDIA Jetson AGX Orin
#
# Uses dustynv's vLLM container which has proper Jetson CUDA support
# Orpheus uses vLLM under the hood for fast inference
FROM dustynv/vllm:0.8.6-r36.4-cu128-24.04
# Set working directory
WORKDIR /app
# Install system dependencies
RUN apt-get update && apt-get install -y \
git \
wget \
libsndfile1 \
&& rm -rf /var/lib/apt/lists/*
# Copy requirements first for better caching
COPY requirements.txt /app/
# Install Python dependencies (FastAPI, etc - but NOT torch/vllm)
RUN pip3 install --no-cache-dir -r requirements.txt
# Install orpheus-speech from regular PyPI WITHOUT dependencies
# to avoid overwriting vllm/torch. Then install snac audio codec.
RUN pip3 install --no-cache-dir --no-deps --index-url https://pypi.org/simple/ orpheus-speech && \
pip3 install --no-cache-dir --index-url https://pypi.org/simple/ snac
# Copy application code
COPY main.py /app/
# Create directories for cache, output, and custom voices
RUN mkdir -p /app/cache /app/output /app/voices
# Expose API port (same as VoiceTail for drop-in replacement)
EXPOSE 8766
# Set environment variables
ENV PYTHONUNBUFFERED=1
ENV CACHE_DIR=/app/cache
ENV OUTPUT_DIR=/app/output
ENV VOICES_DIR=/app/voices
ENV ORPHEUS_MODEL=canopylabs/orpheus-tts-0.1-finetune-prod
ENV DEFAULT_VOICE=tara
ENV MAX_MODEL_LEN=2048
# Health check (longer start period - model loading takes time)
HEALTHCHECK --interval=30s --timeout=10s --start-period=180s --retries=3 \
CMD curl -f http://localhost:8766/health || exit 1
# Run the FastAPI application
CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8766"]