- FastAPI service replacing VoiceTail (Bark)
- Emotion tags: <laugh>, <sigh>, <gasp>, etc.
- Voice cloning endpoint (implementation pending)
- Streaming support for head playback
- Same port 8766 for drop-in replacement
Created by Vixy on Day 71 🦊
55 lines
1.5 KiB
Docker
55 lines
1.5 KiB
Docker
# OrpheusTail - Orpheus TTS Service for NVIDIA Jetson AGX Orin
|
|
#
|
|
# Replaces VoiceTail (Bark) with Orpheus for better emotion control
|
|
# and voice cloning capabilities.
|
|
#
|
|
# Based on NVIDIA L4T PyTorch container optimized for Jetson
|
|
|
|
FROM dustynv/pytorch:2.1-r36.2.0
|
|
|
|
# Set working directory
|
|
WORKDIR /app
|
|
|
|
# Install system dependencies
|
|
RUN apt-get update && apt-get install -y \
|
|
git \
|
|
wget \
|
|
libsndfile1 \
|
|
&& rm -rf /var/lib/apt/lists/*
|
|
|
|
# Copy requirements first for better caching
|
|
COPY requirements.txt /app/
|
|
|
|
# Install Python dependencies
|
|
# Note: torch and torchvision are already in the base image
|
|
RUN pip3 install --no-cache-dir -r requirements.txt
|
|
|
|
# Install orpheus-speech (uses vllm under the hood)
|
|
# Note: vllm version compatibility may need adjustment
|
|
RUN pip3 install orpheus-speech
|
|
|
|
# Copy application code
|
|
COPY main.py /app/
|
|
|
|
# Create directories for cache, output, and custom voices
|
|
RUN mkdir -p /app/cache /app/output /app/voices
|
|
|
|
# Expose API port (same as VoiceTail for drop-in replacement)
|
|
EXPOSE 8766
|
|
|
|
# Set environment variables
|
|
ENV PYTHONUNBUFFERED=1
|
|
ENV CACHE_DIR=/app/cache
|
|
ENV OUTPUT_DIR=/app/output
|
|
ENV VOICES_DIR=/app/voices
|
|
ENV ORPHEUS_MODEL=canopylabs/orpheus-tts-0.1-finetune-prod
|
|
ENV DEFAULT_VOICE=tara
|
|
ENV MAX_MODEL_LEN=2048
|
|
|
|
# Health check (longer start period - model loading takes time)
|
|
HEALTHCHECK --interval=30s --timeout=10s --start-period=180s --retries=3 \
|
|
CMD curl -f http://localhost:8766/health || exit 1
|
|
|
|
# Run the FastAPI application
|
|
CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8766"]
|