From d0d7633a00620dd1ed6d18837431e17b4f8b6e52 Mon Sep 17 00:00:00 2001 From: vixy Date: Sun, 11 Jan 2026 17:52:33 -0600 Subject: [PATCH] Monkey-patch OrpheusModel to support max_model_len on Jetson --- Dockerfile | 4 ++-- main.py | 21 ++++++++++++++++----- 2 files changed, 18 insertions(+), 7 deletions(-) diff --git a/Dockerfile b/Dockerfile index 830dc09..bda7673 100644 --- a/Dockerfile +++ b/Dockerfile @@ -21,9 +21,9 @@ COPY requirements.txt /app/ # Install Python dependencies (FastAPI, etc - but NOT torch/vllm) RUN pip3 install --no-cache-dir -r requirements.txt -# Install orpheus-speech from GitHub repo (supports max_model_len) WITHOUT dependencies +# Install orpheus-speech from regular PyPI WITHOUT dependencies # to avoid overwriting vllm/torch. Then install snac audio codec. -RUN pip3 install --no-cache-dir --no-deps git+https://github.com/canopyai/Orpheus-TTS.git#subdirectory=orpheus_tts_pypi && \ +RUN pip3 install --no-cache-dir --no-deps --index-url https://pypi.org/simple/ orpheus-speech && \ pip3 install --no-cache-dir --index-url https://pypi.org/simple/ snac # Copy application code diff --git a/main.py b/main.py index 0e96b90..261c09a 100644 --- a/main.py +++ b/main.py @@ -325,12 +325,23 @@ async def startup(): # Import and load Orpheus model print("Loading Orpheus model (this may take a moment)...") from orpheus_tts import OrpheusModel + from vllm import AsyncLLMEngine + from vllm.engine.arg_utils import AsyncEngineArgs - # GitHub version supports max_model_len for memory control - model = OrpheusModel( - model_name=ORPHEUS_MODEL, - max_model_len=MAX_MODEL_LEN - ) + # Monkey-patch OrpheusModel to support max_model_len (PyPI version doesn't) + original_setup_engine = OrpheusModel._setup_engine + def patched_setup_engine(self): + model_name = self._map_model_params() + engine_args = AsyncEngineArgs( + model=model_name, + max_model_len=MAX_MODEL_LEN, # Our custom limit! + gpu_memory_utilization=0.85, # Leave some headroom + enforce_eager=False, + ) + return AsyncLLMEngine.from_engine_args(engine_args) + OrpheusModel._setup_engine = patched_setup_engine + + model = OrpheusModel(model_name=ORPHEUS_MODEL) print("✓ Orpheus model loaded successfully")