diff --git a/main.py b/main.py index 261c09a..dccc8fe 100644 --- a/main.py +++ b/main.py @@ -331,7 +331,8 @@ async def startup(): # Monkey-patch OrpheusModel to support max_model_len (PyPI version doesn't) original_setup_engine = OrpheusModel._setup_engine def patched_setup_engine(self): - model_name = self._map_model_params() + # Get the mapped model name (handles "medium-3b" -> full path) + model_name = self._map_model_params(self.model_name) engine_args = AsyncEngineArgs( model=model_name, max_model_len=MAX_MODEL_LEN, # Our custom limit!