diff --git a/docs/my-website/sidebars.js b/docs/my-website/sidebars.js index 414838280..f84a43c56 100644 --- a/docs/my-website/sidebars.js +++ b/docs/my-website/sidebars.js @@ -193,12 +193,12 @@ const sidebars = { "vertex_ai" ], }, + "scheduler", { type: "category", label: "🚅 LiteLLM Python SDK", items: [ "routing", - "scheduler", "set_keys", "completion/token_usage", "sdk_custom_pricing", diff --git a/litellm/router.py b/litellm/router.py index 74562566d..fb9af9618 100644 --- a/litellm/router.py +++ b/litellm/router.py @@ -654,7 +654,12 @@ class Router: timeout = kwargs.get("request_timeout", self.timeout) kwargs.setdefault("metadata", {}).update({"model_group": model}) - response = await self.async_function_with_fallbacks(**kwargs) + if kwargs.get("priority", None) is not None and isinstance( + kwargs.get("priority"), int + ): + response = await self.schedule_acompletion(**kwargs) + else: + response = await self.async_function_with_fallbacks(**kwargs) return response except Exception as e: @@ -1097,6 +1102,10 @@ class Router: _response = await self.acompletion( model=model, messages=messages, stream=stream, **kwargs ) + _response._hidden_params.setdefault("additional_headers", {}) + _response._hidden_params["additional_headers"].update( + {"x-litellm-request-prioritization-used": True} + ) return _response except Exception as e: setattr(e, "priority", priority)