feat(router.py): allow using .acompletion() for request prioritization

allows /chat/completion endpoint to work for request prioritization calls
This commit is contained in:
Krrish Dholakia 2024-08-07 16:43:12 -07:00
parent e585dfba92
commit 400653992c
2 changed files with 11 additions and 2 deletions

View file

@ -193,12 +193,12 @@ const sidebars = {
"vertex_ai" "vertex_ai"
], ],
}, },
"scheduler",
{ {
type: "category", type: "category",
label: "🚅 LiteLLM Python SDK", label: "🚅 LiteLLM Python SDK",
items: [ items: [
"routing", "routing",
"scheduler",
"set_keys", "set_keys",
"completion/token_usage", "completion/token_usage",
"sdk_custom_pricing", "sdk_custom_pricing",

View file

@ -654,6 +654,11 @@ class Router:
timeout = kwargs.get("request_timeout", self.timeout) timeout = kwargs.get("request_timeout", self.timeout)
kwargs.setdefault("metadata", {}).update({"model_group": model}) kwargs.setdefault("metadata", {}).update({"model_group": model})
if kwargs.get("priority", None) is not None and isinstance(
kwargs.get("priority"), int
):
response = await self.schedule_acompletion(**kwargs)
else:
response = await self.async_function_with_fallbacks(**kwargs) response = await self.async_function_with_fallbacks(**kwargs)
return response return response
@ -1097,6 +1102,10 @@ class Router:
_response = await self.acompletion( _response = await self.acompletion(
model=model, messages=messages, stream=stream, **kwargs model=model, messages=messages, stream=stream, **kwargs
) )
_response._hidden_params.setdefault("additional_headers", {})
_response._hidden_params["additional_headers"].update(
{"x-litellm-request-prioritization-used": True}
)
return _response return _response
except Exception as e: except Exception as e:
setattr(e, "priority", priority) setattr(e, "priority", priority)