forked from phoenix/litellm-mirror
feat(router.py): allow using .acompletion() for request prioritization
allows /chat/completion endpoint to work for request prioritization calls
This commit is contained in:
parent
e585dfba92
commit
400653992c
2 changed files with 11 additions and 2 deletions
|
@ -193,12 +193,12 @@ const sidebars = {
|
||||||
"vertex_ai"
|
"vertex_ai"
|
||||||
],
|
],
|
||||||
},
|
},
|
||||||
|
"scheduler",
|
||||||
{
|
{
|
||||||
type: "category",
|
type: "category",
|
||||||
label: "🚅 LiteLLM Python SDK",
|
label: "🚅 LiteLLM Python SDK",
|
||||||
items: [
|
items: [
|
||||||
"routing",
|
"routing",
|
||||||
"scheduler",
|
|
||||||
"set_keys",
|
"set_keys",
|
||||||
"completion/token_usage",
|
"completion/token_usage",
|
||||||
"sdk_custom_pricing",
|
"sdk_custom_pricing",
|
||||||
|
|
|
@ -654,7 +654,12 @@ class Router:
|
||||||
timeout = kwargs.get("request_timeout", self.timeout)
|
timeout = kwargs.get("request_timeout", self.timeout)
|
||||||
kwargs.setdefault("metadata", {}).update({"model_group": model})
|
kwargs.setdefault("metadata", {}).update({"model_group": model})
|
||||||
|
|
||||||
response = await self.async_function_with_fallbacks(**kwargs)
|
if kwargs.get("priority", None) is not None and isinstance(
|
||||||
|
kwargs.get("priority"), int
|
||||||
|
):
|
||||||
|
response = await self.schedule_acompletion(**kwargs)
|
||||||
|
else:
|
||||||
|
response = await self.async_function_with_fallbacks(**kwargs)
|
||||||
|
|
||||||
return response
|
return response
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
@ -1097,6 +1102,10 @@ class Router:
|
||||||
_response = await self.acompletion(
|
_response = await self.acompletion(
|
||||||
model=model, messages=messages, stream=stream, **kwargs
|
model=model, messages=messages, stream=stream, **kwargs
|
||||||
)
|
)
|
||||||
|
_response._hidden_params.setdefault("additional_headers", {})
|
||||||
|
_response._hidden_params["additional_headers"].update(
|
||||||
|
{"x-litellm-request-prioritization-used": True}
|
||||||
|
)
|
||||||
return _response
|
return _response
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
setattr(e, "priority", priority)
|
setattr(e, "priority", priority)
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue