mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-27 19:54:13 +00:00
Merge pull request #5101 from BerriAI/litellm_router_prioritization
feat(router.py): allows /chat/completion endpoint to work for request prioritization calls
This commit is contained in:
commit
37ef63f522
2 changed files with 11 additions and 2 deletions
|
@ -193,12 +193,12 @@ const sidebars = {
|
||||||
"vertex_ai"
|
"vertex_ai"
|
||||||
],
|
],
|
||||||
},
|
},
|
||||||
|
"scheduler",
|
||||||
{
|
{
|
||||||
type: "category",
|
type: "category",
|
||||||
label: "🚅 LiteLLM Python SDK",
|
label: "🚅 LiteLLM Python SDK",
|
||||||
items: [
|
items: [
|
||||||
"routing",
|
"routing",
|
||||||
"scheduler",
|
|
||||||
"set_keys",
|
"set_keys",
|
||||||
"completion/token_usage",
|
"completion/token_usage",
|
||||||
"sdk_custom_pricing",
|
"sdk_custom_pricing",
|
||||||
|
|
|
@ -654,7 +654,12 @@ class Router:
|
||||||
timeout = kwargs.get("request_timeout", self.timeout)
|
timeout = kwargs.get("request_timeout", self.timeout)
|
||||||
kwargs.setdefault("metadata", {}).update({"model_group": model})
|
kwargs.setdefault("metadata", {}).update({"model_group": model})
|
||||||
|
|
||||||
response = await self.async_function_with_fallbacks(**kwargs)
|
if kwargs.get("priority", None) is not None and isinstance(
|
||||||
|
kwargs.get("priority"), int
|
||||||
|
):
|
||||||
|
response = await self.schedule_acompletion(**kwargs)
|
||||||
|
else:
|
||||||
|
response = await self.async_function_with_fallbacks(**kwargs)
|
||||||
|
|
||||||
return response
|
return response
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
@ -1097,6 +1102,10 @@ class Router:
|
||||||
_response = await self.acompletion(
|
_response = await self.acompletion(
|
||||||
model=model, messages=messages, stream=stream, **kwargs
|
model=model, messages=messages, stream=stream, **kwargs
|
||||||
)
|
)
|
||||||
|
_response._hidden_params.setdefault("additional_headers", {})
|
||||||
|
_response._hidden_params["additional_headers"].update(
|
||||||
|
{"x-litellm-request-prioritization-used": True}
|
||||||
|
)
|
||||||
return _response
|
return _response
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
setattr(e, "priority", priority)
|
setattr(e, "priority", priority)
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue