forked from phoenix/litellm-mirror
feat(router.py): allow using .acompletion() for request prioritization
allows /chat/completion endpoint to work for request prioritization calls
This commit is contained in:
parent
e585dfba92
commit
400653992c
2 changed files with 11 additions and 2 deletions
|
@ -654,7 +654,12 @@ class Router:
|
|||
timeout = kwargs.get("request_timeout", self.timeout)
|
||||
kwargs.setdefault("metadata", {}).update({"model_group": model})
|
||||
|
||||
response = await self.async_function_with_fallbacks(**kwargs)
|
||||
if kwargs.get("priority", None) is not None and isinstance(
|
||||
kwargs.get("priority"), int
|
||||
):
|
||||
response = await self.schedule_acompletion(**kwargs)
|
||||
else:
|
||||
response = await self.async_function_with_fallbacks(**kwargs)
|
||||
|
||||
return response
|
||||
except Exception as e:
|
||||
|
@ -1097,6 +1102,10 @@ class Router:
|
|||
_response = await self.acompletion(
|
||||
model=model, messages=messages, stream=stream, **kwargs
|
||||
)
|
||||
_response._hidden_params.setdefault("additional_headers", {})
|
||||
_response._hidden_params["additional_headers"].update(
|
||||
{"x-litellm-request-prioritization-used": True}
|
||||
)
|
||||
return _response
|
||||
except Exception as e:
|
||||
setattr(e, "priority", priority)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue