mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-26 11:14:04 +00:00
Merge pull request #3283 from BerriAI/litellm_debug_lowest_latency
[Fix] Add better observability for debugging lowest latency routing
This commit is contained in:
commit
ad637c15ce
4 changed files with 198 additions and 1 deletions
|
@ -454,6 +454,7 @@ class Router:
|
|||
model=model,
|
||||
messages=messages,
|
||||
specific_deployment=kwargs.pop("specific_deployment", None),
|
||||
request_kwargs=kwargs,
|
||||
)
|
||||
|
||||
# debug how often this deployment picked
|
||||
|
@ -2831,6 +2832,7 @@ class Router:
|
|||
messages: Optional[List[Dict[str, str]]] = None,
|
||||
input: Optional[Union[str, List]] = None,
|
||||
specific_deployment: Optional[bool] = False,
|
||||
request_kwargs: Optional[Dict] = None,
|
||||
):
|
||||
"""
|
||||
Async implementation of 'get_available_deployments'.
|
||||
|
@ -2846,6 +2848,7 @@ class Router:
|
|||
messages=messages,
|
||||
input=input,
|
||||
specific_deployment=specific_deployment,
|
||||
request_kwargs=request_kwargs,
|
||||
)
|
||||
|
||||
model, healthy_deployments = self._common_checks_available_deployment(
|
||||
|
@ -2949,6 +2952,7 @@ class Router:
|
|||
messages: Optional[List[Dict[str, str]]] = None,
|
||||
input: Optional[Union[str, List]] = None,
|
||||
specific_deployment: Optional[bool] = False,
|
||||
request_kwargs: Optional[Dict] = None,
|
||||
):
|
||||
"""
|
||||
Returns the deployment based on routing strategy
|
||||
|
@ -3035,7 +3039,9 @@ class Router:
|
|||
and self.lowestlatency_logger is not None
|
||||
):
|
||||
deployment = self.lowestlatency_logger.get_available_deployments(
|
||||
model_group=model, healthy_deployments=healthy_deployments
|
||||
model_group=model,
|
||||
healthy_deployments=healthy_deployments,
|
||||
request_kwargs=request_kwargs,
|
||||
)
|
||||
elif (
|
||||
self.routing_strategy == "usage-based-routing"
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue