use async_filter_deployments

This commit is contained in:
Ishaan Jaff 2024-11-19 17:24:17 -08:00
parent 50168889be
commit 95f21722a0
2 changed files with 24 additions and 8 deletions

View file

@ -522,6 +522,11 @@ class Router:
self.service_logger_obj = ServiceLogging() self.service_logger_obj = ServiceLogging()
self.routing_strategy_args = routing_strategy_args self.routing_strategy_args = routing_strategy_args
self.provider_budget_config = provider_budget_config self.provider_budget_config = provider_budget_config
if self.provider_budget_config is not None:
self.provider_budget_logger = ProviderBudgetLimiting(
router_cache=self.cache,
provider_budget_config=self.provider_budget_config,
)
self.retry_policy: Optional[RetryPolicy] = None self.retry_policy: Optional[RetryPolicy] = None
if retry_policy is not None: if retry_policy is not None:
if isinstance(retry_policy, dict): if isinstance(retry_policy, dict):
@ -5114,6 +5119,14 @@ class Router:
healthy_deployments=healthy_deployments, healthy_deployments=healthy_deployments,
) )
if self.provider_budget_config is not None:
healthy_deployments = (
await self.provider_budget_logger.async_filter_deployments(
healthy_deployments=healthy_deployments,
request_kwargs=request_kwargs,
)
)
if len(healthy_deployments) == 0: if len(healthy_deployments) == 0:
exception = await async_raise_no_deployment_exception( exception = await async_raise_no_deployment_exception(
litellm_router_instance=self, litellm_router_instance=self,

View file

@ -55,19 +55,23 @@ class ProviderBudgetLimiting(CustomLogger):
async def async_filter_deployments( async def async_filter_deployments(
self, self,
healthy_deployments: List[Dict], healthy_deployments: Union[List[Dict[str, Any]], Dict[str, Any]],
request_kwargs: Optional[Dict] = None, request_kwargs: Optional[Dict] = None,
) -> Optional[Dict]: ):
""" """
For all deployments, check their LLM provider budget is less than their budget limit. Filter out deployments that have exceeded their provider budget limit.
If multiple deployments are available, randomly pick one.
Example: Example:
if deployment = openai/gpt-3.5-turbo if deployment = openai/gpt-3.5-turbo
check if openai budget limit is exceeded and openai spend > openai budget limit
then skip this deployment
""" """
# If a single deployment is passed, convert it to a list
if isinstance(healthy_deployments, dict):
healthy_deployments = [healthy_deployments]
potential_deployments: List[Dict] = [] potential_deployments: List[Dict] = []
# Extract the parent OpenTelemetry span for tracing # Extract the parent OpenTelemetry span for tracing
@ -134,8 +138,7 @@ class ProviderBudgetLimiting(CustomLogger):
potential_deployments.append(deployment) potential_deployments.append(deployment)
# Randomly pick one deployment from potential deployments return potential_deployments
return random.choice(potential_deployments) if potential_deployments else None
async def async_log_success_event(self, kwargs, response_obj, start_time, end_time): async def async_log_success_event(self, kwargs, response_obj, start_time, end_time):
""" """