forked from phoenix/litellm-mirror
fix raise good error message when budget crossed for provider budget
This commit is contained in:
parent
06eb5ecab5
commit
caf37e2190
2 changed files with 17 additions and 3 deletions
|
@ -29,6 +29,7 @@ from litellm.types.router import (
|
|||
LiteLLM_Params,
|
||||
ProviderBudgetConfigType,
|
||||
ProviderBudgetInfo,
|
||||
RouterErrors,
|
||||
)
|
||||
from litellm.types.utils import StandardLoggingPayload
|
||||
|
||||
|
@ -83,6 +84,10 @@ class ProviderBudgetLimiting(CustomLogger):
|
|||
if isinstance(healthy_deployments, dict):
|
||||
healthy_deployments = [healthy_deployments]
|
||||
|
||||
# Don't do any filtering if there are no healthy deployments
|
||||
if len(healthy_deployments) == 0:
|
||||
return healthy_deployments
|
||||
|
||||
potential_deployments: List[Dict] = []
|
||||
|
||||
# Extract the parent OpenTelemetry span for tracing
|
||||
|
@ -125,6 +130,7 @@ class ProviderBudgetLimiting(CustomLogger):
|
|||
provider_spend_map[provider] = float(current_spends[idx] or 0.0)
|
||||
|
||||
# Filter healthy deployments based on budget constraints
|
||||
deployment_above_budget_info: str = "" # used to return in error message
|
||||
for deployment in healthy_deployments:
|
||||
provider = self._get_llm_provider_for_deployment(deployment)
|
||||
if provider is None:
|
||||
|
@ -142,13 +148,18 @@ class ProviderBudgetLimiting(CustomLogger):
|
|||
)
|
||||
|
||||
if current_spend >= budget_limit:
|
||||
verbose_router_logger.debug(
|
||||
f"Skipping deployment {deployment} for provider {provider} as spend limit exceeded"
|
||||
)
|
||||
debug_msg = f"Exceeded budget for provider {provider}: {current_spend} >= {budget_limit}"
|
||||
verbose_router_logger.debug(debug_msg)
|
||||
deployment_above_budget_info += f"{debug_msg}\n"
|
||||
continue
|
||||
|
||||
potential_deployments.append(deployment)
|
||||
|
||||
if len(potential_deployments) == 0:
|
||||
raise ValueError(
|
||||
f"{RouterErrors.no_deployments_with_provider_budget_routing.value}: {deployment_above_budget_info}"
|
||||
)
|
||||
|
||||
return potential_deployments
|
||||
|
||||
async def async_log_success_event(self, kwargs, response_obj, start_time, end_time):
|
||||
|
|
|
@ -434,6 +434,9 @@ class RouterErrors(enum.Enum):
|
|||
no_deployments_with_tag_routing = (
|
||||
"Not allowed to access model due to tags configuration"
|
||||
)
|
||||
no_deployments_with_provider_budget_routing = (
|
||||
"No deployments available - crossed budget for provider"
|
||||
)
|
||||
|
||||
|
||||
class AllowedFailsPolicy(BaseModel):
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue