ResponsesApiDeploymentCheck

This commit is contained in:
Ishaan Jaff 2025-04-21 16:39:42 -07:00
parent cddbac5b28
commit a460304208
2 changed files with 28 additions and 17 deletions

View file

@ -98,6 +98,9 @@ from litellm.router_utils.handle_error import (
from litellm.router_utils.pre_call_checks.prompt_caching_deployment_check import ( from litellm.router_utils.pre_call_checks.prompt_caching_deployment_check import (
PromptCachingDeploymentCheck, PromptCachingDeploymentCheck,
) )
from litellm.router_utils.pre_call_checks.responses_api_deployment_check import (
ResponsesApiDeploymentCheck,
)
from litellm.router_utils.router_callbacks.track_deployment_metrics import ( from litellm.router_utils.router_callbacks.track_deployment_metrics import (
increment_deployment_failures_for_current_minute, increment_deployment_failures_for_current_minute,
increment_deployment_successes_for_current_minute, increment_deployment_successes_for_current_minute,
@ -339,9 +342,9 @@ class Router:
) # names of models under litellm_params. ex. azure/chatgpt-v-2 ) # names of models under litellm_params. ex. azure/chatgpt-v-2
self.deployment_latency_map = {} self.deployment_latency_map = {}
### CACHING ### ### CACHING ###
cache_type: Literal[ cache_type: Literal["local", "redis", "redis-semantic", "s3", "disk"] = (
"local", "redis", "redis-semantic", "s3", "disk" "local" # default to an in-memory cache
] = "local" # default to an in-memory cache )
redis_cache = None redis_cache = None
cache_config: Dict[str, Any] = {} cache_config: Dict[str, Any] = {}
@ -562,9 +565,9 @@ class Router:
) )
) )
self.model_group_retry_policy: Optional[ self.model_group_retry_policy: Optional[Dict[str, RetryPolicy]] = (
Dict[str, RetryPolicy] model_group_retry_policy
] = model_group_retry_policy )
self.allowed_fails_policy: Optional[AllowedFailsPolicy] = None self.allowed_fails_policy: Optional[AllowedFailsPolicy] = None
if allowed_fails_policy is not None: if allowed_fails_policy is not None:
@ -765,6 +768,10 @@ class Router:
provider_budget_config=self.provider_budget_config, provider_budget_config=self.provider_budget_config,
model_list=self.model_list, model_list=self.model_list,
) )
elif pre_call_check == "responses_api_deployment_check":
_callback = ResponsesApiDeploymentCheck(
cache=self.cache,
)
if _callback is not None: if _callback is not None:
litellm.logging_callback_manager.add_litellm_callback(_callback) litellm.logging_callback_manager.add_litellm_callback(_callback)
@ -3247,11 +3254,11 @@ class Router:
if isinstance(e, litellm.ContextWindowExceededError): if isinstance(e, litellm.ContextWindowExceededError):
if context_window_fallbacks is not None: if context_window_fallbacks is not None:
fallback_model_group: Optional[ fallback_model_group: Optional[List[str]] = (
List[str] self._get_fallback_model_group_from_fallbacks(
] = self._get_fallback_model_group_from_fallbacks( fallbacks=context_window_fallbacks,
fallbacks=context_window_fallbacks, model_group=model_group,
model_group=model_group, )
) )
if fallback_model_group is None: if fallback_model_group is None:
raise original_exception raise original_exception
@ -3283,11 +3290,11 @@ class Router:
e.message += "\n{}".format(error_message) e.message += "\n{}".format(error_message)
elif isinstance(e, litellm.ContentPolicyViolationError): elif isinstance(e, litellm.ContentPolicyViolationError):
if content_policy_fallbacks is not None: if content_policy_fallbacks is not None:
fallback_model_group: Optional[ fallback_model_group: Optional[List[str]] = (
List[str] self._get_fallback_model_group_from_fallbacks(
] = self._get_fallback_model_group_from_fallbacks( fallbacks=content_policy_fallbacks,
fallbacks=content_policy_fallbacks, model_group=model_group,
model_group=model_group, )
) )
if fallback_model_group is None: if fallback_model_group is None:
raise original_exception raise original_exception

View file

@ -709,7 +709,11 @@ class GenericBudgetWindowDetails(BaseModel):
ttl_seconds: int ttl_seconds: int
OptionalPreCallChecks = List[Literal["prompt_caching", "router_budget_limiting"]] OptionalPreCallChecks = List[
Literal[
"prompt_caching", "router_budget_limiting", "responses_api_deployment_check"
]
]
class LiteLLM_RouterFileObject(TypedDict, total=False): class LiteLLM_RouterFileObject(TypedDict, total=False):