ResponsesApiDeploymentCheck

2025-04-24 18:24:20 +00:00 · 2025-04-21 16:39:42 -07:00 · 2025-04-21 16:39:42 -07:00 · a460304208
commit a460304208
parent cddbac5b28
2 changed files with 28 additions and 17 deletions
--- a/litellm/router.py
+++ b/litellm/router.py
@ -98,6 +98,9 @@ from litellm.router_utils.handle_error import (
 from litellm.router_utils.pre_call_checks.prompt_caching_deployment_check import (
    PromptCachingDeploymentCheck,
 )
+from litellm.router_utils.pre_call_checks.responses_api_deployment_check import (
+    ResponsesApiDeploymentCheck,
+)
 from litellm.router_utils.router_callbacks.track_deployment_metrics import (
    increment_deployment_failures_for_current_minute,
    increment_deployment_successes_for_current_minute,
@ -339,9 +342,9 @@ class Router:
        )  # names of models under litellm_params. ex. azure/chatgpt-v-2
        self.deployment_latency_map = {}
        ### CACHING ###
-        cache_type: Literal[
-            "local", "redis", "redis-semantic", "s3", "disk"
-        ] = "local"  # default to an in-memory cache
+        cache_type: Literal["local", "redis", "redis-semantic", "s3", "disk"] = (
+            "local"  # default to an in-memory cache
+        )
        redis_cache = None
        cache_config: Dict[str, Any] = {}

@ -562,9 +565,9 @@ class Router:
                )
            )

-        self.model_group_retry_policy: Optional[
-            Dict[str, RetryPolicy]
-        ] = model_group_retry_policy
+        self.model_group_retry_policy: Optional[Dict[str, RetryPolicy]] = (
+            model_group_retry_policy
+        )

        self.allowed_fails_policy: Optional[AllowedFailsPolicy] = None
        if allowed_fails_policy is not None:
@ -765,6 +768,10 @@ class Router:
                        provider_budget_config=self.provider_budget_config,
                        model_list=self.model_list,
                    )
+                elif pre_call_check == "responses_api_deployment_check":
+                    _callback = ResponsesApiDeploymentCheck(
+                        cache=self.cache,
+                    )
                if _callback is not None:
                    litellm.logging_callback_manager.add_litellm_callback(_callback)

@ -3247,11 +3254,11 @@ class Router:

                if isinstance(e, litellm.ContextWindowExceededError):
                    if context_window_fallbacks is not None:
-                        fallback_model_group: Optional[
-                            List[str]
-                        ] = self._get_fallback_model_group_from_fallbacks(
-                            fallbacks=context_window_fallbacks,
-                            model_group=model_group,
+                        fallback_model_group: Optional[List[str]] = (
+                            self._get_fallback_model_group_from_fallbacks(
+                                fallbacks=context_window_fallbacks,
+                                model_group=model_group,
+                            )
                        )
                        if fallback_model_group is None:
                            raise original_exception
@ -3283,11 +3290,11 @@ class Router:
                        e.message += "\n{}".format(error_message)
                elif isinstance(e, litellm.ContentPolicyViolationError):
                    if content_policy_fallbacks is not None:
-                        fallback_model_group: Optional[
-                            List[str]
-                        ] = self._get_fallback_model_group_from_fallbacks(
-                            fallbacks=content_policy_fallbacks,
-                            model_group=model_group,
+                        fallback_model_group: Optional[List[str]] = (
+                            self._get_fallback_model_group_from_fallbacks(
+                                fallbacks=content_policy_fallbacks,
+                                model_group=model_group,
+                            )
                        )
                        if fallback_model_group is None:
                            raise original_exception
--- a/litellm/types/router.py
+++ b/litellm/types/router.py
@ -709,7 +709,11 @@ class GenericBudgetWindowDetails(BaseModel):
    ttl_seconds: int


-OptionalPreCallChecks = List[Literal["prompt_caching", "router_budget_limiting"]]
+OptionalPreCallChecks = List[
+    Literal[
+        "prompt_caching", "router_budget_limiting", "responses_api_deployment_check"
+    ]
+]


 class LiteLLM_RouterFileObject(TypedDict, total=False):