mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-25 02:34:29 +00:00
ResponsesApiDeploymentCheck
This commit is contained in:
parent
cddbac5b28
commit
a460304208
2 changed files with 28 additions and 17 deletions
|
@ -98,6 +98,9 @@ from litellm.router_utils.handle_error import (
|
||||||
from litellm.router_utils.pre_call_checks.prompt_caching_deployment_check import (
|
from litellm.router_utils.pre_call_checks.prompt_caching_deployment_check import (
|
||||||
PromptCachingDeploymentCheck,
|
PromptCachingDeploymentCheck,
|
||||||
)
|
)
|
||||||
|
from litellm.router_utils.pre_call_checks.responses_api_deployment_check import (
|
||||||
|
ResponsesApiDeploymentCheck,
|
||||||
|
)
|
||||||
from litellm.router_utils.router_callbacks.track_deployment_metrics import (
|
from litellm.router_utils.router_callbacks.track_deployment_metrics import (
|
||||||
increment_deployment_failures_for_current_minute,
|
increment_deployment_failures_for_current_minute,
|
||||||
increment_deployment_successes_for_current_minute,
|
increment_deployment_successes_for_current_minute,
|
||||||
|
@ -339,9 +342,9 @@ class Router:
|
||||||
) # names of models under litellm_params. ex. azure/chatgpt-v-2
|
) # names of models under litellm_params. ex. azure/chatgpt-v-2
|
||||||
self.deployment_latency_map = {}
|
self.deployment_latency_map = {}
|
||||||
### CACHING ###
|
### CACHING ###
|
||||||
cache_type: Literal[
|
cache_type: Literal["local", "redis", "redis-semantic", "s3", "disk"] = (
|
||||||
"local", "redis", "redis-semantic", "s3", "disk"
|
"local" # default to an in-memory cache
|
||||||
] = "local" # default to an in-memory cache
|
)
|
||||||
redis_cache = None
|
redis_cache = None
|
||||||
cache_config: Dict[str, Any] = {}
|
cache_config: Dict[str, Any] = {}
|
||||||
|
|
||||||
|
@ -562,9 +565,9 @@ class Router:
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
self.model_group_retry_policy: Optional[
|
self.model_group_retry_policy: Optional[Dict[str, RetryPolicy]] = (
|
||||||
Dict[str, RetryPolicy]
|
model_group_retry_policy
|
||||||
] = model_group_retry_policy
|
)
|
||||||
|
|
||||||
self.allowed_fails_policy: Optional[AllowedFailsPolicy] = None
|
self.allowed_fails_policy: Optional[AllowedFailsPolicy] = None
|
||||||
if allowed_fails_policy is not None:
|
if allowed_fails_policy is not None:
|
||||||
|
@ -765,6 +768,10 @@ class Router:
|
||||||
provider_budget_config=self.provider_budget_config,
|
provider_budget_config=self.provider_budget_config,
|
||||||
model_list=self.model_list,
|
model_list=self.model_list,
|
||||||
)
|
)
|
||||||
|
elif pre_call_check == "responses_api_deployment_check":
|
||||||
|
_callback = ResponsesApiDeploymentCheck(
|
||||||
|
cache=self.cache,
|
||||||
|
)
|
||||||
if _callback is not None:
|
if _callback is not None:
|
||||||
litellm.logging_callback_manager.add_litellm_callback(_callback)
|
litellm.logging_callback_manager.add_litellm_callback(_callback)
|
||||||
|
|
||||||
|
@ -3247,11 +3254,11 @@ class Router:
|
||||||
|
|
||||||
if isinstance(e, litellm.ContextWindowExceededError):
|
if isinstance(e, litellm.ContextWindowExceededError):
|
||||||
if context_window_fallbacks is not None:
|
if context_window_fallbacks is not None:
|
||||||
fallback_model_group: Optional[
|
fallback_model_group: Optional[List[str]] = (
|
||||||
List[str]
|
self._get_fallback_model_group_from_fallbacks(
|
||||||
] = self._get_fallback_model_group_from_fallbacks(
|
fallbacks=context_window_fallbacks,
|
||||||
fallbacks=context_window_fallbacks,
|
model_group=model_group,
|
||||||
model_group=model_group,
|
)
|
||||||
)
|
)
|
||||||
if fallback_model_group is None:
|
if fallback_model_group is None:
|
||||||
raise original_exception
|
raise original_exception
|
||||||
|
@ -3283,11 +3290,11 @@ class Router:
|
||||||
e.message += "\n{}".format(error_message)
|
e.message += "\n{}".format(error_message)
|
||||||
elif isinstance(e, litellm.ContentPolicyViolationError):
|
elif isinstance(e, litellm.ContentPolicyViolationError):
|
||||||
if content_policy_fallbacks is not None:
|
if content_policy_fallbacks is not None:
|
||||||
fallback_model_group: Optional[
|
fallback_model_group: Optional[List[str]] = (
|
||||||
List[str]
|
self._get_fallback_model_group_from_fallbacks(
|
||||||
] = self._get_fallback_model_group_from_fallbacks(
|
fallbacks=content_policy_fallbacks,
|
||||||
fallbacks=content_policy_fallbacks,
|
model_group=model_group,
|
||||||
model_group=model_group,
|
)
|
||||||
)
|
)
|
||||||
if fallback_model_group is None:
|
if fallback_model_group is None:
|
||||||
raise original_exception
|
raise original_exception
|
||||||
|
|
|
@ -709,7 +709,11 @@ class GenericBudgetWindowDetails(BaseModel):
|
||||||
ttl_seconds: int
|
ttl_seconds: int
|
||||||
|
|
||||||
|
|
||||||
OptionalPreCallChecks = List[Literal["prompt_caching", "router_budget_limiting"]]
|
OptionalPreCallChecks = List[
|
||||||
|
Literal[
|
||||||
|
"prompt_caching", "router_budget_limiting", "responses_api_deployment_check"
|
||||||
|
]
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
class LiteLLM_RouterFileObject(TypedDict, total=False):
|
class LiteLLM_RouterFileObject(TypedDict, total=False):
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue