mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-25 10:44:24 +00:00
fix(router.py): fallback on 400-status code requests
This commit is contained in:
parent
f8b0118ecd
commit
7b6db63d30
5 changed files with 31 additions and 30 deletions
|
@ -1,8 +1,24 @@
|
|||
model_list:
|
||||
- model_name: "*"
|
||||
litellm_params:
|
||||
model: "*"
|
||||
general_settings:
|
||||
store_model_in_db: true
|
||||
database_connection_pool_limit: 20
|
||||
|
||||
model_list:
|
||||
- model_name: fake-openai-endpoint
|
||||
litellm_params:
|
||||
model: openai/my-fake-model
|
||||
api_key: my-fake-key
|
||||
api_base: https://exampleopenaiendpoint-production.up.railway.app/
|
||||
litellm_settings:
|
||||
max_internal_user_budget: 0.001
|
||||
internal_user_budget_duration: "5m"
|
||||
drop_params: True
|
||||
success_callback: ["prometheus"]
|
||||
failure_callback: ["prometheus"]
|
||||
service_callback: ["prometheus_system"]
|
||||
_langfuse_default_tags: ["user_api_key_alias", "user_api_key_user_id", "user_api_key_user_email", "user_api_key_team_alias", "semantic-similarity", "proxy_base_url"]
|
||||
|
||||
router_settings:
|
||||
routing_strategy: "latency-based-routing"
|
||||
routing_strategy_args: {"ttl": 86400} # Average the last 10 calls to compute avg latency per model
|
||||
allowed_fails: 1
|
||||
num_retries: 3
|
||||
retry_after: 5 # seconds to wait before retrying a failed request
|
||||
cooldown_time: 30 # seconds to cooldown a deployment after failure
|
||||
|
|
|
@ -420,6 +420,7 @@ async def update_team(
|
|||
@management_endpoint_wrapper
|
||||
async def team_member_add(
|
||||
data: TeamMemberAddRequest,
|
||||
http_request: Request,
|
||||
user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
|
||||
):
|
||||
"""
|
||||
|
|
|
@ -2364,18 +2364,6 @@ class Router:
|
|||
fallback_failure_exception_str = ""
|
||||
try:
|
||||
verbose_router_logger.debug("Trying to fallback b/w models")
|
||||
if (
|
||||
hasattr(e, "status_code")
|
||||
and e.status_code == 400 # type: ignore
|
||||
and not (
|
||||
isinstance(e, litellm.ContextWindowExceededError)
|
||||
or isinstance(e, litellm.ContentPolicyViolationError)
|
||||
)
|
||||
): # don't retry a malformed request
|
||||
verbose_router_logger.debug(
|
||||
"Not retrying request as it's malformed. Status code=400."
|
||||
)
|
||||
raise e
|
||||
if isinstance(e, litellm.ContextWindowExceededError):
|
||||
if context_window_fallbacks is not None:
|
||||
fallback_model_group = None
|
||||
|
@ -2730,16 +2718,6 @@ class Router:
|
|||
original_exception = e
|
||||
verbose_router_logger.debug(f"An exception occurs {original_exception}")
|
||||
try:
|
||||
if (
|
||||
hasattr(e, "status_code")
|
||||
and e.status_code == 400 # type: ignore
|
||||
and not (
|
||||
isinstance(e, litellm.ContextWindowExceededError)
|
||||
or isinstance(e, litellm.ContentPolicyViolationError)
|
||||
)
|
||||
): # don't retry a malformed request
|
||||
raise e
|
||||
|
||||
verbose_router_logger.debug(
|
||||
f"Trying to fallback b/w models. Initial model group: {model_group}"
|
||||
)
|
||||
|
|
|
@ -865,6 +865,8 @@ async def test_create_user_default_budget(prisma_client, user_role):
|
|||
async def test_create_team_member_add(prisma_client, new_member_method):
|
||||
import time
|
||||
|
||||
from fastapi import Request
|
||||
|
||||
from litellm.proxy._types import LiteLLM_TeamTableCachedObj
|
||||
from litellm.proxy.proxy_server import hash_token, user_api_key_cache
|
||||
|
||||
|
@ -906,7 +908,11 @@ async def test_create_team_member_add(prisma_client, new_member_method):
|
|||
mock_litellm_usertable.find_many = AsyncMock(return_value=None)
|
||||
|
||||
await team_member_add(
|
||||
data=team_member_add_request, user_api_key_dict=UserAPIKeyAuth()
|
||||
data=team_member_add_request,
|
||||
user_api_key_dict=UserAPIKeyAuth(),
|
||||
http_request=Request(
|
||||
scope={"type": "http", "path": "/user/new"},
|
||||
),
|
||||
)
|
||||
|
||||
mock_client.assert_called()
|
||||
|
|
|
@ -143,7 +143,7 @@ class GenericLiteLLMParams(BaseModel):
|
|||
## VERTEX AI ##
|
||||
vertex_project: Optional[str] = None
|
||||
vertex_location: Optional[str] = None
|
||||
vertex_credentials: Optional[str] = None
|
||||
vertex_credentials: Optional[Union[str, dict]] = None
|
||||
## AWS BEDROCK / SAGEMAKER ##
|
||||
aws_access_key_id: Optional[str] = None
|
||||
aws_secret_access_key: Optional[str] = None
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue