diff --git a/litellm/proxy/_new_secret_config.yaml b/litellm/proxy/_new_secret_config.yaml index 711cb2d31b..33a272fa13 100644 --- a/litellm/proxy/_new_secret_config.yaml +++ b/litellm/proxy/_new_secret_config.yaml @@ -1,8 +1,24 @@ -model_list: - - model_name: "*" - litellm_params: - model: "*" +general_settings: + store_model_in_db: true + database_connection_pool_limit: 20 +model_list: + - model_name: fake-openai-endpoint + litellm_params: + model: openai/my-fake-model + api_key: my-fake-key + api_base: https://exampleopenaiendpoint-production.up.railway.app/ litellm_settings: - max_internal_user_budget: 0.001 - internal_user_budget_duration: "5m" + drop_params: True + success_callback: ["prometheus"] + failure_callback: ["prometheus"] + service_callback: ["prometheus_system"] + _langfuse_default_tags: ["user_api_key_alias", "user_api_key_user_id", "user_api_key_user_email", "user_api_key_team_alias", "semantic-similarity", "proxy_base_url"] + +router_settings: + routing_strategy: "latency-based-routing" + routing_strategy_args: {"ttl": 86400} # Average the last 10 calls to compute avg latency per model + allowed_fails: 1 + num_retries: 3 + retry_after: 5 # seconds to wait before retrying a failed request + cooldown_time: 30 # seconds to cooldown a deployment after failure diff --git a/litellm/proxy/management_endpoints/team_endpoints.py b/litellm/proxy/management_endpoints/team_endpoints.py index 43e458465a..09e077c965 100644 --- a/litellm/proxy/management_endpoints/team_endpoints.py +++ b/litellm/proxy/management_endpoints/team_endpoints.py @@ -420,6 +420,7 @@ async def update_team( @management_endpoint_wrapper async def team_member_add( data: TeamMemberAddRequest, + http_request: Request, user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth), ): """ diff --git a/litellm/router.py b/litellm/router.py index fb9af96188..0af5c10485 100644 --- a/litellm/router.py +++ b/litellm/router.py @@ -2364,18 +2364,6 @@ class Router: fallback_failure_exception_str = "" try: verbose_router_logger.debug("Trying to fallback b/w models") - if ( - hasattr(e, "status_code") - and e.status_code == 400 # type: ignore - and not ( - isinstance(e, litellm.ContextWindowExceededError) - or isinstance(e, litellm.ContentPolicyViolationError) - ) - ): # don't retry a malformed request - verbose_router_logger.debug( - "Not retrying request as it's malformed. Status code=400." - ) - raise e if isinstance(e, litellm.ContextWindowExceededError): if context_window_fallbacks is not None: fallback_model_group = None @@ -2730,16 +2718,6 @@ class Router: original_exception = e verbose_router_logger.debug(f"An exception occurs {original_exception}") try: - if ( - hasattr(e, "status_code") - and e.status_code == 400 # type: ignore - and not ( - isinstance(e, litellm.ContextWindowExceededError) - or isinstance(e, litellm.ContentPolicyViolationError) - ) - ): # don't retry a malformed request - raise e - verbose_router_logger.debug( f"Trying to fallback b/w models. Initial model group: {model_group}" ) diff --git a/litellm/tests/test_proxy_server.py b/litellm/tests/test_proxy_server.py index ffc2600ba6..dee20a273c 100644 --- a/litellm/tests/test_proxy_server.py +++ b/litellm/tests/test_proxy_server.py @@ -865,6 +865,8 @@ async def test_create_user_default_budget(prisma_client, user_role): async def test_create_team_member_add(prisma_client, new_member_method): import time + from fastapi import Request + from litellm.proxy._types import LiteLLM_TeamTableCachedObj from litellm.proxy.proxy_server import hash_token, user_api_key_cache @@ -906,7 +908,11 @@ async def test_create_team_member_add(prisma_client, new_member_method): mock_litellm_usertable.find_many = AsyncMock(return_value=None) await team_member_add( - data=team_member_add_request, user_api_key_dict=UserAPIKeyAuth() + data=team_member_add_request, + user_api_key_dict=UserAPIKeyAuth(), + http_request=Request( + scope={"type": "http", "path": "/user/new"}, + ), ) mock_client.assert_called() diff --git a/litellm/types/router.py b/litellm/types/router.py index dda6968f09..cadc454105 100644 --- a/litellm/types/router.py +++ b/litellm/types/router.py @@ -143,7 +143,7 @@ class GenericLiteLLMParams(BaseModel): ## VERTEX AI ## vertex_project: Optional[str] = None vertex_location: Optional[str] = None - vertex_credentials: Optional[str] = None + vertex_credentials: Optional[Union[str, dict]] = None ## AWS BEDROCK / SAGEMAKER ## aws_access_key_id: Optional[str] = None aws_secret_access_key: Optional[str] = None