Merge branch 'main' into litellm_add_bedrock_guardrails

2025-04-26 19:24:27 +00:00 · 2024-08-22 17:28:49 -07:00 · 2024-08-22 17:28:49 -07:00 · c23cf18a70
commit c23cf18a70
parent 550da1153e 849cfa9bde
14 changed files with 173 additions and 22 deletions
--- a/litellm/proxy/_new_secret_config.yaml
+++ b/litellm/proxy/_new_secret_config.yaml
@ -2,12 +2,3 @@ model_list:
  - model_name: "*"
    litellm_params:
      model: "*"
-
-litellm_settings:
-  success_callback: ["s3"]
-  cache: true
-  s3_callback_params:
-    s3_bucket_name: mytestbucketlitellm   # AWS Bucket Name for S3
-    s3_region_name: us-west-2              # AWS Region Name for S3
-    s3_aws_access_key_id: os.environ/AWS_ACCESS_KEY_ID  # us os.environ/<variable name> to pass environment variables. This is AWS Access Key ID for S3
-    s3_aws_secret_access_key: os.environ/AWS_SECRET_ACCESS_KEY  # AWS Secret Access Key for S3
--- a/litellm/proxy/auth/auth_checks.py
+++ b/litellm/proxy/auth/auth_checks.py
@ -66,7 +66,7 @@ def common_checks(
        raise Exception(
            f"Team={team_object.team_id} is blocked. Update via `/team/unblock` if your admin."
        )
-    # 2. If user can call model
+    # 2. If team can call model
    if (
        _model is not None
        and team_object is not None
@ -74,7 +74,11 @@ def common_checks(
        and _model not in team_object.models
    ):
        # this means the team has access to all models on the proxy
-        if "all-proxy-models" in team_object.models:
+        if (
+            "all-proxy-models" in team_object.models
+            or "*" in team_object.models
+            or "openai/*" in team_object.models
+        ):
            # this means the team has access to all models on the proxy
            pass
        # check if the team model is an access_group
--- a/litellm/proxy/proxy_config.yaml
+++ b/litellm/proxy/proxy_config.yaml
@ -1,8 +1,9 @@
 model_list:
  - model_name: fake-openai-endpoint
    litellm_params:
-      model: openai/gpt-4
-      api_key: os.environ/OPENAI_API_KEY
+      model: openai/fake
+      api_key: fake-key
+      api_base: https://exampleopenaiendpoint-production.up.railway.app/

 guardrails:
  - guardrail_name: "bedrock-pre-guard"
--- a/litellm/proxy/proxy_server.py
+++ b/litellm/proxy/proxy_server.py
@ -1588,7 +1588,7 @@ class ProxyConfig:
                        verbose_proxy_logger.debug(  # noqa
                            f"{blue_color_code}Set Cache on LiteLLM Proxy: {vars(litellm.cache.cache)}{reset_color_code}"
                        )
-                elif key == "cache" and value == False:
+                elif key == "cache" and value is False:
                    pass
                elif key == "guardrails":
                    if premium_user is not True:
@ -2672,6 +2672,13 @@ def giveup(e):
        and isinstance(e.message, str)
        and "Max parallel request limit reached" in e.message
    )
+
+    if (
+        general_settings.get("disable_retry_on_max_parallel_request_limit_error")
+        is True
+    ):
+        return True  # giveup if queuing max parallel request limits is disabled
+
    if result:
        verbose_proxy_logger.info(json.dumps({"event": "giveup", "exception": str(e)}))
    return result