Merge branch 'main' into litellm_add_bedrock_guardrails

This commit is contained in:
Ishaan Jaff 2024-08-22 17:28:49 -07:00 committed by GitHub
commit c23cf18a70
14 changed files with 173 additions and 22 deletions

View file

@ -2,12 +2,3 @@ model_list:
- model_name: "*"
litellm_params:
model: "*"
litellm_settings:
success_callback: ["s3"]
cache: true
s3_callback_params:
s3_bucket_name: mytestbucketlitellm # AWS Bucket Name for S3
s3_region_name: us-west-2 # AWS Region Name for S3
s3_aws_access_key_id: os.environ/AWS_ACCESS_KEY_ID # us os.environ/<variable name> to pass environment variables. This is AWS Access Key ID for S3
s3_aws_secret_access_key: os.environ/AWS_SECRET_ACCESS_KEY # AWS Secret Access Key for S3

View file

@ -66,7 +66,7 @@ def common_checks(
raise Exception(
f"Team={team_object.team_id} is blocked. Update via `/team/unblock` if your admin."
)
# 2. If user can call model
# 2. If team can call model
if (
_model is not None
and team_object is not None
@ -74,7 +74,11 @@ def common_checks(
and _model not in team_object.models
):
# this means the team has access to all models on the proxy
if "all-proxy-models" in team_object.models:
if (
"all-proxy-models" in team_object.models
or "*" in team_object.models
or "openai/*" in team_object.models
):
# this means the team has access to all models on the proxy
pass
# check if the team model is an access_group

View file

@ -1,8 +1,9 @@
model_list:
- model_name: fake-openai-endpoint
litellm_params:
model: openai/gpt-4
api_key: os.environ/OPENAI_API_KEY
model: openai/fake
api_key: fake-key
api_base: https://exampleopenaiendpoint-production.up.railway.app/
guardrails:
- guardrail_name: "bedrock-pre-guard"

View file

@ -1588,7 +1588,7 @@ class ProxyConfig:
verbose_proxy_logger.debug( # noqa
f"{blue_color_code}Set Cache on LiteLLM Proxy: {vars(litellm.cache.cache)}{reset_color_code}"
)
elif key == "cache" and value == False:
elif key == "cache" and value is False:
pass
elif key == "guardrails":
if premium_user is not True:
@ -2672,6 +2672,13 @@ def giveup(e):
and isinstance(e.message, str)
and "Max parallel request limit reached" in e.message
)
if (
general_settings.get("disable_retry_on_max_parallel_request_limit_error")
is True
):
return True # giveup if queuing max parallel request limits is disabled
if result:
verbose_proxy_logger.info(json.dumps({"event": "giveup", "exception": str(e)}))
return result