From 30e985d79c16a080b8914dbeee6234eb0847ee2d Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Tue, 20 Aug 2024 08:16:28 -0700 Subject: [PATCH] feat control guardrails per API Key --- litellm/proxy/_types.py | 1 + .../key_management_endpoints.py | 16 +++++- litellm/tests/test_key_generate_prisma.py | 54 +++++++++++++++++++ 3 files changed, 70 insertions(+), 1 deletion(-) diff --git a/litellm/proxy/_types.py b/litellm/proxy/_types.py index 4f9d39d77..75934ee1f 100644 --- a/litellm/proxy/_types.py +++ b/litellm/proxy/_types.py @@ -587,6 +587,7 @@ class GenerateKeyRequest(GenerateRequestBase): send_invite_email: Optional[bool] = None model_rpm_limit: Optional[dict] = None model_tpm_limit: Optional[dict] = None + guardrails: Optional[List[str]] = None class GenerateKeyResponse(GenerateKeyRequest): diff --git a/litellm/proxy/management_endpoints/key_management_endpoints.py b/litellm/proxy/management_endpoints/key_management_endpoints.py index 79e2dcc2d..1758b416d 100644 --- a/litellm/proxy/management_endpoints/key_management_endpoints.py +++ b/litellm/proxy/management_endpoints/key_management_endpoints.py @@ -66,6 +66,7 @@ async def generate_key_fn( - budget_duration: Optional[str] - Budget is reset at the end of specified duration. If not set, budget is never reset. You can set duration as seconds ("30s"), minutes ("30m"), hours ("30h"), days ("30d"). - max_parallel_requests: Optional[int] - Rate limit a user based on the number of parallel requests. Raises 429 error, if user's parallel requests > x. - metadata: Optional[dict] - Metadata for key, store information for key. Example metadata = {"team": "core-infra", "app": "app2", "email": "ishaan@berri.ai" } + - guardrails: Optional[List[str]] - List of active guardrails for the key - permissions: Optional[dict] - key-specific permissions. Currently just used for turning off pii masking (if connected). Example - {"pii": false} - model_max_budget: Optional[dict] - key-specific model budget in USD. Example - {"text-davinci-002": 0.5, "gpt-3.5-turbo": 0.5}. IF null or {} then no model specific budget. - model_rpm_limit: Optional[dict] - key-specific model rpm limit. Example - {"text-davinci-002": 1000, "gpt-3.5-turbo": 1000}. IF null or {} then no model specific rpm limit. @@ -321,11 +322,12 @@ async def update_key_fn( detail={"error": f"Team not found, passed team_id={data.team_id}"}, ) + _metadata_fields = ["model_rpm_limit", "model_tpm_limit", "guardrails"] # get non default values for key non_default_values = {} for k, v in data_json.items(): # this field gets stored in metadata - if key == "model_rpm_limit" or key == "model_tpm_limit": + if key in _metadata_fields: continue if v is not None and v not in ( [], @@ -366,6 +368,14 @@ async def update_key_fn( non_default_values["metadata"] = _metadata non_default_values.pop("model_rpm_limit", None) + if data.guardrails: + _metadata = existing_key_row.metadata or {} + _metadata["guardrails"] = data.guardrails + + # update values that will be written to the DB + non_default_values["metadata"] = _metadata + non_default_values.pop("guardrails", None) + response = await prisma_client.update_data( token=key, data={**non_default_values, "token": key} ) @@ -734,6 +744,7 @@ async def generate_key_helper_fn( model_max_budget: Optional[dict] = {}, model_rpm_limit: Optional[dict] = {}, model_tpm_limit: Optional[dict] = {}, + guardrails: Optional[list] = None, teams: Optional[list] = None, organization_id: Optional[str] = None, table_name: Optional[Literal["key", "user"]] = None, @@ -783,6 +794,9 @@ async def generate_key_helper_fn( if model_tpm_limit is not None: metadata = metadata or {} metadata["model_tpm_limit"] = model_tpm_limit + if guardrails is not None: + metadata = metadata or {} + metadata["guardrails"] = guardrails metadata_json = json.dumps(metadata) model_max_budget_json = json.dumps(model_max_budget) diff --git a/litellm/tests/test_key_generate_prisma.py b/litellm/tests/test_key_generate_prisma.py index 907262d48..2641edbb4 100644 --- a/litellm/tests/test_key_generate_prisma.py +++ b/litellm/tests/test_key_generate_prisma.py @@ -2770,6 +2770,60 @@ async def test_generate_key_with_model_tpm_limit(prisma_client): } +@pytest.mark.asyncio() +async def test_generate_key_with_guardrails(prisma_client): + print("prisma client=", prisma_client) + + setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client) + setattr(litellm.proxy.proxy_server, "master_key", "sk-1234") + await litellm.proxy.proxy_server.prisma_client.connect() + request = GenerateKeyRequest( + guardrails=["aporia-pre-call"], + metadata={ + "team": "litellm-team3", + }, + ) + key = await generate_key_fn( + data=request, + user_api_key_dict=UserAPIKeyAuth( + user_role=LitellmUserRoles.PROXY_ADMIN, + api_key="sk-1234", + user_id="1234", + ), + ) + print("generated key=", key) + + generated_key = key.key + + # use generated key to auth in + result = await info_key_fn(key=generated_key) + print("result from info_key_fn", result) + assert result["key"] == generated_key + print("\n info for key=", result["info"]) + assert result["info"]["metadata"] == { + "team": "litellm-team3", + "guardrails": ["aporia-pre-call"], + } + + # Update model tpm_limit and rpm_limit + request = UpdateKeyRequest( + key=generated_key, + guardrails=["aporia-pre-call", "aporia-post-call"], + ) + _request = Request(scope={"type": "http"}) + _request._url = URL(url="/update/key") + + await update_key_fn(data=request, request=_request) + result = await info_key_fn(key=generated_key) + print("result from info_key_fn", result) + assert result["key"] == generated_key + print("\n info for key=", result["info"]) + assert result["info"]["metadata"] == { + "team": "litellm-team3", + "guardrails": ["aporia-pre-call", "aporia-post-call"], + } + + @pytest.mark.asyncio() async def test_team_access_groups(prisma_client): """