diff --git a/litellm/proxy/_types.py b/litellm/proxy/_types.py index 2eab4a66d0..4f9d39d773 100644 --- a/litellm/proxy/_types.py +++ b/litellm/proxy/_types.py @@ -585,6 +585,8 @@ class GenerateKeyRequest(GenerateRequestBase): model_config = ConfigDict(protected_namespaces=()) send_invite_email: Optional[bool] = None + model_rpm_limit: Optional[dict] = None + model_tpm_limit: Optional[dict] = None class GenerateKeyResponse(GenerateKeyRequest): diff --git a/litellm/proxy/management_endpoints/key_management_endpoints.py b/litellm/proxy/management_endpoints/key_management_endpoints.py index 968b3ede91..e0c082b5f7 100644 --- a/litellm/proxy/management_endpoints/key_management_endpoints.py +++ b/litellm/proxy/management_endpoints/key_management_endpoints.py @@ -68,7 +68,8 @@ async def generate_key_fn( - metadata: Optional[dict] - Metadata for key, store information for key. Example metadata = {"team": "core-infra", "app": "app2", "email": "ishaan@berri.ai" } - permissions: Optional[dict] - key-specific permissions. Currently just used for turning off pii masking (if connected). Example - {"pii": false} - model_max_budget: Optional[dict] - key-specific model budget in USD. Example - {"text-davinci-002": 0.5, "gpt-3.5-turbo": 0.5}. IF null or {} then no model specific budget. - + - model_rpm_limit: Optional[dict] - key-specific model rpm limit. Example - {"text-davinci-002": 1000, "gpt-3.5-turbo": 1000}. IF null or {} then no model specific rpm limit. + - model_tpm_limit: Optional[dict] - key-specific model tpm limit. Example - {"text-davinci-002": 1000, "gpt-3.5-turbo": 1000}. IF null or {} then no model specific tpm limit. Examples: 1. Allow users to turn on/off pii masking @@ -343,6 +344,11 @@ async def update_key_fn( key_reset_at = datetime.now(timezone.utc) + timedelta(seconds=duration_s) non_default_values["budget_reset_at"] = key_reset_at + # Update metadata for virtual Key + _metadata = existing_key_row.metadata or {} + _metadata.update(data_json.get("metadata", {})) + non_default_values["metadata"] = _metadata + response = await prisma_client.update_data( token=key, data={**non_default_values, "token": key} ) @@ -709,6 +715,8 @@ async def generate_key_helper_fn( allowed_cache_controls: Optional[list] = [], permissions: Optional[dict] = {}, model_max_budget: Optional[dict] = {}, + model_rpm_limit: Optional[dict] = {}, + model_tpm_limit: Optional[dict] = {}, teams: Optional[list] = None, organization_id: Optional[str] = None, table_name: Optional[Literal["key", "user"]] = None, @@ -750,6 +758,15 @@ async def generate_key_helper_fn( aliases_json = json.dumps(aliases) config_json = json.dumps(config) permissions_json = json.dumps(permissions) + + # Add model_rpm_limit and model_tpm_limit to metadata + if model_rpm_limit is not None: + metadata = metadata or {} + metadata["model_rpm_limit"] = model_rpm_limit + if model_tpm_limit is not None: + metadata = metadata or {} + metadata["model_tpm_limit"] = model_tpm_limit + metadata_json = json.dumps(metadata) model_max_budget_json = json.dumps(model_max_budget) user_role = user_role diff --git a/litellm/tests/test_key_generate_prisma.py b/litellm/tests/test_key_generate_prisma.py index a757476a55..66d49f886d 100644 --- a/litellm/tests/test_key_generate_prisma.py +++ b/litellm/tests/test_key_generate_prisma.py @@ -2710,3 +2710,60 @@ async def test_custom_api_key_header_name(prisma_client): pass # this should pass because X-Litellm-Key is valid + + +@pytest.mark.asyncio() +async def test_generate_key_with_model_tpm_limit(prisma_client): + print("prisma client=", prisma_client) + + setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client) + setattr(litellm.proxy.proxy_server, "master_key", "sk-1234") + await litellm.proxy.proxy_server.prisma_client.connect() + request = GenerateKeyRequest( + metadata={ + "team": "litellm-team3", + "model_tpm_limit": {"gpt-4": 100}, + "model_rpm_limit": {"gpt-4": 2}, + } + ) + key = await generate_key_fn( + data=request, + user_api_key_dict=UserAPIKeyAuth( + user_role=LitellmUserRoles.PROXY_ADMIN, + api_key="sk-1234", + user_id="1234", + ), + ) + print(key) + + generated_key = key.key + + # use generated key to auth in + result = await info_key_fn(key=generated_key) + print("result from info_key_fn", result) + assert result["key"] == generated_key + print("\n info for key=", result["info"]) + assert result["info"]["metadata"] == { + "team": "litellm-team3", + "model_tpm_limit": {"gpt-4": 100}, + "model_rpm_limit": {"gpt-4": 2}, + } + + # Update model tpm_limit and rpm_limit + request = UpdateKeyRequest( + key=generated_key, + metadata={"model_tpm_limit": {"gpt-4": 200}, "model_rpm_limit": {"gpt-4": 3}}, + ) + _request = Request(scope={"type": "http"}) + _request._url = URL(url="/update/key") + + await update_key_fn(data=request, request=_request) + result = await info_key_fn(key=generated_key) + print("result from info_key_fn", result) + assert result["key"] == generated_key + print("\n info for key=", result["info"]) + assert result["info"]["metadata"] == { + "team": "litellm-team3", + "model_tpm_limit": {"gpt-4": 200}, + "model_rpm_limit": {"gpt-4": 3}, + }