forked from phoenix/litellm-mirror
add tpm limits per api key per model
This commit is contained in:
parent
fa96610bbc
commit
68b54bed85
3 changed files with 77 additions and 1 deletions
|
@ -585,6 +585,8 @@ class GenerateKeyRequest(GenerateRequestBase):
|
||||||
|
|
||||||
model_config = ConfigDict(protected_namespaces=())
|
model_config = ConfigDict(protected_namespaces=())
|
||||||
send_invite_email: Optional[bool] = None
|
send_invite_email: Optional[bool] = None
|
||||||
|
model_rpm_limit: Optional[dict] = None
|
||||||
|
model_tpm_limit: Optional[dict] = None
|
||||||
|
|
||||||
|
|
||||||
class GenerateKeyResponse(GenerateKeyRequest):
|
class GenerateKeyResponse(GenerateKeyRequest):
|
||||||
|
|
|
@ -68,7 +68,8 @@ async def generate_key_fn(
|
||||||
- metadata: Optional[dict] - Metadata for key, store information for key. Example metadata = {"team": "core-infra", "app": "app2", "email": "ishaan@berri.ai" }
|
- metadata: Optional[dict] - Metadata for key, store information for key. Example metadata = {"team": "core-infra", "app": "app2", "email": "ishaan@berri.ai" }
|
||||||
- permissions: Optional[dict] - key-specific permissions. Currently just used for turning off pii masking (if connected). Example - {"pii": false}
|
- permissions: Optional[dict] - key-specific permissions. Currently just used for turning off pii masking (if connected). Example - {"pii": false}
|
||||||
- model_max_budget: Optional[dict] - key-specific model budget in USD. Example - {"text-davinci-002": 0.5, "gpt-3.5-turbo": 0.5}. IF null or {} then no model specific budget.
|
- model_max_budget: Optional[dict] - key-specific model budget in USD. Example - {"text-davinci-002": 0.5, "gpt-3.5-turbo": 0.5}. IF null or {} then no model specific budget.
|
||||||
|
- model_rpm_limit: Optional[dict] - key-specific model rpm limit. Example - {"text-davinci-002": 1000, "gpt-3.5-turbo": 1000}. IF null or {} then no model specific rpm limit.
|
||||||
|
- model_tpm_limit: Optional[dict] - key-specific model tpm limit. Example - {"text-davinci-002": 1000, "gpt-3.5-turbo": 1000}. IF null or {} then no model specific tpm limit.
|
||||||
Examples:
|
Examples:
|
||||||
|
|
||||||
1. Allow users to turn on/off pii masking
|
1. Allow users to turn on/off pii masking
|
||||||
|
@ -343,6 +344,11 @@ async def update_key_fn(
|
||||||
key_reset_at = datetime.now(timezone.utc) + timedelta(seconds=duration_s)
|
key_reset_at = datetime.now(timezone.utc) + timedelta(seconds=duration_s)
|
||||||
non_default_values["budget_reset_at"] = key_reset_at
|
non_default_values["budget_reset_at"] = key_reset_at
|
||||||
|
|
||||||
|
# Update metadata for virtual Key
|
||||||
|
_metadata = existing_key_row.metadata or {}
|
||||||
|
_metadata.update(data_json.get("metadata", {}))
|
||||||
|
non_default_values["metadata"] = _metadata
|
||||||
|
|
||||||
response = await prisma_client.update_data(
|
response = await prisma_client.update_data(
|
||||||
token=key, data={**non_default_values, "token": key}
|
token=key, data={**non_default_values, "token": key}
|
||||||
)
|
)
|
||||||
|
@ -709,6 +715,8 @@ async def generate_key_helper_fn(
|
||||||
allowed_cache_controls: Optional[list] = [],
|
allowed_cache_controls: Optional[list] = [],
|
||||||
permissions: Optional[dict] = {},
|
permissions: Optional[dict] = {},
|
||||||
model_max_budget: Optional[dict] = {},
|
model_max_budget: Optional[dict] = {},
|
||||||
|
model_rpm_limit: Optional[dict] = {},
|
||||||
|
model_tpm_limit: Optional[dict] = {},
|
||||||
teams: Optional[list] = None,
|
teams: Optional[list] = None,
|
||||||
organization_id: Optional[str] = None,
|
organization_id: Optional[str] = None,
|
||||||
table_name: Optional[Literal["key", "user"]] = None,
|
table_name: Optional[Literal["key", "user"]] = None,
|
||||||
|
@ -750,6 +758,15 @@ async def generate_key_helper_fn(
|
||||||
aliases_json = json.dumps(aliases)
|
aliases_json = json.dumps(aliases)
|
||||||
config_json = json.dumps(config)
|
config_json = json.dumps(config)
|
||||||
permissions_json = json.dumps(permissions)
|
permissions_json = json.dumps(permissions)
|
||||||
|
|
||||||
|
# Add model_rpm_limit and model_tpm_limit to metadata
|
||||||
|
if model_rpm_limit is not None:
|
||||||
|
metadata = metadata or {}
|
||||||
|
metadata["model_rpm_limit"] = model_rpm_limit
|
||||||
|
if model_tpm_limit is not None:
|
||||||
|
metadata = metadata or {}
|
||||||
|
metadata["model_tpm_limit"] = model_tpm_limit
|
||||||
|
|
||||||
metadata_json = json.dumps(metadata)
|
metadata_json = json.dumps(metadata)
|
||||||
model_max_budget_json = json.dumps(model_max_budget)
|
model_max_budget_json = json.dumps(model_max_budget)
|
||||||
user_role = user_role
|
user_role = user_role
|
||||||
|
|
|
@ -2710,3 +2710,60 @@ async def test_custom_api_key_header_name(prisma_client):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
# this should pass because X-Litellm-Key is valid
|
# this should pass because X-Litellm-Key is valid
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio()
|
||||||
|
async def test_generate_key_with_model_tpm_limit(prisma_client):
|
||||||
|
print("prisma client=", prisma_client)
|
||||||
|
|
||||||
|
setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
|
||||||
|
setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
|
||||||
|
await litellm.proxy.proxy_server.prisma_client.connect()
|
||||||
|
request = GenerateKeyRequest(
|
||||||
|
metadata={
|
||||||
|
"team": "litellm-team3",
|
||||||
|
"model_tpm_limit": {"gpt-4": 100},
|
||||||
|
"model_rpm_limit": {"gpt-4": 2},
|
||||||
|
}
|
||||||
|
)
|
||||||
|
key = await generate_key_fn(
|
||||||
|
data=request,
|
||||||
|
user_api_key_dict=UserAPIKeyAuth(
|
||||||
|
user_role=LitellmUserRoles.PROXY_ADMIN,
|
||||||
|
api_key="sk-1234",
|
||||||
|
user_id="1234",
|
||||||
|
),
|
||||||
|
)
|
||||||
|
print(key)
|
||||||
|
|
||||||
|
generated_key = key.key
|
||||||
|
|
||||||
|
# use generated key to auth in
|
||||||
|
result = await info_key_fn(key=generated_key)
|
||||||
|
print("result from info_key_fn", result)
|
||||||
|
assert result["key"] == generated_key
|
||||||
|
print("\n info for key=", result["info"])
|
||||||
|
assert result["info"]["metadata"] == {
|
||||||
|
"team": "litellm-team3",
|
||||||
|
"model_tpm_limit": {"gpt-4": 100},
|
||||||
|
"model_rpm_limit": {"gpt-4": 2},
|
||||||
|
}
|
||||||
|
|
||||||
|
# Update model tpm_limit and rpm_limit
|
||||||
|
request = UpdateKeyRequest(
|
||||||
|
key=generated_key,
|
||||||
|
metadata={"model_tpm_limit": {"gpt-4": 200}, "model_rpm_limit": {"gpt-4": 3}},
|
||||||
|
)
|
||||||
|
_request = Request(scope={"type": "http"})
|
||||||
|
_request._url = URL(url="/update/key")
|
||||||
|
|
||||||
|
await update_key_fn(data=request, request=_request)
|
||||||
|
result = await info_key_fn(key=generated_key)
|
||||||
|
print("result from info_key_fn", result)
|
||||||
|
assert result["key"] == generated_key
|
||||||
|
print("\n info for key=", result["info"])
|
||||||
|
assert result["info"]["metadata"] == {
|
||||||
|
"team": "litellm-team3",
|
||||||
|
"model_tpm_limit": {"gpt-4": 200},
|
||||||
|
"model_rpm_limit": {"gpt-4": 3},
|
||||||
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue