add tpm limits per api key per model

This commit is contained in:
Ishaan Jaff 2024-08-17 13:20:55 -07:00
parent fa96610bbc
commit 68b54bed85
3 changed files with 77 additions and 1 deletions

View file

@ -68,7 +68,8 @@ async def generate_key_fn(
- metadata: Optional[dict] - Metadata for key, store information for key. Example metadata = {"team": "core-infra", "app": "app2", "email": "ishaan@berri.ai" }
- permissions: Optional[dict] - key-specific permissions. Currently just used for turning off pii masking (if connected). Example - {"pii": false}
- model_max_budget: Optional[dict] - key-specific model budget in USD. Example - {"text-davinci-002": 0.5, "gpt-3.5-turbo": 0.5}. IF null or {} then no model specific budget.
- model_rpm_limit: Optional[dict] - key-specific model rpm limit. Example - {"text-davinci-002": 1000, "gpt-3.5-turbo": 1000}. IF null or {} then no model specific rpm limit.
- model_tpm_limit: Optional[dict] - key-specific model tpm limit. Example - {"text-davinci-002": 1000, "gpt-3.5-turbo": 1000}. IF null or {} then no model specific tpm limit.
Examples:
1. Allow users to turn on/off pii masking
@ -343,6 +344,11 @@ async def update_key_fn(
key_reset_at = datetime.now(timezone.utc) + timedelta(seconds=duration_s)
non_default_values["budget_reset_at"] = key_reset_at
# Update metadata for virtual Key
_metadata = existing_key_row.metadata or {}
_metadata.update(data_json.get("metadata", {}))
non_default_values["metadata"] = _metadata
response = await prisma_client.update_data(
token=key, data={**non_default_values, "token": key}
)
@ -709,6 +715,8 @@ async def generate_key_helper_fn(
allowed_cache_controls: Optional[list] = [],
permissions: Optional[dict] = {},
model_max_budget: Optional[dict] = {},
model_rpm_limit: Optional[dict] = {},
model_tpm_limit: Optional[dict] = {},
teams: Optional[list] = None,
organization_id: Optional[str] = None,
table_name: Optional[Literal["key", "user"]] = None,
@ -750,6 +758,15 @@ async def generate_key_helper_fn(
aliases_json = json.dumps(aliases)
config_json = json.dumps(config)
permissions_json = json.dumps(permissions)
# Add model_rpm_limit and model_tpm_limit to metadata
if model_rpm_limit is not None:
metadata = metadata or {}
metadata["model_rpm_limit"] = model_rpm_limit
if model_tpm_limit is not None:
metadata = metadata or {}
metadata["model_tpm_limit"] = model_tpm_limit
metadata_json = json.dumps(metadata)
model_max_budget_json = json.dumps(model_max_budget)
user_role = user_role