From a2f1d2ee526b098041601fac779eef145e72cf92 Mon Sep 17 00:00:00 2001 From: ishaan-jaff Date: Fri, 16 Feb 2024 15:44:34 -0800 Subject: [PATCH] (feat) set key-model budgets --- litellm/proxy/_types.py | 2 ++ litellm/proxy/proxy_server.py | 5 +++++ litellm/proxy/schema.prisma | 2 ++ schema.prisma | 2 ++ 4 files changed, 11 insertions(+) diff --git a/litellm/proxy/_types.py b/litellm/proxy/_types.py index 372b953e0..b791f0dd9 100644 --- a/litellm/proxy/_types.py +++ b/litellm/proxy/_types.py @@ -383,6 +383,8 @@ class LiteLLM_VerificationToken(LiteLLMBase): budget_reset_at: Optional[datetime] = None allowed_cache_controls: Optional[list] = [] permissions: Dict = {} + model_spend: Dict = {} + model_max_budget: Dict = {} class UserAPIKeyAuth( diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py index 37f55072e..6866b142a 100644 --- a/litellm/proxy/proxy_server.py +++ b/litellm/proxy/proxy_server.py @@ -1635,6 +1635,7 @@ async def generate_key_helper_fn( key_alias: Optional[str] = None, allowed_cache_controls: Optional[list] = [], permissions: Optional[dict] = {}, + model_max_budget: Optional[dict] = {}, ): global prisma_client, custom_db_client, user_api_key_cache @@ -1668,6 +1669,8 @@ async def generate_key_helper_fn( config_json = json.dumps(config) permissions_json = json.dumps(permissions) metadata_json = json.dumps(metadata) + model_max_budget_json = json.dumps(model_max_budget) + user_id = user_id or str(uuid.uuid4()) user_role = user_role or "app_user" tpm_limit = tpm_limit @@ -1710,6 +1713,7 @@ async def generate_key_helper_fn( "budget_reset_at": key_reset_at, "allowed_cache_controls": allowed_cache_controls, "permissions": permissions_json, + "model_max_budget": model_max_budget_json, } if ( general_settings.get("allow_user_auth", False) == True @@ -3059,6 +3063,7 @@ async def generate_key_fn( - max_parallel_requests: Optional[int] - Rate limit a user based on the number of parallel requests. Raises 429 error, if user's parallel requests > x. - metadata: Optional[dict] - Metadata for key, store information for key. Example metadata = {"team": "core-infra", "app": "app2", "email": "ishaan@berri.ai" } - permissions: Optional[dict] - key-specific permissions. Currently just used for turning off pii masking (if connected). Example - {"pii": false} + - model_max_budget: Optional[dict] - key-specific model budget in USD. Example - {"text-davinci-002": 0.5, "gpt-3.5-turbo": 0.5}. IF null or {} then no model specific budget. Returns: - key: (str) The generated api key diff --git a/litellm/proxy/schema.prisma b/litellm/proxy/schema.prisma index 5a57b8808..df840a9ee 100644 --- a/litellm/proxy/schema.prisma +++ b/litellm/proxy/schema.prisma @@ -64,6 +64,8 @@ model LiteLLM_VerificationToken { budget_duration String? budget_reset_at DateTime? allowed_cache_controls String[] @default([]) + model_spend Json @default("{}") + model_max_budget Json @default("{}") } // store proxy config.yaml diff --git a/schema.prisma b/schema.prisma index 5a57b8808..df840a9ee 100644 --- a/schema.prisma +++ b/schema.prisma @@ -64,6 +64,8 @@ model LiteLLM_VerificationToken { budget_duration String? budget_reset_at DateTime? allowed_cache_controls String[] @default([]) + model_spend Json @default("{}") + model_max_budget Json @default("{}") } // store proxy config.yaml