From 3aba9019ac27481361c3fa87956907f5c41dd4ca Mon Sep 17 00:00:00 2001 From: ishaan-jaff Date: Sat, 2 Mar 2024 10:06:33 -0800 Subject: [PATCH 01/17] (feat) track used api_base in response --- litellm/router.py | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/litellm/router.py b/litellm/router.py index 6f33d0b0d..59cbf43d5 100644 --- a/litellm/router.py +++ b/litellm/router.py @@ -357,6 +357,11 @@ class Router: verbose_router_logger.info( f"litellm.completion(model={model_name})\033[32m 200 OK\033[0m" ) + # set used api_base in response + if hasattr(response, "_hidden_params"): + response._hidden_params["api_base"] = deployment.get( + "litellm_params", {} + ).get("api_base", None) return response except Exception as e: verbose_router_logger.info( @@ -446,6 +451,12 @@ class Router: verbose_router_logger.info( f"litellm.acompletion(model={model_name})\033[32m 200 OK\033[0m" ) + + # set used api_base in response + if hasattr(response, "_hidden_params"): + response._hidden_params["api_base"] = deployment.get( + "litellm_params", {} + ).get("api_base", None) return response except Exception as e: verbose_router_logger.info( @@ -602,6 +613,12 @@ class Router: verbose_router_logger.info( f"litellm.aimage_generation(model={model_name})\033[32m 200 OK\033[0m" ) + + # set used api_base in response + if hasattr(response, "_hidden_params"): + response._hidden_params["api_base"] = deployment.get( + "litellm_params", {} + ).get("api_base", None) return response except Exception as e: verbose_router_logger.info( @@ -694,6 +711,11 @@ class Router: verbose_router_logger.info( f"litellm.amoderation(model={model_name})\033[32m 200 OK\033[0m" ) + # set used api_base in response + if hasattr(response, "_hidden_params"): + response._hidden_params["api_base"] = deployment.get( + "litellm_params", {} + ).get("api_base", None) return response except Exception as e: verbose_router_logger.info( @@ -825,6 +847,12 @@ class Router: verbose_router_logger.info( f"litellm.atext_completion(model={model_name})\033[32m 200 OK\033[0m" ) + + # set used api_base in response + if hasattr(response, "_hidden_params"): + response._hidden_params["api_base"] = deployment.get( + "litellm_params", {} + ).get("api_base", None) return response except Exception as e: verbose_router_logger.info( @@ -954,6 +982,12 @@ class Router: verbose_router_logger.info( f"litellm.aembedding(model={model_name})\033[32m 200 OK\033[0m" ) + + # set used api_base in response + if hasattr(response, "_hidden_params"): + response._hidden_params["api_base"] = deployment.get( + "litellm_params", {} + ).get("api_base", None) return response except Exception as e: verbose_router_logger.info( From 0bb45b33fd1050abeeb48ce3bcc173d604219b36 Mon Sep 17 00:00:00 2001 From: ishaan-jaff Date: Sat, 2 Mar 2024 11:06:03 -0800 Subject: [PATCH 02/17] (feat) send api_base --- litellm/proxy/utils.py | 35 +++++++++++++++++++++++++---------- 1 file changed, 25 insertions(+), 10 deletions(-) diff --git a/litellm/proxy/utils.py b/litellm/proxy/utils.py index 1cc52401a..948e686dd 100644 --- a/litellm/proxy/utils.py +++ b/litellm/proxy/utils.py @@ -64,6 +64,7 @@ class ProxyLogging: litellm.callbacks.append(self.max_parallel_request_limiter) litellm.callbacks.append(self.max_budget_limiter) litellm.callbacks.append(self.cache_control_check) + litellm.callbacks.append(self.response_taking_too_long_callback) for callback in litellm.callbacks: if callback not in litellm.input_callback: litellm.input_callback.append(callback) @@ -142,6 +143,30 @@ class ProxyLogging: raise e return data + async def response_taking_too_long_callback( + self, + kwargs, # kwargs to completion + completion_response, # response from completion + start_time, + end_time, # start/end time + ): + if self.alerting is None: + return + time_difference = end_time - start_time + # Convert the timedelta to float (in seconds) + time_difference_float = time_difference.total_seconds() + litellm_params = kwargs.get("litellm_params", {}) + api_base = litellm_params.get("api_base", "") + model = kwargs.get("model", "") + messages = kwargs.get("messages", "") + request_info = f"\nRequest Model: `{model}`\nAPI Base: `{api_base}`\nMessages: `{messages}`" + slow_message = f"`Responses are slow - {round(time_difference_float,2)}s response time > Alerting threshold: {self.alerting_threshold}s`" + if time_difference_float > self.alerting_threshold: + await self.alerting_handler( + message=slow_message + request_info, + level="Low", + ) + async def response_taking_too_long( self, start_time: Optional[float] = None, @@ -189,16 +214,6 @@ class ProxyLogging: level="Medium", ) - elif ( - type == "slow_response" and start_time is not None and end_time is not None - ): - slow_message = f"`Responses are slow - {round(end_time-start_time,2)}s response time > Alerting threshold: {self.alerting_threshold}s`" - if end_time - start_time > self.alerting_threshold: - await self.alerting_handler( - message=slow_message + request_info, - level="Low", - ) - async def budget_alerts( self, type: Literal[ From 127bc743b2b298a3b79e80ec453b78aab5ca6e79 Mon Sep 17 00:00:00 2001 From: ishaan-jaff Date: Sat, 2 Mar 2024 11:09:40 -0800 Subject: [PATCH 03/17] (feat) cleanup --- litellm/proxy/proxy_server.py | 42 ----------------------------------- 1 file changed, 42 deletions(-) diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py index 869de6dde..17db8c3ab 100644 --- a/litellm/proxy/proxy_server.py +++ b/litellm/proxy/proxy_server.py @@ -2138,14 +2138,6 @@ async def async_data_generator(response, user_api_key_dict): except Exception as e: yield f"data: {str(e)}\n\n" - ### ALERTING ### - end_time = time.time() - asyncio.create_task( - proxy_logging_obj.response_taking_too_long( - start_time=start_time, end_time=end_time, type="slow_response" - ) - ) - # Streaming is done, yield the [DONE] chunk done_message = "[DONE]" yield f"data: {done_message}\n\n" @@ -2494,14 +2486,6 @@ async def completion( headers=custom_headers, ) - ### ALERTING ### - end_time = time.time() - asyncio.create_task( - proxy_logging_obj.response_taking_too_long( - start_time=start_time, end_time=end_time, type="slow_response" - ) - ) - fastapi_response.headers["x-litellm-model-id"] = model_id return response except Exception as e: @@ -2700,14 +2684,6 @@ async def chat_completion( headers=custom_headers, ) - ### ALERTING ### - end_time = time.time() - asyncio.create_task( - proxy_logging_obj.response_taking_too_long( - start_time=start_time, end_time=end_time, type="slow_response" - ) - ) - fastapi_response.headers["x-litellm-model-id"] = model_id ### CALL HOOKS ### - modify outgoing data @@ -2915,12 +2891,6 @@ async def embeddings( ### ALERTING ### data["litellm_status"] = "success" # used for alerting - end_time = time.time() - asyncio.create_task( - proxy_logging_obj.response_taking_too_long( - start_time=start_time, end_time=end_time, type="slow_response" - ) - ) return response except Exception as e: @@ -3066,12 +3036,6 @@ async def image_generation( ### ALERTING ### data["litellm_status"] = "success" # used for alerting - end_time = time.time() - asyncio.create_task( - proxy_logging_obj.response_taking_too_long( - start_time=start_time, end_time=end_time, type="slow_response" - ) - ) return response except Exception as e: @@ -3225,12 +3189,6 @@ async def moderations( ### ALERTING ### data["litellm_status"] = "success" # used for alerting - end_time = time.time() - asyncio.create_task( - proxy_logging_obj.response_taking_too_long( - start_time=start_time, end_time=end_time, type="slow_response" - ) - ) return response except Exception as e: From 868a415aa0160985ee27850f0faef04e06b1dfb1 Mon Sep 17 00:00:00 2001 From: ishaan-jaff Date: Sat, 2 Mar 2024 11:12:09 -0800 Subject: [PATCH 04/17] Revert "(feat) track used api_base in response" This reverts commit 3aba9019ac27481361c3fa87956907f5c41dd4ca. --- litellm/router.py | 34 ---------------------------------- 1 file changed, 34 deletions(-) diff --git a/litellm/router.py b/litellm/router.py index 59cbf43d5..6f33d0b0d 100644 --- a/litellm/router.py +++ b/litellm/router.py @@ -357,11 +357,6 @@ class Router: verbose_router_logger.info( f"litellm.completion(model={model_name})\033[32m 200 OK\033[0m" ) - # set used api_base in response - if hasattr(response, "_hidden_params"): - response._hidden_params["api_base"] = deployment.get( - "litellm_params", {} - ).get("api_base", None) return response except Exception as e: verbose_router_logger.info( @@ -451,12 +446,6 @@ class Router: verbose_router_logger.info( f"litellm.acompletion(model={model_name})\033[32m 200 OK\033[0m" ) - - # set used api_base in response - if hasattr(response, "_hidden_params"): - response._hidden_params["api_base"] = deployment.get( - "litellm_params", {} - ).get("api_base", None) return response except Exception as e: verbose_router_logger.info( @@ -613,12 +602,6 @@ class Router: verbose_router_logger.info( f"litellm.aimage_generation(model={model_name})\033[32m 200 OK\033[0m" ) - - # set used api_base in response - if hasattr(response, "_hidden_params"): - response._hidden_params["api_base"] = deployment.get( - "litellm_params", {} - ).get("api_base", None) return response except Exception as e: verbose_router_logger.info( @@ -711,11 +694,6 @@ class Router: verbose_router_logger.info( f"litellm.amoderation(model={model_name})\033[32m 200 OK\033[0m" ) - # set used api_base in response - if hasattr(response, "_hidden_params"): - response._hidden_params["api_base"] = deployment.get( - "litellm_params", {} - ).get("api_base", None) return response except Exception as e: verbose_router_logger.info( @@ -847,12 +825,6 @@ class Router: verbose_router_logger.info( f"litellm.atext_completion(model={model_name})\033[32m 200 OK\033[0m" ) - - # set used api_base in response - if hasattr(response, "_hidden_params"): - response._hidden_params["api_base"] = deployment.get( - "litellm_params", {} - ).get("api_base", None) return response except Exception as e: verbose_router_logger.info( @@ -982,12 +954,6 @@ class Router: verbose_router_logger.info( f"litellm.aembedding(model={model_name})\033[32m 200 OK\033[0m" ) - - # set used api_base in response - if hasattr(response, "_hidden_params"): - response._hidden_params["api_base"] = deployment.get( - "litellm_params", {} - ).get("api_base", None) return response except Exception as e: verbose_router_logger.info( From 5feea0483188af59a09e63bada96dc7a48fada7c Mon Sep 17 00:00:00 2001 From: Krrish Dholakia Date: Sat, 2 Mar 2024 11:59:17 -0800 Subject: [PATCH 05/17] build: update schema.prisma --- litellm/proxy/schema.prisma | 46 ++++++++++++++++++++++++++++++++----- schema.prisma | 43 ++++++++++++++++++++++++++++++---- 2 files changed, 78 insertions(+), 11 deletions(-) diff --git a/litellm/proxy/schema.prisma b/litellm/proxy/schema.prisma index 6a9b72728..f31fa130a 100644 --- a/litellm/proxy/schema.prisma +++ b/litellm/proxy/schema.prisma @@ -7,10 +7,42 @@ generator client { provider = "prisma-client-py" } +// Budget / Rate Limits for an org +model LiteLLM_BudgetTable { + budget_id String @id @default(uuid()) + max_budget Float? + max_parallel_requests Int? + tpm_limit BigInt? + rpm_limit BigInt? + model_max_budget Json @default("{}") + budget_duration String? + budget_reset_at DateTime? + created_at DateTime @default(now()) @map("created_at") + created_by String + updated_at DateTime @default(now()) @updatedAt @map("updated_at") + updated_by String +} + +model LiteLLM_OrganizationTable { + organization_id String @id @default(uuid()) + organization_alias String? + budget_id String + metadata Json @default("{}") + models String[] + spend Float @default(0.0) + model_spend Json @default("{}") + created_at DateTime @default(now()) @map("created_at") + created_by String + updated_at DateTime @default(now()) @updatedAt @map("updated_at") + updated_by String + litellm_budget_table LiteLLM_BudgetTable @relation(fields: [budget_id], references: [budget_id]) +} + // Assign prod keys to groups, not individuals model LiteLLM_TeamTable { - team_id String @unique + team_id String @id @default(uuid()) team_alias String? + organization_id String? admins String[] members String[] members_with_roles Json @default("{}") @@ -27,11 +59,12 @@ model LiteLLM_TeamTable { updated_at DateTime @default(now()) @updatedAt @map("updated_at") model_spend Json @default("{}") model_max_budget Json @default("{}") + litellm_organization_table LiteLLM_OrganizationTable @relation(fields: [organization_id], references: [organization_id]) } // Track spend, rate limit, budget Users model LiteLLM_UserTable { - user_id String @unique + user_id String @id team_id String? teams String[] @default([]) user_role String? @@ -51,7 +84,7 @@ model LiteLLM_UserTable { // Generate Tokens for Proxy model LiteLLM_VerificationToken { - token String @unique + token String @id key_name String? key_alias String? spend Float @default(0.0) @@ -82,7 +115,7 @@ model LiteLLM_Config { // View spend, model, api_key per request model LiteLLM_SpendLogs { - request_id String @unique + request_id String @id call_type String api_key String @default ("") spend Float @default(0.0) @@ -98,11 +131,12 @@ model LiteLLM_SpendLogs { cache_key String @default("") request_tags Json @default("[]") team_id String? - end_user String? + end_user String? } + // Beta - allow team members to request access to a model model LiteLLM_UserNotifications { - request_id String @unique + request_id String @id user_id String models String[] justification String diff --git a/schema.prisma b/schema.prisma index d08295e10..f31fa130a 100644 --- a/schema.prisma +++ b/schema.prisma @@ -7,10 +7,42 @@ generator client { provider = "prisma-client-py" } +// Budget / Rate Limits for an org +model LiteLLM_BudgetTable { + budget_id String @id @default(uuid()) + max_budget Float? + max_parallel_requests Int? + tpm_limit BigInt? + rpm_limit BigInt? + model_max_budget Json @default("{}") + budget_duration String? + budget_reset_at DateTime? + created_at DateTime @default(now()) @map("created_at") + created_by String + updated_at DateTime @default(now()) @updatedAt @map("updated_at") + updated_by String +} + +model LiteLLM_OrganizationTable { + organization_id String @id @default(uuid()) + organization_alias String? + budget_id String + metadata Json @default("{}") + models String[] + spend Float @default(0.0) + model_spend Json @default("{}") + created_at DateTime @default(now()) @map("created_at") + created_by String + updated_at DateTime @default(now()) @updatedAt @map("updated_at") + updated_by String + litellm_budget_table LiteLLM_BudgetTable @relation(fields: [budget_id], references: [budget_id]) +} + // Assign prod keys to groups, not individuals model LiteLLM_TeamTable { - team_id String @unique + team_id String @id @default(uuid()) team_alias String? + organization_id String? admins String[] members String[] members_with_roles Json @default("{}") @@ -27,11 +59,12 @@ model LiteLLM_TeamTable { updated_at DateTime @default(now()) @updatedAt @map("updated_at") model_spend Json @default("{}") model_max_budget Json @default("{}") + litellm_organization_table LiteLLM_OrganizationTable @relation(fields: [organization_id], references: [organization_id]) } // Track spend, rate limit, budget Users model LiteLLM_UserTable { - user_id String @unique + user_id String @id team_id String? teams String[] @default([]) user_role String? @@ -51,7 +84,7 @@ model LiteLLM_UserTable { // Generate Tokens for Proxy model LiteLLM_VerificationToken { - token String @unique + token String @id key_name String? key_alias String? spend Float @default(0.0) @@ -82,7 +115,7 @@ model LiteLLM_Config { // View spend, model, api_key per request model LiteLLM_SpendLogs { - request_id String @unique + request_id String @id call_type String api_key String @default ("") spend Float @default(0.0) @@ -103,7 +136,7 @@ model LiteLLM_SpendLogs { // Beta - allow team members to request access to a model model LiteLLM_UserNotifications { - request_id String @unique + request_id String @id user_id String models String[] justification String From 0418ba5fc3a487313856d1af4fe6b6c14e69fb45 Mon Sep 17 00:00:00 2001 From: ishaan-jaff Date: Sat, 2 Mar 2024 12:02:06 -0800 Subject: [PATCH 06/17] (feat) log api_base in spend logs --- litellm/proxy/_types.py | 1 + litellm/proxy/schema.prisma | 1 + litellm/proxy/utils.py | 1 + schema.prisma | 1 + 4 files changed, 4 insertions(+) diff --git a/litellm/proxy/_types.py b/litellm/proxy/_types.py index ce4de2d14..175f801da 100644 --- a/litellm/proxy/_types.py +++ b/litellm/proxy/_types.py @@ -542,6 +542,7 @@ class LiteLLM_SpendLogs(LiteLLMBase): request_id: str api_key: str model: Optional[str] = "" + api_base: Optional[str] = "" call_type: str spend: Optional[float] = 0.0 total_tokens: Optional[int] = 0 diff --git a/litellm/proxy/schema.prisma b/litellm/proxy/schema.prisma index 6a9b72728..9c5c00d13 100644 --- a/litellm/proxy/schema.prisma +++ b/litellm/proxy/schema.prisma @@ -92,6 +92,7 @@ model LiteLLM_SpendLogs { startTime DateTime // Assuming start_time is a DateTime field endTime DateTime // Assuming end_time is a DateTime field model String @default("") + api_base String @default("") user String @default("") metadata Json @default("{}") cache_hit String @default("") diff --git a/litellm/proxy/utils.py b/litellm/proxy/utils.py index 948e686dd..c67448c86 100644 --- a/litellm/proxy/utils.py +++ b/litellm/proxy/utils.py @@ -1600,6 +1600,7 @@ def get_logging_payload(kwargs, response_obj, start_time, end_time): "completion_tokens": usage.get("completion_tokens", 0), "request_tags": metadata.get("tags", []), "end_user": kwargs.get("user", ""), + "api_base": litellm_params.get("api_base", ""), } verbose_proxy_logger.debug(f"SpendTable: created payload - payload: {payload}\n\n") diff --git a/schema.prisma b/schema.prisma index d08295e10..06964e4a6 100644 --- a/schema.prisma +++ b/schema.prisma @@ -92,6 +92,7 @@ model LiteLLM_SpendLogs { startTime DateTime // Assuming start_time is a DateTime field endTime DateTime // Assuming end_time is a DateTime field model String @default("") + api_base String @default("") user String @default("") metadata Json @default("{}") cache_hit String @default("") From e60ae9388ea9ec9ea0ca481b16aa4aa886ef72c8 Mon Sep 17 00:00:00 2001 From: ishaan-jaff Date: Sat, 2 Mar 2024 12:08:34 -0800 Subject: [PATCH 07/17] =?UTF-8?q?bump:=20version=201.28.8=20=E2=86=92=201.?= =?UTF-8?q?28.9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- pyproject.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 0dbe465c3..65e8645fc 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "litellm" -version = "1.28.8" +version = "1.28.9" description = "Library to easily interface with LLM API providers" authors = ["BerriAI"] license = "MIT" @@ -74,7 +74,7 @@ requires = ["poetry-core", "wheel"] build-backend = "poetry.core.masonry.api" [tool.commitizen] -version = "1.28.8" +version = "1.28.9" version_files = [ "pyproject.toml:^version" ] From 8a249b5fe91255458f22c325f9217a582280976c Mon Sep 17 00:00:00 2001 From: Krrish Dholakia Date: Sat, 2 Mar 2024 12:18:28 -0800 Subject: [PATCH 08/17] build(schema.prisma): update schema with new orgs and budget table --- litellm/proxy/schema.prisma | 8 +++++--- schema.prisma | 8 +++++--- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/litellm/proxy/schema.prisma b/litellm/proxy/schema.prisma index 2744ae5c0..7eb59ee48 100644 --- a/litellm/proxy/schema.prisma +++ b/litellm/proxy/schema.prisma @@ -21,11 +21,12 @@ model LiteLLM_BudgetTable { created_by String updated_at DateTime @default(now()) @updatedAt @map("updated_at") updated_by String + organization LiteLLM_OrganizationTable[] // multiple orgs can have the same budget } model LiteLLM_OrganizationTable { organization_id String @id @default(uuid()) - organization_alias String? + organization_alias String budget_id String metadata Json @default("{}") models String[] @@ -35,7 +36,8 @@ model LiteLLM_OrganizationTable { created_by String updated_at DateTime @default(now()) @updatedAt @map("updated_at") updated_by String - litellm_budget_table LiteLLM_BudgetTable @relation(fields: [budget_id], references: [budget_id]) + litellm_budget_table LiteLLM_BudgetTable? @relation(fields: [budget_id], references: [budget_id]) + teams LiteLLM_TeamTable[] } // Assign prod keys to groups, not individuals @@ -59,7 +61,7 @@ model LiteLLM_TeamTable { updated_at DateTime @default(now()) @updatedAt @map("updated_at") model_spend Json @default("{}") model_max_budget Json @default("{}") - litellm_organization_table LiteLLM_OrganizationTable @relation(fields: [organization_id], references: [organization_id]) + litellm_organization_table LiteLLM_OrganizationTable? @relation(fields: [organization_id], references: [organization_id]) } // Track spend, rate limit, budget Users diff --git a/schema.prisma b/schema.prisma index 2744ae5c0..7eb59ee48 100644 --- a/schema.prisma +++ b/schema.prisma @@ -21,11 +21,12 @@ model LiteLLM_BudgetTable { created_by String updated_at DateTime @default(now()) @updatedAt @map("updated_at") updated_by String + organization LiteLLM_OrganizationTable[] // multiple orgs can have the same budget } model LiteLLM_OrganizationTable { organization_id String @id @default(uuid()) - organization_alias String? + organization_alias String budget_id String metadata Json @default("{}") models String[] @@ -35,7 +36,8 @@ model LiteLLM_OrganizationTable { created_by String updated_at DateTime @default(now()) @updatedAt @map("updated_at") updated_by String - litellm_budget_table LiteLLM_BudgetTable @relation(fields: [budget_id], references: [budget_id]) + litellm_budget_table LiteLLM_BudgetTable? @relation(fields: [budget_id], references: [budget_id]) + teams LiteLLM_TeamTable[] } // Assign prod keys to groups, not individuals @@ -59,7 +61,7 @@ model LiteLLM_TeamTable { updated_at DateTime @default(now()) @updatedAt @map("updated_at") model_spend Json @default("{}") model_max_budget Json @default("{}") - litellm_organization_table LiteLLM_OrganizationTable @relation(fields: [organization_id], references: [organization_id]) + litellm_organization_table LiteLLM_OrganizationTable? @relation(fields: [organization_id], references: [organization_id]) } // Track spend, rate limit, budget Users From b042b5dc3b94564e4291a31c2954c0014975c04f Mon Sep 17 00:00:00 2001 From: ishaan-jaff Date: Sat, 2 Mar 2024 12:25:40 -0800 Subject: [PATCH 09/17] (feat) set soft_budgets on keys --- litellm/proxy/schema.prisma | 2 ++ schema.prisma | 2 ++ 2 files changed, 4 insertions(+) diff --git a/litellm/proxy/schema.prisma b/litellm/proxy/schema.prisma index 7eb59ee48..1fe55f24e 100644 --- a/litellm/proxy/schema.prisma +++ b/litellm/proxy/schema.prisma @@ -11,6 +11,7 @@ generator client { model LiteLLM_BudgetTable { budget_id String @id @default(uuid()) max_budget Float? + soft_budget Float? max_parallel_requests Int? tpm_limit BigInt? rpm_limit BigInt? @@ -107,6 +108,7 @@ model LiteLLM_VerificationToken { allowed_cache_controls String[] @default([]) model_spend Json @default("{}") model_max_budget Json @default("{}") + budget_id String? } // store proxy config.yaml diff --git a/schema.prisma b/schema.prisma index 7eb59ee48..1fe55f24e 100644 --- a/schema.prisma +++ b/schema.prisma @@ -11,6 +11,7 @@ generator client { model LiteLLM_BudgetTable { budget_id String @id @default(uuid()) max_budget Float? + soft_budget Float? max_parallel_requests Int? tpm_limit BigInt? rpm_limit BigInt? @@ -107,6 +108,7 @@ model LiteLLM_VerificationToken { allowed_cache_controls String[] @default([]) model_spend Json @default("{}") model_max_budget Json @default("{}") + budget_id String? } // store proxy config.yaml From eb4f90115d42207290ed4b3beba8447ea1168a69 Mon Sep 17 00:00:00 2001 From: ishaan-jaff Date: Sat, 2 Mar 2024 12:52:09 -0800 Subject: [PATCH 10/17] (feat) create soft budget --- litellm/proxy/_types.py | 15 +++++++++++++++ litellm/proxy/proxy_server.py | 14 ++++++++++++++ 2 files changed, 29 insertions(+) diff --git a/litellm/proxy/_types.py b/litellm/proxy/_types.py index 175f801da..e4b28001d 100644 --- a/litellm/proxy/_types.py +++ b/litellm/proxy/_types.py @@ -324,6 +324,21 @@ class TeamRequest(LiteLLMBase): teams: List[str] +class LiteLLM_BudgetTable(LiteLLMBase): + """Represents user-controllable params for a LiteLLM_BudgetTable record""" + + max_budget: Optional[float] = None + soft_budget: Optional[float] = None + max_parallel_requests: Optional[int] = None + tpm_limit: Optional[int] = None + rpm_limit: Optional[int] = None + model_max_budget: dict + budget_duration: Optional[str] = None + budget_reset_at: Optional[datetime] = None + created_by: str + updated_by: str + + class KeyManagementSystem(enum.Enum): GOOGLE_KMS = "google_kms" AZURE_KEY_VAULT = "azure_key_vault" diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py index 17db8c3ab..37b28baea 100644 --- a/litellm/proxy/proxy_server.py +++ b/litellm/proxy/proxy_server.py @@ -1869,6 +1869,19 @@ async def generate_key_helper_fn( rpm_limit = rpm_limit allowed_cache_controls = allowed_cache_controls + # TODO: @ishaan-jaff: Migrate all budget tracking to use LiteLLM_BudgetTable + if prisma_client is not None: + # create the Budget Row for the LiteLLM Verification Token + budget_row = LiteLLM_BudgetTable( + soft_budget=50, + model_max_budget=model_max_budget or {}, + created_by=user_id, + updated_by=user_id, + ) + new_budget = prisma_client.jsonify_object(budget_row.json(exclude_none=True)) + _budget = await prisma_client.db.litellm_budgettable.create(data={**new_budget}) # type: ignore + _budget_id = getattr(_budget, "id", None) + try: # Create a new verification token (you may want to enhance this logic based on your needs) user_data = { @@ -1906,6 +1919,7 @@ async def generate_key_helper_fn( "allowed_cache_controls": allowed_cache_controls, "permissions": permissions_json, "model_max_budget": model_max_budget_json, + "budget_id": _budget_id, } if ( general_settings.get("allow_user_auth", False) == True From fd9f8b7010fbd249d8b8587b66478011e2d29146 Mon Sep 17 00:00:00 2001 From: ishaan-jaff Date: Sat, 2 Mar 2024 13:05:00 -0800 Subject: [PATCH 11/17] (docs) setting soft budgets --- docs/my-website/docs/proxy/virtual_keys.md | 2 ++ litellm/__init__.py | 3 +++ litellm/proxy/proxy_server.py | 7 ++++++- 3 files changed, 11 insertions(+), 1 deletion(-) diff --git a/docs/my-website/docs/proxy/virtual_keys.md b/docs/my-website/docs/proxy/virtual_keys.md index e350ce9d5..70fd6e6a8 100644 --- a/docs/my-website/docs/proxy/virtual_keys.md +++ b/docs/my-website/docs/proxy/virtual_keys.md @@ -79,6 +79,7 @@ curl 'http://0.0.0.0:8000/key/generate' \ "metadata": {"user": "ishaan@berri.ai"}, "team_id": "core-infra", "max_budget": 10, + "soft_budget": 5, }' ``` @@ -93,6 +94,7 @@ Request Params: - `config`: *Optional[dict]* - any key-specific configs, overrides config in config.yaml - `spend`: *Optional[int]* - Amount spent by key. Default is 0. Will be updated by proxy whenever key is used. https://docs.litellm.ai/docs/proxy/virtual_keys#managing-auth---tracking-spend - `max_budget`: *Optional[float]* - Specify max budget for a given key. +- `soft_budget`: *Optional[float]* - Specify soft limit budget for a given key. Get Alerts when key hits its soft budget - `model_max_budget`: *Optional[dict[str, float]]* - Specify max budget for each model, `model_max_budget={"gpt4": 0.5, "gpt-5": 0.01}` - `max_parallel_requests`: *Optional[int]* - Rate limit a user based on the number of parallel requests. Raises 429 error, if user's parallel requests > x. - `metadata`: *Optional[dict]* - Metadata for key, store information for key. Example metadata = {"team": "core-infra", "app": "app2", "email": "ishaan@berri.ai" } diff --git a/litellm/__init__.py b/litellm/__init__.py index cd639ddb9..f218fe036 100644 --- a/litellm/__init__.py +++ b/litellm/__init__.py @@ -79,6 +79,9 @@ max_budget: float = 0.0 # set the max budget across all providers budget_duration: Optional[str] = ( None # proxy only - resets budget after fixed duration. You can set duration as seconds ("30s"), minutes ("30m"), hours ("30h"), days ("30d"). ) +default_soft_budget: float = ( + 50.0 # by default all litellm proxy keys have a soft budget of 50.0 +) _openai_finish_reasons = ["stop", "length", "function_call", "content_filter", "null"] _openai_completion_params = [ "functions", diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py index 37b28baea..dcd4283ba 100644 --- a/litellm/proxy/proxy_server.py +++ b/litellm/proxy/proxy_server.py @@ -1810,6 +1810,9 @@ async def generate_key_helper_fn( spend: float, key_max_budget: Optional[float] = None, # key_max_budget is used to Budget Per key key_budget_duration: Optional[str] = None, + key_soft_budget: Optional[ + float + ] = None, # key_soft_budget is used to Budget Per key max_budget: Optional[float] = None, # max_budget is used to Budget Per user budget_duration: Optional[str] = None, # max_budget is used to Budget Per user token: Optional[str] = None, @@ -1873,7 +1876,7 @@ async def generate_key_helper_fn( if prisma_client is not None: # create the Budget Row for the LiteLLM Verification Token budget_row = LiteLLM_BudgetTable( - soft_budget=50, + soft_budget=key_soft_budget or litellm.default_soft_budget, model_max_budget=model_max_budget or {}, created_by=user_id, updated_by=user_id, @@ -3347,6 +3350,8 @@ async def generate_key_fn( # if we get max_budget passed to /key/generate, then use it as key_max_budget. Since generate_key_helper_fn is used to make new users if "max_budget" in data_json: data_json["key_max_budget"] = data_json.pop("max_budget", None) + if "soft_budget" in data_json: + data_json["key_soft_budget"] = data_json.pop("soft_budget", None) if "budget_duration" in data_json: data_json["key_budget_duration"] = data_json.pop("budget_duration", None) From 1bb8263c922bdb133c13db2ece74b2d783900fbe Mon Sep 17 00:00:00 2001 From: ishaan-jaff Date: Sat, 2 Mar 2024 14:43:01 -0800 Subject: [PATCH 12/17] (feat) set soft_budget with /key/generate --- litellm/proxy/_types.py | 1 + litellm/proxy/proxy_server.py | 3 +++ 2 files changed, 4 insertions(+) diff --git a/litellm/proxy/_types.py b/litellm/proxy/_types.py index e4b28001d..6196f18a2 100644 --- a/litellm/proxy/_types.py +++ b/litellm/proxy/_types.py @@ -151,6 +151,7 @@ class GenerateRequestBase(LiteLLMBase): rpm_limit: Optional[int] = None budget_duration: Optional[str] = None allowed_cache_controls: Optional[list] = [] + soft_budget: Optional[float] = None class GenerateKeyRequest(GenerateRequestBase): diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py index dcd4283ba..482397b86 100644 --- a/litellm/proxy/proxy_server.py +++ b/litellm/proxy/proxy_server.py @@ -1995,6 +1995,9 @@ async def generate_key_helper_fn( except Exception as e: traceback.print_exc() raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR) + + # Add budget related info in key_data - this ensures it's returned + key_data["soft_budget"] = key_soft_budget return key_data From 163c8f1c5a137e9167c511ef228261cef6b14cc7 Mon Sep 17 00:00:00 2001 From: ishaan-jaff Date: Sat, 2 Mar 2024 14:58:02 -0800 Subject: [PATCH 13/17] (feat) set soft budget limits on ui --- ui/litellm-dashboard/src/components/create_key_button.tsx | 3 +++ ui/litellm-dashboard/src/components/view_key_spend_report.tsx | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/ui/litellm-dashboard/src/components/create_key_button.tsx b/ui/litellm-dashboard/src/components/create_key_button.tsx index 3dddaf8b7..b6cec81e4 100644 --- a/ui/litellm-dashboard/src/components/create_key_button.tsx +++ b/ui/litellm-dashboard/src/components/create_key_button.tsx @@ -108,6 +108,9 @@ const CreateKey: React.FC = ({ ))} + + + diff --git a/ui/litellm-dashboard/src/components/view_key_spend_report.tsx b/ui/litellm-dashboard/src/components/view_key_spend_report.tsx index 0788af209..f0916ec01 100644 --- a/ui/litellm-dashboard/src/components/view_key_spend_report.tsx +++ b/ui/litellm-dashboard/src/components/view_key_spend_report.tsx @@ -105,7 +105,7 @@ const ViewKeySpendReport: React.FC = ({ return (
- Date: Sat, 2 Mar 2024 15:31:59 -0800 Subject: [PATCH 14/17] (feat) set soft limits per key --- .../src/components/create_key_button.tsx | 50 ++++++++++++------- 1 file changed, 32 insertions(+), 18 deletions(-) diff --git a/ui/litellm-dashboard/src/components/create_key_button.tsx b/ui/litellm-dashboard/src/components/create_key_button.tsx index b6cec81e4..e76e2d0c2 100644 --- a/ui/litellm-dashboard/src/components/create_key_button.tsx +++ b/ui/litellm-dashboard/src/components/create_key_button.tsx @@ -2,7 +2,7 @@ import React, { useState, useEffect, useRef } from "react"; import { Button, TextInput, Grid, Col } from "@tremor/react"; -import { Card, Metric, Text } from "@tremor/react"; +import { Card, Metric, Text, Title, Subtitle } from "@tremor/react"; import { Button as Button2, Modal, @@ -38,6 +38,7 @@ const CreateKey: React.FC = ({ const [form] = Form.useForm(); const [isModalVisible, setIsModalVisible] = useState(false); const [apiKey, setApiKey] = useState(null); + const [softBudget, setSoftBudget] = useState(null); const handleOk = () => { setIsModalVisible(false); form.resetFields(); @@ -54,8 +55,11 @@ const CreateKey: React.FC = ({ message.info("Making API Call"); setIsModalVisible(true); const response = await keyCreateCall(accessToken, userID, formValues); + + console.log("key create Response:", response); setData((prevData) => (prevData ? [...prevData, response] : [response])); // Check if prevData is null setApiKey(response["key"]); + setSoftBudget(response["soft_budget"]); message.success("API Key Created"); form.resetFields(); localStorage.removeItem("userData" + userID); @@ -108,7 +112,7 @@ const CreateKey: React.FC = ({ ))} - + @@ -157,28 +161,38 @@ const CreateKey: React.FC = ({ {apiKey && ( - -

- Please save this secret key somewhere safe and accessible. For - security reasons, you will not be able to view it again{" "} - through your LiteLLM account. If you lose this secret key, you - will need to generate a new one. -

- - - {apiKey != null ? ( - API Key: {apiKey} - ) : ( - Key being created, this might take 30s - )} - + + Save your Key + +

+ Please save this secret key somewhere safe and accessible. For + security reasons, you will not be able to view it again{" "} + through your LiteLLM account. If you lose this secret key, you + will need to generate a new one. +

+ + + {apiKey != null ? ( +
+ API Key: {apiKey} + Budgets + Soft Limit Budget: ${softBudget} + + +
+ ) : ( + Key being created, this might take 30s + )} + +
)} From 1ef19fbc9c690363de689203a8513a98f8cdff00 Mon Sep 17 00:00:00 2001 From: Krrish Dholakia Date: Sat, 2 Mar 2024 15:54:37 -0800 Subject: [PATCH 15/17] feat: enable user to test slack budget alerting when creating a key --- litellm/proxy/proxy_server.py | 37 +++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py index 482397b86..99bdb579c 100644 --- a/litellm/proxy/proxy_server.py +++ b/litellm/proxy/proxy_server.py @@ -787,6 +787,7 @@ async def user_api_key_auth( "/global/spend/keys", "/global/spend/models", "/global/predict/spend/logs", + "/health/services", ] # check if the current route startswith any of the allowed routes if ( @@ -6479,6 +6480,42 @@ async def test_endpoint(request: Request): return {"route": request.url.path} +@router.get( + "/health/services", + tags=["health"], + dependencies=[Depends(user_api_key_auth)], + include_in_schema=False, +) +async def health_services_endpoint( + user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth), + service: Literal["slack_budget_alerts"] = fastapi.Query( + description="Specify the service being hit." + ), +): + """ + Hidden endpoint. + + Used by the UI to let user check if slack alerting is working as expected. + """ + global general_settings, proxy_logging_obj + + if service is None: + raise HTTPException( + status_code=400, detail={"error": "Service must be specified."} + ) + + if service not in ["slack_budget_alerts"]: + raise HTTPException( + status_code=400, + detail={ + "error": f"Service must be in list. Service={service}. List={['slack_budget_alerts']}" + }, + ) + + if "slack" in general_settings.get("alerting", []): + await proxy_logging_obj.alerting_handler(message="This is a test", level="Low") + + @router.get("/health", tags=["health"], dependencies=[Depends(user_api_key_auth)]) async def health_endpoint( user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth), From cbd085125700c4ba3b83985c1fe505c48639d01f Mon Sep 17 00:00:00 2001 From: Krrish Dholakia Date: Sat, 2 Mar 2024 15:56:42 -0800 Subject: [PATCH 16/17] fix(proxy_server.py): raise 422 error if no slack connection setup when calling `/health/services` --- litellm/proxy/proxy_server.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py index 99bdb579c..ffae102a0 100644 --- a/litellm/proxy/proxy_server.py +++ b/litellm/proxy/proxy_server.py @@ -6514,6 +6514,11 @@ async def health_services_endpoint( if "slack" in general_settings.get("alerting", []): await proxy_logging_obj.alerting_handler(message="This is a test", level="Low") + else: + raise HTTPException( + status_code=422, + detail={"error": "No slack connection setup. Unable to test this."}, + ) @router.get("/health", tags=["health"], dependencies=[Depends(user_api_key_auth)]) From b30cbd0d55d489d91bc5a54513b40e0488fd7bc4 Mon Sep 17 00:00:00 2001 From: Krrish Dholakia Date: Sat, 2 Mar 2024 16:04:36 -0800 Subject: [PATCH 17/17] refactor(proxy_server.py): format the message for slack budget alerts --- litellm/proxy/proxy_server.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py index ffae102a0..eca5fb30a 100644 --- a/litellm/proxy/proxy_server.py +++ b/litellm/proxy/proxy_server.py @@ -6512,8 +6512,11 @@ async def health_services_endpoint( }, ) + test_message = f"""\n🚨 `ProjectedLimitExceededError` 💸\n\n`Key Alias:` my-secret-project \n`Expected Day of Error`: 28th March \n`Current Spend`: 100 \n`Projected Spend at end of month`: 1000 \n + """ + if "slack" in general_settings.get("alerting", []): - await proxy_logging_obj.alerting_handler(message="This is a test", level="Low") + await proxy_logging_obj.alerting_handler(message=test_message, level="Low") else: raise HTTPException( status_code=422,