From 3aba9019ac27481361c3fa87956907f5c41dd4ca Mon Sep 17 00:00:00 2001
From: ishaan-jaff <ishaanjaffer0324@gmail.com>
Date: Sat, 2 Mar 2024 10:06:33 -0800
Subject: [PATCH 01/17] (feat) track used api_base in response

---
 litellm/router.py | 34 ++++++++++++++++++++++++++++++++++
 1 file changed, 34 insertions(+)

diff --git a/litellm/router.py b/litellm/router.py
index 6f33d0b0d..59cbf43d5 100644
--- a/litellm/router.py
+++ b/litellm/router.py
@@ -357,6 +357,11 @@ class Router:
             verbose_router_logger.info(
                 f"litellm.completion(model={model_name})\033[32m 200 OK\033[0m"
             )
+            # set used api_base in response
+            if hasattr(response, "_hidden_params"):
+                response._hidden_params["api_base"] = deployment.get(
+                    "litellm_params", {}
+                ).get("api_base", None)
             return response
         except Exception as e:
             verbose_router_logger.info(
@@ -446,6 +451,12 @@ class Router:
             verbose_router_logger.info(
                 f"litellm.acompletion(model={model_name})\033[32m 200 OK\033[0m"
             )
+
+            # set used api_base in response
+            if hasattr(response, "_hidden_params"):
+                response._hidden_params["api_base"] = deployment.get(
+                    "litellm_params", {}
+                ).get("api_base", None)
             return response
         except Exception as e:
             verbose_router_logger.info(
@@ -602,6 +613,12 @@ class Router:
             verbose_router_logger.info(
                 f"litellm.aimage_generation(model={model_name})\033[32m 200 OK\033[0m"
             )
+
+            # set used api_base in response
+            if hasattr(response, "_hidden_params"):
+                response._hidden_params["api_base"] = deployment.get(
+                    "litellm_params", {}
+                ).get("api_base", None)
             return response
         except Exception as e:
             verbose_router_logger.info(
@@ -694,6 +711,11 @@ class Router:
             verbose_router_logger.info(
                 f"litellm.amoderation(model={model_name})\033[32m 200 OK\033[0m"
             )
+            # set used api_base in response
+            if hasattr(response, "_hidden_params"):
+                response._hidden_params["api_base"] = deployment.get(
+                    "litellm_params", {}
+                ).get("api_base", None)
             return response
         except Exception as e:
             verbose_router_logger.info(
@@ -825,6 +847,12 @@ class Router:
             verbose_router_logger.info(
                 f"litellm.atext_completion(model={model_name})\033[32m 200 OK\033[0m"
             )
+
+            # set used api_base in response
+            if hasattr(response, "_hidden_params"):
+                response._hidden_params["api_base"] = deployment.get(
+                    "litellm_params", {}
+                ).get("api_base", None)
             return response
         except Exception as e:
             verbose_router_logger.info(
@@ -954,6 +982,12 @@ class Router:
             verbose_router_logger.info(
                 f"litellm.aembedding(model={model_name})\033[32m 200 OK\033[0m"
             )
+
+            # set used api_base in response
+            if hasattr(response, "_hidden_params"):
+                response._hidden_params["api_base"] = deployment.get(
+                    "litellm_params", {}
+                ).get("api_base", None)
             return response
         except Exception as e:
             verbose_router_logger.info(

From 0bb45b33fd1050abeeb48ce3bcc173d604219b36 Mon Sep 17 00:00:00 2001
From: ishaan-jaff <ishaanjaffer0324@gmail.com>
Date: Sat, 2 Mar 2024 11:06:03 -0800
Subject: [PATCH 02/17] (feat) send api_base

---
 litellm/proxy/utils.py | 35 +++++++++++++++++++++++++----------
 1 file changed, 25 insertions(+), 10 deletions(-)

diff --git a/litellm/proxy/utils.py b/litellm/proxy/utils.py
index 1cc52401a..948e686dd 100644
--- a/litellm/proxy/utils.py
+++ b/litellm/proxy/utils.py
@@ -64,6 +64,7 @@ class ProxyLogging:
         litellm.callbacks.append(self.max_parallel_request_limiter)
         litellm.callbacks.append(self.max_budget_limiter)
         litellm.callbacks.append(self.cache_control_check)
+        litellm.callbacks.append(self.response_taking_too_long_callback)
         for callback in litellm.callbacks:
             if callback not in litellm.input_callback:
                 litellm.input_callback.append(callback)
@@ -142,6 +143,30 @@ class ProxyLogging:
                 raise e
         return data
 
+    async def response_taking_too_long_callback(
+        self,
+        kwargs,  # kwargs to completion
+        completion_response,  # response from completion
+        start_time,
+        end_time,  # start/end time
+    ):
+        if self.alerting is None:
+            return
+        time_difference = end_time - start_time
+        # Convert the timedelta to float (in seconds)
+        time_difference_float = time_difference.total_seconds()
+        litellm_params = kwargs.get("litellm_params", {})
+        api_base = litellm_params.get("api_base", "")
+        model = kwargs.get("model", "")
+        messages = kwargs.get("messages", "")
+        request_info = f"\nRequest Model: `{model}`\nAPI Base: `{api_base}`\nMessages: `{messages}`"
+        slow_message = f"`Responses are slow - {round(time_difference_float,2)}s response time > Alerting threshold: {self.alerting_threshold}s`"
+        if time_difference_float > self.alerting_threshold:
+            await self.alerting_handler(
+                message=slow_message + request_info,
+                level="Low",
+            )
+
     async def response_taking_too_long(
         self,
         start_time: Optional[float] = None,
@@ -189,16 +214,6 @@ class ProxyLogging:
                     level="Medium",
                 )
 
-        elif (
-            type == "slow_response" and start_time is not None and end_time is not None
-        ):
-            slow_message = f"`Responses are slow - {round(end_time-start_time,2)}s response time > Alerting threshold: {self.alerting_threshold}s`"
-            if end_time - start_time > self.alerting_threshold:
-                await self.alerting_handler(
-                    message=slow_message + request_info,
-                    level="Low",
-                )
-
     async def budget_alerts(
         self,
         type: Literal[

From 127bc743b2b298a3b79e80ec453b78aab5ca6e79 Mon Sep 17 00:00:00 2001
From: ishaan-jaff <ishaanjaffer0324@gmail.com>
Date: Sat, 2 Mar 2024 11:09:40 -0800
Subject: [PATCH 03/17] (feat) cleanup

---
 litellm/proxy/proxy_server.py | 42 -----------------------------------
 1 file changed, 42 deletions(-)

diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py
index 869de6dde..17db8c3ab 100644
--- a/litellm/proxy/proxy_server.py
+++ b/litellm/proxy/proxy_server.py
@@ -2138,14 +2138,6 @@ async def async_data_generator(response, user_api_key_dict):
             except Exception as e:
                 yield f"data: {str(e)}\n\n"
 
-        ### ALERTING ###
-        end_time = time.time()
-        asyncio.create_task(
-            proxy_logging_obj.response_taking_too_long(
-                start_time=start_time, end_time=end_time, type="slow_response"
-            )
-        )
-
         # Streaming is done, yield the [DONE] chunk
         done_message = "[DONE]"
         yield f"data: {done_message}\n\n"
@@ -2494,14 +2486,6 @@ async def completion(
                 headers=custom_headers,
             )
 
-        ### ALERTING ###
-        end_time = time.time()
-        asyncio.create_task(
-            proxy_logging_obj.response_taking_too_long(
-                start_time=start_time, end_time=end_time, type="slow_response"
-            )
-        )
-
         fastapi_response.headers["x-litellm-model-id"] = model_id
         return response
     except Exception as e:
@@ -2700,14 +2684,6 @@ async def chat_completion(
                 headers=custom_headers,
             )
 
-        ### ALERTING ###
-        end_time = time.time()
-        asyncio.create_task(
-            proxy_logging_obj.response_taking_too_long(
-                start_time=start_time, end_time=end_time, type="slow_response"
-            )
-        )
-
         fastapi_response.headers["x-litellm-model-id"] = model_id
 
         ### CALL HOOKS ### - modify outgoing data
@@ -2915,12 +2891,6 @@ async def embeddings(
 
         ### ALERTING ###
         data["litellm_status"] = "success"  # used for alerting
-        end_time = time.time()
-        asyncio.create_task(
-            proxy_logging_obj.response_taking_too_long(
-                start_time=start_time, end_time=end_time, type="slow_response"
-            )
-        )
 
         return response
     except Exception as e:
@@ -3066,12 +3036,6 @@ async def image_generation(
 
         ### ALERTING ###
         data["litellm_status"] = "success"  # used for alerting
-        end_time = time.time()
-        asyncio.create_task(
-            proxy_logging_obj.response_taking_too_long(
-                start_time=start_time, end_time=end_time, type="slow_response"
-            )
-        )
 
         return response
     except Exception as e:
@@ -3225,12 +3189,6 @@ async def moderations(
 
         ### ALERTING ###
         data["litellm_status"] = "success"  # used for alerting
-        end_time = time.time()
-        asyncio.create_task(
-            proxy_logging_obj.response_taking_too_long(
-                start_time=start_time, end_time=end_time, type="slow_response"
-            )
-        )
 
         return response
     except Exception as e:

From 868a415aa0160985ee27850f0faef04e06b1dfb1 Mon Sep 17 00:00:00 2001
From: ishaan-jaff <ishaanjaffer0324@gmail.com>
Date: Sat, 2 Mar 2024 11:12:09 -0800
Subject: [PATCH 04/17] Revert "(feat) track used api_base in response"

This reverts commit 3aba9019ac27481361c3fa87956907f5c41dd4ca.
---
 litellm/router.py | 34 ----------------------------------
 1 file changed, 34 deletions(-)

diff --git a/litellm/router.py b/litellm/router.py
index 59cbf43d5..6f33d0b0d 100644
--- a/litellm/router.py
+++ b/litellm/router.py
@@ -357,11 +357,6 @@ class Router:
             verbose_router_logger.info(
                 f"litellm.completion(model={model_name})\033[32m 200 OK\033[0m"
             )
-            # set used api_base in response
-            if hasattr(response, "_hidden_params"):
-                response._hidden_params["api_base"] = deployment.get(
-                    "litellm_params", {}
-                ).get("api_base", None)
             return response
         except Exception as e:
             verbose_router_logger.info(
@@ -451,12 +446,6 @@ class Router:
             verbose_router_logger.info(
                 f"litellm.acompletion(model={model_name})\033[32m 200 OK\033[0m"
             )
-
-            # set used api_base in response
-            if hasattr(response, "_hidden_params"):
-                response._hidden_params["api_base"] = deployment.get(
-                    "litellm_params", {}
-                ).get("api_base", None)
             return response
         except Exception as e:
             verbose_router_logger.info(
@@ -613,12 +602,6 @@ class Router:
             verbose_router_logger.info(
                 f"litellm.aimage_generation(model={model_name})\033[32m 200 OK\033[0m"
             )
-
-            # set used api_base in response
-            if hasattr(response, "_hidden_params"):
-                response._hidden_params["api_base"] = deployment.get(
-                    "litellm_params", {}
-                ).get("api_base", None)
             return response
         except Exception as e:
             verbose_router_logger.info(
@@ -711,11 +694,6 @@ class Router:
             verbose_router_logger.info(
                 f"litellm.amoderation(model={model_name})\033[32m 200 OK\033[0m"
             )
-            # set used api_base in response
-            if hasattr(response, "_hidden_params"):
-                response._hidden_params["api_base"] = deployment.get(
-                    "litellm_params", {}
-                ).get("api_base", None)
             return response
         except Exception as e:
             verbose_router_logger.info(
@@ -847,12 +825,6 @@ class Router:
             verbose_router_logger.info(
                 f"litellm.atext_completion(model={model_name})\033[32m 200 OK\033[0m"
             )
-
-            # set used api_base in response
-            if hasattr(response, "_hidden_params"):
-                response._hidden_params["api_base"] = deployment.get(
-                    "litellm_params", {}
-                ).get("api_base", None)
             return response
         except Exception as e:
             verbose_router_logger.info(
@@ -982,12 +954,6 @@ class Router:
             verbose_router_logger.info(
                 f"litellm.aembedding(model={model_name})\033[32m 200 OK\033[0m"
             )
-
-            # set used api_base in response
-            if hasattr(response, "_hidden_params"):
-                response._hidden_params["api_base"] = deployment.get(
-                    "litellm_params", {}
-                ).get("api_base", None)
             return response
         except Exception as e:
             verbose_router_logger.info(

From 5feea0483188af59a09e63bada96dc7a48fada7c Mon Sep 17 00:00:00 2001
From: Krrish Dholakia <krrishdholakia@gmail.com>
Date: Sat, 2 Mar 2024 11:59:17 -0800
Subject: [PATCH 05/17] build: update schema.prisma

---
 litellm/proxy/schema.prisma | 46 ++++++++++++++++++++++++++++++++-----
 schema.prisma               | 43 ++++++++++++++++++++++++++++++----
 2 files changed, 78 insertions(+), 11 deletions(-)

diff --git a/litellm/proxy/schema.prisma b/litellm/proxy/schema.prisma
index 6a9b72728..f31fa130a 100644
--- a/litellm/proxy/schema.prisma
+++ b/litellm/proxy/schema.prisma
@@ -7,10 +7,42 @@ generator client {
   provider = "prisma-client-py"
 }
 
+// Budget / Rate Limits for an org
+model LiteLLM_BudgetTable {
+  budget_id String @id @default(uuid())
+  max_budget Float?
+  max_parallel_requests Int?
+  tpm_limit     BigInt?
+  rpm_limit     BigInt?
+  model_max_budget Json @default("{}")
+  budget_duration String? 
+  budget_reset_at DateTime?
+  created_at    DateTime               @default(now()) @map("created_at")
+  created_by String
+  updated_at    DateTime               @default(now()) @updatedAt @map("updated_at")
+  updated_by String
+}
+
+model LiteLLM_OrganizationTable {
+		organization_id String @id @default(uuid())
+    organization_alias  String? 
+    budget_id String
+    metadata  Json  @default("{}")
+    models     String[]
+    spend      Float    @default(0.0)
+    model_spend      Json @default("{}")
+    created_at    DateTime               @default(now()) @map("created_at")
+    created_by String
+    updated_at    DateTime               @default(now()) @updatedAt @map("updated_at")
+    updated_by String
+    litellm_budget_table LiteLLM_BudgetTable   @relation(fields: [budget_id], references: [budget_id])
+}
+
 // Assign prod keys to groups, not individuals 
 model LiteLLM_TeamTable {
-		team_id    String @unique
+		team_id    String @id @default(uuid())
     team_alias  String? 
+    organization_id String?
     admins String[]
     members String[]
     members_with_roles Json @default("{}")
@@ -27,11 +59,12 @@ model LiteLLM_TeamTable {
     updated_at    DateTime               @default(now()) @updatedAt @map("updated_at")
     model_spend      Json @default("{}")
     model_max_budget Json @default("{}")
+    litellm_organization_table LiteLLM_OrganizationTable   @relation(fields: [organization_id], references: [organization_id])
 }
 
 // Track spend, rate limit, budget Users
 model LiteLLM_UserTable {
-		user_id    String @unique
+		user_id    String @id
     team_id    String?
     teams    String[] @default([])
     user_role  String?
@@ -51,7 +84,7 @@ model LiteLLM_UserTable {
 
 // Generate Tokens for Proxy
 model LiteLLM_VerificationToken {
-    token      String   @unique
+    token      String   @id
     key_name   String?
     key_alias   String?
     spend      Float    @default(0.0)
@@ -82,7 +115,7 @@ model LiteLLM_Config {
 
 // View spend, model, api_key per request
 model LiteLLM_SpendLogs {
-  request_id          String @unique
+  request_id          String @id
   call_type           String
   api_key             String  @default ("")
   spend               Float    @default(0.0)
@@ -98,11 +131,12 @@ model LiteLLM_SpendLogs {
   cache_key           String   @default("")
   request_tags        Json     @default("[]")
   team_id             String? 
-  end_user            String? 
+  end_user            String?
 }
+
 // Beta - allow team members to request access to a model
 model LiteLLM_UserNotifications {
-  request_id          String @unique
+  request_id          String @id
   user_id             String 
   models              String[]
   justification       String
diff --git a/schema.prisma b/schema.prisma
index d08295e10..f31fa130a 100644
--- a/schema.prisma
+++ b/schema.prisma
@@ -7,10 +7,42 @@ generator client {
   provider = "prisma-client-py"
 }
 
+// Budget / Rate Limits for an org
+model LiteLLM_BudgetTable {
+  budget_id String @id @default(uuid())
+  max_budget Float?
+  max_parallel_requests Int?
+  tpm_limit     BigInt?
+  rpm_limit     BigInt?
+  model_max_budget Json @default("{}")
+  budget_duration String? 
+  budget_reset_at DateTime?
+  created_at    DateTime               @default(now()) @map("created_at")
+  created_by String
+  updated_at    DateTime               @default(now()) @updatedAt @map("updated_at")
+  updated_by String
+}
+
+model LiteLLM_OrganizationTable {
+		organization_id String @id @default(uuid())
+    organization_alias  String? 
+    budget_id String
+    metadata  Json  @default("{}")
+    models     String[]
+    spend      Float    @default(0.0)
+    model_spend      Json @default("{}")
+    created_at    DateTime               @default(now()) @map("created_at")
+    created_by String
+    updated_at    DateTime               @default(now()) @updatedAt @map("updated_at")
+    updated_by String
+    litellm_budget_table LiteLLM_BudgetTable   @relation(fields: [budget_id], references: [budget_id])
+}
+
 // Assign prod keys to groups, not individuals 
 model LiteLLM_TeamTable {
-		team_id    String @unique
+		team_id    String @id @default(uuid())
     team_alias  String? 
+    organization_id String?
     admins String[]
     members String[]
     members_with_roles Json @default("{}")
@@ -27,11 +59,12 @@ model LiteLLM_TeamTable {
     updated_at    DateTime               @default(now()) @updatedAt @map("updated_at")
     model_spend      Json @default("{}")
     model_max_budget Json @default("{}")
+    litellm_organization_table LiteLLM_OrganizationTable   @relation(fields: [organization_id], references: [organization_id])
 }
 
 // Track spend, rate limit, budget Users
 model LiteLLM_UserTable {
-		user_id    String @unique
+		user_id    String @id
     team_id    String?
     teams    String[] @default([])
     user_role  String?
@@ -51,7 +84,7 @@ model LiteLLM_UserTable {
 
 // Generate Tokens for Proxy
 model LiteLLM_VerificationToken {
-    token      String   @unique
+    token      String   @id
     key_name   String?
     key_alias   String?
     spend      Float    @default(0.0)
@@ -82,7 +115,7 @@ model LiteLLM_Config {
 
 // View spend, model, api_key per request
 model LiteLLM_SpendLogs {
-  request_id          String @unique
+  request_id          String @id
   call_type           String
   api_key             String  @default ("")
   spend               Float    @default(0.0)
@@ -103,7 +136,7 @@ model LiteLLM_SpendLogs {
 
 // Beta - allow team members to request access to a model
 model LiteLLM_UserNotifications {
-  request_id          String @unique
+  request_id          String @id
   user_id             String 
   models              String[]
   justification       String

From 0418ba5fc3a487313856d1af4fe6b6c14e69fb45 Mon Sep 17 00:00:00 2001
From: ishaan-jaff <ishaanjaffer0324@gmail.com>
Date: Sat, 2 Mar 2024 12:02:06 -0800
Subject: [PATCH 06/17] (feat) log api_base in spend logs

---
 litellm/proxy/_types.py     | 1 +
 litellm/proxy/schema.prisma | 1 +
 litellm/proxy/utils.py      | 1 +
 schema.prisma               | 1 +
 4 files changed, 4 insertions(+)

diff --git a/litellm/proxy/_types.py b/litellm/proxy/_types.py
index ce4de2d14..175f801da 100644
--- a/litellm/proxy/_types.py
+++ b/litellm/proxy/_types.py
@@ -542,6 +542,7 @@ class LiteLLM_SpendLogs(LiteLLMBase):
     request_id: str
     api_key: str
     model: Optional[str] = ""
+    api_base: Optional[str] = ""
     call_type: str
     spend: Optional[float] = 0.0
     total_tokens: Optional[int] = 0
diff --git a/litellm/proxy/schema.prisma b/litellm/proxy/schema.prisma
index 6a9b72728..9c5c00d13 100644
--- a/litellm/proxy/schema.prisma
+++ b/litellm/proxy/schema.prisma
@@ -92,6 +92,7 @@ model LiteLLM_SpendLogs {
   startTime           DateTime // Assuming start_time is a DateTime field
   endTime             DateTime // Assuming end_time is a DateTime field
   model               String   @default("")
+  api_base            String   @default("")
   user                String   @default("")
   metadata            Json     @default("{}")
   cache_hit           String   @default("")
diff --git a/litellm/proxy/utils.py b/litellm/proxy/utils.py
index 948e686dd..c67448c86 100644
--- a/litellm/proxy/utils.py
+++ b/litellm/proxy/utils.py
@@ -1600,6 +1600,7 @@ def get_logging_payload(kwargs, response_obj, start_time, end_time):
         "completion_tokens": usage.get("completion_tokens", 0),
         "request_tags": metadata.get("tags", []),
         "end_user": kwargs.get("user", ""),
+        "api_base": litellm_params.get("api_base", ""),
     }
 
     verbose_proxy_logger.debug(f"SpendTable: created payload - payload: {payload}\n\n")
diff --git a/schema.prisma b/schema.prisma
index d08295e10..06964e4a6 100644
--- a/schema.prisma
+++ b/schema.prisma
@@ -92,6 +92,7 @@ model LiteLLM_SpendLogs {
   startTime           DateTime // Assuming start_time is a DateTime field
   endTime             DateTime // Assuming end_time is a DateTime field
   model               String   @default("")
+  api_base            String   @default("")
   user                String   @default("")
   metadata            Json     @default("{}")
   cache_hit           String   @default("")

From e60ae9388ea9ec9ea0ca481b16aa4aa886ef72c8 Mon Sep 17 00:00:00 2001
From: ishaan-jaff <ishaanjaffer0324@gmail.com>
Date: Sat, 2 Mar 2024 12:08:34 -0800
Subject: [PATCH 07/17] =?UTF-8?q?bump:=20version=201.28.8=20=E2=86=92=201.?=
 =?UTF-8?q?28.9?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 pyproject.toml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 0dbe465c3..65e8645fc 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "litellm"
-version = "1.28.8"
+version = "1.28.9"
 description = "Library to easily interface with LLM API providers"
 authors = ["BerriAI"]
 license = "MIT"
@@ -74,7 +74,7 @@ requires = ["poetry-core", "wheel"]
 build-backend = "poetry.core.masonry.api"
 
 [tool.commitizen]
-version = "1.28.8"
+version = "1.28.9"
 version_files = [
     "pyproject.toml:^version"
 ]

From 8a249b5fe91255458f22c325f9217a582280976c Mon Sep 17 00:00:00 2001
From: Krrish Dholakia <krrishdholakia@gmail.com>
Date: Sat, 2 Mar 2024 12:18:28 -0800
Subject: [PATCH 08/17] build(schema.prisma): update schema with new orgs and
 budget table

---
 litellm/proxy/schema.prisma | 8 +++++---
 schema.prisma               | 8 +++++---
 2 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/litellm/proxy/schema.prisma b/litellm/proxy/schema.prisma
index 2744ae5c0..7eb59ee48 100644
--- a/litellm/proxy/schema.prisma
+++ b/litellm/proxy/schema.prisma
@@ -21,11 +21,12 @@ model LiteLLM_BudgetTable {
   created_by String
   updated_at    DateTime               @default(now()) @updatedAt @map("updated_at")
   updated_by String
+  organization LiteLLM_OrganizationTable[] // multiple orgs can have the same budget
 }
 
 model LiteLLM_OrganizationTable {
 		organization_id String @id @default(uuid())
-    organization_alias  String? 
+    organization_alias  String
     budget_id String
     metadata  Json  @default("{}")
     models     String[]
@@ -35,7 +36,8 @@ model LiteLLM_OrganizationTable {
     created_by String
     updated_at    DateTime               @default(now()) @updatedAt @map("updated_at")
     updated_by String
-    litellm_budget_table LiteLLM_BudgetTable   @relation(fields: [budget_id], references: [budget_id])
+    litellm_budget_table LiteLLM_BudgetTable?   @relation(fields: [budget_id], references: [budget_id])
+    teams LiteLLM_TeamTable[] 
 }
 
 // Assign prod keys to groups, not individuals 
@@ -59,7 +61,7 @@ model LiteLLM_TeamTable {
     updated_at    DateTime               @default(now()) @updatedAt @map("updated_at")
     model_spend      Json @default("{}")
     model_max_budget Json @default("{}")
-    litellm_organization_table LiteLLM_OrganizationTable   @relation(fields: [organization_id], references: [organization_id])
+    litellm_organization_table LiteLLM_OrganizationTable?   @relation(fields: [organization_id], references: [organization_id])
 }
 
 // Track spend, rate limit, budget Users
diff --git a/schema.prisma b/schema.prisma
index 2744ae5c0..7eb59ee48 100644
--- a/schema.prisma
+++ b/schema.prisma
@@ -21,11 +21,12 @@ model LiteLLM_BudgetTable {
   created_by String
   updated_at    DateTime               @default(now()) @updatedAt @map("updated_at")
   updated_by String
+  organization LiteLLM_OrganizationTable[] // multiple orgs can have the same budget
 }
 
 model LiteLLM_OrganizationTable {
 		organization_id String @id @default(uuid())
-    organization_alias  String? 
+    organization_alias  String
     budget_id String
     metadata  Json  @default("{}")
     models     String[]
@@ -35,7 +36,8 @@ model LiteLLM_OrganizationTable {
     created_by String
     updated_at    DateTime               @default(now()) @updatedAt @map("updated_at")
     updated_by String
-    litellm_budget_table LiteLLM_BudgetTable   @relation(fields: [budget_id], references: [budget_id])
+    litellm_budget_table LiteLLM_BudgetTable?   @relation(fields: [budget_id], references: [budget_id])
+    teams LiteLLM_TeamTable[] 
 }
 
 // Assign prod keys to groups, not individuals 
@@ -59,7 +61,7 @@ model LiteLLM_TeamTable {
     updated_at    DateTime               @default(now()) @updatedAt @map("updated_at")
     model_spend      Json @default("{}")
     model_max_budget Json @default("{}")
-    litellm_organization_table LiteLLM_OrganizationTable   @relation(fields: [organization_id], references: [organization_id])
+    litellm_organization_table LiteLLM_OrganizationTable?   @relation(fields: [organization_id], references: [organization_id])
 }
 
 // Track spend, rate limit, budget Users

From b042b5dc3b94564e4291a31c2954c0014975c04f Mon Sep 17 00:00:00 2001
From: ishaan-jaff <ishaanjaffer0324@gmail.com>
Date: Sat, 2 Mar 2024 12:25:40 -0800
Subject: [PATCH 09/17] (feat) set soft_budgets on keys

---
 litellm/proxy/schema.prisma | 2 ++
 schema.prisma               | 2 ++
 2 files changed, 4 insertions(+)

diff --git a/litellm/proxy/schema.prisma b/litellm/proxy/schema.prisma
index 7eb59ee48..1fe55f24e 100644
--- a/litellm/proxy/schema.prisma
+++ b/litellm/proxy/schema.prisma
@@ -11,6 +11,7 @@ generator client {
 model LiteLLM_BudgetTable {
   budget_id String @id @default(uuid())
   max_budget Float?
+  soft_budget Float?
   max_parallel_requests Int?
   tpm_limit     BigInt?
   rpm_limit     BigInt?
@@ -107,6 +108,7 @@ model LiteLLM_VerificationToken {
     allowed_cache_controls String[] @default([])
     model_spend      Json @default("{}")
     model_max_budget Json @default("{}")
+    budget_id String?
 }
 
 // store proxy config.yaml
diff --git a/schema.prisma b/schema.prisma
index 7eb59ee48..1fe55f24e 100644
--- a/schema.prisma
+++ b/schema.prisma
@@ -11,6 +11,7 @@ generator client {
 model LiteLLM_BudgetTable {
   budget_id String @id @default(uuid())
   max_budget Float?
+  soft_budget Float?
   max_parallel_requests Int?
   tpm_limit     BigInt?
   rpm_limit     BigInt?
@@ -107,6 +108,7 @@ model LiteLLM_VerificationToken {
     allowed_cache_controls String[] @default([])
     model_spend      Json @default("{}")
     model_max_budget Json @default("{}")
+    budget_id String?
 }
 
 // store proxy config.yaml

From eb4f90115d42207290ed4b3beba8447ea1168a69 Mon Sep 17 00:00:00 2001
From: ishaan-jaff <ishaanjaffer0324@gmail.com>
Date: Sat, 2 Mar 2024 12:52:09 -0800
Subject: [PATCH 10/17] (feat) create soft budget

---
 litellm/proxy/_types.py       | 15 +++++++++++++++
 litellm/proxy/proxy_server.py | 14 ++++++++++++++
 2 files changed, 29 insertions(+)

diff --git a/litellm/proxy/_types.py b/litellm/proxy/_types.py
index 175f801da..e4b28001d 100644
--- a/litellm/proxy/_types.py
+++ b/litellm/proxy/_types.py
@@ -324,6 +324,21 @@ class TeamRequest(LiteLLMBase):
     teams: List[str]
 
 
+class LiteLLM_BudgetTable(LiteLLMBase):
+    """Represents user-controllable params for a LiteLLM_BudgetTable record"""
+
+    max_budget: Optional[float] = None
+    soft_budget: Optional[float] = None
+    max_parallel_requests: Optional[int] = None
+    tpm_limit: Optional[int] = None
+    rpm_limit: Optional[int] = None
+    model_max_budget: dict
+    budget_duration: Optional[str] = None
+    budget_reset_at: Optional[datetime] = None
+    created_by: str
+    updated_by: str
+
+
 class KeyManagementSystem(enum.Enum):
     GOOGLE_KMS = "google_kms"
     AZURE_KEY_VAULT = "azure_key_vault"
diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py
index 17db8c3ab..37b28baea 100644
--- a/litellm/proxy/proxy_server.py
+++ b/litellm/proxy/proxy_server.py
@@ -1869,6 +1869,19 @@ async def generate_key_helper_fn(
     rpm_limit = rpm_limit
     allowed_cache_controls = allowed_cache_controls
 
+    # TODO: @ishaan-jaff: Migrate all budget tracking to use LiteLLM_BudgetTable
+    if prisma_client is not None:
+        # create the Budget Row for the LiteLLM Verification Token
+        budget_row = LiteLLM_BudgetTable(
+            soft_budget=50,
+            model_max_budget=model_max_budget or {},
+            created_by=user_id,
+            updated_by=user_id,
+        )
+        new_budget = prisma_client.jsonify_object(budget_row.json(exclude_none=True))
+        _budget = await prisma_client.db.litellm_budgettable.create(data={**new_budget})  # type: ignore
+        _budget_id = getattr(_budget, "id", None)
+
     try:
         # Create a new verification token (you may want to enhance this logic based on your needs)
         user_data = {
@@ -1906,6 +1919,7 @@ async def generate_key_helper_fn(
             "allowed_cache_controls": allowed_cache_controls,
             "permissions": permissions_json,
             "model_max_budget": model_max_budget_json,
+            "budget_id": _budget_id,
         }
         if (
             general_settings.get("allow_user_auth", False) == True

From fd9f8b7010fbd249d8b8587b66478011e2d29146 Mon Sep 17 00:00:00 2001
From: ishaan-jaff <ishaanjaffer0324@gmail.com>
Date: Sat, 2 Mar 2024 13:05:00 -0800
Subject: [PATCH 11/17] (docs) setting soft budgets

---
 docs/my-website/docs/proxy/virtual_keys.md | 2 ++
 litellm/__init__.py                        | 3 +++
 litellm/proxy/proxy_server.py              | 7 ++++++-
 3 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/docs/my-website/docs/proxy/virtual_keys.md b/docs/my-website/docs/proxy/virtual_keys.md
index e350ce9d5..70fd6e6a8 100644
--- a/docs/my-website/docs/proxy/virtual_keys.md
+++ b/docs/my-website/docs/proxy/virtual_keys.md
@@ -79,6 +79,7 @@ curl 'http://0.0.0.0:8000/key/generate' \
   "metadata": {"user": "ishaan@berri.ai"},
   "team_id": "core-infra",
   "max_budget": 10,
+  "soft_budget": 5,
 }'
 ```
 
@@ -93,6 +94,7 @@ Request Params:
 - `config`: *Optional[dict]* - any key-specific configs, overrides config in config.yaml
 - `spend`: *Optional[int]* - Amount spent by key. Default is 0. Will be updated by proxy whenever key is used. https://docs.litellm.ai/docs/proxy/virtual_keys#managing-auth---tracking-spend
 - `max_budget`: *Optional[float]* - Specify max budget for a given key.
+- `soft_budget`: *Optional[float]* - Specify soft limit budget for a given key. Get Alerts when key hits its soft budget
 - `model_max_budget`: *Optional[dict[str, float]]* - Specify max budget for each model, `model_max_budget={"gpt4": 0.5, "gpt-5": 0.01}`
 - `max_parallel_requests`: *Optional[int]* - Rate limit a user based on the number of parallel requests. Raises 429 error, if user's parallel requests > x.
 - `metadata`: *Optional[dict]* - Metadata for key, store information for key. Example metadata = {"team": "core-infra", "app": "app2", "email": "ishaan@berri.ai" }
diff --git a/litellm/__init__.py b/litellm/__init__.py
index cd639ddb9..f218fe036 100644
--- a/litellm/__init__.py
+++ b/litellm/__init__.py
@@ -79,6 +79,9 @@ max_budget: float = 0.0  # set the max budget across all providers
 budget_duration: Optional[str] = (
     None  # proxy only - resets budget after fixed duration. You can set duration as seconds ("30s"), minutes ("30m"), hours ("30h"), days ("30d").
 )
+default_soft_budget: float = (
+    50.0  # by default all litellm proxy keys have a soft budget of 50.0
+)
 _openai_finish_reasons = ["stop", "length", "function_call", "content_filter", "null"]
 _openai_completion_params = [
     "functions",
diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py
index 37b28baea..dcd4283ba 100644
--- a/litellm/proxy/proxy_server.py
+++ b/litellm/proxy/proxy_server.py
@@ -1810,6 +1810,9 @@ async def generate_key_helper_fn(
     spend: float,
     key_max_budget: Optional[float] = None,  # key_max_budget is used to Budget Per key
     key_budget_duration: Optional[str] = None,
+    key_soft_budget: Optional[
+        float
+    ] = None,  # key_soft_budget is used to Budget Per key
     max_budget: Optional[float] = None,  # max_budget is used to Budget Per user
     budget_duration: Optional[str] = None,  # max_budget is used to Budget Per user
     token: Optional[str] = None,
@@ -1873,7 +1876,7 @@ async def generate_key_helper_fn(
     if prisma_client is not None:
         # create the Budget Row for the LiteLLM Verification Token
         budget_row = LiteLLM_BudgetTable(
-            soft_budget=50,
+            soft_budget=key_soft_budget or litellm.default_soft_budget,
             model_max_budget=model_max_budget or {},
             created_by=user_id,
             updated_by=user_id,
@@ -3347,6 +3350,8 @@ async def generate_key_fn(
         # if we get max_budget passed to /key/generate, then use it as key_max_budget. Since generate_key_helper_fn is used to make new users
         if "max_budget" in data_json:
             data_json["key_max_budget"] = data_json.pop("max_budget", None)
+        if "soft_budget" in data_json:
+            data_json["key_soft_budget"] = data_json.pop("soft_budget", None)
 
         if "budget_duration" in data_json:
             data_json["key_budget_duration"] = data_json.pop("budget_duration", None)

From 1bb8263c922bdb133c13db2ece74b2d783900fbe Mon Sep 17 00:00:00 2001
From: ishaan-jaff <ishaanjaffer0324@gmail.com>
Date: Sat, 2 Mar 2024 14:43:01 -0800
Subject: [PATCH 12/17] (feat) set soft_budget with /key/generate

---
 litellm/proxy/_types.py       | 1 +
 litellm/proxy/proxy_server.py | 3 +++
 2 files changed, 4 insertions(+)

diff --git a/litellm/proxy/_types.py b/litellm/proxy/_types.py
index e4b28001d..6196f18a2 100644
--- a/litellm/proxy/_types.py
+++ b/litellm/proxy/_types.py
@@ -151,6 +151,7 @@ class GenerateRequestBase(LiteLLMBase):
     rpm_limit: Optional[int] = None
     budget_duration: Optional[str] = None
     allowed_cache_controls: Optional[list] = []
+    soft_budget: Optional[float] = None
 
 
 class GenerateKeyRequest(GenerateRequestBase):
diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py
index dcd4283ba..482397b86 100644
--- a/litellm/proxy/proxy_server.py
+++ b/litellm/proxy/proxy_server.py
@@ -1995,6 +1995,9 @@ async def generate_key_helper_fn(
     except Exception as e:
         traceback.print_exc()
         raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR)
+
+    # Add budget related info in key_data - this ensures it's returned
+    key_data["soft_budget"] = key_soft_budget
     return key_data
 
 

From 163c8f1c5a137e9167c511ef228261cef6b14cc7 Mon Sep 17 00:00:00 2001
From: ishaan-jaff <ishaanjaffer0324@gmail.com>
Date: Sat, 2 Mar 2024 14:58:02 -0800
Subject: [PATCH 13/17] (feat) set soft budget limits on ui

---
 ui/litellm-dashboard/src/components/create_key_button.tsx     | 3 +++
 ui/litellm-dashboard/src/components/view_key_spend_report.tsx | 2 +-
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/ui/litellm-dashboard/src/components/create_key_button.tsx b/ui/litellm-dashboard/src/components/create_key_button.tsx
index 3dddaf8b7..b6cec81e4 100644
--- a/ui/litellm-dashboard/src/components/create_key_button.tsx
+++ b/ui/litellm-dashboard/src/components/create_key_button.tsx
@@ -108,6 +108,9 @@ const CreateKey: React.FC<CreateKeyProps> = ({
                   ))}
                 </Select>
               </Form.Item>
+              <Form.Item label="Soft Budget (USD) Monthly" name="soft_budget">
+                <InputNumber step={0.01} precision={2} defaultValue={50.00} width={200} />
+              </Form.Item>
               <Form.Item label="Max Budget (USD)" name="max_budget">
                 <InputNumber step={0.01} precision={2} width={200} />
               </Form.Item>
diff --git a/ui/litellm-dashboard/src/components/view_key_spend_report.tsx b/ui/litellm-dashboard/src/components/view_key_spend_report.tsx
index 0788af209..f0916ec01 100644
--- a/ui/litellm-dashboard/src/components/view_key_spend_report.tsx
+++ b/ui/litellm-dashboard/src/components/view_key_spend_report.tsx
@@ -105,7 +105,7 @@ const ViewKeySpendReport: React.FC<ViewKeySpendReportProps> = ({
 
   return (
     <div>
-      <Button size = "xs" onClick={showModal}>
+      <Button size = "xs" onClick={showModal} variant="secondary">
         View Spend Report
       </Button>
       <Modal

From 709c2518e7eb81c488e382fc2cb4fa1134724946 Mon Sep 17 00:00:00 2001
From: ishaan-jaff <ishaanjaffer0324@gmail.com>
Date: Sat, 2 Mar 2024 15:31:59 -0800
Subject: [PATCH 14/17] (feat) set soft limits per key

---
 .../src/components/create_key_button.tsx      | 50 ++++++++++++-------
 1 file changed, 32 insertions(+), 18 deletions(-)

diff --git a/ui/litellm-dashboard/src/components/create_key_button.tsx b/ui/litellm-dashboard/src/components/create_key_button.tsx
index b6cec81e4..e76e2d0c2 100644
--- a/ui/litellm-dashboard/src/components/create_key_button.tsx
+++ b/ui/litellm-dashboard/src/components/create_key_button.tsx
@@ -2,7 +2,7 @@
 
 import React, { useState, useEffect, useRef } from "react";
 import { Button, TextInput, Grid, Col } from "@tremor/react";
-import { Card, Metric, Text } from "@tremor/react";
+import { Card, Metric, Text, Title, Subtitle } from "@tremor/react";
 import {
   Button as Button2,
   Modal,
@@ -38,6 +38,7 @@ const CreateKey: React.FC<CreateKeyProps> = ({
   const [form] = Form.useForm();
   const [isModalVisible, setIsModalVisible] = useState(false);
   const [apiKey, setApiKey] = useState(null);
+  const [softBudget, setSoftBudget] = useState(null);
   const handleOk = () => {
     setIsModalVisible(false);
     form.resetFields();
@@ -54,8 +55,11 @@ const CreateKey: React.FC<CreateKeyProps> = ({
       message.info("Making API Call");
       setIsModalVisible(true);
       const response = await keyCreateCall(accessToken, userID, formValues);
+
+      console.log("key create Response:", response);
       setData((prevData) => (prevData ? [...prevData, response] : [response])); // Check if prevData is null
       setApiKey(response["key"]);
+      setSoftBudget(response["soft_budget"]);
       message.success("API Key Created");
       form.resetFields();
       localStorage.removeItem("userData" + userID);
@@ -108,7 +112,7 @@ const CreateKey: React.FC<CreateKeyProps> = ({
                   ))}
                 </Select>
               </Form.Item>
-              <Form.Item label="Soft Budget (USD) Monthly" name="soft_budget">
+              <Form.Item label="Soft Budget (USD) Monthly" name="soft_budget" initialValue={50.00}>
                 <InputNumber step={0.01} precision={2} defaultValue={50.00} width={200} />
               </Form.Item>
               <Form.Item label="Max Budget (USD)" name="max_budget">
@@ -157,28 +161,38 @@ const CreateKey: React.FC<CreateKeyProps> = ({
       </Modal>
       {apiKey && (
         <Modal
-          title="Save your key"
           visible={isModalVisible}
           onOk={handleOk}
           onCancel={handleCancel}
           footer={null}
         >
           <Grid numItems={1} className="gap-2 w-full">
-            <Col numColSpan={1}>
-              <p>
-                Please save this secret key somewhere safe and accessible. For
-                security reasons, <b>you will not be able to view it again</b>{" "}
-                through your LiteLLM account. If you lose this secret key, you
-                will need to generate a new one.
-              </p>
-            </Col>
-            <Col numColSpan={1}>
-              {apiKey != null ? (
-                <Text>API Key: {apiKey}</Text>
-              ) : (
-                <Text>Key being created, this might take 30s</Text>
-              )}
-            </Col>
+            <Card>
+              <Title>Save your Key</Title>
+              <Col numColSpan={1}>
+                <p>
+                  Please save this secret key somewhere safe and accessible. For
+                  security reasons, <b>you will not be able to view it again</b>{" "}
+                  through your LiteLLM account. If you lose this secret key, you
+                  will need to generate a new one.
+                </p>
+              </Col>
+              <Col numColSpan={1}>
+                {apiKey != null ? (
+                  <div>
+                    <Text>API Key: {apiKey}</Text>
+                    <Title className="mt-6">Budgets</Title>
+                      <Text>Soft Limit Budget: ${softBudget}</Text>
+                      <Button className="mt-3">
+                        Test Alert
+                      </Button>
+
+                  </div>
+                ) : (
+                  <Text>Key being created, this might take 30s</Text>
+                )}
+              </Col>
+            </Card>
           </Grid>
         </Modal>
       )}

From 1ef19fbc9c690363de689203a8513a98f8cdff00 Mon Sep 17 00:00:00 2001
From: Krrish Dholakia <krrishdholakia@gmail.com>
Date: Sat, 2 Mar 2024 15:54:37 -0800
Subject: [PATCH 15/17] feat: enable user to test slack budget alerting when
 creating a key

---
 litellm/proxy/proxy_server.py | 37 +++++++++++++++++++++++++++++++++++
 1 file changed, 37 insertions(+)

diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py
index 482397b86..99bdb579c 100644
--- a/litellm/proxy/proxy_server.py
+++ b/litellm/proxy/proxy_server.py
@@ -787,6 +787,7 @@ async def user_api_key_auth(
                 "/global/spend/keys",
                 "/global/spend/models",
                 "/global/predict/spend/logs",
+                "/health/services",
             ]
             # check if the current route startswith any of the allowed routes
             if (
@@ -6479,6 +6480,42 @@ async def test_endpoint(request: Request):
     return {"route": request.url.path}
 
 
+@router.get(
+    "/health/services",
+    tags=["health"],
+    dependencies=[Depends(user_api_key_auth)],
+    include_in_schema=False,
+)
+async def health_services_endpoint(
+    user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
+    service: Literal["slack_budget_alerts"] = fastapi.Query(
+        description="Specify the service being hit."
+    ),
+):
+    """
+    Hidden endpoint.
+
+    Used by the UI to let user check if slack alerting is working as expected.
+    """
+    global general_settings, proxy_logging_obj
+
+    if service is None:
+        raise HTTPException(
+            status_code=400, detail={"error": "Service must be specified."}
+        )
+
+    if service not in ["slack_budget_alerts"]:
+        raise HTTPException(
+            status_code=400,
+            detail={
+                "error": f"Service must be in list. Service={service}. List={['slack_budget_alerts']}"
+            },
+        )
+
+    if "slack" in general_settings.get("alerting", []):
+        await proxy_logging_obj.alerting_handler(message="This is a test", level="Low")
+
+
 @router.get("/health", tags=["health"], dependencies=[Depends(user_api_key_auth)])
 async def health_endpoint(
     user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),

From cbd085125700c4ba3b83985c1fe505c48639d01f Mon Sep 17 00:00:00 2001
From: Krrish Dholakia <krrishdholakia@gmail.com>
Date: Sat, 2 Mar 2024 15:56:42 -0800
Subject: [PATCH 16/17] fix(proxy_server.py): raise 422 error if no slack
 connection setup when calling `/health/services`

---
 litellm/proxy/proxy_server.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py
index 99bdb579c..ffae102a0 100644
--- a/litellm/proxy/proxy_server.py
+++ b/litellm/proxy/proxy_server.py
@@ -6514,6 +6514,11 @@ async def health_services_endpoint(
 
     if "slack" in general_settings.get("alerting", []):
         await proxy_logging_obj.alerting_handler(message="This is a test", level="Low")
+    else:
+        raise HTTPException(
+            status_code=422,
+            detail={"error": "No slack connection setup. Unable to test this."},
+        )
 
 
 @router.get("/health", tags=["health"], dependencies=[Depends(user_api_key_auth)])

From b30cbd0d55d489d91bc5a54513b40e0488fd7bc4 Mon Sep 17 00:00:00 2001
From: Krrish Dholakia <krrishdholakia@gmail.com>
Date: Sat, 2 Mar 2024 16:04:36 -0800
Subject: [PATCH 17/17] refactor(proxy_server.py): format the message for slack
 budget alerts

---
 litellm/proxy/proxy_server.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py
index ffae102a0..eca5fb30a 100644
--- a/litellm/proxy/proxy_server.py
+++ b/litellm/proxy/proxy_server.py
@@ -6512,8 +6512,11 @@ async def health_services_endpoint(
             },
         )
 
+    test_message = f"""\n🚨 `ProjectedLimitExceededError` 💸\n\n`Key Alias:` my-secret-project \n`Expected Day of Error`: 28th March \n`Current Spend`: 100 \n`Projected Spend at end of month`: 1000 \n
+    """
+
     if "slack" in general_settings.get("alerting", []):
-        await proxy_logging_obj.alerting_handler(message="This is a test", level="Low")
+        await proxy_logging_obj.alerting_handler(message=test_message, level="Low")
     else:
         raise HTTPException(
             status_code=422,