Merge branch 'BerriAI:main' into main

2025-04-26 11:14:04 +00:00 · 2024-06-16 15:15:00 +02:00 · 2024-06-16 15:15:00 +02:00 · 1b644fa0fc
commit 1b644fa0fc
parent d338a94a57 3a35a58859
142 changed files with 19096 additions and 13214 deletions
--- a/.github/workflows/ghcr_deploy.yml
+++ b/.github/workflows/ghcr_deploy.yml
@ -25,6 +25,11 @@ jobs:
    if: github.repository == 'BerriAI/litellm'
    runs-on: ubuntu-latest
    steps:
+      -
+        name: Checkout
+        uses: actions/checkout@v4
+        with:
+          ref: ${{ github.event.inputs.commit_hash }}
      -
        name: Set up QEMU
        uses: docker/setup-qemu-action@v3
@ -41,12 +46,14 @@ jobs:
        name: Build and push
        uses: docker/build-push-action@v5
        with:
+          context: .
          push: true
          tags: litellm/litellm:${{ github.event.inputs.tag || 'latest' }} 
      -
        name: Build and push litellm-database image
        uses: docker/build-push-action@v5
        with:
+          context: .
          push: true
          file: Dockerfile.database
          tags: litellm/litellm-database:${{ github.event.inputs.tag || 'latest' }}
@ -54,6 +61,7 @@ jobs:
        name: Build and push litellm-spend-logs image
        uses: docker/build-push-action@v5
        with:
+          context: .
          push: true
          file: ./litellm-js/spend-logs/Dockerfile
          tags: litellm/litellm-spend_logs:${{ github.event.inputs.tag || 'latest' }}
@ -68,6 +76,8 @@ jobs:
    steps:
      - name: Checkout repository
        uses: actions/checkout@v4
+        with:
+          ref: ${{ github.event.inputs.commit_hash }}
      # Uses the `docker/login-action` action to log in to the Container registry registry using the account and password that will publish the packages. Once published, the packages are scoped to the account defined here.
      - name: Log in to the Container registry
        uses: docker/login-action@65b78e6e13532edd9afa3aa52ac7964289d1a9c1
@ -92,7 +102,7 @@ jobs:
      - name: Build and push Docker image
        uses: docker/build-push-action@4976231911ebf5f32aad765192d35f942aa48cb8
        with:
-          context: https://github.com/BerriAI/litellm.git#${{ github.event.inputs.commit_hash}}
+          context: .
          push: true
          tags: ${{ steps.meta.outputs.tags }}-${{ github.event.inputs.tag || 'latest' }}, ${{ steps.meta.outputs.tags }}-${{ github.event.inputs.release_type }} # if a tag is provided, use that, otherwise use the release tag, and if neither is available, use 'latest'
          labels: ${{ steps.meta.outputs.labels }}
@ -106,6 +116,8 @@ jobs:
    steps:
      - name: Checkout repository
        uses: actions/checkout@v4
+        with:
+          ref: ${{ github.event.inputs.commit_hash }}

      - name: Log in to the Container registry
        uses: docker/login-action@65b78e6e13532edd9afa3aa52ac7964289d1a9c1
@ -128,7 +140,7 @@ jobs:
      - name: Build and push Database Docker image
        uses: docker/build-push-action@f2a1d5e99d037542a71f64918e516c093c6f3fc4
        with:
-          context: https://github.com/BerriAI/litellm.git#${{ github.event.inputs.commit_hash}}
+          context: .
          file: Dockerfile.database
          push: true
          tags: ${{ steps.meta-database.outputs.tags }}-${{ github.event.inputs.tag || 'latest' }}, ${{ steps.meta-database.outputs.tags }}-${{ github.event.inputs.release_type }} 
@ -143,6 +155,8 @@ jobs:
    steps:
      - name: Checkout repository
        uses: actions/checkout@v4
+        with:
+          ref: ${{ github.event.inputs.commit_hash }}

      - name: Log in to the Container registry
        uses: docker/login-action@65b78e6e13532edd9afa3aa52ac7964289d1a9c1
@ -165,7 +179,7 @@ jobs:
      - name: Build and push Database Docker image
        uses: docker/build-push-action@f2a1d5e99d037542a71f64918e516c093c6f3fc4
        with:
-          context: https://github.com/BerriAI/litellm.git#${{ github.event.inputs.commit_hash}}
+          context: .
          file: ./litellm-js/spend-logs/Dockerfile
          push: true
          tags: ${{ steps.meta-spend-logs.outputs.tags }}-${{ github.event.inputs.tag || 'latest' }}, ${{ steps.meta-spend-logs.outputs.tags }}-${{ github.event.inputs.release_type }}
@ -176,6 +190,8 @@ jobs:
    steps:
      - name: Checkout repository
        uses: actions/checkout@v4
+        with:
+          ref: ${{ github.event.inputs.commit_hash }}

      - name: Log in to the Container registry
        uses: docker/login-action@65b78e6e13532edd9afa3aa52ac7964289d1a9c1
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@ -1,4 +1,19 @@
 repos:
+-   repo: local
+    hooks:
+    # -   id: mypy
+    #     name: mypy
+    #     entry: python3 -m mypy --ignore-missing-imports
+    #     language: system
+    #     types: [python]
+    #     files: ^litellm/
+    -   id: isort
+        name: isort
+        entry: isort
+        language: system
+        types: [python]
+        files: litellm/.*\.py
+        exclude: ^litellm/__init__.py$
 -   repo: https://github.com/psf/black
    rev: 24.2.0
    hooks:
@ -16,11 +31,10 @@ repos:
        name: Check if files match
        entry: python3 ci_cd/check_files_match.py
        language: system
-   repo: local
-    hooks:
-    -   id: mypy
-        name: mypy
-        entry: python3 -m mypy --ignore-missing-imports
-        language: system
-        types: [python]
-        files: ^litellm/
+    # -   id: check-file-length
+    #     name: Check file length
+    #     entry: python check_file_length.py
+    #     args: ["10000"]  # set your desired maximum number of lines
+    #     language: python
+    #     files: litellm/.*\.py
+    #     exclude: ^litellm/tests/
--- a/check_file_length.py
+++ b/check_file_length.py
@ -0,0 +1,28 @@
+import sys
+
+
+def check_file_length(max_lines, filenames):
+    bad_files = []
+    for filename in filenames:
+        with open(filename, "r") as file:
+            lines = file.readlines()
+            if len(lines) > max_lines:
+                bad_files.append((filename, len(lines)))
+    return bad_files
+
+
+if __name__ == "__main__":
+    max_lines = int(sys.argv[1])
+    filenames = sys.argv[2:]
+
+    bad_files = check_file_length(max_lines, filenames)
+    if bad_files:
+        bad_files.sort(
+            key=lambda x: x[1], reverse=True
+        )  # Sort files by length in descending order
+        for filename, length in bad_files:
+            print(f"{filename}: {length} lines")
+
+        sys.exit(1)
+    else:
+        sys.exit(0)
--- a/docs/my-website/docs/completion/input.md
+++ b/docs/my-website/docs/completion/input.md
@ -162,7 +162,7 @@ def completion(

    - `function`: *object* - Required.

- `tool_choice`: *string or object (optional)* - Controls which (if any) function is called by the model. none means the model will not call a function and instead generates a message. auto means the model can pick between generating a message or calling a function. Specifying a particular function via {"type: "function", "function": {"name": "my_function"}} forces the model to call that function.
+- `tool_choice`: *string or object (optional)* - Controls which (if any) function is called by the model. none means the model will not call a function and instead generates a message. auto means the model can pick between generating a message or calling a function. Specifying a particular function via `{"type: "function", "function": {"name": "my_function"}}` forces the model to call that function.

    - `none` is the default when no functions are present. `auto` is the default if functions are present.

--- a/docs/my-website/docs/debugging/hosted_debugging.md
+++ b/docs/my-website/docs/debugging/hosted_debugging.md
@ -1,90 +0,0 @@
-import Image from '@theme/IdealImage';
-import QueryParamReader from '../../src/components/queryParamReader.js'
-
-# [Beta] Monitor Logs in Production
-
-:::note
-
-This is in beta. Expect frequent updates, as we improve based on your feedback.
-
-:::
-
-LiteLLM provides an integration to let you monitor logs in production.
-
-👉 Jump to our sample LiteLLM Dashboard: https://admin.litellm.ai/
-
-
-<Image img={require('../../img/alt_dashboard.png')} alt="Dashboard" />
-
-## Debug your first logs
-<a target="_blank" href="https://colab.research.google.com/github/BerriAI/litellm/blob/main/cookbook/liteLLM_OpenAI.ipynb">
-  <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/>
-</a>
-
-
-### 1. Get your LiteLLM Token
-
-Go to [admin.litellm.ai](https://admin.litellm.ai/) and copy the code snippet with your unique token
-
-<Image img={require('../../img/hosted_debugger_usage_page.png')} alt="Usage" />
-
-### 2. Set up your environment
-
-**Add it to your .env**
-
-```python
-import os 
-
-os.env["LITELLM_TOKEN"] = "e24c4c06-d027-4c30-9e78-18bc3a50aebb" # replace with your unique token
-
-```
-
-**Turn on LiteLLM Client**
-```python
-import litellm 
-litellm.client = True
-```
-
-### 3. Make a normal `completion()` call
-```python
-import litellm 
-from litellm import completion
-import os 
-
-# set env variables
-os.environ["LITELLM_TOKEN"] = "e24c4c06-d027-4c30-9e78-18bc3a50aebb" # replace with your unique token
-os.environ["OPENAI_API_KEY"] = "openai key"
-
-litellm.use_client = True # enable logging dashboard 
-messages = [{ "content": "Hello, how are you?","role": "user"}]
-
-# openai call
-response = completion(model="gpt-3.5-turbo", messages=messages)
-```
-
-Your `completion()` call print with a link to your session dashboard (https://admin.litellm.ai/<your_unique_token>)
-
-In the above case it would be: [`admin.litellm.ai/e24c4c06-d027-4c30-9e78-18bc3a50aebb`](https://admin.litellm.ai/e24c4c06-d027-4c30-9e78-18bc3a50aebb)
-
-Click on your personal dashboard link. Here's how you can find it 👇
-
-<Image img={require('../../img/dash_output.png')} alt="Dashboard" />
-
-[👋 Tell us if you need better privacy controls](https://calendly.com/d/4mp-gd3-k5k/berriai-1-1-onboarding-litellm-hosted-version?month=2023-08)  
-
-### 3. Review request log 
-
-Oh! Looks like our request was made successfully. Let's click on it and see exactly what got sent to the LLM provider. 
-
-
-
-
-Ah! So we can see that this request was made to a **Baseten** (see litellm_params > custom_llm_provider) for a model with ID - **7qQNLDB** (see model). The message sent was - `"Hey, how's it going?"` and the response received was - `"As an AI language model, I don't have feelings or emotions, but I can assist you with your queries. How can I assist you today?"`
-
-<Image img={require('../../img/dashboard_log.png')} alt="Dashboard Log Row" />
-
-:::info
-
-🎉 Congratulations! You've successfully debugger your first log!
-
-:::
--- a/docs/my-website/docs/observability/promptlayer_integration.md
+++ b/docs/my-website/docs/observability/promptlayer_integration.md
@ -1,3 +1,5 @@
+import Image from '@theme/IdealImage';
+
 # Promptlayer Tutorial

 Promptlayer is a platform for prompt engineers. Log OpenAI requests. Search usage history. Track performance. Visually manage prompt templates.
--- a/docs/my-website/docs/providers/text_completion_openai.md
+++ b/docs/my-website/docs/providers/text_completion_openai.md
@ -1,3 +1,6 @@
+import Tabs from '@theme/Tabs';
+import TabItem from '@theme/TabItem';
+
 # OpenAI (Text Completion)

 LiteLLM supports OpenAI text completion models
--- a/docs/my-website/docs/providers/togetherai.md
+++ b/docs/my-website/docs/providers/togetherai.md
@ -208,7 +208,7 @@ print(response)

 Instead of using the `custom_llm_provider` arg to specify which provider you're using (e.g. together ai), you can just pass the provider name as part of the model name, and LiteLLM will parse it out. 

-Expected format: <custom_llm_provider>/<model_name>
+Expected format: `<custom_llm_provider>/<model_name>`

 e.g. completion(model="together_ai/togethercomputer/Llama-2-7B-32K-Instruct", ...)

--- a/docs/my-website/docs/proxy/alerting.md
+++ b/docs/my-website/docs/proxy/alerting.md
@ -32,41 +32,33 @@ Get a slack webhook url from https://api.slack.com/messaging/webhooks

 You can also use Discord Webhooks, see [here](#using-discord-webhooks)

-### Step 2: Update config.yaml 

- Set `SLACK_WEBHOOK_URL` in your proxy env to enable Slack alerts.
- Just for testing purposes, let's save a bad key to our proxy.
+Set `SLACK_WEBHOOK_URL` in your proxy env to enable Slack alerts.
+
+```bash
+export SLACK_WEBHOOK_URL="https://hooks.slack.com/services/<>/<>/<>"
+```
+
+### Step 2: Setup Proxy

 ```yaml
-model_list: 
-    model_name: "azure-model"
-    litellm_params:
-        model: "azure/gpt-35-turbo"
-        api_key: "my-bad-key" # 👈 bad key
-
 general_settings: 
    alerting: ["slack"]
    alerting_threshold: 300 # sends alerts if requests hang for 5min+ and responses take 5min+ 
-
-environment_variables:
-    SLACK_WEBHOOK_URL: "https://hooks.slack.com/services/<>/<>/<>"
-    SLACK_DAILY_REPORT_FREQUENCY: "86400"  # 24 hours; Optional: defaults to 12 hours
 ```

-
-### Step 3: Start proxy
-
+Start proxy 
 ```bash
 $ litellm --config /path/to/config.yaml
 ```

-## Testing Alerting is Setup Correctly

-Make a GET request to `/health/services`, expect to see a test slack alert in your provided webhook slack channel
+### Step 3: Test it!

-```shell
-curl -X GET 'http://localhost:4000/health/services?service=slack' \
-  -H 'Authorization: Bearer sk-1234'
+
+```bash
+curl -X GET 'http://0.0.0.0:4000/health/services?service=slack' \
+-H 'Authorization: Bearer sk-1234'
 ```

 ## Advanced - Redacting Messages from Alerts
@ -84,7 +76,34 @@ litellm_settings:
 ```


+## Advanced - Add Metadata to alerts 

+Add alerting metadata to proxy calls for debugging. 
+
+```python
+import openai
+client = openai.OpenAI(
+    api_key="anything",
+    base_url="http://0.0.0.0:4000"
+)
+
+# request sent to model set on litellm proxy, `litellm --model`
+response = client.chat.completions.create(
+    model="gpt-3.5-turbo",
+    messages = [], 
+    extra_body={
+        "metadata": {
+            "alerting_metadata": {
+                "hello": "world"
+            }
+        }
+    }
+)
+```
+
+**Expected Response**
+
+<Image img={require('../../img/alerting_metadata.png')}/>

 ## Advanced - Opting into specific alert types

--- a/docs/my-website/docs/proxy/deploy.md
+++ b/docs/my-website/docs/proxy/deploy.md
@ -669,7 +669,7 @@ Once the stack is created, get the DatabaseURL of the Database resource, copy th
 #### 3. Connect to the EC2 Instance and deploy litellm on the EC2 container
 From the EC2 console, connect to the instance created by the stack (e.g., using SSH).

-Run the following command, replacing <database_url> with the value you copied in step 2
+Run the following command, replacing `<database_url>` with the value you copied in step 2

 ```shell
 docker run --name litellm-proxy \
--- a/docs/my-website/docs/proxy/email.md
+++ b/docs/my-website/docs/proxy/email.md
@ -5,6 +5,7 @@ import Image from '@theme/IdealImage';
 Send an Email to your users when:
 - A Proxy API Key is created for them 
 - Their API Key crosses it's Budget 
+- All Team members of a LiteLLM Team -> when the team crosses it's budget

 <Image img={require('../../img/email_notifs.png')} style={{ width: '500px' }}/>

--- a/docs/my-website/docs/proxy/model_management.md
+++ b/docs/my-website/docs/proxy/model_management.md
@ -1,3 +1,6 @@
+import Tabs from '@theme/Tabs';
+import TabItem from '@theme/TabItem';
+
 # Model Management
 Add new models + Get model info without restarting proxy.

--- a/docs/my-website/docs/proxy/perf.md
+++ b/docs/my-website/docs/proxy/perf.md
@ -1,3 +1,5 @@
+import Image from '@theme/IdealImage';
+
 # LiteLLM Proxy Performance

 ### Throughput - 30% Increase
--- a/docs/my-website/docs/proxy/reliability.md
+++ b/docs/my-website/docs/proxy/reliability.md
@ -409,6 +409,28 @@ print(response)
 </Tabs>


+### Content Policy Fallbacks
+
+Fallback across providers (e.g. from Azure OpenAI to Anthropic) if you hit content policy violation errors. 
+
+```yaml
+model_list:
+	- model_name: gpt-3.5-turbo-small
+	  litellm_params:
+		model: azure/chatgpt-v-2
+        api_base: os.environ/AZURE_API_BASE
+        api_key: os.environ/AZURE_API_KEY
+        api_version: "2023-07-01-preview"
+
+    - model_name: claude-opus
+      litellm_params:
+        model: claude-3-opus-20240229
+        api_key: os.environ/ANTHROPIC_API_KEY
+
+litellm_settings:
+  content_policy_fallbacks: [{"gpt-3.5-turbo-small": ["claude-opus"]}]
+```
+
 ### EU-Region Filtering (Pre-Call Checks)

 **Before call is made** check if a call is within model context window with  **`enable_pre_call_checks: true`**.
--- a/docs/my-website/docs/proxy/team_budgets.md
+++ b/docs/my-website/docs/proxy/team_budgets.md
@ -46,7 +46,7 @@ Possible values for `budget_duration`
 | `budget_duration="1m"` | every 1 min |
 | `budget_duration="1h"` | every 1 hour |
 | `budget_duration="1d"` | every 1 day |
-| `budget_duration="1mo"` | start of every month |
+| `budget_duration="1mo"` | every 1 month |


 ### 2. Create a key for the `team`
--- a/docs/my-website/docs/proxy/users.md
+++ b/docs/my-website/docs/proxy/users.md
@ -413,6 +413,52 @@ curl 'http://0.0.0.0:4000/key/generate' \
 </TabItem>
 </Tabs>

+### Reset Budgets 
+
+Reset budgets across keys/internal users/teams/customers
+
+`budget_duration`: Budget is reset at the end of specified duration. If not set, budget is never reset. You can set duration as seconds ("30s"), minutes ("30m"), hours ("30h"), days ("30d").
+
+<Tabs>
+<TabItem value="users" label="Internal Users">
+
+```bash
+curl 'http://0.0.0.0:4000/user/new' \
+--header 'Authorization: Bearer <your-master-key>' \
+--header 'Content-Type: application/json' \
+--data-raw '{
+  "max_budget": 10,
+  "budget_duration": 10s, # 👈 KEY CHANGE
+}'
+```
+</TabItem>
+<TabItem value="keys" label="Keys">
+
+```bash
+curl 'http://0.0.0.0:4000/key/generate' \
+--header 'Authorization: Bearer <your-master-key>' \
+--header 'Content-Type: application/json' \
+--data-raw '{
+  "max_budget": 10,
+  "budget_duration": 10s, # 👈 KEY CHANGE
+}'
+```
+
+</TabItem>
+<TabItem value="teams" label="Teams">
+
+```bash
+curl 'http://0.0.0.0:4000/team/new' \
+--header 'Authorization: Bearer <your-master-key>' \
+--header 'Content-Type: application/json' \
+--data-raw '{
+  "max_budget": 10,
+  "budget_duration": 10s, # 👈 KEY CHANGE
+}'
+```
+</TabItem>
+</Tabs>
+
 ## Set Rate Limits 

 You can set: 
--- a/docs/my-website/docs/routing.md
+++ b/docs/my-website/docs/routing.md
@ -790,84 +790,204 @@ If the error is a context window exceeded error, fall back to a larger model gro

 Fallbacks are done in-order - ["gpt-3.5-turbo, "gpt-4", "gpt-4-32k"], will do 'gpt-3.5-turbo' first, then 'gpt-4', etc.

-You can also set 'default_fallbacks', in case a specific model group is misconfigured / bad.
+You can also set `default_fallbacks`, in case a specific model group is misconfigured / bad.
+
+There are 3 types of fallbacks: 
+- `content_policy_fallbacks`: For litellm.ContentPolicyViolationError - LiteLLM maps content policy violation errors across providers [**See Code**](https://github.com/BerriAI/litellm/blob/89a43c872a1e3084519fb9de159bf52f5447c6c4/litellm/utils.py#L8495C27-L8495C54)
+- `context_window_fallbacks`: For litellm.ContextWindowExceededErrors - LiteLLM maps context window error messages across providers [**See Code**](https://github.com/BerriAI/litellm/blob/89a43c872a1e3084519fb9de159bf52f5447c6c4/litellm/utils.py#L8469)
+- `fallbacks`: For all remaining errors - e.g. litellm.RateLimitError
+
+**Content Policy Violation Fallback**
+
+Key change: 
+
+```python
+content_policy_fallbacks=[{"claude-2": ["my-fallback-model"]}]
+```
+
+<Tabs>
+<TabItem value="sdk" label="SDK">

 ```python
 from litellm import Router 

-model_list = [
-    { # list of model deployments 
-		"model_name": "azure/gpt-3.5-turbo", # openai model name 
-		"litellm_params": { # params for litellm completion/embedding call 
-			"model": "azure/chatgpt-v-2", 
-			"api_key": "bad-key",
-			"api_version": os.getenv("AZURE_API_VERSION"),
-			"api_base": os.getenv("AZURE_API_BASE")
+router = Router(
+	model_list=[
+		{
+			"model_name": "claude-2",
+			"litellm_params": {
+				"model": "claude-2",
+				"api_key": "",
+				"mock_response": Exception("content filtering policy"),
 			},
-		"tpm": 240000,
-		"rpm": 1800
-	}, 
-    { # list of model deployments 
-		"model_name": "azure/gpt-3.5-turbo-context-fallback", # openai model name 
-		"litellm_params": { # params for litellm completion/embedding call 
-			"model": "azure/chatgpt-v-2", 
-			"api_key": "bad-key",
-			"api_version": os.getenv("AZURE_API_VERSION"),
-			"api_base": os.getenv("AZURE_API_BASE")
-		},
-		"tpm": 240000,
-		"rpm": 1800
 		},
 		{
-		"model_name": "azure/gpt-3.5-turbo", # openai model name 
-		"litellm_params": { # params for litellm completion/embedding call 
-			"model": "azure/chatgpt-functioncalling", 
-			"api_key": "bad-key",
-			"api_version": os.getenv("AZURE_API_VERSION"),
-			"api_base": os.getenv("AZURE_API_BASE")
+			"model_name": "my-fallback-model",
+			"litellm_params": {
+				"model": "claude-2",
+				"api_key": "",
+				"mock_response": "This works!",
 			},
-		"tpm": 240000,
-		"rpm": 1800
 		},
-	{
-		"model_name": "gpt-3.5-turbo", # openai model name 
-		"litellm_params": { # params for litellm completion/embedding call 
-			"model": "gpt-3.5-turbo", 
-			"api_key": os.getenv("OPENAI_API_KEY"),
-		},
-		"tpm": 1000000,
-		"rpm": 9000
-	},
-    {
-		"model_name": "gpt-3.5-turbo-16k", # openai model name 
-		"litellm_params": { # params for litellm completion/embedding call 
-			"model": "gpt-3.5-turbo-16k", 
-			"api_key": os.getenv("OPENAI_API_KEY"),
-		},
-		"tpm": 1000000,
-		"rpm": 9000
-	}
-]
+	],
+	content_policy_fallbacks=[{"claude-2": ["my-fallback-model"]}], # 👈 KEY CHANGE
+	# fallbacks=[..], # [OPTIONAL]
+	# context_window_fallbacks=[..], # [OPTIONAL]
+)

-
-router = Router(model_list=model_list, 
-                fallbacks=[{"azure/gpt-3.5-turbo": ["gpt-3.5-turbo"]}], 
-				default_fallbacks=["gpt-3.5-turbo-16k"],
-                context_window_fallbacks=[{"azure/gpt-3.5-turbo-context-fallback": ["gpt-3.5-turbo-16k"]}, {"gpt-3.5-turbo": ["gpt-3.5-turbo-16k"]}],
-                set_verbose=True)
-
-
-user_message = "Hello, whats the weather in San Francisco??"
-messages = [{"content": user_message, "role": "user"}]
-
-# normal fallback call 
-response = router.completion(model="azure/gpt-3.5-turbo", messages=messages)
-
-# context window fallback call
-response = router.completion(model="azure/gpt-3.5-turbo-context-fallback", messages=messages)
-
-print(f"response: {response}")
+response = router.completion(
+	model="claude-2",
+	messages=[{"role": "user", "content": "Hey, how's it going?"}],
+)
 ```
+</TabItem>
+<TabItem value="proxy" label="PROXY">
+
+In your proxy config.yaml just add this line 👇
+
+```yaml
+router_settings:
+	content_policy_fallbacks=[{"claude-2": ["my-fallback-model"]}]
+```
+
+Start proxy 
+
+```bash
+litellm --config /path/to/config.yaml
+
+# RUNNING on http://0.0.0.0:4000
+```
+
+</TabItem>
+</Tabs>
+
+**Context Window Exceeded Fallback**
+
+Key change: 
+
+```python
+context_window_fallbacks=[{"claude-2": ["my-fallback-model"]}]
+```
+
+<Tabs>
+<TabItem value="sdk" label="SDK">
+
+```python
+from litellm import Router 
+
+router = Router(
+	model_list=[
+		{
+			"model_name": "claude-2",
+			"litellm_params": {
+				"model": "claude-2",
+				"api_key": "",
+				"mock_response": Exception("prompt is too long"),
+			},
+		},
+		{
+			"model_name": "my-fallback-model",
+			"litellm_params": {
+				"model": "claude-2",
+				"api_key": "",
+				"mock_response": "This works!",
+			},
+		},
+	],
+	context_window_fallbacks=[{"claude-2": ["my-fallback-model"]}], # 👈 KEY CHANGE
+	# fallbacks=[..], # [OPTIONAL]
+	# content_policy_fallbacks=[..], # [OPTIONAL]
+)
+
+response = router.completion(
+	model="claude-2",
+	messages=[{"role": "user", "content": "Hey, how's it going?"}],
+)
+```
+</TabItem>
+<TabItem value="proxy" label="PROXY">
+
+In your proxy config.yaml just add this line 👇
+
+```yaml
+router_settings:
+	context_window_fallbacks=[{"claude-2": ["my-fallback-model"]}]
+```
+
+Start proxy 
+
+```bash
+litellm --config /path/to/config.yaml
+
+# RUNNING on http://0.0.0.0:4000
+```
+
+</TabItem>
+</Tabs>
+
+**Regular Fallbacks**
+
+Key change: 
+
+```python
+fallbacks=[{"claude-2": ["my-fallback-model"]}]
+```
+
+<Tabs>
+<TabItem value="sdk" label="SDK">
+
+```python
+from litellm import Router 
+
+router = Router(
+	model_list=[
+		{
+			"model_name": "claude-2",
+			"litellm_params": {
+				"model": "claude-2",
+				"api_key": "",
+				"mock_response": Exception("this is a rate limit error"),
+			},
+		},
+		{
+			"model_name": "my-fallback-model",
+			"litellm_params": {
+				"model": "claude-2",
+				"api_key": "",
+				"mock_response": "This works!",
+			},
+		},
+	],
+	fallbacks=[{"claude-2": ["my-fallback-model"]}], # 👈 KEY CHANGE
+	# context_window_fallbacks=[..], # [OPTIONAL]
+	# content_policy_fallbacks=[..], # [OPTIONAL]
+)
+
+response = router.completion(
+	model="claude-2",
+	messages=[{"role": "user", "content": "Hey, how's it going?"}],
+)
+```
+</TabItem>
+<TabItem value="proxy" label="PROXY">
+
+In your proxy config.yaml just add this line 👇
+
+```yaml
+router_settings:
+	fallbacks=[{"claude-2": ["my-fallback-model"]}]
+```
+
+Start proxy 
+
+```bash
+litellm --config /path/to/config.yaml
+
+# RUNNING on http://0.0.0.0:4000
+```
+
+</TabItem>
+</Tabs>

 ### Caching

--- a/docs/my-website/docs/tutorials/TogetherAI_liteLLM.md
+++ b/docs/my-website/docs/tutorials/TogetherAI_liteLLM.md
@ -23,9 +23,13 @@ https://api.together.xyz/playground/chat?model=togethercomputer%2Fllama-2-70b-ch
 model_name = "together_ai/togethercomputer/llama-2-70b-chat"
 response = completion(model=model_name, messages=messages)
 print(response)
+```
+
+
 ```

    {'choices': [{'finish_reason': 'stop', 'index': 0, 'message': {'role': 'assistant', 'content': "\n\nI'm not able to provide real-time weather information. However, I can suggest"}}], 'created': 1691629657.9288375, 'model': 'togethercomputer/llama-2-70b-chat', 'usage': {'prompt_tokens': 9, 'completion_tokens': 17, 'total_tokens': 26}}
+```


 LiteLLM handles the prompt formatting for Together AI's Llama2 models as well, converting your message to the 
--- a/docs/my-website/img/alerting_metadata.png
+++ b/docs/my-website/img/alerting_metadata.png
--- a/docs/my-website/package-lock.json
+++ b/docs/my-website/package-lock.json
--- a/docs/my-website/package.json
+++ b/docs/my-website/package.json
@ -14,17 +14,17 @@
    "write-heading-ids": "docusaurus write-heading-ids"
  },
  "dependencies": {
-    "@docusaurus/core": "2.4.1",
-    "@docusaurus/plugin-google-gtag": "^2.4.1",
-    "@docusaurus/plugin-ideal-image": "^2.4.1",
-    "@docusaurus/preset-classic": "2.4.1",
+    "@docusaurus/core": "3.1.1",
+    "@docusaurus/plugin-google-gtag": "3.1.1",
+    "@docusaurus/plugin-ideal-image": "3.1.1",
+    "@docusaurus/preset-classic": "3.1.1",
    "@mdx-js/react": "^1.6.22",
    "clsx": "^1.2.1",
    "docusaurus": "^1.14.7",
    "docusaurus-lunr-search": "^2.4.1",
    "prism-react-renderer": "^1.3.5",
-    "react": "^17.0.2",
-    "react-dom": "^17.0.2",
+    "react": "^18.1.0",
+    "react-dom": "^18.1.0",
    "sharp": "^0.32.6",
    "uuid": "^9.0.1"
  },
--- a/docs/my-website/yarn.lock
+++ b/docs/my-website/yarn.lock
--- a/enterprise/utils.py
+++ b/enterprise/utils.py
@ -122,236 +122,6 @@ async def ui_get_spend_by_tags(
    return {"spend_per_tag": ui_tags}


-async def view_spend_logs_from_clickhouse(
-    api_key=None, user_id=None, request_id=None, start_date=None, end_date=None
-):
-    verbose_logger.debug("Reading logs from Clickhouse")
-    import os
-
-    # if user has setup clickhouse
-    # TODO: Move this to be a helper function
-    # querying clickhouse for this data
-    import clickhouse_connect
-    from datetime import datetime
-
-    port = os.getenv("CLICKHOUSE_PORT")
-    if port is not None and isinstance(port, str):
-        port = int(port)
-
-    client = clickhouse_connect.get_client(
-        host=os.getenv("CLICKHOUSE_HOST"),
-        port=port,
-        username=os.getenv("CLICKHOUSE_USERNAME", ""),
-        password=os.getenv("CLICKHOUSE_PASSWORD", ""),
-    )
-    if (
-        start_date is not None
-        and isinstance(start_date, str)
-        and end_date is not None
-        and isinstance(end_date, str)
-    ):
-        # Convert the date strings to datetime objects
-        start_date_obj = datetime.strptime(start_date, "%Y-%m-%d")
-        end_date_obj = datetime.strptime(end_date, "%Y-%m-%d")
-
-        # get top spend per day
-        response = client.query(
-            f"""
-                SELECT
-                    toDate(startTime) AS day,
-                    sum(spend) AS total_spend
-                FROM
-                    spend_logs
-                WHERE
-                    toDate(startTime) BETWEEN toDate('2024-02-01') AND toDate('2024-02-29')
-                GROUP BY
-                    day
-                ORDER BY
-                    total_spend
-                """
-        )
-
-        results = []
-        result_rows = list(response.result_rows)
-        for response in result_rows:
-            current_row = {}
-            current_row["users"] = {"example": 0.0}
-            current_row["models"] = {}
-
-            current_row["spend"] = float(response[1])
-            current_row["startTime"] = str(response[0])
-
-            # stubbed api_key
-            current_row[""] = 0.0  # type: ignore
-            results.append(current_row)
-
-        return results
-    else:
-        # check if spend logs exist, if it does then return last 10 logs, sorted in descending order of startTime
-        response = client.query(
-            """
-                SELECT
-                    *
-                FROM
-                    default.spend_logs
-                ORDER BY
-                    startTime DESC
-                LIMIT
-                    10
-            """
-        )
-
-        # get size of spend logs
-        num_rows = client.query("SELECT count(*) FROM default.spend_logs")
-        num_rows = num_rows.result_rows[0][0]
-
-        # safely access num_rows.result_rows[0][0]
-        if num_rows is None:
-            num_rows = 0
-
-        raw_rows = list(response.result_rows)
-        response_data = {
-            "logs": raw_rows,
-            "log_count": num_rows,
-        }
-        return response_data
-
-
-def _create_clickhouse_material_views(client=None, table_names=[]):
-    # Create Materialized Views if they don't exist
-    # Materialized Views send new inserted rows to the aggregate tables
-
-    verbose_logger.debug("Clickhouse: Creating Materialized Views")
-    if "daily_aggregated_spend_per_model_mv" not in table_names:
-        verbose_logger.debug("Clickhouse: Creating daily_aggregated_spend_per_model_mv")
-        client.command(
-            """
-            CREATE MATERIALIZED VIEW daily_aggregated_spend_per_model_mv
-            TO daily_aggregated_spend_per_model
-            AS
-            SELECT
-                toDate(startTime) as day,
-                sumState(spend) AS DailySpend,
-                model as model
-            FROM spend_logs
-            GROUP BY
-                day, model
-            """
-        )
-    if "daily_aggregated_spend_per_api_key_mv" not in table_names:
-        verbose_logger.debug(
-            "Clickhouse: Creating daily_aggregated_spend_per_api_key_mv"
-        )
-        client.command(
-            """
-            CREATE MATERIALIZED VIEW daily_aggregated_spend_per_api_key_mv
-            TO daily_aggregated_spend_per_api_key
-            AS
-            SELECT
-                toDate(startTime) as day,
-                sumState(spend) AS DailySpend,
-                api_key as api_key
-            FROM spend_logs
-            GROUP BY
-                day, api_key
-            """
-        )
-    if "daily_aggregated_spend_per_user_mv" not in table_names:
-        verbose_logger.debug("Clickhouse: Creating daily_aggregated_spend_per_user_mv")
-        client.command(
-            """
-            CREATE MATERIALIZED VIEW daily_aggregated_spend_per_user_mv
-            TO daily_aggregated_spend_per_user
-            AS
-            SELECT
-                toDate(startTime) as day,
-                sumState(spend) AS DailySpend,
-                user as user
-            FROM spend_logs
-            GROUP BY
-                day, user
-            """
-        )
-    if "daily_aggregated_spend_mv" not in table_names:
-        verbose_logger.debug("Clickhouse: Creating daily_aggregated_spend_mv")
-        client.command(
-            """
-            CREATE MATERIALIZED VIEW daily_aggregated_spend_mv
-            TO daily_aggregated_spend
-            AS
-            SELECT
-                toDate(startTime) as day,
-                sumState(spend) AS DailySpend
-            FROM spend_logs
-            GROUP BY
-                day
-            """
-        )
-
-
-def _create_clickhouse_aggregate_tables(client=None, table_names=[]):
-    # Basic Logging works without this - this is only used for low latency reporting apis
-    verbose_logger.debug("Clickhouse: Creating Aggregate Tables")
-
-    # Create Aggregeate Tables if they don't exist
-    if "daily_aggregated_spend_per_model" not in table_names:
-        verbose_logger.debug("Clickhouse: Creating daily_aggregated_spend_per_model")
-        client.command(
-            """
-            CREATE TABLE daily_aggregated_spend_per_model
-            (
-                `day` Date,
-                `DailySpend` AggregateFunction(sum, Float64),
-                `model` String
-            )
-            ENGINE = SummingMergeTree()
-            ORDER BY (day, model);
-            """
-        )
-    if "daily_aggregated_spend_per_api_key" not in table_names:
-        verbose_logger.debug("Clickhouse: Creating daily_aggregated_spend_per_api_key")
-        client.command(
-            """
-            CREATE TABLE daily_aggregated_spend_per_api_key
-            (
-                `day` Date,
-                `DailySpend` AggregateFunction(sum, Float64),
-                `api_key` String
-            )
-            ENGINE = SummingMergeTree()
-            ORDER BY (day, api_key);
-            """
-        )
-    if "daily_aggregated_spend_per_user" not in table_names:
-        verbose_logger.debug("Clickhouse: Creating daily_aggregated_spend_per_user")
-        client.command(
-            """
-            CREATE TABLE daily_aggregated_spend_per_user
-            (
-                `day` Date,
-                `DailySpend` AggregateFunction(sum, Float64),
-                `user` String
-            )
-            ENGINE = SummingMergeTree()
-            ORDER BY (day, user);
-            """
-        )
-    if "daily_aggregated_spend" not in table_names:
-        verbose_logger.debug("Clickhouse: Creating daily_aggregated_spend")
-        client.command(
-            """
-            CREATE TABLE daily_aggregated_spend
-            (
-                `day` Date,
-                `DailySpend` AggregateFunction(sum, Float64),
-            )
-            ENGINE = SummingMergeTree()
-            ORDER BY (day);
-            """
-        )
-    return
-
-
 def _forecast_daily_cost(data: list):
    import requests  # type: ignore
    from datetime import datetime, timedelta
--- a/litellm/init.py
+++ b/litellm/init.py
@ -240,6 +240,7 @@ num_retries: Optional[int] = None  # per model endpoint
 default_fallbacks: Optional[List] = None
 fallbacks: Optional[List] = None
 context_window_fallbacks: Optional[List] = None
+content_policy_fallbacks: Optional[List] = None
 allowed_fails: int = 0
 num_retries_per_request: Optional[int] = (
    None  # for the request overall (incl. fallbacks + model retries)
@ -714,6 +715,7 @@ openai_image_generation_models = ["dall-e-2", "dall-e-3"]

 from .timeout import timeout
 from .cost_calculator import completion_cost
+from litellm.litellm_core_utils.litellm_logging import Logging
 from .utils import (
    client,
    exception_type,
@ -722,12 +724,10 @@ from .utils import (
    token_counter,
    create_pretrained_tokenizer,
    create_tokenizer,
-    cost_per_token,
    supports_function_calling,
    supports_parallel_function_calling,
    supports_vision,
    get_litellm_params,
-    Logging,
    acreate,
    get_model_list,
    get_max_tokens,
@ -747,9 +747,10 @@ from .utils import (
    get_first_chars_messages,
    ModelResponse,
    ImageResponse,
-    ImageObject,
    get_provider_fields,
 )
+
+from .types.utils import ImageObject
 from .llms.huggingface_restapi import HuggingfaceConfig
 from .llms.anthropic import AnthropicConfig
 from .llms.databricks import DatabricksConfig, DatabricksEmbeddingConfig
@ -826,4 +827,4 @@ from .router import Router
 from .assistants.main import *
 from .batches.main import *
 from .scheduler import *
-from .cost_calculator import response_cost_calculator
+from .cost_calculator import response_cost_calculator, cost_per_token
--- a/litellm/_logging.py
+++ b/litellm/_logging.py
@ -3,10 +3,17 @@ from logging import Formatter
 import traceback

 set_verbose = False
+
+if set_verbose is True:
+    logging.warning(
+        "`litellm.set_verbose` is deprecated. Please set `os.environ['LITELLM_LOG'] = 'DEBUG'` for debug logs."
+    )
 json_logs = bool(os.getenv("JSON_LOGS", False))
 # Create a handler for the logger (you may need to adapt this based on your needs)
+log_level = os.getenv("LITELLM_LOG", "ERROR")
+numeric_level: str = getattr(logging, log_level.upper())
 handler = logging.StreamHandler()
-handler.setLevel(logging.DEBUG)
+handler.setLevel(numeric_level)


 class JsonFormatter(Formatter):
--- a/litellm/cost_calculator.py
+++ b/litellm/cost_calculator.py
@ -1,6 +1,6 @@
 # What is this?
 ## File for 'response_cost' calculation in Logging
-from typing import Optional, Union, Literal, List
+from typing import Optional, Union, Literal, List, Tuple
 import litellm._logging
 from litellm.utils import (
    ModelResponse,
@ -9,7 +9,6 @@ from litellm.utils import (
    TranscriptionResponse,
    TextCompletionResponse,
    CallTypes,
-    cost_per_token,
    print_verbose,
    CostPerToken,
    token_counter,
@ -18,6 +17,224 @@ import litellm
 from litellm import verbose_logger


+def _cost_per_token_custom_pricing_helper(
+    prompt_tokens=0,
+    completion_tokens=0,
+    response_time_ms=None,
+    ### CUSTOM PRICING ###
+    custom_cost_per_token: Optional[CostPerToken] = None,
+    custom_cost_per_second: Optional[float] = None,
+) -> Optional[Tuple[float, float]]:
+    """Internal helper function for calculating cost, if custom pricing given"""
+    if custom_cost_per_token is None and custom_cost_per_second is None:
+        return None
+
+    if custom_cost_per_token is not None:
+        input_cost = custom_cost_per_token["input_cost_per_token"] * prompt_tokens
+        output_cost = custom_cost_per_token["output_cost_per_token"] * completion_tokens
+        return input_cost, output_cost
+    elif custom_cost_per_second is not None:
+        output_cost = custom_cost_per_second * response_time_ms / 1000  # type: ignore
+        return 0, output_cost
+
+    return None
+
+
+def cost_per_token(
+    model: str = "",
+    prompt_tokens=0,
+    completion_tokens=0,
+    response_time_ms=None,
+    custom_llm_provider=None,
+    region_name=None,
+    ### CUSTOM PRICING ###
+    custom_cost_per_token: Optional[CostPerToken] = None,
+    custom_cost_per_second: Optional[float] = None,
+) -> Tuple[float, float]:
+    """
+    Calculates the cost per token for a given model, prompt tokens, and completion tokens.
+
+    Parameters:
+        model (str): The name of the model to use. Default is ""
+        prompt_tokens (int): The number of tokens in the prompt.
+        completion_tokens (int): The number of tokens in the completion.
+        response_time (float): The amount of time, in milliseconds, it took the call to complete.
+        custom_llm_provider (str): The llm provider to whom the call was made (see init.py for full list)
+        custom_cost_per_token: Optional[CostPerToken]: the cost per input + output token for the llm api call.
+        custom_cost_per_second: Optional[float]: the cost per second for the llm api call.
+
+    Returns:
+        tuple: A tuple containing the cost in USD dollars for prompt tokens and completion tokens, respectively.
+    """
+    if model is None:
+        raise Exception("Invalid arg. Model cannot be none.")
+    ## CUSTOM PRICING ##
+    response_cost = _cost_per_token_custom_pricing_helper(
+        prompt_tokens=prompt_tokens,
+        completion_tokens=completion_tokens,
+        response_time_ms=response_time_ms,
+        custom_cost_per_second=custom_cost_per_second,
+        custom_cost_per_token=custom_cost_per_token,
+    )
+    if response_cost is not None:
+        return response_cost[0], response_cost[1]
+
+    # given
+    prompt_tokens_cost_usd_dollar: float = 0
+    completion_tokens_cost_usd_dollar: float = 0
+    model_cost_ref = litellm.model_cost
+    model_with_provider = model
+    if custom_llm_provider is not None:
+        model_with_provider = custom_llm_provider + "/" + model
+        if region_name is not None:
+            model_with_provider_and_region = (
+                f"{custom_llm_provider}/{region_name}/{model}"
+            )
+            if (
+                model_with_provider_and_region in model_cost_ref
+            ):  # use region based pricing, if it's available
+                model_with_provider = model_with_provider_and_region
+
+    model_without_prefix = model
+    model_parts = model.split("/")
+    if len(model_parts) > 1:
+        model_without_prefix = model_parts[1]
+    else:
+        model_without_prefix = model
+    """
+    Code block that formats model to lookup in litellm.model_cost
+    Option1. model = "bedrock/ap-northeast-1/anthropic.claude-instant-v1". This is the most accurate since it is region based. Should always be option 1
+    Option2. model = "openai/gpt-4"       - model = provider/model
+    Option3. model = "anthropic.claude-3" - model = model
+    """
+    if (
+        model_with_provider in model_cost_ref
+    ):  # Option 2. use model with provider, model = "openai/gpt-4"
+        model = model_with_provider
+    elif model in model_cost_ref:  # Option 1. use model passed, model="gpt-4"
+        model = model
+    elif (
+        model_without_prefix in model_cost_ref
+    ):  # Option 3. if user passed model="bedrock/anthropic.claude-3", use model="anthropic.claude-3"
+        model = model_without_prefix
+
+    # see this https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models
+    print_verbose(f"Looking up model={model} in model_cost_map")
+    if model in model_cost_ref:
+        print_verbose(f"Success: model={model} in model_cost_map")
+        print_verbose(
+            f"prompt_tokens={prompt_tokens}; completion_tokens={completion_tokens}"
+        )
+        if (
+            model_cost_ref[model].get("input_cost_per_token", None) is not None
+            and model_cost_ref[model].get("output_cost_per_token", None) is not None
+        ):
+            ## COST PER TOKEN ##
+            prompt_tokens_cost_usd_dollar = (
+                model_cost_ref[model]["input_cost_per_token"] * prompt_tokens
+            )
+            completion_tokens_cost_usd_dollar = (
+                model_cost_ref[model]["output_cost_per_token"] * completion_tokens
+            )
+        elif (
+            model_cost_ref[model].get("output_cost_per_second", None) is not None
+            and response_time_ms is not None
+        ):
+            print_verbose(
+                f"For model={model} - output_cost_per_second: {model_cost_ref[model].get('output_cost_per_second')}; response time: {response_time_ms}"
+            )
+            ## COST PER SECOND ##
+            prompt_tokens_cost_usd_dollar = 0
+            completion_tokens_cost_usd_dollar = (
+                model_cost_ref[model]["output_cost_per_second"]
+                * response_time_ms
+                / 1000
+            )
+        elif (
+            model_cost_ref[model].get("input_cost_per_second", None) is not None
+            and response_time_ms is not None
+        ):
+            print_verbose(
+                f"For model={model} - input_cost_per_second: {model_cost_ref[model].get('input_cost_per_second')}; response time: {response_time_ms}"
+            )
+            ## COST PER SECOND ##
+            prompt_tokens_cost_usd_dollar = (
+                model_cost_ref[model]["input_cost_per_second"] * response_time_ms / 1000
+            )
+            completion_tokens_cost_usd_dollar = 0.0
+        print_verbose(
+            f"Returned custom cost for model={model} - prompt_tokens_cost_usd_dollar: {prompt_tokens_cost_usd_dollar}, completion_tokens_cost_usd_dollar: {completion_tokens_cost_usd_dollar}"
+        )
+        return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar
+    elif "ft:gpt-3.5-turbo" in model:
+        print_verbose(f"Cost Tracking: {model} is an OpenAI FinteTuned LLM")
+        # fuzzy match ft:gpt-3.5-turbo:abcd-id-cool-litellm
+        prompt_tokens_cost_usd_dollar = (
+            model_cost_ref["ft:gpt-3.5-turbo"]["input_cost_per_token"] * prompt_tokens
+        )
+        completion_tokens_cost_usd_dollar = (
+            model_cost_ref["ft:gpt-3.5-turbo"]["output_cost_per_token"]
+            * completion_tokens
+        )
+        return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar
+    elif "ft:davinci-002" in model:
+        print_verbose(f"Cost Tracking: {model} is an OpenAI FinteTuned LLM")
+        # fuzzy match ft:davinci-002:abcd-id-cool-litellm
+        prompt_tokens_cost_usd_dollar = (
+            model_cost_ref["ft:davinci-002"]["input_cost_per_token"] * prompt_tokens
+        )
+        completion_tokens_cost_usd_dollar = (
+            model_cost_ref["ft:davinci-002"]["output_cost_per_token"]
+            * completion_tokens
+        )
+        return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar
+    elif "ft:babbage-002" in model:
+        print_verbose(f"Cost Tracking: {model} is an OpenAI FinteTuned LLM")
+        # fuzzy match ft:babbage-002:abcd-id-cool-litellm
+        prompt_tokens_cost_usd_dollar = (
+            model_cost_ref["ft:babbage-002"]["input_cost_per_token"] * prompt_tokens
+        )
+        completion_tokens_cost_usd_dollar = (
+            model_cost_ref["ft:babbage-002"]["output_cost_per_token"]
+            * completion_tokens
+        )
+        return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar
+    elif model in litellm.azure_llms:
+        verbose_logger.debug(f"Cost Tracking: {model} is an Azure LLM")
+        model = litellm.azure_llms[model]
+        verbose_logger.debug(
+            f"applying cost={model_cost_ref[model]['input_cost_per_token']} for prompt_tokens={prompt_tokens}"
+        )
+        prompt_tokens_cost_usd_dollar = (
+            model_cost_ref[model]["input_cost_per_token"] * prompt_tokens
+        )
+        verbose_logger.debug(
+            f"applying cost={model_cost_ref[model]['output_cost_per_token']} for completion_tokens={completion_tokens}"
+        )
+        completion_tokens_cost_usd_dollar = (
+            model_cost_ref[model]["output_cost_per_token"] * completion_tokens
+        )
+        return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar
+    elif model in litellm.azure_embedding_models:
+        verbose_logger.debug(f"Cost Tracking: {model} is an Azure Embedding Model")
+        model = litellm.azure_embedding_models[model]
+        prompt_tokens_cost_usd_dollar = (
+            model_cost_ref[model]["input_cost_per_token"] * prompt_tokens
+        )
+        completion_tokens_cost_usd_dollar = (
+            model_cost_ref[model]["output_cost_per_token"] * completion_tokens
+        )
+        return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar
+    else:
+        # if model is not in model_prices_and_context_window.json. Raise an exception-let users know
+        error_str = f"Model not in model_prices_and_context_window.json. You passed model={model}. Register pricing for model - https://docs.litellm.ai/docs/proxy/custom_pricing\n"
+        raise litellm.exceptions.NotFoundError(  # type: ignore
+            message=error_str,
+            model=model,
+            llm_provider="",
+        )
+
+
 # Extract the number of billion parameters from the model name
 # only used for together_computer LLMs
 def get_model_params_and_category(model_name) -> str:
--- a/litellm/exceptions.py
+++ b/litellm/exceptions.py
@ -324,7 +324,7 @@ class ContextWindowExceededError(BadRequestError):  # type: ignore
        message,
        model,
        llm_provider,
-        response: httpx.Response,
+        response: Optional[httpx.Response] = None,
        litellm_debug_info: Optional[str] = None,
    ):
        self.status_code = 400
@ -332,11 +332,13 @@ class ContextWindowExceededError(BadRequestError):  # type: ignore
        self.model = model
        self.llm_provider = llm_provider
        self.litellm_debug_info = litellm_debug_info
+        request = httpx.Request(method="POST", url="https://api.openai.com/v1")
+        self.response = response or httpx.Response(status_code=400, request=request)
        super().__init__(
            message=self.message,
            model=self.model,  # type: ignore
            llm_provider=self.llm_provider,  # type: ignore
-            response=response,
+            response=self.response,
            litellm_debug_info=self.litellm_debug_info,
        )  # Call the base class constructor with the parameters it needs

@ -407,7 +409,7 @@ class ContentPolicyViolationError(BadRequestError):  # type: ignore
        message,
        model,
        llm_provider,
-        response: httpx.Response,
+        response: Optional[httpx.Response] = None,
        litellm_debug_info: Optional[str] = None,
    ):
        self.status_code = 400
@ -415,11 +417,13 @@ class ContentPolicyViolationError(BadRequestError):  # type: ignore
        self.model = model
        self.llm_provider = llm_provider
        self.litellm_debug_info = litellm_debug_info
+        request = httpx.Request(method="POST", url="https://api.openai.com/v1")
+        self.response = response or httpx.Response(status_code=500, request=request)
        super().__init__(
            message=self.message,
            model=self.model,  # type: ignore
            llm_provider=self.llm_provider,  # type: ignore
-            response=response,
+            response=self.response,
            litellm_debug_info=self.litellm_debug_info,
        )  # Call the base class constructor with the parameters it needs

--- a/litellm/integrations/clickhouse.py
+++ b/litellm/integrations/clickhouse.py
@ -226,14 +226,6 @@ def _start_clickhouse():
            response = client.query("DESCRIBE default.spend_logs")
            verbose_logger.debug(f"spend logs schema ={response.result_rows}")

-        # RUN Enterprise Clickhouse Setup
-        # TLDR: For Enterprise - we create views / aggregate tables for low latency reporting APIs
-        from litellm.proxy.enterprise.utils import _create_clickhouse_aggregate_tables
-        from litellm.proxy.enterprise.utils import _create_clickhouse_material_views
-
-        _create_clickhouse_aggregate_tables(client=client, table_names=table_names)
-        _create_clickhouse_material_views(client=client, table_names=table_names)
-

 class ClickhouseLogger:
    # Class variables or attributes
--- a/litellm/integrations/email_alerting.py
+++ b/litellm/integrations/email_alerting.py
@ -0,0 +1,136 @@
+"""
+Functions for sending Email Alerts
+"""
+
+import os
+from typing import Optional, List
+from litellm.proxy._types import WebhookEvent
+import asyncio
+from litellm._logging import verbose_logger, verbose_proxy_logger
+
+# we use this for the email header, please send a test email if you change this. verify it looks good on email
+LITELLM_LOGO_URL = "https://litellm-listing.s3.amazonaws.com/litellm_logo.png"
+LITELLM_SUPPORT_CONTACT = "support@berri.ai"
+
+
+async def get_all_team_member_emails(team_id: Optional[str] = None) -> list:
+    verbose_logger.debug(
+        "Email Alerting: Getting all team members for team_id=%s", team_id
+    )
+    if team_id is None:
+        return []
+    from litellm.proxy.proxy_server import premium_user, prisma_client
+
+    if prisma_client is None:
+        raise Exception("Not connected to DB!")
+
+    team_row = await prisma_client.db.litellm_teamtable.find_unique(
+        where={
+            "team_id": team_id,
+        }
+    )
+
+    if team_row is None:
+        return []
+
+    _team_members = team_row.members_with_roles
+    verbose_logger.debug(
+        "Email Alerting: Got team members for team_id=%s Team Members: %s",
+        team_id,
+        _team_members,
+    )
+    _team_member_user_ids: List[str] = []
+    for member in _team_members:
+        if member and isinstance(member, dict) and member.get("user_id") is not None:
+            _team_member_user_ids.append(member.get("user_id"))
+
+    sql_query = """
+        SELECT user_email
+        FROM "LiteLLM_UserTable"
+        WHERE user_id = ANY($1::TEXT[]);
+    """
+
+    _result = await prisma_client.db.query_raw(sql_query, _team_member_user_ids)
+
+    verbose_logger.debug("Email Alerting: Got all Emails for team, emails=%s", _result)
+
+    if _result is None:
+        return []
+
+    emails = []
+    for user in _result:
+        if user and isinstance(user, dict) and user.get("user_email", None) is not None:
+            emails.append(user.get("user_email"))
+    return emails
+
+
+async def send_team_budget_alert(webhook_event: WebhookEvent) -> bool:
+    """
+    Send an Email Alert to All Team Members when the Team Budget is crossed
+    Returns -> True if sent, False if not.
+    """
+    from litellm.proxy.utils import send_email
+
+    from litellm.proxy.proxy_server import premium_user, prisma_client
+
+    _team_id = webhook_event.team_id
+    team_alias = webhook_event.team_alias
+    verbose_logger.debug(
+        "Email Alerting: Sending Team Budget Alert for team=%s", team_alias
+    )
+
+    email_logo_url = os.getenv("SMTP_SENDER_LOGO", os.getenv("EMAIL_LOGO_URL", None))
+    email_support_contact = os.getenv("EMAIL_SUPPORT_CONTACT", None)
+
+    # await self._check_if_using_premium_email_feature(
+    #     premium_user, email_logo_url, email_support_contact
+    # )
+
+    if email_logo_url is None:
+        email_logo_url = LITELLM_LOGO_URL
+    if email_support_contact is None:
+        email_support_contact = LITELLM_SUPPORT_CONTACT
+    recipient_emails = await get_all_team_member_emails(_team_id)
+    recipient_emails_str: str = ",".join(recipient_emails)
+    verbose_logger.debug(
+        "Email Alerting: Sending team budget alert to %s", recipient_emails_str
+    )
+
+    event_name = webhook_event.event_message
+    max_budget = webhook_event.max_budget
+    email_html_content = "Alert from LiteLLM Server"
+
+    if recipient_emails_str is None:
+        verbose_proxy_logger.error(
+            "Email Alerting: Trying to send email alert to no recipient, got recipient_emails=%s",
+            recipient_emails_str,
+        )
+
+    email_html_content = f"""
+    <img src="{email_logo_url}" alt="LiteLLM Logo" width="150" height="50" /> <br/><br/><br/>
+
+    Budget Crossed for Team <b> {team_alias} </b> <br/> <br/>
+
+    Your Teams LLM API usage has crossed it's <b> budget of ${max_budget} </b>, current spend is <b>${webhook_event.spend}</b><br /> <br />
+
+    API requests will be rejected until either (a) you increase your budget or (b) your budget gets reset <br /> <br />
+
+    If you have any questions, please send an email to {email_support_contact} <br /> <br />
+
+    Best, <br />
+    The LiteLLM team <br />
+    """
+
+    email_event = {
+        "to": recipient_emails_str,
+        "subject": f"LiteLLM {event_name} for Team {team_alias}",
+        "html": email_html_content,
+    }
+
+    await send_email(
+        receiver_email=email_event["to"],
+        subject=email_event["subject"],
+        html=email_event["html"],
+    )
+
+    return False
--- a/litellm/integrations/slack_alerting.py
+++ b/litellm/integrations/slack_alerting.py
@ -330,6 +330,7 @@ class SlackAlerting(CustomLogger):
            messages = "Message not logged. litellm.redact_messages_in_exceptions=True"
        request_info = f"\nRequest Model: `{model}`\nAPI Base: `{api_base}`\nMessages: `{messages}`"
        slow_message = f"`Responses are slow - {round(time_difference_float,2)}s response time > Alerting threshold: {self.alerting_threshold}s`"
+        alerting_metadata: dict = {}
        if time_difference_float > self.alerting_threshold:
            # add deployment latencies to alert
            if (
@ -337,7 +338,7 @@ class SlackAlerting(CustomLogger):
                and "litellm_params" in kwargs
                and "metadata" in kwargs["litellm_params"]
            ):
-                _metadata = kwargs["litellm_params"]["metadata"]
+                _metadata: dict = kwargs["litellm_params"]["metadata"]
                request_info = litellm.utils._add_key_name_and_team_to_alert(
                    request_info=request_info, metadata=_metadata
                )
@ -349,10 +350,14 @@ class SlackAlerting(CustomLogger):
                    request_info += (
                        f"\nAvailable Deployment Latencies\n{_deployment_latency_map}"
                    )
+
+                if "alerting_metadata" in _metadata:
+                    alerting_metadata = _metadata["alerting_metadata"]
            await self.send_alert(
                message=slow_message + request_info,
                level="Low",
                alert_type="llm_too_slow",
+                alerting_metadata=alerting_metadata,
            )

    async def async_update_daily_reports(
@ -540,7 +545,12 @@ class SlackAlerting(CustomLogger):
        message += f"\n\nNext Run is at: `{time.time() + self.alerting_args.daily_report_frequency}`s"

        # send alert
-        await self.send_alert(message=message, level="Low", alert_type="daily_reports")
+        await self.send_alert(
+            message=message,
+            level="Low",
+            alert_type="daily_reports",
+            alerting_metadata={},
+        )

        return True

@ -582,6 +592,7 @@ class SlackAlerting(CustomLogger):
            await asyncio.sleep(
                self.alerting_threshold
            )  # Set it to 5 minutes - i'd imagine this might be different for streaming, non-streaming, non-completion (embedding + img) requests
+            alerting_metadata: dict = {}
            if (
                request_data is not None
                and request_data.get("litellm_status", "") != "success"
@ -606,7 +617,7 @@ class SlackAlerting(CustomLogger):
                ):
                    # In hanging requests sometime it has not made it to the point where the deployment is passed to the `request_data``
                    # in that case we fallback to the api base set in the request metadata
-                    _metadata = request_data["metadata"]
+                    _metadata: dict = request_data["metadata"]
                    _api_base = _metadata.get("api_base", "")

                    request_info = litellm.utils._add_key_name_and_team_to_alert(
@ -615,6 +626,9 @@ class SlackAlerting(CustomLogger):

                    if _api_base is None:
                        _api_base = ""
+
+                    if "alerting_metadata" in _metadata:
+                        alerting_metadata = _metadata["alerting_metadata"]
                    request_info += f"\nAPI Base: `{_api_base}`"
                # only alert hanging responses if they have not been marked as success
                alerting_message = (
@ -640,6 +654,7 @@ class SlackAlerting(CustomLogger):
                    message=alerting_message + request_info,
                    level="Medium",
                    alert_type="llm_requests_hanging",
+                    alerting_metadata=alerting_metadata,
                )

    async def failed_tracking_alert(self, error_message: str):
@ -650,7 +665,10 @@ class SlackAlerting(CustomLogger):
        result = await _cache.async_get_cache(key=_cache_key)
        if result is None:
            await self.send_alert(
-                message=message, level="High", alert_type="budget_alerts"
+                message=message,
+                level="High",
+                alert_type="budget_alerts",
+                alerting_metadata={},
            )
            await _cache.async_set_cache(
                key=_cache_key,
@ -680,7 +698,7 @@ class SlackAlerting(CustomLogger):
            return
        if "budget_alerts" not in self.alert_types:
            return
-        _id: str = "default_id"  # used for caching
+        _id: Optional[str] = "default_id"  # used for caching
        user_info_json = user_info.model_dump(exclude_none=True)
        for k, v in user_info_json.items():
            user_info_str = "\n{}: {}\n".format(k, v)
@ -751,6 +769,7 @@ class SlackAlerting(CustomLogger):
                    level="High",
                    alert_type="budget_alerts",
                    user_info=webhook_event,
+                    alerting_metadata={},
                )
                await _cache.async_set_cache(
                    key=_cache_key,
@ -769,7 +788,13 @@ class SlackAlerting(CustomLogger):
        response_cost: Optional[float],
        max_budget: Optional[float],
    ):
-        if end_user_id is not None and token is not None and response_cost is not None:
+        if (
+            self.alerting is not None
+            and "webhook" in self.alerting
+            and end_user_id is not None
+            and token is not None
+            and response_cost is not None
+        ):
            # log customer spend
            event = WebhookEvent(
                spend=response_cost,
@ -941,7 +966,10 @@ class SlackAlerting(CustomLogger):
            )
            # send minor alert
            await self.send_alert(
-                message=msg, level="Medium", alert_type="outage_alerts"
+                message=msg,
+                level="Medium",
+                alert_type="outage_alerts",
+                alerting_metadata={},
            )
            # set to true
            outage_value["minor_alert_sent"] = True
@ -963,7 +991,12 @@ class SlackAlerting(CustomLogger):
            )

            # send minor alert
-            await self.send_alert(message=msg, level="High", alert_type="outage_alerts")
+            await self.send_alert(
+                message=msg,
+                level="High",
+                alert_type="outage_alerts",
+                alerting_metadata={},
+            )
            # set to true
            outage_value["major_alert_sent"] = True

@ -1062,7 +1095,10 @@ class SlackAlerting(CustomLogger):
                )
                # send minor alert
                await self.send_alert(
-                    message=msg, level="Medium", alert_type="outage_alerts"
+                    message=msg,
+                    level="Medium",
+                    alert_type="outage_alerts",
+                    alerting_metadata={},
                )
                # set to true
                outage_value["minor_alert_sent"] = True
@ -1081,7 +1117,10 @@ class SlackAlerting(CustomLogger):
                )
                # send minor alert
                await self.send_alert(
-                    message=msg, level="High", alert_type="outage_alerts"
+                    message=msg,
+                    level="High",
+                    alert_type="outage_alerts",
+                    alerting_metadata={},
                )
                # set to true
                outage_value["major_alert_sent"] = True
@ -1143,7 +1182,10 @@ Model Info:
 """

        alert_val = self.send_alert(
-            message=message, level="Low", alert_type="new_model_added"
+            message=message,
+            level="Low",
+            alert_type="new_model_added",
+            alerting_metadata={},
        )

        if alert_val is not None and asyncio.iscoroutine(alert_val):
@ -1159,6 +1201,9 @@ Model Info:
        Currently only implemented for budget alerts

        Returns -> True if sent, False if not.
+
+        Raises Exception
+            - if WEBHOOK_URL is not set
        """

        webhook_url = os.getenv("WEBHOOK_URL", None)
@ -1297,7 +1342,9 @@ Model Info:
            verbose_proxy_logger.error("Error sending email alert %s", str(e))
            return False

-    async def send_email_alert_using_smtp(self, webhook_event: WebhookEvent) -> bool:
+    async def send_email_alert_using_smtp(
+        self, webhook_event: WebhookEvent, alert_type: str
+    ) -> bool:
        """
        Sends structured Email alert to an SMTP server

@ -1306,7 +1353,6 @@ Model Info:
        Returns -> True if sent, False if not.
        """
        from litellm.proxy.utils import send_email
-
        from litellm.proxy.proxy_server import premium_user, prisma_client

        email_logo_url = os.getenv(
@ -1360,6 +1406,10 @@ Model Info:
            subject=email_event["subject"],
            html=email_event["html"],
        )
+        if webhook_event.event_group == "team":
+            from litellm.integrations.email_alerting import send_team_budget_alert
+
+            await send_team_budget_alert(webhook_event=webhook_event)

        return False

@ -1368,6 +1418,7 @@ Model Info:
        message: str,
        level: Literal["Low", "Medium", "High"],
        alert_type: Literal[AlertType],
+        alerting_metadata: dict,
        user_info: Optional[WebhookEvent] = None,
        **kwargs,
    ):
@ -1401,7 +1452,9 @@ Model Info:
            and user_info is not None
        ):
            # only send budget alerts over Email
-            await self.send_email_alert_using_smtp(webhook_event=user_info)
+            await self.send_email_alert_using_smtp(
+                webhook_event=user_info, alert_type=alert_type
+            )

        if "slack" not in self.alerting:
            return
@ -1425,6 +1478,9 @@ Model Info:
        if kwargs:
            for key, value in kwargs.items():
                formatted_message += f"\n\n{key}: `{value}`\n\n"
+        if alerting_metadata:
+            for key, value in alerting_metadata.items():
+                formatted_message += f"\n\n*Alerting Metadata*: \n{key}: `{value}`\n\n"
        if _proxy_base_url is not None:
            formatted_message += f"\n\nProxy URL: `{_proxy_base_url}`"

@ -1440,7 +1496,7 @@ Model Info:
            slack_webhook_url = os.getenv("SLACK_WEBHOOK_URL", None)

        if slack_webhook_url is None:
-            raise Exception("Missing SLACK_WEBHOOK_URL from environment")
+            raise ValueError("Missing SLACK_WEBHOOK_URL from environment")
        payload = {"text": formatted_message}
        headers = {"Content-type": "application/json"}

@ -1453,7 +1509,7 @@ Model Info:
            pass
        else:
            verbose_proxy_logger.debug(
-                "Error sending slack alert. Error=", response.text
+                "Error sending slack alert. Error={}".format(response.text)
            )

    async def async_log_success_event(self, kwargs, response_obj, start_time, end_time):
@ -1622,6 +1678,7 @@ Model Info:
                message=_weekly_spend_message,
                level="Low",
                alert_type="spend_reports",
+                alerting_metadata={},
            )
        except Exception as e:
            verbose_proxy_logger.error("Error sending weekly spend report", e)
@ -1673,6 +1730,7 @@ Model Info:
                message=_spend_message,
                level="Low",
                alert_type="spend_reports",
+                alerting_metadata={},
            )
        except Exception as e:
            verbose_proxy_logger.error("Error sending weekly spend report", e)
--- a/litellm/litellm_core_utils/core_helpers.py
+++ b/litellm/litellm_core_utils/core_helpers.py
@ -0,0 +1,41 @@
+# What is this?
+## Helper utilities for the model response objects
+
+
+def map_finish_reason(
+    finish_reason: str,
+):  # openai supports 5 stop sequences - 'stop', 'length', 'function_call', 'content_filter', 'null'
+    # anthropic mapping
+    if finish_reason == "stop_sequence":
+        return "stop"
+    # cohere mapping - https://docs.cohere.com/reference/generate
+    elif finish_reason == "COMPLETE":
+        return "stop"
+    elif finish_reason == "MAX_TOKENS":  # cohere + vertex ai
+        return "length"
+    elif finish_reason == "ERROR_TOXIC":
+        return "content_filter"
+    elif (
+        finish_reason == "ERROR"
+    ):  # openai currently doesn't support an 'error' finish reason
+        return "stop"
+    # huggingface mapping https://huggingface.github.io/text-generation-inference/#/Text%20Generation%20Inference/generate_stream
+    elif finish_reason == "eos_token" or finish_reason == "stop_sequence":
+        return "stop"
+    elif (
+        finish_reason == "FINISH_REASON_UNSPECIFIED" or finish_reason == "STOP"
+    ):  # vertex ai - got from running `print(dir(response_obj.candidates[0].finish_reason))`: ['FINISH_REASON_UNSPECIFIED', 'MAX_TOKENS', 'OTHER', 'RECITATION', 'SAFETY', 'STOP',]
+        return "stop"
+    elif finish_reason == "SAFETY":  # vertex ai
+        return "content_filter"
+    elif finish_reason == "STOP":  # vertex ai
+        return "stop"
+    elif finish_reason == "end_turn" or finish_reason == "stop_sequence":  # anthropic
+        return "stop"
+    elif finish_reason == "max_tokens":  # anthropic
+        return "length"
+    elif finish_reason == "tool_use":  # anthropic
+        return "tool_calls"
+    elif finish_reason == "content_filtered":
+        return "content_filter"
+    return finish_reason
--- a/litellm/litellm_core_utils/litellm_logging.py
+++ b/litellm/litellm_core_utils/litellm_logging.py
--- a/litellm/litellm_core_utils/redact_messages.py
+++ b/litellm/litellm_core_utils/redact_messages.py
@ -12,7 +12,9 @@ from typing import TYPE_CHECKING, Any
 import litellm

 if TYPE_CHECKING:
-    from litellm.utils import Logging as _LiteLLMLoggingObject
+    from litellm.litellm_core_utils.litellm_logging import (
+        Logging as _LiteLLMLoggingObject,
+    )

    LiteLLMLoggingObject = _LiteLLMLoggingObject
 else:
@ -30,7 +32,6 @@ def redact_message_input_output_from_logging(
    if litellm.turn_off_message_logging is not True:
        return result

-    _result = copy.deepcopy(result)
    # remove messages, prompts, input, response from logging
    litellm_logging_obj.model_call_details["messages"] = [
        {"role": "user", "content": "redacted-by-litellm"}
@ -53,8 +54,10 @@ def redact_message_input_output_from_logging(
            elif isinstance(choice, litellm.utils.StreamingChoices):
                choice.delta.content = "redacted-by-litellm"
    else:
-        if _result is not None:
-            if isinstance(_result, litellm.ModelResponse):
+        if result is not None:
+            if isinstance(result, litellm.ModelResponse):
+                # only deep copy litellm.ModelResponse
+                _result = copy.deepcopy(result)
                if hasattr(_result, "choices") and _result.choices is not None:
                    for choice in _result.choices:
                        if isinstance(choice, litellm.Choices):
@ -63,3 +66,6 @@ def redact_message_input_output_from_logging(
                            choice.delta.content = "redacted-by-litellm"

                return _result
+
+    # by default return result
+    return result
--- a/litellm/llms/anthropic.py
+++ b/litellm/llms/anthropic.py
@ -5,10 +5,16 @@ import requests, copy  # type: ignore
 import time
 from functools import partial
 from typing import Callable, Optional, List, Union
-from litellm.utils import ModelResponse, Usage, map_finish_reason, CustomStreamWrapper
+import litellm.litellm_core_utils
+from litellm.utils import ModelResponse, Usage, CustomStreamWrapper
+from litellm.litellm_core_utils.core_helpers import map_finish_reason
 import litellm
 from .prompt_templates.factory import prompt_factory, custom_prompt
-from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler
+from litellm.llms.custom_httpx.http_handler import (
+    AsyncHTTPHandler,
+    _get_async_httpx_client,
+    _get_httpx_client,
+)
 from .base import BaseLLM
 import httpx  # type: ignore
 from litellm.types.llms.anthropic import AnthropicMessagesToolChoice
@ -171,7 +177,7 @@ async def make_call(
    logging_obj,
 ):
    if client is None:
-        client = AsyncHTTPHandler()  # Create a new client if none provided
+        client = _get_async_httpx_client()  # Create a new client if none provided

    response = await client.post(api_base, headers=headers, data=data, stream=True)

@ -201,7 +207,7 @@ class AnthropicChatCompletion(BaseLLM):
        response: Union[requests.Response, httpx.Response],
        model_response: ModelResponse,
        stream: bool,
-        logging_obj: litellm.utils.Logging,
+        logging_obj: litellm.litellm_core_utils.litellm_logging.Logging,
        optional_params: dict,
        api_key: str,
        data: Union[dict, str],
@ -316,7 +322,7 @@ class AnthropicChatCompletion(BaseLLM):
        response: Union[requests.Response, httpx.Response],
        model_response: ModelResponse,
        stream: bool,
-        logging_obj: litellm.utils.Logging,
+        logging_obj: litellm.litellm_core_utils.litellm_logging.Logging,
        optional_params: dict,
        api_key: str,
        data: Union[dict, str],
@ -463,9 +469,7 @@ class AnthropicChatCompletion(BaseLLM):
        logger_fn=None,
        headers={},
    ) -> Union[ModelResponse, CustomStreamWrapper]:
-        async_handler = AsyncHTTPHandler(
-            timeout=httpx.Timeout(timeout=600.0, connect=5.0)
-        )
+        async_handler = _get_async_httpx_client()
        response = await async_handler.post(api_base, headers=headers, json=data)
        if stream and _is_function_call:
            return self.process_streaming_response(
--- a/litellm/llms/base.py
+++ b/litellm/llms/base.py
@ -2,7 +2,7 @@
 import litellm
 import httpx, requests
 from typing import Optional, Union
-from litellm.utils import Logging
+from litellm.litellm_core_utils.litellm_logging import Logging


 class BaseLLM:
--- a/litellm/llms/bedrock.py
+++ b/litellm/llms/bedrock.py
@ -5,12 +5,10 @@ import time, uuid
 from typing import Callable, Optional, Any, Union, List
 import litellm
 from litellm.utils import (
-    ModelResponse,
    get_secret,
-    Usage,
-    ImageResponse,
-    map_finish_reason,
 )
+from litellm.litellm_core_utils.core_helpers import map_finish_reason
+from litellm.types.utils import ImageResponse, ModelResponse, Usage
 from .prompt_templates.factory import (
    prompt_factory,
    custom_prompt,
@ -633,7 +631,11 @@ def init_bedrock_client(
        config = boto3.session.Config()

    ### CHECK STS ###
-    if aws_web_identity_token is not None and aws_role_name is not None and aws_session_name is not None:
+    if (
+        aws_web_identity_token is not None
+        and aws_role_name is not None
+        and aws_session_name is not None
+    ):
        oidc_token = get_secret(aws_web_identity_token)

        if oidc_token is None:
@ -642,9 +644,7 @@ def init_bedrock_client(
                status_code=401,
            )

-        sts_client = boto3.client(
-            "sts"
-        )
+        sts_client = boto3.client("sts")

        # https://docs.aws.amazon.com/STS/latest/APIReference/API_AssumeRoleWithWebIdentity.html
        # https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/sts/client/assume_role_with_web_identity.html
--- a/litellm/llms/bedrock_httpx.py
+++ b/litellm/llms/bedrock_httpx.py
@ -22,13 +22,12 @@ from typing import (
 from litellm.utils import (
    ModelResponse,
    Usage,
-    map_finish_reason,
    CustomStreamWrapper,
-    Message,
-    Choices,
    get_secret,
-    Logging,
 )
+from litellm.litellm_core_utils.core_helpers import map_finish_reason
+from litellm.litellm_core_utils.litellm_logging import Logging
+from litellm.types.utils import Message, Choices
 import litellm, uuid
 from .prompt_templates.factory import (
    prompt_factory,
@ -41,7 +40,12 @@ from .prompt_templates.factory import (
    _bedrock_converse_messages_pt,
    _bedrock_tools_pt,
 )
-from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler
+from litellm.llms.custom_httpx.http_handler import (
+    AsyncHTTPHandler,
+    HTTPHandler,
+    _get_async_httpx_client,
+    _get_httpx_client,
+)
 from .base import BaseLLM
 import httpx  # type: ignore
 from .bedrock import BedrockError, convert_messages_to_prompt, ModelResponseIterator
@ -57,6 +61,7 @@ from litellm.caching import DualCache

 iam_cache = DualCache()

+
 class AmazonCohereChatConfig:
    """
    Reference - https://docs.aws.amazon.com/bedrock/latest/userguide/model-parameters-cohere-command-r-plus.html
@ -167,7 +172,7 @@ async def make_call(
    logging_obj,
 ):
    if client is None:
-        client = AsyncHTTPHandler()  # Create a new client if none provided
+        client = _get_async_httpx_client()  # Create a new client if none provided

    response = await client.post(api_base, headers=headers, data=data, stream=True)

@ -198,7 +203,7 @@ def make_sync_call(
    logging_obj,
 ):
    if client is None:
-        client = HTTPHandler()  # Create a new client if none provided
+        client = _get_httpx_client()  # Create a new client if none provided

    response = client.post(api_base, headers=headers, data=data, stream=True)

@ -327,13 +332,19 @@ class BedrockLLM(BaseLLM):
        ) = params_to_check

        ### CHECK STS ###
-        if aws_web_identity_token is not None and aws_role_name is not None and aws_session_name is not None:
-            iam_creds_cache_key = json.dumps({
+        if (
+            aws_web_identity_token is not None
+            and aws_role_name is not None
+            and aws_session_name is not None
+        ):
+            iam_creds_cache_key = json.dumps(
+                {
                    "aws_web_identity_token": aws_web_identity_token,
                    "aws_role_name": aws_role_name,
                    "aws_session_name": aws_session_name,
                    "aws_region_name": aws_region_name,
-            })
+                }
+            )

            iam_creds_dict = iam_cache.get_cache(iam_creds_cache_key)
            if iam_creds_dict is None:
@ -348,7 +359,7 @@ class BedrockLLM(BaseLLM):
                sts_client = boto3.client(
                    "sts",
                    region_name=aws_region_name,
-                    endpoint_url=f"https://sts.{aws_region_name}.amazonaws.com"
+                    endpoint_url=f"https://sts.{aws_region_name}.amazonaws.com",
                )

                # https://docs.aws.amazon.com/STS/latest/APIReference/API_AssumeRoleWithWebIdentity.html
@ -362,12 +373,18 @@ class BedrockLLM(BaseLLM):

                iam_creds_dict = {
                    "aws_access_key_id": sts_response["Credentials"]["AccessKeyId"],
-                    "aws_secret_access_key": sts_response["Credentials"]["SecretAccessKey"],
+                    "aws_secret_access_key": sts_response["Credentials"][
+                        "SecretAccessKey"
+                    ],
                    "aws_session_token": sts_response["Credentials"]["SessionToken"],
                    "region_name": aws_region_name,
                }

-                iam_cache.set_cache(key=iam_creds_cache_key, value=json.dumps(iam_creds_dict), ttl=3600 - 60)
+                iam_cache.set_cache(
+                    key=iam_creds_cache_key,
+                    value=json.dumps(iam_creds_dict),
+                    ttl=3600 - 60,
+                )

            session = boto3.Session(**iam_creds_dict)

@ -976,7 +993,7 @@ class BedrockLLM(BaseLLM):
                if isinstance(timeout, float) or isinstance(timeout, int):
                    timeout = httpx.Timeout(timeout)
                _params["timeout"] = timeout
-            self.client = HTTPHandler(**_params)  # type: ignore
+            self.client = _get_httpx_client(_params)  # type: ignore
        else:
            self.client = client
        if (stream is not None and stream == True) and provider != "ai21":
@ -1058,7 +1075,7 @@ class BedrockLLM(BaseLLM):
                if isinstance(timeout, float) or isinstance(timeout, int):
                    timeout = httpx.Timeout(timeout)
                _params["timeout"] = timeout
-            client = AsyncHTTPHandler(**_params)  # type: ignore
+            client = _get_async_httpx_client(_params)  # type: ignore
        else:
            client = client  # type: ignore

@ -1433,13 +1450,19 @@ class BedrockConverseLLM(BaseLLM):
        ) = params_to_check

        ### CHECK STS ###
-        if aws_web_identity_token is not None and aws_role_name is not None and aws_session_name is not None:
-            iam_creds_cache_key = json.dumps({
+        if (
+            aws_web_identity_token is not None
+            and aws_role_name is not None
+            and aws_session_name is not None
+        ):
+            iam_creds_cache_key = json.dumps(
+                {
                    "aws_web_identity_token": aws_web_identity_token,
                    "aws_role_name": aws_role_name,
                    "aws_session_name": aws_session_name,
                    "aws_region_name": aws_region_name,
-            })
+                }
+            )

            iam_creds_dict = iam_cache.get_cache(iam_creds_cache_key)
            if iam_creds_dict is None:
@ -1454,7 +1477,7 @@ class BedrockConverseLLM(BaseLLM):
                sts_client = boto3.client(
                    "sts",
                    region_name=aws_region_name,
-                    endpoint_url=f"https://sts.{aws_region_name}.amazonaws.com"
+                    endpoint_url=f"https://sts.{aws_region_name}.amazonaws.com",
                )

                # https://docs.aws.amazon.com/STS/latest/APIReference/API_AssumeRoleWithWebIdentity.html
@ -1468,12 +1491,18 @@ class BedrockConverseLLM(BaseLLM):

                iam_creds_dict = {
                    "aws_access_key_id": sts_response["Credentials"]["AccessKeyId"],
-                    "aws_secret_access_key": sts_response["Credentials"]["SecretAccessKey"],
+                    "aws_secret_access_key": sts_response["Credentials"][
+                        "SecretAccessKey"
+                    ],
                    "aws_session_token": sts_response["Credentials"]["SessionToken"],
                    "region_name": aws_region_name,
                }

-                iam_cache.set_cache(key=iam_creds_cache_key, value=json.dumps(iam_creds_dict), ttl=3600 - 60)
+                iam_cache.set_cache(
+                    key=iam_creds_cache_key,
+                    value=json.dumps(iam_creds_dict),
+                    ttl=3600 - 60,
+                )

            session = boto3.Session(**iam_creds_dict)

@ -1575,7 +1604,7 @@ class BedrockConverseLLM(BaseLLM):
                if isinstance(timeout, float) or isinstance(timeout, int):
                    timeout = httpx.Timeout(timeout)
                _params["timeout"] = timeout
-            client = AsyncHTTPHandler(**_params)  # type: ignore
+            client = _get_async_httpx_client(_params)  # type: ignore
        else:
            client = client  # type: ignore

@ -1847,7 +1876,7 @@ class BedrockConverseLLM(BaseLLM):
                if isinstance(timeout, float) or isinstance(timeout, int):
                    timeout = httpx.Timeout(timeout)
                _params["timeout"] = timeout
-            client = HTTPHandler(**_params)  # type: ignore
+            client = _get_httpx_client(_params)  # type: ignore
        else:
            client = client
        try:
--- a/litellm/llms/custom_httpx/http_handler.py
+++ b/litellm/llms/custom_httpx/http_handler.py
@ -219,3 +219,60 @@ class HTTPHandler:
            self.close()
        except Exception:
            pass
+
+
+def _get_async_httpx_client(params: Optional[dict] = None) -> AsyncHTTPHandler:
+    """
+    Retrieves the async HTTP client from the cache
+    If not present, creates a new client
+
+    Caches the new client and returns it.
+    """
+    _params_key_name = ""
+    if params is not None:
+        for key, value in params.items():
+            try:
+                _params_key_name += f"{key}_{value}"
+            except Exception:
+                pass
+
+    _cache_key_name = "async_httpx_client" + _params_key_name
+    if _cache_key_name in litellm.in_memory_llm_clients_cache:
+        return litellm.in_memory_llm_clients_cache[_cache_key_name]
+
+    if params is not None:
+        _new_client = AsyncHTTPHandler(**params)
+    else:
+        _new_client = AsyncHTTPHandler(
+            timeout=httpx.Timeout(timeout=600.0, connect=5.0)
+        )
+    litellm.in_memory_llm_clients_cache[_cache_key_name] = _new_client
+    return _new_client
+
+
+def _get_httpx_client(params: Optional[dict] = None) -> HTTPHandler:
+    """
+    Retrieves the HTTP client from the cache
+    If not present, creates a new client
+
+    Caches the new client and returns it.
+    """
+    _params_key_name = ""
+    if params is not None:
+        for key, value in params.items():
+            try:
+                _params_key_name += f"{key}_{value}"
+            except Exception:
+                pass
+
+    _cache_key_name = "httpx_client" + _params_key_name
+    if _cache_key_name in litellm.in_memory_llm_clients_cache:
+        return litellm.in_memory_llm_clients_cache[_cache_key_name]
+
+    if params is not None:
+        _new_client = HTTPHandler(**params)
+    else:
+        _new_client = HTTPHandler(timeout=httpx.Timeout(timeout=600.0, connect=5.0))
+
+    litellm.in_memory_llm_clients_cache[_cache_key_name] = _new_client
+    return _new_client
--- a/litellm/llms/databricks.py
+++ b/litellm/llms/databricks.py
@ -10,10 +10,10 @@ from typing import Callable, Optional, List, Union, Tuple, Literal
 from litellm.utils import (
    ModelResponse,
    Usage,
-    map_finish_reason,
    CustomStreamWrapper,
    EmbeddingResponse,
 )
+from litellm.litellm_core_utils.core_helpers import map_finish_reason
 import litellm
 from .prompt_templates.factory import prompt_factory, custom_prompt
 from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler
@ -289,7 +289,7 @@ class DatabricksChatCompletion(BaseLLM):
        response: Union[requests.Response, httpx.Response],
        model_response: ModelResponse,
        stream: bool,
-        logging_obj: litellm.utils.Logging,
+        logging_obj: litellm.litellm_core_utils.litellm_logging.Logging,
        optional_params: dict,
        api_key: str,
        data: Union[dict, str],
--- a/litellm/llms/predibase.py
+++ b/litellm/llms/predibase.py
@ -12,11 +12,11 @@ from typing import Callable, Optional, List, Literal, Union
 from litellm.utils import (
    ModelResponse,
    Usage,
-    map_finish_reason,
    CustomStreamWrapper,
    Message,
    Choices,
 )
+from litellm.litellm_core_utils.core_helpers import map_finish_reason
 import litellm
 from .prompt_templates.factory import prompt_factory, custom_prompt
 from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler
@ -198,7 +198,7 @@ class PredibaseChatCompletion(BaseLLM):
        response: Union[requests.Response, httpx.Response],
        model_response: ModelResponse,
        stream: bool,
-        logging_obj: litellm.utils.Logging,
+        logging_obj: litellm.litellm_core_utils.litellm_logging.Logging,
        optional_params: dict,
        api_key: str,
        data: Union[dict, str],
--- a/litellm/llms/triton.py
+++ b/litellm/llms/triton.py
@ -4,7 +4,6 @@ from enum import Enum
 import requests, copy  # type: ignore
 import time
 from typing import Callable, Optional, List
-from litellm.utils import ModelResponse, Usage, map_finish_reason, CustomStreamWrapper
 import litellm
 from .prompt_templates.factory import prompt_factory, custom_prompt
 from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler
--- a/litellm/llms/vertex_ai.py
+++ b/litellm/llms/vertex_ai.py
@ -5,7 +5,8 @@ import requests  # type: ignore
 import time
 from typing import Callable, Optional, Union, List, Literal, Any
 from pydantic import BaseModel
-from litellm.utils import ModelResponse, Usage, CustomStreamWrapper, map_finish_reason
+from litellm.utils import ModelResponse, Usage, CustomStreamWrapper
+from litellm.litellm_core_utils.core_helpers import map_finish_reason
 import litellm, uuid
 import httpx, inspect  # type: ignore
 from litellm.types.llms.vertex_ai import *
--- a/litellm/llms/vertex_ai_anthropic.py
+++ b/litellm/llms/vertex_ai_anthropic.py
@ -6,7 +6,8 @@ from enum import Enum
 import requests, copy  # type: ignore
 import time, uuid
 from typing import Callable, Optional, List
-from litellm.utils import ModelResponse, Usage, map_finish_reason, CustomStreamWrapper
+from litellm.utils import ModelResponse, Usage, CustomStreamWrapper
+from litellm.litellm_core_utils.core_helpers import map_finish_reason
 import litellm
 from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler
 from .prompt_templates.factory import (
--- a/litellm/llms/vertex_httpx.py
+++ b/litellm/llms/vertex_httpx.py
@ -8,7 +8,10 @@ from enum import Enum
 import requests  # type: ignore
 import time
 from typing import Callable, Optional, Union, List, Any, Tuple
-from litellm.utils import ModelResponse, Usage, CustomStreamWrapper, map_finish_reason
+import litellm.litellm_core_utils
+import litellm.litellm_core_utils.litellm_logging
+from litellm.utils import ModelResponse, Usage, CustomStreamWrapper
+from litellm.litellm_core_utils.core_helpers import map_finish_reason
 import litellm, uuid
 import httpx, inspect  # type: ignore
 from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler
@ -320,7 +323,7 @@ class VertexLLM(BaseLLM):
        model: str,
        response: httpx.Response,
        model_response: ModelResponse,
-        logging_obj: litellm.utils.Logging,
+        logging_obj: litellm.litellm_core_utils.litellm_logging.Logging,
        optional_params: dict,
        api_key: str,
        data: Union[dict, str],
--- a/litellm/main.py
+++ b/litellm/main.py
@ -368,7 +368,9 @@ async def acompletion(
        return response
    except Exception as e:
        verbose_logger.error(
-            "litellm.acompletion(): Exception occured - {}".format(str(e))
+            "litellm.acompletion(): Exception occured - {}\n{}".format(
+                str(e), traceback.format_exc()
+            )
        )
        verbose_logger.debug(traceback.format_exc())
        custom_llm_provider = custom_llm_provider or "openai"
@ -399,6 +401,7 @@ def mock_completion(
    stream: Optional[bool] = False,
    mock_response: Union[str, Exception] = "This is a mock request",
    logging=None,
+    custom_llm_provider=None,
    **kwargs,
 ):
    """
@ -436,7 +439,7 @@ def mock_completion(
            raise litellm.APIError(
                status_code=getattr(mock_response, "status_code", 500),  # type: ignore
                message=getattr(mock_response, "text", str(mock_response)),
-                llm_provider=getattr(mock_response, "llm_provider", "openai"),  # type: ignore
+                llm_provider=getattr(mock_response, "llm_provider", custom_llm_provider or "openai"),  # type: ignore
                model=model,  # type: ignore
                request=httpx.Request(method="POST", url="https://api.openai.com/v1/"),
            )
@ -905,6 +908,7 @@ def completion(
                logging=logging,
                acompletion=acompletion,
                mock_delay=kwargs.get("mock_delay", None),
+                custom_llm_provider=custom_llm_provider,
            )
        if custom_llm_provider == "azure":
            # azure configs
--- a/litellm/proxy/_experimental/out/404.html
+++ b/litellm/proxy/_experimental/out/404.html
--- a/litellm/proxy/_experimental/out/_next/static/S9_6IC27HNWjJtr-LNaAO/_buildManifest.js
+++ b/litellm/proxy/_experimental/out/_next/static/S9_6IC27HNWjJtr-LNaAO/_buildManifest.js
--- a/litellm/proxy/_experimental/out/_next/static/S9_6IC27HNWjJtr-LNaAO/_ssgManifest.js
+++ b/litellm/proxy/_experimental/out/_next/static/S9_6IC27HNWjJtr-LNaAO/_ssgManifest.js
--- a/litellm/proxy/_experimental/out/_next/static/chunks/131-6a03368053f9d26d.js
+++ b/litellm/proxy/_experimental/out/_next/static/chunks/131-6a03368053f9d26d.js
--- a/litellm/proxy/_experimental/out/_next/static/chunks/131-cb6bfe24e23e121b.js
+++ b/litellm/proxy/_experimental/out/_next/static/chunks/131-cb6bfe24e23e121b.js
--- a/litellm/proxy/_experimental/out/_next/static/chunks/294-0e35509d5ca95267.js
+++ b/litellm/proxy/_experimental/out/_next/static/chunks/294-0e35509d5ca95267.js
--- a/litellm/proxy/_experimental/out/_next/static/chunks/2f6dbc85-052c4579f80d66ae.js
+++ b/litellm/proxy/_experimental/out/_next/static/chunks/2f6dbc85-052c4579f80d66ae.js
--- a/litellm/proxy/_experimental/out/_next/static/chunks/2f6dbc85-cac2949a76539886.js
+++ b/litellm/proxy/_experimental/out/_next/static/chunks/2f6dbc85-cac2949a76539886.js
--- a/litellm/proxy/_experimental/out/_next/static/chunks/3014691f-589a5f4865c3822f.js
+++ b/litellm/proxy/_experimental/out/_next/static/chunks/3014691f-589a5f4865c3822f.js
@ -0,0 +1 @@
+"use strict";(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[665],{30953:function(e,t,r){r.d(t,{GH$:function(){return n}});var l=r(64090);let n=e=>{let{color:t="currentColor",size:r=24,className:n,...s}=e;return l.createElement("svg",{viewBox:"0 0 24 24",xmlns:"http://www.w3.org/2000/svg",width:r,height:r,fill:t,...s,className:"remixicon "+(n||"")},l.createElement("path",{d:"M12 22C6.47715 22 2 17.5228 2 12C2 6.47715 6.47715 2 12 2C17.5228 2 22 6.47715 22 12C22 17.5228 17.5228 22 12 22ZM12 20C16.4183 20 20 16.4183 20 12C20 7.58172 16.4183 4 12 4C7.58172 4 4 7.58172 4 12C4 16.4183 7.58172 20 12 20ZM11.0026 16L6.75999 11.7574L8.17421 10.3431L11.0026 13.1716L16.6595 7.51472L18.0737 8.92893L11.0026 16Z"}))}}}]);
--- a/litellm/proxy/_experimental/out/_next/static/chunks/3014691f-b24e8254c7593934.js
+++ b/litellm/proxy/_experimental/out/_next/static/chunks/3014691f-b24e8254c7593934.js
@ -1 +0,0 @@
-"use strict";(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[665],{30953:function(e,t,r){r.d(t,{GH$:function(){return n}});var l=r(2265);let n=e=>{let{color:t="currentColor",size:r=24,className:n,...s}=e;return l.createElement("svg",{viewBox:"0 0 24 24",xmlns:"http://www.w3.org/2000/svg",width:r,height:r,fill:t,...s,className:"remixicon "+(n||"")},l.createElement("path",{d:"M12 22C6.47715 22 2 17.5228 2 12C2 6.47715 6.47715 2 12 2C17.5228 2 22 6.47715 22 12C22 17.5228 17.5228 22 12 22ZM12 20C16.4183 20 20 16.4183 20 12C20 7.58172 16.4183 4 12 4C7.58172 4 4 7.58172 4 12C4 16.4183 7.58172 20 12 20ZM11.0026 16L6.75999 11.7574L8.17421 10.3431L11.0026 13.1716L16.6595 7.51472L18.0737 8.92893L11.0026 16Z"}))}}}]);
--- a/litellm/proxy/_experimental/out/_next/static/chunks/505-5ff3c318fddfa35c.js
+++ b/litellm/proxy/_experimental/out/_next/static/chunks/505-5ff3c318fddfa35c.js
--- a/litellm/proxy/_experimental/out/_next/static/chunks/684-16b194c83a169f6d.js
+++ b/litellm/proxy/_experimental/out/_next/static/chunks/684-16b194c83a169f6d.js
--- a/litellm/proxy/_experimental/out/_next/static/chunks/684-bb2d2f93d92acb0b.js
+++ b/litellm/proxy/_experimental/out/_next/static/chunks/684-bb2d2f93d92acb0b.js
--- a/litellm/proxy/_experimental/out/_next/static/chunks/69-04708d7d4a17c1ee.js
+++ b/litellm/proxy/_experimental/out/_next/static/chunks/69-04708d7d4a17c1ee.js
--- a/litellm/proxy/_experimental/out/_next/static/chunks/69-8316d07d1f41e39f.js
+++ b/litellm/proxy/_experimental/out/_next/static/chunks/69-8316d07d1f41e39f.js
--- a/litellm/proxy/_experimental/out/_next/static/chunks/759-83a8bdddfe32b5d9.js
+++ b/litellm/proxy/_experimental/out/_next/static/chunks/759-83a8bdddfe32b5d9.js
--- a/litellm/proxy/_experimental/out/_next/static/chunks/759-c0083d8a782d300e.js
+++ b/litellm/proxy/_experimental/out/_next/static/chunks/759-c0083d8a782d300e.js
--- a/litellm/proxy/_experimental/out/_next/static/chunks/app/_not-found-b1ee1381b72386c2.js
+++ b/litellm/proxy/_experimental/out/_next/static/chunks/app/_not-found-b1ee1381b72386c2.js
@ -1 +1 @@
-(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[165],{83155:function(e,t,n){(window.__NEXT_P=window.__NEXT_P||[]).push(["/_not-found",function(){return n(84032)}])},84032:function(e,t,n){"use strict";Object.defineProperty(t,"__esModule",{value:!0}),Object.defineProperty(t,"default",{enumerable:!0,get:function(){return i}}),n(86921);let o=n(57437);n(2265);let r={error:{fontFamily:'system-ui,"Segoe UI",Roboto,Helvetica,Arial,sans-serif,"Apple Color Emoji","Segoe UI Emoji"',height:"100vh",textAlign:"center",display:"flex",flexDirection:"column",alignItems:"center",justifyContent:"center"},desc:{display:"inline-block"},h1:{display:"inline-block",margin:"0 20px 0 0",padding:"0 23px 0 0",fontSize:24,fontWeight:500,verticalAlign:"top",lineHeight:"49px"},h2:{fontSize:14,fontWeight:400,lineHeight:"49px",margin:0}};function i(){return(0,o.jsxs)(o.Fragment,{children:[(0,o.jsx)("title",{children:"404: This page could not be found."}),(0,o.jsx)("div",{style:r.error,children:(0,o.jsxs)("div",{children:[(0,o.jsx)("style",{dangerouslySetInnerHTML:{__html:"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}),(0,o.jsx)("h1",{className:"next-error-h1",style:r.h1,children:"404"}),(0,o.jsx)("div",{style:r.desc,children:(0,o.jsx)("h2",{style:r.h2,children:"This page could not be found."})})]})})]})}("function"==typeof t.default||"object"==typeof t.default&&null!==t.default)&&void 0===t.default.__esModule&&(Object.defineProperty(t.default,"__esModule",{value:!0}),Object.assign(t.default,t),e.exports=t.default)}},function(e){e.O(0,[971,69,744],function(){return e(e.s=83155)}),_N_E=e.O()}]);
+(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[165],{83155:function(e,t,n){(window.__NEXT_P=window.__NEXT_P||[]).push(["/_not-found",function(){return n(84032)}])},84032:function(e,t,n){"use strict";Object.defineProperty(t,"__esModule",{value:!0}),Object.defineProperty(t,"default",{enumerable:!0,get:function(){return i}}),n(86921);let o=n(3827);n(64090);let r={error:{fontFamily:'system-ui,"Segoe UI",Roboto,Helvetica,Arial,sans-serif,"Apple Color Emoji","Segoe UI Emoji"',height:"100vh",textAlign:"center",display:"flex",flexDirection:"column",alignItems:"center",justifyContent:"center"},desc:{display:"inline-block"},h1:{display:"inline-block",margin:"0 20px 0 0",padding:"0 23px 0 0",fontSize:24,fontWeight:500,verticalAlign:"top",lineHeight:"49px"},h2:{fontSize:14,fontWeight:400,lineHeight:"49px",margin:0}};function i(){return(0,o.jsxs)(o.Fragment,{children:[(0,o.jsx)("title",{children:"404: This page could not be found."}),(0,o.jsx)("div",{style:r.error,children:(0,o.jsxs)("div",{children:[(0,o.jsx)("style",{dangerouslySetInnerHTML:{__html:"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}),(0,o.jsx)("h1",{className:"next-error-h1",style:r.h1,children:"404"}),(0,o.jsx)("div",{style:r.desc,children:(0,o.jsx)("h2",{style:r.h2,children:"This page could not be found."})})]})})]})}("function"==typeof t.default||"object"==typeof t.default&&null!==t.default)&&void 0===t.default.__esModule&&(Object.defineProperty(t.default,"__esModule",{value:!0}),Object.assign(t.default,t),e.exports=t.default)}},function(e){e.O(0,[971,69,744],function(){return e(e.s=83155)}),_N_E=e.O()}]);
--- a/litellm/proxy/_experimental/out/_next/static/chunks/app/layout-23af663d40748d68.js
+++ b/litellm/proxy/_experimental/out/_next/static/chunks/app/layout-23af663d40748d68.js
@ -1 +1 @@
-(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[185],{11837:function(n,e,t){Promise.resolve().then(t.t.bind(t,99646,23)),Promise.resolve().then(t.t.bind(t,63385,23))},63385:function(){},99646:function(n){n.exports={style:{fontFamily:"'__Inter_12bbc4', '__Inter_Fallback_12bbc4'",fontStyle:"normal"},className:"__className_12bbc4"}}},function(n){n.O(0,[971,69,744],function(){return n(n.s=11837)}),_N_E=n.O()}]);
+(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[185],{87421:function(n,e,t){Promise.resolve().then(t.t.bind(t,99646,23)),Promise.resolve().then(t.t.bind(t,63385,23))},63385:function(){},99646:function(n){n.exports={style:{fontFamily:"'__Inter_12bbc4', '__Inter_Fallback_12bbc4'",fontStyle:"normal"},className:"__className_12bbc4"}}},function(n){n.O(0,[971,69,744],function(){return n(n.s=87421)}),_N_E=n.O()}]);
--- a/litellm/proxy/_experimental/out/_next/static/chunks/app/model_hub/page-4cb65c32467214b5.js
+++ b/litellm/proxy/_experimental/out/_next/static/chunks/app/model_hub/page-4cb65c32467214b5.js
--- a/litellm/proxy/_experimental/out/_next/static/chunks/app/model_hub/page-a1942d43573c82c3.js
+++ b/litellm/proxy/_experimental/out/_next/static/chunks/app/model_hub/page-a1942d43573c82c3.js
--- a/litellm/proxy/_experimental/out/_next/static/chunks/app/onboarding/page-49a30e653b6ae929.js
+++ b/litellm/proxy/_experimental/out/_next/static/chunks/app/onboarding/page-49a30e653b6ae929.js
@ -1 +0,0 @@
-(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[461],{20723:function(e,s,l){Promise.resolve().then(l.bind(l,667))},667:function(e,s,l){"use strict";l.r(s),l.d(s,{default:function(){return _}});var t=l(57437),a=l(2265),r=l(47907),n=l(2179),i=l(18190),o=l(13810),u=l(10384),c=l(46453),d=l(71801),m=l(52273),h=l(42440),x=l(30953),j=l(777),p=l(37963),f=l(60620),g=l(1861);function _(){let[e]=f.Z.useForm(),s=(0,r.useSearchParams)();s.get("token");let l=s.get("id"),[_,Z]=(0,a.useState)(null),[w,b]=(0,a.useState)(""),[N,S]=(0,a.useState)(""),[k,y]=(0,a.useState)(null),[v,E]=(0,a.useState)(""),[F,I]=(0,a.useState)("");return(0,a.useEffect)(()=>{l&&(0,j.W_)(l).then(e=>{let s=e.login_url;console.log("login_url:",s),E(s);let l=e.token,t=(0,p.o)(l);I(l),console.log("decoded:",t),Z(t.key),console.log("decoded user email:",t.user_email),S(t.user_email),y(t.user_id)})},[l]),(0,t.jsx)("div",{className:"mx-auto max-w-md mt-10",children:(0,t.jsxs)(o.Z,{children:[(0,t.jsx)(h.Z,{className:"text-sm mb-5 text-center",children:"\uD83D\uDE85 LiteLLM"}),(0,t.jsx)(h.Z,{className:"text-xl",children:"Sign up"}),(0,t.jsx)(d.Z,{children:"Claim your user account to login to Admin UI."}),(0,t.jsx)(i.Z,{className:"mt-4",title:"SSO",icon:x.GH$,color:"sky",children:(0,t.jsxs)(c.Z,{numItems:2,className:"flex justify-between items-center",children:[(0,t.jsx)(u.Z,{children:"SSO is under the Enterprise Tirer."}),(0,t.jsx)(u.Z,{children:(0,t.jsx)(n.Z,{variant:"primary",className:"mb-2",children:(0,t.jsx)("a",{href:"https://forms.gle/W3U4PZpJGFHWtHyA9",target:"_blank",children:"Get Free Trial"})})})]})}),(0,t.jsxs)(f.Z,{className:"mt-10 mb-5 mx-auto",layout:"vertical",onFinish:e=>{console.log("in handle submit. accessToken:",_,"token:",F,"formValues:",e),_&&F&&(e.user_email=N,k&&l&&(0,j.m_)(_,l,k,e.password).then(e=>{var s;let l="/ui/";console.log("redirecting to:",l+="?userID="+((null===(s=e.data)||void 0===s?void 0:s.user_id)||e.user_id)+"&token="+F),window.location.href=l}))},children:[(0,t.jsxs)(t.Fragment,{children:[(0,t.jsx)(f.Z.Item,{label:"Email Address",name:"user_email",children:(0,t.jsx)(m.Z,{type:"email",disabled:!0,value:N,defaultValue:N,className:"max-w-md"})}),(0,t.jsx)(f.Z.Item,{label:"Password",name:"password",rules:[{required:!0,message:"password required to sign up"}],help:"Create a password for your account",children:(0,t.jsx)(m.Z,{placeholder:"",type:"password",className:"max-w-md"})})]}),(0,t.jsx)("div",{className:"mt-10",children:(0,t.jsx)(g.ZP,{htmlType:"submit",children:"Sign Up"})})]})]})})}}},function(e){e.O(0,[665,505,684,777,971,69,744],function(){return e(e.s=20723)}),_N_E=e.O()}]);
--- a/litellm/proxy/_experimental/out/_next/static/chunks/app/onboarding/page-664c7288e11fff5a.js
+++ b/litellm/proxy/_experimental/out/_next/static/chunks/app/onboarding/page-664c7288e11fff5a.js
@ -0,0 +1 @@
+(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[461],{61994:function(e,s,l){Promise.resolve().then(l.bind(l,667))},667:function(e,s,l){"use strict";l.r(s),l.d(s,{default:function(){return _}});var t=l(3827),a=l(64090),r=l(47907),n=l(16450),i=l(18190),o=l(13810),u=l(10384),c=l(46453),d=l(71801),m=l(52273),h=l(42440),x=l(30953),j=l(777),p=l(37963),f=l(60620),g=l(1861);function _(){let[e]=f.Z.useForm(),s=(0,r.useSearchParams)();s.get("token");let l=s.get("id"),[_,Z]=(0,a.useState)(null),[w,b]=(0,a.useState)(""),[N,S]=(0,a.useState)(""),[k,y]=(0,a.useState)(null),[v,E]=(0,a.useState)(""),[F,I]=(0,a.useState)("");return(0,a.useEffect)(()=>{l&&(0,j.W_)(l).then(e=>{let s=e.login_url;console.log("login_url:",s),E(s);let l=e.token,t=(0,p.o)(l);I(l),console.log("decoded:",t),Z(t.key),console.log("decoded user email:",t.user_email),S(t.user_email),y(t.user_id)})},[l]),(0,t.jsx)("div",{className:"mx-auto max-w-md mt-10",children:(0,t.jsxs)(o.Z,{children:[(0,t.jsx)(h.Z,{className:"text-sm mb-5 text-center",children:"\uD83D\uDE85 LiteLLM"}),(0,t.jsx)(h.Z,{className:"text-xl",children:"Sign up"}),(0,t.jsx)(d.Z,{children:"Claim your user account to login to Admin UI."}),(0,t.jsx)(i.Z,{className:"mt-4",title:"SSO",icon:x.GH$,color:"sky",children:(0,t.jsxs)(c.Z,{numItems:2,className:"flex justify-between items-center",children:[(0,t.jsx)(u.Z,{children:"SSO is under the Enterprise Tirer."}),(0,t.jsx)(u.Z,{children:(0,t.jsx)(n.Z,{variant:"primary",className:"mb-2",children:(0,t.jsx)("a",{href:"https://forms.gle/W3U4PZpJGFHWtHyA9",target:"_blank",children:"Get Free Trial"})})})]})}),(0,t.jsxs)(f.Z,{className:"mt-10 mb-5 mx-auto",layout:"vertical",onFinish:e=>{console.log("in handle submit. accessToken:",_,"token:",F,"formValues:",e),_&&F&&(e.user_email=N,k&&l&&(0,j.m_)(_,l,k,e.password).then(e=>{var s;let l="/ui/";console.log("redirecting to:",l+="?userID="+((null===(s=e.data)||void 0===s?void 0:s.user_id)||e.user_id)+"&token="+F),window.location.href=l}))},children:[(0,t.jsxs)(t.Fragment,{children:[(0,t.jsx)(f.Z.Item,{label:"Email Address",name:"user_email",children:(0,t.jsx)(m.Z,{type:"email",disabled:!0,value:N,defaultValue:N,className:"max-w-md"})}),(0,t.jsx)(f.Z.Item,{label:"Password",name:"password",rules:[{required:!0,message:"password required to sign up"}],help:"Create a password for your account",children:(0,t.jsx)(m.Z,{placeholder:"",type:"password",className:"max-w-md"})})]}),(0,t.jsx)("div",{className:"mt-10",children:(0,t.jsx)(g.ZP,{htmlType:"submit",children:"Sign Up"})})]})]})})}}},function(e){e.O(0,[665,294,684,777,971,69,744],function(){return e(e.s=61994)}),_N_E=e.O()}]);
--- a/litellm/proxy/_experimental/out/_next/static/chunks/app/page-626098dc8320c801.js
+++ b/litellm/proxy/_experimental/out/_next/static/chunks/app/page-626098dc8320c801.js
--- a/litellm/proxy/_experimental/out/_next/static/chunks/app/page-8028473f1a04553d.js
+++ b/litellm/proxy/_experimental/out/_next/static/chunks/app/page-8028473f1a04553d.js
--- a/litellm/proxy/_experimental/out/_next/static/chunks/fd9d1056-f593049e31b05aeb.js
+++ b/litellm/proxy/_experimental/out/_next/static/chunks/fd9d1056-f593049e31b05aeb.js
--- a/litellm/proxy/_experimental/out/_next/static/chunks/fd9d1056-f960ab1e6d32b002.js
+++ b/litellm/proxy/_experimental/out/_next/static/chunks/fd9d1056-f960ab1e6d32b002.js
--- a/litellm/proxy/_experimental/out/_next/static/chunks/main-160227023782230a.js
+++ b/litellm/proxy/_experimental/out/_next/static/chunks/main-160227023782230a.js
--- a/litellm/proxy/_experimental/out/_next/static/chunks/main-app-9b4fb13a7db53edf.js
+++ b/litellm/proxy/_experimental/out/_next/static/chunks/main-app-9b4fb13a7db53edf.js
@ -1 +1 @@
-(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[744],{70377:function(e,n,t){Promise.resolve().then(t.t.bind(t,47690,23)),Promise.resolve().then(t.t.bind(t,48955,23)),Promise.resolve().then(t.t.bind(t,5613,23)),Promise.resolve().then(t.t.bind(t,11902,23)),Promise.resolve().then(t.t.bind(t,31778,23)),Promise.resolve().then(t.t.bind(t,77831,23))}},function(e){var n=function(n){return e(e.s=n)};e.O(0,[971,69],function(){return n(35317),n(70377)}),_N_E=e.O()}]);
+(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[744],{32028:function(e,n,t){Promise.resolve().then(t.t.bind(t,47690,23)),Promise.resolve().then(t.t.bind(t,48955,23)),Promise.resolve().then(t.t.bind(t,5613,23)),Promise.resolve().then(t.t.bind(t,11902,23)),Promise.resolve().then(t.t.bind(t,31778,23)),Promise.resolve().then(t.t.bind(t,77831,23))}},function(e){var n=function(n){return e(e.s=n)};e.O(0,[971,69],function(){return n(35317),n(32028)}),_N_E=e.O()}]);
--- a/litellm/proxy/_experimental/out/_next/static/chunks/webpack-6f7793f21bbb2fbe.js
+++ b/litellm/proxy/_experimental/out/_next/static/chunks/webpack-6f7793f21bbb2fbe.js
@ -1 +1 @@
-!function(){"use strict";var e,t,n,r,o,u,i,c,f,a={},l={};function d(e){var t=l[e];if(void 0!==t)return t.exports;var n=l[e]={id:e,loaded:!1,exports:{}},r=!0;try{a[e](n,n.exports,d),r=!1}finally{r&&delete l[e]}return n.loaded=!0,n.exports}d.m=a,e=[],d.O=function(t,n,r,o){if(n){o=o||0;for(var u=e.length;u>0&&e[u-1][2]>o;u--)e[u]=e[u-1];e[u]=[n,r,o];return}for(var i=1/0,u=0;u<e.length;u++){for(var n=e[u][0],r=e[u][1],o=e[u][2],c=!0,f=0;f<n.length;f++)i>=o&&Object.keys(d.O).every(function(e){return d.O[e](n[f])})?n.splice(f--,1):(c=!1,o<i&&(i=o));if(c){e.splice(u--,1);var a=r();void 0!==a&&(t=a)}}return t},d.n=function(e){var t=e&&e.__esModule?function(){return e.default}:function(){return e};return d.d(t,{a:t}),t},n=Object.getPrototypeOf?function(e){return Object.getPrototypeOf(e)}:function(e){return e.__proto__},d.t=function(e,r){if(1&r&&(e=this(e)),8&r||"object"==typeof e&&e&&(4&r&&e.__esModule||16&r&&"function"==typeof e.then))return e;var o=Object.create(null);d.r(o);var u={};t=t||[null,n({}),n([]),n(n)];for(var i=2&r&&e;"object"==typeof i&&!~t.indexOf(i);i=n(i))Object.getOwnPropertyNames(i).forEach(function(t){u[t]=function(){return e[t]}});return u.default=function(){return e},d.d(o,u),o},d.d=function(e,t){for(var n in t)d.o(t,n)&&!d.o(e,n)&&Object.defineProperty(e,n,{enumerable:!0,get:t[n]})},d.f={},d.e=function(e){return Promise.all(Object.keys(d.f).reduce(function(t,n){return d.f[n](e,t),t},[]))},d.u=function(e){},d.miniCssF=function(e){return"static/css/63f65dbb14efd996.css"},d.g=function(){if("object"==typeof globalThis)return globalThis;try{return this||Function("return this")()}catch(e){if("object"==typeof window)return window}}(),d.o=function(e,t){return Object.prototype.hasOwnProperty.call(e,t)},r={},o="_N_E:",d.l=function(e,t,n,u){if(r[e]){r[e].push(t);return}if(void 0!==n)for(var i,c,f=document.getElementsByTagName("script"),a=0;a<f.length;a++){var l=f[a];if(l.getAttribute("src")==e||l.getAttribute("data-webpack")==o+n){i=l;break}}i||(c=!0,(i=document.createElement("script")).charset="utf-8",i.timeout=120,d.nc&&i.setAttribute("nonce",d.nc),i.setAttribute("data-webpack",o+n),i.src=d.tu(e)),r[e]=[t];var s=function(t,n){i.onerror=i.onload=null,clearTimeout(p);var o=r[e];if(delete r[e],i.parentNode&&i.parentNode.removeChild(i),o&&o.forEach(function(e){return e(n)}),t)return t(n)},p=setTimeout(s.bind(null,void 0,{type:"timeout",target:i}),12e4);i.onerror=s.bind(null,i.onerror),i.onload=s.bind(null,i.onload),c&&document.head.appendChild(i)},d.r=function(e){"undefined"!=typeof Symbol&&Symbol.toStringTag&&Object.defineProperty(e,Symbol.toStringTag,{value:"Module"}),Object.defineProperty(e,"__esModule",{value:!0})},d.nmd=function(e){return e.paths=[],e.children||(e.children=[]),e},d.tt=function(){return void 0===u&&(u={createScriptURL:function(e){return e}},"undefined"!=typeof trustedTypes&&trustedTypes.createPolicy&&(u=trustedTypes.createPolicy("nextjs#bundler",u))),u},d.tu=function(e){return d.tt().createScriptURL(e)},d.p="/ui/_next/",i={272:0},d.f.j=function(e,t){var n=d.o(i,e)?i[e]:void 0;if(0!==n){if(n)t.push(n[2]);else if(272!=e){var r=new Promise(function(t,r){n=i[e]=[t,r]});t.push(n[2]=r);var o=d.p+d.u(e),u=Error();d.l(o,function(t){if(d.o(i,e)&&(0!==(n=i[e])&&(i[e]=void 0),n)){var r=t&&("load"===t.type?"missing":t.type),o=t&&t.target&&t.target.src;u.message="Loading chunk "+e+" failed.\n("+r+": "+o+")",u.name="ChunkLoadError",u.type=r,u.request=o,n[1](u)}},"chunk-"+e,e)}else i[e]=0}},d.O.j=function(e){return 0===i[e]},c=function(e,t){var n,r,o=t[0],u=t[1],c=t[2],f=0;if(o.some(function(e){return 0!==i[e]})){for(n in u)d.o(u,n)&&(d.m[n]=u[n]);if(c)var a=c(d)}for(e&&e(t);f<o.length;f++)r=o[f],d.o(i,r)&&i[r]&&i[r][0](),i[r]=0;return d.O(a)},(f=self.webpackChunk_N_E=self.webpackChunk_N_E||[]).forEach(c.bind(null,0)),f.push=c.bind(null,f.push.bind(f))}();
+!function(){"use strict";var e,t,n,r,o,u,i,c,f,a={},l={};function d(e){var t=l[e];if(void 0!==t)return t.exports;var n=l[e]={id:e,loaded:!1,exports:{}},r=!0;try{a[e](n,n.exports,d),r=!1}finally{r&&delete l[e]}return n.loaded=!0,n.exports}d.m=a,e=[],d.O=function(t,n,r,o){if(n){o=o||0;for(var u=e.length;u>0&&e[u-1][2]>o;u--)e[u]=e[u-1];e[u]=[n,r,o];return}for(var i=1/0,u=0;u<e.length;u++){for(var n=e[u][0],r=e[u][1],o=e[u][2],c=!0,f=0;f<n.length;f++)i>=o&&Object.keys(d.O).every(function(e){return d.O[e](n[f])})?n.splice(f--,1):(c=!1,o<i&&(i=o));if(c){e.splice(u--,1);var a=r();void 0!==a&&(t=a)}}return t},d.n=function(e){var t=e&&e.__esModule?function(){return e.default}:function(){return e};return d.d(t,{a:t}),t},n=Object.getPrototypeOf?function(e){return Object.getPrototypeOf(e)}:function(e){return e.__proto__},d.t=function(e,r){if(1&r&&(e=this(e)),8&r||"object"==typeof e&&e&&(4&r&&e.__esModule||16&r&&"function"==typeof e.then))return e;var o=Object.create(null);d.r(o);var u={};t=t||[null,n({}),n([]),n(n)];for(var i=2&r&&e;"object"==typeof i&&!~t.indexOf(i);i=n(i))Object.getOwnPropertyNames(i).forEach(function(t){u[t]=function(){return e[t]}});return u.default=function(){return e},d.d(o,u),o},d.d=function(e,t){for(var n in t)d.o(t,n)&&!d.o(e,n)&&Object.defineProperty(e,n,{enumerable:!0,get:t[n]})},d.f={},d.e=function(e){return Promise.all(Object.keys(d.f).reduce(function(t,n){return d.f[n](e,t),t},[]))},d.u=function(e){},d.miniCssF=function(e){return"static/css/b65d5698d1a1958d.css"},d.g=function(){if("object"==typeof globalThis)return globalThis;try{return this||Function("return this")()}catch(e){if("object"==typeof window)return window}}(),d.o=function(e,t){return Object.prototype.hasOwnProperty.call(e,t)},r={},o="_N_E:",d.l=function(e,t,n,u){if(r[e]){r[e].push(t);return}if(void 0!==n)for(var i,c,f=document.getElementsByTagName("script"),a=0;a<f.length;a++){var l=f[a];if(l.getAttribute("src")==e||l.getAttribute("data-webpack")==o+n){i=l;break}}i||(c=!0,(i=document.createElement("script")).charset="utf-8",i.timeout=120,d.nc&&i.setAttribute("nonce",d.nc),i.setAttribute("data-webpack",o+n),i.src=d.tu(e)),r[e]=[t];var s=function(t,n){i.onerror=i.onload=null,clearTimeout(p);var o=r[e];if(delete r[e],i.parentNode&&i.parentNode.removeChild(i),o&&o.forEach(function(e){return e(n)}),t)return t(n)},p=setTimeout(s.bind(null,void 0,{type:"timeout",target:i}),12e4);i.onerror=s.bind(null,i.onerror),i.onload=s.bind(null,i.onload),c&&document.head.appendChild(i)},d.r=function(e){"undefined"!=typeof Symbol&&Symbol.toStringTag&&Object.defineProperty(e,Symbol.toStringTag,{value:"Module"}),Object.defineProperty(e,"__esModule",{value:!0})},d.nmd=function(e){return e.paths=[],e.children||(e.children=[]),e},d.tt=function(){return void 0===u&&(u={createScriptURL:function(e){return e}},"undefined"!=typeof trustedTypes&&trustedTypes.createPolicy&&(u=trustedTypes.createPolicy("nextjs#bundler",u))),u},d.tu=function(e){return d.tt().createScriptURL(e)},d.p="/ui/_next/",i={272:0},d.f.j=function(e,t){var n=d.o(i,e)?i[e]:void 0;if(0!==n){if(n)t.push(n[2]);else if(272!=e){var r=new Promise(function(t,r){n=i[e]=[t,r]});t.push(n[2]=r);var o=d.p+d.u(e),u=Error();d.l(o,function(t){if(d.o(i,e)&&(0!==(n=i[e])&&(i[e]=void 0),n)){var r=t&&("load"===t.type?"missing":t.type),o=t&&t.target&&t.target.src;u.message="Loading chunk "+e+" failed.\n("+r+": "+o+")",u.name="ChunkLoadError",u.type=r,u.request=o,n[1](u)}},"chunk-"+e,e)}else i[e]=0}},d.O.j=function(e){return 0===i[e]},c=function(e,t){var n,r,o=t[0],u=t[1],c=t[2],f=0;if(o.some(function(e){return 0!==i[e]})){for(n in u)d.o(u,n)&&(d.m[n]=u[n]);if(c)var a=c(d)}for(e&&e(t);f<o.length;f++)r=o[f],d.o(i,r)&&i[r]&&i[r][0](),i[r]=0;return d.O(a)},(f=self.webpackChunk_N_E=self.webpackChunk_N_E||[]).forEach(c.bind(null,0)),f.push=c.bind(null,f.push.bind(f))}();
--- a/litellm/proxy/_experimental/out/_next/static/css/63f65dbb14efd996.css
+++ b/litellm/proxy/_experimental/out/_next/static/css/63f65dbb14efd996.css
--- a/litellm/proxy/_experimental/out/_next/static/css/b65d5698d1a1958d.css
+++ b/litellm/proxy/_experimental/out/_next/static/css/b65d5698d1a1958d.css
--- a/litellm/proxy/_experimental/out/index.html
+++ b/litellm/proxy/_experimental/out/index.html
@ -1 +1 @@
-<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-887c75b16b85d4b4.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-f593049e31b05aeb.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-8316d07d1f41e39f.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-096338c8e1915716.js" async="" crossorigin=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-887c75b16b85d4b4.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/63f65dbb14efd996.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[68101,[\"936\",\"static/chunks/2f6dbc85-cac2949a76539886.js\",\"505\",\"static/chunks/505-5ff3c318fddfa35c.js\",\"131\",\"static/chunks/131-cb6bfe24e23e121b.js\",\"684\",\"static/chunks/684-16b194c83a169f6d.js\",\"759\",\"static/chunks/759-c0083d8a782d300e.js\",\"777\",\"static/chunks/777-71fb78fdb4897cc3.js\",\"931\",\"static/chunks/app/page-8028473f1a04553d.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/63f65dbb14efd996.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"sTvd1VbHSi_TBr1KiIpul\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_12bbc4\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>
+<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-6f7793f21bbb2fbe.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-f960ab1e6d32b002.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-04708d7d4a17c1ee.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-6f7793f21bbb2fbe.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/b65d5698d1a1958d.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[68101,[\"936\",\"static/chunks/2f6dbc85-052c4579f80d66ae.js\",\"294\",\"static/chunks/294-0e35509d5ca95267.js\",\"131\",\"static/chunks/131-6a03368053f9d26d.js\",\"684\",\"static/chunks/684-bb2d2f93d92acb0b.js\",\"759\",\"static/chunks/759-83a8bdddfe32b5d9.js\",\"777\",\"static/chunks/777-71fb78fdb4897cc3.js\",\"931\",\"static/chunks/app/page-626098dc8320c801.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/b65d5698d1a1958d.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"S9_6IC27HNWjJtr-LNaAO\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_12bbc4\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>
--- a/litellm/proxy/_experimental/out/index.txt
+++ b/litellm/proxy/_experimental/out/index.txt
@ -1,7 +1,7 @@
 2:I[77831,[],""]
-3:I[68101,["936","static/chunks/2f6dbc85-cac2949a76539886.js","505","static/chunks/505-5ff3c318fddfa35c.js","131","static/chunks/131-cb6bfe24e23e121b.js","684","static/chunks/684-16b194c83a169f6d.js","759","static/chunks/759-c0083d8a782d300e.js","777","static/chunks/777-71fb78fdb4897cc3.js","931","static/chunks/app/page-8028473f1a04553d.js"],""]
+3:I[68101,["936","static/chunks/2f6dbc85-052c4579f80d66ae.js","294","static/chunks/294-0e35509d5ca95267.js","131","static/chunks/131-6a03368053f9d26d.js","684","static/chunks/684-bb2d2f93d92acb0b.js","759","static/chunks/759-83a8bdddfe32b5d9.js","777","static/chunks/777-71fb78fdb4897cc3.js","931","static/chunks/app/page-626098dc8320c801.js"],""]
 4:I[5613,[],""]
 5:I[31778,[],""]
-0:["sTvd1VbHSi_TBr1KiIpul",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_12bbc4","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/63f65dbb14efd996.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
+0:["S9_6IC27HNWjJtr-LNaAO",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_12bbc4","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/b65d5698d1a1958d.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
 6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
 1:null
--- a/litellm/proxy/_experimental/out/model_hub.html
+++ b/litellm/proxy/_experimental/out/model_hub.html
--- a/litellm/proxy/_experimental/out/model_hub.txt
+++ b/litellm/proxy/_experimental/out/model_hub.txt
@ -1,7 +1,7 @@
 2:I[77831,[],""]
-3:I[87494,["505","static/chunks/505-5ff3c318fddfa35c.js","131","static/chunks/131-cb6bfe24e23e121b.js","777","static/chunks/777-71fb78fdb4897cc3.js","418","static/chunks/app/model_hub/page-a1942d43573c82c3.js"],""]
+3:I[87494,["294","static/chunks/294-0e35509d5ca95267.js","131","static/chunks/131-6a03368053f9d26d.js","777","static/chunks/777-71fb78fdb4897cc3.js","418","static/chunks/app/model_hub/page-4cb65c32467214b5.js"],""]
 4:I[5613,[],""]
 5:I[31778,[],""]
-0:["sTvd1VbHSi_TBr1KiIpul",[[["",{"children":["model_hub",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["model_hub",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","model_hub","children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined","styles":null}]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_12bbc4","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/63f65dbb14efd996.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
+0:["S9_6IC27HNWjJtr-LNaAO",[[["",{"children":["model_hub",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["model_hub",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","model_hub","children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined","styles":null}]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_12bbc4","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/b65d5698d1a1958d.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
 6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
 1:null
--- a/litellm/proxy/_experimental/out/onboarding.html
+++ b/litellm/proxy/_experimental/out/onboarding.html
--- a/litellm/proxy/_experimental/out/onboarding.txt
+++ b/litellm/proxy/_experimental/out/onboarding.txt
@ -1,7 +1,7 @@
 2:I[77831,[],""]
-3:I[667,["665","static/chunks/3014691f-b24e8254c7593934.js","505","static/chunks/505-5ff3c318fddfa35c.js","684","static/chunks/684-16b194c83a169f6d.js","777","static/chunks/777-71fb78fdb4897cc3.js","461","static/chunks/app/onboarding/page-49a30e653b6ae929.js"],""]
+3:I[667,["665","static/chunks/3014691f-589a5f4865c3822f.js","294","static/chunks/294-0e35509d5ca95267.js","684","static/chunks/684-bb2d2f93d92acb0b.js","777","static/chunks/777-71fb78fdb4897cc3.js","461","static/chunks/app/onboarding/page-664c7288e11fff5a.js"],""]
 4:I[5613,[],""]
 5:I[31778,[],""]
-0:["sTvd1VbHSi_TBr1KiIpul",[[["",{"children":["onboarding",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["onboarding",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","onboarding","children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined","styles":null}]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_12bbc4","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/63f65dbb14efd996.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
+0:["S9_6IC27HNWjJtr-LNaAO",[[["",{"children":["onboarding",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["onboarding",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","onboarding","children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined","styles":null}]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_12bbc4","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/b65d5698d1a1958d.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
 6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
 1:null
--- a/litellm/proxy/_super_secret_config.yaml
+++ b/litellm/proxy/_super_secret_config.yaml
@ -79,8 +79,8 @@ litellm_settings:
  failure_callback: ["langfuse"]
  cache: true

-# general_settings:
-#   alerting: ["email"]
+general_settings:
+  alerting: ["slack"]
 #   key_management_system: "aws_kms"
 #   key_management_settings:
 #     hosted_keys: ["LITELLM_MASTER_KEY"]
--- a/litellm/proxy/_types.py
+++ b/litellm/proxy/_types.py
@ -1358,10 +1358,11 @@ class CallInfo(LiteLLMBase):

    spend: float
    max_budget: Optional[float] = None
-    token: str = Field(description="Hashed value of that key")
+    token: Optional[str] = Field(default=None, description="Hashed value of that key")
    customer_id: Optional[str] = None
    user_id: Optional[str] = None
    team_id: Optional[str] = None
+    team_alias: Optional[str] = None
    user_email: Optional[str] = None
    key_alias: Optional[str] = None
    projected_exceeded_date: Optional[str] = None
@ -1574,3 +1575,44 @@ class ManagementEndpointLoggingPayload(LiteLLMBase):
    exception: Optional[Any] = None
    start_time: Optional[datetime] = None
    end_time: Optional[datetime] = None
+
+
+class ProxyException(Exception):
+    # NOTE: DO NOT MODIFY THIS
+    # This is used to map exactly to OPENAI Exceptions
+    def __init__(
+        self,
+        message: str,
+        type: str,
+        param: Optional[str],
+        code: Optional[int],
+    ):
+        self.message = message
+        self.type = type
+        self.param = param
+        self.code = code
+
+        # rules for proxyExceptions
+        # Litellm router.py returns "No healthy deployment available" when there are no deployments available
+        # Should map to 429 errors https://github.com/BerriAI/litellm/issues/2487
+        if (
+            "No healthy deployment available" in self.message
+            or "No deployments available" in self.message
+        ):
+            self.code = 429
+
+    def to_dict(self) -> dict:
+        """Converts the ProxyException instance to a dictionary."""
+        return {
+            "message": self.message,
+            "type": self.type,
+            "param": self.param,
+            "code": self.code,
+        }
+
+
+class CommonProxyErrors(enum.Enum):
+    db_not_connected_error = "DB not connected"
+    no_llm_router = "No models configured on proxy"
+    not_allowed_access = "Admin-only endpoint. Not allowed to access this."
+    not_premium_user = "You must be a LiteLLM Enterprise user to use this feature. If you have a license please set `LITELLM_LICENSE` in your env. If you want to obtain a license meet with us here: https://calendly.com/d/4mp-gd3-k5k/litellm-1-1-onboarding-chat"
--- a/litellm/proxy/auth/user_api_key_auth.py
+++ b/litellm/proxy/auth/user_api_key_auth.py
--- a/litellm/proxy/caching_routes.py
+++ b/litellm/proxy/caching_routes.py
@ -0,0 +1,194 @@
+from typing import Optional
+from fastapi import Depends, Request, APIRouter
+from fastapi import HTTPException
+import copy
+import litellm
+from litellm._logging import verbose_proxy_logger
+from litellm.proxy.auth.user_api_key_auth import user_api_key_auth
+
+
+router = APIRouter(
+    prefix="/cache",
+    tags=["caching"],
+)
+
+
+@router.get(
+    "/ping",
+    dependencies=[Depends(user_api_key_auth)],
+)
+async def cache_ping():
+    """
+    Endpoint for checking if cache can be pinged
+    """
+    try:
+        litellm_cache_params = {}
+        specific_cache_params = {}
+
+        if litellm.cache is None:
+            raise HTTPException(
+                status_code=503, detail="Cache not initialized. litellm.cache is None"
+            )
+
+        for k, v in vars(litellm.cache).items():
+            try:
+                if k == "cache":
+                    continue
+                litellm_cache_params[k] = str(copy.deepcopy(v))
+            except Exception:
+                litellm_cache_params[k] = "<unable to copy or convert>"
+        for k, v in vars(litellm.cache.cache).items():
+            try:
+                specific_cache_params[k] = str(v)
+            except Exception:
+                specific_cache_params[k] = "<unable to copy or convert>"
+        if litellm.cache.type == "redis":
+            # ping the redis cache
+            ping_response = await litellm.cache.ping()
+            verbose_proxy_logger.debug(
+                "/cache/ping: ping_response: " + str(ping_response)
+            )
+            # making a set cache call
+            # add cache does not return anything
+            await litellm.cache.async_add_cache(
+                result="test_key",
+                model="test-model",
+                messages=[{"role": "user", "content": "test from litellm"}],
+            )
+            verbose_proxy_logger.debug("/cache/ping: done with set_cache()")
+            return {
+                "status": "healthy",
+                "cache_type": litellm.cache.type,
+                "ping_response": True,
+                "set_cache_response": "success",
+                "litellm_cache_params": litellm_cache_params,
+                "redis_cache_params": specific_cache_params,
+            }
+        else:
+            return {
+                "status": "healthy",
+                "cache_type": litellm.cache.type,
+                "litellm_cache_params": litellm_cache_params,
+            }
+    except Exception as e:
+        raise HTTPException(
+            status_code=503,
+            detail=f"Service Unhealthy ({str(e)}).Cache parameters: {litellm_cache_params}.specific_cache_params: {specific_cache_params}",
+        )
+
+
+@router.post(
+    "/delete",
+    tags=["caching"],
+    dependencies=[Depends(user_api_key_auth)],
+)
+async def cache_delete(request: Request):
+    """
+    Endpoint for deleting a key from the cache. All responses from litellm proxy have `x-litellm-cache-key` in the headers
+
+    Parameters:
+    - **keys**: *Optional[List[str]]* - A list of keys to delete from the cache. Example {"keys": ["key1", "key2"]}
+
+    ```shell
+    curl -X POST "http://0.0.0.0:4000/cache/delete" \
+    -H "Authorization: Bearer sk-1234" \
+    -d '{"keys": ["key1", "key2"]}'
+    ```
+
+    """
+    try:
+        if litellm.cache is None:
+            raise HTTPException(
+                status_code=503, detail="Cache not initialized. litellm.cache is None"
+            )
+
+        request_data = await request.json()
+        keys = request_data.get("keys", None)
+
+        if litellm.cache.type == "redis":
+            await litellm.cache.delete_cache_keys(keys=keys)
+            return {
+                "status": "success",
+            }
+        else:
+            raise HTTPException(
+                status_code=500,
+                detail=f"Cache type {litellm.cache.type} does not support deleting a key. only `redis` is supported",
+            )
+    except Exception as e:
+        raise HTTPException(
+            status_code=500,
+            detail=f"Cache Delete Failed({str(e)})",
+        )
+
+
+@router.get(
+    "/redis/info",
+    dependencies=[Depends(user_api_key_auth)],
+)
+async def cache_redis_info():
+    """
+    Endpoint for getting /redis/info
+    """
+    try:
+        if litellm.cache is None:
+            raise HTTPException(
+                status_code=503, detail="Cache not initialized. litellm.cache is None"
+            )
+        if litellm.cache.type == "redis":
+            client_list = litellm.cache.cache.client_list()
+            redis_info = litellm.cache.cache.info()
+            num_clients = len(client_list)
+            return {
+                "num_clients": num_clients,
+                "clients": client_list,
+                "info": redis_info,
+            }
+        else:
+            raise HTTPException(
+                status_code=500,
+                detail=f"Cache type {litellm.cache.type} does not support flushing",
+            )
+    except Exception as e:
+        raise HTTPException(
+            status_code=503,
+            detail=f"Service Unhealthy ({str(e)})",
+        )
+
+
+@router.post(
+    "/flushall",
+    tags=["caching"],
+    dependencies=[Depends(user_api_key_auth)],
+)
+async def cache_flushall():
+    """
+    A function to flush all items from the cache. (All items will be deleted from the cache with this)
+    Raises HTTPException if the cache is not initialized or if the cache type does not support flushing.
+    Returns a dictionary with the status of the operation.
+
+    Usage:
+    ```
+    curl -X POST http://0.0.0.0:4000/cache/flushall -H "Authorization: Bearer sk-1234"
+    ```
+    """
+    try:
+        if litellm.cache is None:
+            raise HTTPException(
+                status_code=503, detail="Cache not initialized. litellm.cache is None"
+            )
+        if litellm.cache.type == "redis":
+            litellm.cache.cache.flushall()
+            return {
+                "status": "success",
+            }
+        else:
+            raise HTTPException(
+                status_code=500,
+                detail=f"Cache type {litellm.cache.type} does not support flushing",
+            )
+    except Exception as e:
+        raise HTTPException(
+            status_code=503,
+            detail=f"Service Unhealthy ({str(e)})",
+        )
--- a/litellm/proxy/common_utils/management_endpoint_utils.py
+++ b/litellm/proxy/common_utils/management_endpoint_utils.py
@ -1,91 +0,0 @@
-from datetime import datetime
-from functools import wraps
-from litellm.proxy._types import UserAPIKeyAuth, ManagementEndpointLoggingPayload
-from litellm.proxy.common_utils.http_parsing_utils import _read_request_body
-from fastapi import Request
-
-
-def management_endpoint_wrapper(func):
-    """
-    This wrapper does the following:
-
-    1. Log I/O, Exceptions to OTEL
-    2. Create an Audit log for success calls
-    """
-
-    @wraps(func)
-    async def wrapper(*args, **kwargs):
-        start_time = datetime.now()
-
-        try:
-            result = await func(*args, **kwargs)
-            end_time = datetime.now()
-
-            if kwargs is None:
-                kwargs = {}
-            user_api_key_dict: UserAPIKeyAuth = (
-                kwargs.get("user_api_key_dict") or UserAPIKeyAuth()
-            )
-            parent_otel_span = user_api_key_dict.parent_otel_span
-            if parent_otel_span is not None:
-                from litellm.proxy.proxy_server import open_telemetry_logger
-
-                if open_telemetry_logger is not None:
-                    _http_request: Request = kwargs.get("http_request")
-                    if _http_request:
-                        _route = _http_request.url.path
-                        _request_body: dict = await _read_request_body(
-                            request=_http_request
-                        )
-                        _response = dict(result) if result is not None else None
-
-                        logging_payload = ManagementEndpointLoggingPayload(
-                            route=_route,
-                            request_data=_request_body,
-                            response=_response,
-                            start_time=start_time,
-                            end_time=end_time,
-                        )
-
-                        await open_telemetry_logger.async_management_endpoint_success_hook(
-                            logging_payload=logging_payload,
-                            parent_otel_span=parent_otel_span,
-                        )
-
-            return result
-        except Exception as e:
-            end_time = datetime.now()
-
-            if kwargs is None:
-                kwargs = {}
-            user_api_key_dict: UserAPIKeyAuth = (
-                kwargs.get("user_api_key_dict") or UserAPIKeyAuth()
-            )
-            parent_otel_span = user_api_key_dict.parent_otel_span
-            if parent_otel_span is not None:
-                from litellm.proxy.proxy_server import open_telemetry_logger
-
-                if open_telemetry_logger is not None:
-                    _http_request: Request = kwargs.get("http_request")
-                    if _http_request:
-                        _route = _http_request.url.path
-                        _request_body: dict = await _read_request_body(
-                            request=_http_request
-                        )
-                        logging_payload = ManagementEndpointLoggingPayload(
-                            route=_route,
-                            request_data=_request_body,
-                            response=None,
-                            start_time=start_time,
-                            end_time=end_time,
-                            exception=e,
-                        )
-
-                        await open_telemetry_logger.async_management_endpoint_failure_hook(
-                            logging_payload=logging_payload,
-                            parent_otel_span=parent_otel_span,
-                        )
-
-            raise e
-
-    return wrapper
--- a/litellm/proxy/health_endpoints/_health_endpoints.py
+++ b/litellm/proxy/health_endpoints/_health_endpoints.py
@ -0,0 +1,478 @@
+from typing import Optional, Literal
+import litellm
+import os
+import asyncio
+import fastapi
+import traceback
+from datetime import datetime, timedelta
+from fastapi import Depends, Request, APIRouter, Header, status
+from litellm.proxy.health_check import perform_health_check
+from fastapi import HTTPException
+import copy
+from litellm._logging import verbose_proxy_logger
+from litellm.proxy.auth.user_api_key_auth import user_api_key_auth
+from litellm.proxy._types import (
+    UserAPIKeyAuth,
+    ProxyException,
+    WebhookEvent,
+    CallInfo,
+)
+
+#### Health ENDPOINTS ####
+
+router = APIRouter()
+
+
+@router.get(
+    "/test",
+    tags=["health"],
+    dependencies=[Depends(user_api_key_auth)],
+)
+async def test_endpoint(request: Request):
+    """
+    [DEPRECATED] use `/health/liveliness` instead.
+
+    A test endpoint that pings the proxy server to check if it's healthy.
+
+    Parameters:
+        request (Request): The incoming request.
+
+    Returns:
+        dict: A dictionary containing the route of the request URL.
+    """
+    # ping the proxy server to check if its healthy
+    return {"route": request.url.path}
+
+
+@router.get(
+    "/health/services",
+    tags=["health"],
+    dependencies=[Depends(user_api_key_auth)],
+    include_in_schema=False,
+)
+async def health_services_endpoint(
+    user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
+    service: Literal[
+        "slack_budget_alerts", "langfuse", "slack", "openmeter", "webhook", "email"
+    ] = fastapi.Query(description="Specify the service being hit."),
+):
+    """
+    Hidden endpoint.
+
+    Used by the UI to let user check if slack alerting is working as expected.
+    """
+    try:
+        from litellm.proxy.proxy_server import (
+            proxy_logging_obj,
+            prisma_client,
+            general_settings,
+        )
+
+        if service is None:
+            raise HTTPException(
+                status_code=400, detail={"error": "Service must be specified."}
+            )
+        if service not in [
+            "slack_budget_alerts",
+            "email",
+            "langfuse",
+            "slack",
+            "openmeter",
+            "webhook",
+        ]:
+            raise HTTPException(
+                status_code=400,
+                detail={
+                    "error": f"Service must be in list. Service={service}. List={['slack_budget_alerts']}"
+                },
+            )
+
+        if service == "openmeter":
+            _ = await litellm.acompletion(
+                model="openai/litellm-mock-response-model",
+                messages=[{"role": "user", "content": "Hey, how's it going?"}],
+                user="litellm:/health/services",
+                mock_response="This is a mock response",
+            )
+            return {
+                "status": "success",
+                "message": "Mock LLM request made - check openmeter.",
+            }
+
+        if service == "langfuse":
+            from litellm.integrations.langfuse import LangFuseLogger
+
+            langfuse_logger = LangFuseLogger()
+            langfuse_logger.Langfuse.auth_check()
+            _ = litellm.completion(
+                model="openai/litellm-mock-response-model",
+                messages=[{"role": "user", "content": "Hey, how's it going?"}],
+                user="litellm:/health/services",
+                mock_response="This is a mock response",
+            )
+            return {
+                "status": "success",
+                "message": "Mock LLM request made - check langfuse.",
+            }
+
+        if service == "webhook":
+            user_info = CallInfo(
+                token=user_api_key_dict.token or "",
+                spend=1,
+                max_budget=0,
+                user_id=user_api_key_dict.user_id,
+                key_alias=user_api_key_dict.key_alias,
+                team_id=user_api_key_dict.team_id,
+            )
+            await proxy_logging_obj.budget_alerts(
+                type="user_budget",
+                user_info=user_info,
+            )
+
+        if service == "slack" or service == "slack_budget_alerts":
+            if "slack" in general_settings.get("alerting", []):
+                # test_message = f"""\n🚨 `ProjectedLimitExceededError` 💸\n\n`Key Alias:` litellm-ui-test-alert \n`Expected Day of Error`: 28th March \n`Current Spend`: $100.00 \n`Projected Spend at end of month`: $1000.00 \n`Soft Limit`: $700"""
+                # check if user has opted into unique_alert_webhooks
+                if (
+                    proxy_logging_obj.slack_alerting_instance.alert_to_webhook_url
+                    is not None
+                ):
+                    for (
+                        alert_type
+                    ) in proxy_logging_obj.slack_alerting_instance.alert_to_webhook_url:
+                        """
+                        "llm_exceptions",
+                        "llm_too_slow",
+                        "llm_requests_hanging",
+                        "budget_alerts",
+                        "db_exceptions",
+                        """
+                        # only test alert if it's in active alert types
+                        if (
+                            proxy_logging_obj.slack_alerting_instance.alert_types
+                            is not None
+                            and alert_type
+                            not in proxy_logging_obj.slack_alerting_instance.alert_types
+                        ):
+                            continue
+                        test_message = "default test message"
+                        if alert_type == "llm_exceptions":
+                            test_message = f"LLM Exception test alert"
+                        elif alert_type == "llm_too_slow":
+                            test_message = f"LLM Too Slow test alert"
+                        elif alert_type == "llm_requests_hanging":
+                            test_message = f"LLM Requests Hanging test alert"
+                        elif alert_type == "budget_alerts":
+                            test_message = f"Budget Alert test alert"
+                        elif alert_type == "db_exceptions":
+                            test_message = f"DB Exception test alert"
+                        elif alert_type == "outage_alerts":
+                            test_message = f"Outage Alert Exception test alert"
+                        elif alert_type == "daily_reports":
+                            test_message = f"Daily Reports test alert"
+
+                        await proxy_logging_obj.alerting_handler(
+                            message=test_message, level="Low", alert_type=alert_type
+                        )
+                else:
+                    await proxy_logging_obj.alerting_handler(
+                        message="This is a test slack alert message",
+                        level="Low",
+                        alert_type="budget_alerts",
+                    )
+
+                if prisma_client is not None:
+                    asyncio.create_task(
+                        proxy_logging_obj.slack_alerting_instance.send_monthly_spend_report()
+                    )
+                    asyncio.create_task(
+                        proxy_logging_obj.slack_alerting_instance.send_weekly_spend_report()
+                    )
+
+                alert_types = (
+                    proxy_logging_obj.slack_alerting_instance.alert_types or []
+                )
+                alert_types = list(alert_types)
+                return {
+                    "status": "success",
+                    "alert_types": alert_types,
+                    "message": "Mock Slack Alert sent, verify Slack Alert Received on your channel",
+                }
+            else:
+                raise HTTPException(
+                    status_code=422,
+                    detail={
+                        "error": '"{}" not in proxy config: general_settings. Unable to test this.'.format(
+                            service
+                        )
+                    },
+                )
+        if service == "email":
+            webhook_event = WebhookEvent(
+                event="key_created",
+                event_group="key",
+                event_message="Test Email Alert",
+                token=user_api_key_dict.token or "",
+                key_alias="Email Test key (This is only a test alert key. DO NOT USE THIS IN PRODUCTION.)",
+                spend=0,
+                max_budget=0,
+                user_id=user_api_key_dict.user_id,
+                user_email=os.getenv("TEST_EMAIL_ADDRESS"),
+                team_id=user_api_key_dict.team_id,
+            )
+
+            # use create task - this can take 10 seconds. don't keep ui users waiting for notification to check their email
+            asyncio.create_task(
+                proxy_logging_obj.slack_alerting_instance.send_key_created_or_user_invited_email(
+                    webhook_event=webhook_event
+                )
+            )
+
+            return {
+                "status": "success",
+                "message": "Mock Email Alert sent, verify Email Alert Received",
+            }
+
+    except Exception as e:
+        verbose_proxy_logger.error(
+            "litellm.proxy.proxy_server.health_services_endpoint(): Exception occured - {}".format(
+                str(e)
+            )
+        )
+        verbose_proxy_logger.debug(traceback.format_exc())
+        if isinstance(e, HTTPException):
+            raise ProxyException(
+                message=getattr(e, "detail", f"Authentication Error({str(e)})"),
+                type="auth_error",
+                param=getattr(e, "param", "None"),
+                code=getattr(e, "status_code", status.HTTP_500_INTERNAL_SERVER_ERROR),
+            )
+        elif isinstance(e, ProxyException):
+            raise e
+        raise ProxyException(
+            message="Authentication Error, " + str(e),
+            type="auth_error",
+            param=getattr(e, "param", "None"),
+            code=status.HTTP_500_INTERNAL_SERVER_ERROR,
+        )
+
+
+@router.get("/health", tags=["health"], dependencies=[Depends(user_api_key_auth)])
+async def health_endpoint(
+    user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
+    model: Optional[str] = fastapi.Query(
+        None, description="Specify the model name (optional)"
+    ),
+):
+    """
+    🚨 USE `/health/liveliness` to health check the proxy 🚨
+
+    See more 👉 https://docs.litellm.ai/docs/proxy/health
+
+
+    Check the health of all the endpoints in config.yaml
+
+    To run health checks in the background, add this to config.yaml:
+    ```
+    general_settings:
+        # ... other settings
+        background_health_checks: True
+    ```
+    else, the health checks will be run on models when /health is called.
+    """
+    from litellm.proxy.proxy_server import (
+        health_check_results,
+        use_background_health_checks,
+        user_model,
+        llm_model_list,
+    )
+
+    try:
+        if llm_model_list is None:
+            # if no router set, check if user set a model using litellm --model ollama/llama2
+            if user_model is not None:
+                healthy_endpoints, unhealthy_endpoints = await perform_health_check(
+                    model_list=[], cli_model=user_model
+                )
+                return {
+                    "healthy_endpoints": healthy_endpoints,
+                    "unhealthy_endpoints": unhealthy_endpoints,
+                    "healthy_count": len(healthy_endpoints),
+                    "unhealthy_count": len(unhealthy_endpoints),
+                }
+            raise HTTPException(
+                status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
+                detail={"error": "Model list not initialized"},
+            )
+        _llm_model_list = copy.deepcopy(llm_model_list)
+        ### FILTER MODELS FOR ONLY THOSE USER HAS ACCESS TO ###
+        if len(user_api_key_dict.models) > 0:
+            allowed_model_names = user_api_key_dict.models
+        else:
+            allowed_model_names = []  #
+        if use_background_health_checks:
+            return health_check_results
+        else:
+            healthy_endpoints, unhealthy_endpoints = await perform_health_check(
+                _llm_model_list, model
+            )
+
+            return {
+                "healthy_endpoints": healthy_endpoints,
+                "unhealthy_endpoints": unhealthy_endpoints,
+                "healthy_count": len(healthy_endpoints),
+                "unhealthy_count": len(unhealthy_endpoints),
+            }
+    except Exception as e:
+        verbose_proxy_logger.error(
+            "litellm.proxy.proxy_server.py::health_endpoint(): Exception occured - {}".format(
+                str(e)
+            )
+        )
+        verbose_proxy_logger.debug(traceback.format_exc())
+        raise e
+
+
+db_health_cache = {"status": "unknown", "last_updated": datetime.now()}
+
+
+def _db_health_readiness_check():
+    from litellm.proxy.proxy_server import prisma_client
+
+    global db_health_cache
+
+    # Note - Intentionally don't try/except this so it raises an exception when it fails
+
+    # if timedelta is less than 2 minutes return DB Status
+    time_diff = datetime.now() - db_health_cache["last_updated"]
+    if db_health_cache["status"] != "unknown" and time_diff < timedelta(minutes=2):
+        return db_health_cache
+    prisma_client.health_check()
+    db_health_cache = {"status": "connected", "last_updated": datetime.now()}
+    return db_health_cache
+
+
+@router.get(
+    "/active/callbacks",
+    tags=["health"],
+    dependencies=[Depends(user_api_key_auth)],
+)
+async def active_callbacks():
+    """
+    Returns a list of active callbacks on litellm.callbacks, litellm.input_callback, litellm.failure_callback, litellm.success_callback
+    """
+    from litellm.proxy.proxy_server import proxy_logging_obj, general_settings
+
+    _alerting = str(general_settings.get("alerting"))
+    # get success callbacks
+
+    litellm_callbacks = [str(x) for x in litellm.callbacks]
+    litellm_input_callbacks = [str(x) for x in litellm.input_callback]
+    litellm_failure_callbacks = [str(x) for x in litellm.failure_callback]
+    litellm_success_callbacks = [str(x) for x in litellm.success_callback]
+    litellm_async_success_callbacks = [str(x) for x in litellm._async_success_callback]
+    litellm_async_failure_callbacks = [str(x) for x in litellm._async_failure_callback]
+    litellm_async_input_callbacks = [str(x) for x in litellm._async_input_callback]
+
+    all_litellm_callbacks = (
+        litellm_callbacks
+        + litellm_input_callbacks
+        + litellm_failure_callbacks
+        + litellm_success_callbacks
+        + litellm_async_success_callbacks
+        + litellm_async_failure_callbacks
+        + litellm_async_input_callbacks
+    )
+
+    alerting = proxy_logging_obj.alerting
+    _num_alerting = 0
+    if alerting and isinstance(alerting, list):
+        _num_alerting = len(alerting)
+
+    return {
+        "alerting": _alerting,
+        "litellm.callbacks": litellm_callbacks,
+        "litellm.input_callback": litellm_input_callbacks,
+        "litellm.failure_callback": litellm_failure_callbacks,
+        "litellm.success_callback": litellm_success_callbacks,
+        "litellm._async_success_callback": litellm_async_success_callbacks,
+        "litellm._async_failure_callback": litellm_async_failure_callbacks,
+        "litellm._async_input_callback": litellm_async_input_callbacks,
+        "all_litellm_callbacks": all_litellm_callbacks,
+        "num_callbacks": len(all_litellm_callbacks),
+        "num_alerting": _num_alerting,
+    }
+
+
+@router.get(
+    "/health/readiness",
+    tags=["health"],
+    dependencies=[Depends(user_api_key_auth)],
+)
+async def health_readiness():
+    """
+    Unprotected endpoint for checking if worker can receive requests
+    """
+    from litellm.proxy.proxy_server import proxy_logging_obj, prisma_client, version
+
+    try:
+        # get success callback
+        success_callback_names = []
+
+        try:
+            # this was returning a JSON of the values in some of the callbacks
+            # all we need is the callback name, hence we do str(callback)
+            success_callback_names = [str(x) for x in litellm.success_callback]
+        except:
+            # don't let this block the /health/readiness response, if we can't convert to str -> return litellm.success_callback
+            success_callback_names = litellm.success_callback
+
+        # check Cache
+        cache_type = None
+        if litellm.cache is not None:
+            from litellm.caching import RedisSemanticCache
+
+            cache_type = litellm.cache.type
+
+            if isinstance(litellm.cache.cache, RedisSemanticCache):
+                # ping the cache
+                # TODO: @ishaan-jaff - we should probably not ping the cache on every /health/readiness check
+                try:
+                    index_info = await litellm.cache.cache._index_info()
+                except Exception as e:
+                    index_info = "index does not exist - error: " + str(e)
+                cache_type = {"type": cache_type, "index_info": index_info}
+
+        # check DB
+        if prisma_client is not None:  # if db passed in, check if it's connected
+            db_health_status = _db_health_readiness_check()
+            return {
+                "status": "healthy",
+                "db": "connected",
+                "cache": cache_type,
+                "litellm_version": version,
+                "success_callbacks": success_callback_names,
+                **db_health_status,
+            }
+        else:
+            return {
+                "status": "healthy",
+                "db": "Not connected",
+                "cache": cache_type,
+                "litellm_version": version,
+                "success_callbacks": success_callback_names,
+            }
+    except Exception as e:
+        raise HTTPException(status_code=503, detail=f"Service Unhealthy ({str(e)})")
+
+
+@router.get(
+    "/health/liveliness",
+    tags=["health"],
+    dependencies=[Depends(user_api_key_auth)],
+)
+async def health_liveliness():
+    """
+    Unprotected endpoint for checking if worker is alive
+    """
+    return "I'm alive!"
--- a/litellm/proxy/management_endpoints/key_management_endpoints.py
+++ b/litellm/proxy/management_endpoints/key_management_endpoints.py
@ -0,0 +1,926 @@
+"""
+KEY MANAGEMENT
+
+All /key management endpoints 
+
+/key/generate
+/key/info
+/key/update
+/key/delete
+"""
+
+import copy
+import json
+import uuid
+import re
+import traceback
+import asyncio
+import secrets
+from typing import Optional, List
+import fastapi
+from fastapi import Depends, Request, APIRouter, Header, status
+from fastapi import HTTPException
+import litellm
+from datetime import datetime, timedelta, timezone
+from litellm._logging import verbose_proxy_logger
+from litellm.proxy.auth.user_api_key_auth import user_api_key_auth
+from litellm.proxy._types import *
+
+router = APIRouter()
+
+
+@router.post(
+    "/key/generate",
+    tags=["key management"],
+    dependencies=[Depends(user_api_key_auth)],
+    response_model=GenerateKeyResponse,
+)
+async def generate_key_fn(
+    data: GenerateKeyRequest,
+    user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
+    litellm_changed_by: Optional[str] = Header(
+        None,
+        description="The litellm-changed-by header enables tracking of actions performed by authorized users on behalf of other users, providing an audit trail for accountability",
+    ),
+):
+    """
+    Generate an API key based on the provided data.
+
+    Docs: https://docs.litellm.ai/docs/proxy/virtual_keys
+
+    Parameters:
+    - duration: Optional[str] - Specify the length of time the token is valid for. You can set duration as seconds ("30s"), minutes ("30m"), hours ("30h"), days ("30d").
+    - key_alias: Optional[str] - User defined key alias
+    - team_id: Optional[str] - The team id of the key
+    - user_id: Optional[str] - The user id of the key
+    - models: Optional[list] - Model_name's a user is allowed to call. (if empty, key is allowed to call all models)
+    - aliases: Optional[dict] - Any alias mappings, on top of anything in the config.yaml model list. - https://docs.litellm.ai/docs/proxy/virtual_keys#managing-auth---upgradedowngrade-models
+    - config: Optional[dict] - any key-specific configs, overrides config in config.yaml
+    - spend: Optional[int] - Amount spent by key. Default is 0. Will be updated by proxy whenever key is used. https://docs.litellm.ai/docs/proxy/virtual_keys#managing-auth---tracking-spend
+    - send_invite_email: Optional[bool] - Whether to send an invite email to the user_id, with the generate key
+    - max_budget: Optional[float] - Specify max budget for a given key.
+    - max_parallel_requests: Optional[int] - Rate limit a user based on the number of parallel requests. Raises 429 error, if user's parallel requests > x.
+    - metadata: Optional[dict] - Metadata for key, store information for key. Example metadata = {"team": "core-infra", "app": "app2", "email": "ishaan@berri.ai" }
+    - permissions: Optional[dict] - key-specific permissions. Currently just used for turning off pii masking (if connected). Example - {"pii": false}
+    - model_max_budget: Optional[dict] - key-specific model budget in USD. Example - {"text-davinci-002": 0.5, "gpt-3.5-turbo": 0.5}. IF null or {} then no model specific budget.
+
+    Examples:
+
+    1. Allow users to turn on/off pii masking
+
+    ```bash
+    curl --location 'http://0.0.0.0:8000/key/generate' \
+        --header 'Authorization: Bearer sk-1234' \
+        --header 'Content-Type: application/json' \
+        --data '{
+            "permissions": {"allow_pii_controls": true}
+    }'
+    ```
+
+    Returns:
+    - key: (str) The generated api key
+    - expires: (datetime) Datetime object for when key expires.
+    - user_id: (str) Unique user id - used for tracking spend across multiple keys for same user id.
+    """
+    try:
+        from litellm.proxy.proxy_server import (
+            user_custom_key_generate,
+            prisma_client,
+            litellm_proxy_admin_name,
+            general_settings,
+            proxy_logging_obj,
+            create_audit_log_for_update,
+        )
+
+        verbose_proxy_logger.debug("entered /key/generate")
+
+        if user_custom_key_generate is not None:
+            result = await user_custom_key_generate(data)
+            decision = result.get("decision", True)
+            message = result.get("message", "Authentication Failed - Custom Auth Rule")
+            if not decision:
+                raise HTTPException(
+                    status_code=status.HTTP_403_FORBIDDEN, detail=message
+                )
+        # check if user set default key/generate params on config.yaml
+        if litellm.default_key_generate_params is not None:
+            for elem in data:
+                key, value = elem
+                if value is None and key in [
+                    "max_budget",
+                    "user_id",
+                    "team_id",
+                    "max_parallel_requests",
+                    "tpm_limit",
+                    "rpm_limit",
+                    "budget_duration",
+                ]:
+                    setattr(
+                        data, key, litellm.default_key_generate_params.get(key, None)
+                    )
+                elif key == "models" and value == []:
+                    setattr(data, key, litellm.default_key_generate_params.get(key, []))
+                elif key == "metadata" and value == {}:
+                    setattr(data, key, litellm.default_key_generate_params.get(key, {}))
+
+        # check if user set default key/generate params on config.yaml
+        if litellm.upperbound_key_generate_params is not None:
+            for elem in data:
+                # if key in litellm.upperbound_key_generate_params, use the min of value and litellm.upperbound_key_generate_params[key]
+                key, value = elem
+                if (
+                    value is not None
+                    and getattr(litellm.upperbound_key_generate_params, key, None)
+                    is not None
+                ):
+                    # if value is float/int
+                    if key in [
+                        "max_budget",
+                        "max_parallel_requests",
+                        "tpm_limit",
+                        "rpm_limit",
+                    ]:
+                        if value > getattr(litellm.upperbound_key_generate_params, key):
+                            raise HTTPException(
+                                status_code=400,
+                                detail={
+                                    "error": f"{key} is over max limit set in config - user_value={value}; max_value={getattr(litellm.upperbound_key_generate_params, key)}"
+                                },
+                            )
+                    elif key == "budget_duration":
+                        # budgets are in 1s, 1m, 1h, 1d, 1m (30s, 30m, 30h, 30d, 30m)
+                        # compare the duration in seconds and max duration in seconds
+                        upperbound_budget_duration = _duration_in_seconds(
+                            duration=getattr(
+                                litellm.upperbound_key_generate_params, key
+                            )
+                        )
+                        user_set_budget_duration = _duration_in_seconds(duration=value)
+                        if user_set_budget_duration > upperbound_budget_duration:
+                            raise HTTPException(
+                                status_code=400,
+                                detail={
+                                    "error": f"Budget duration is over max limit set in config - user_value={user_set_budget_duration}; max_value={upperbound_budget_duration}"
+                                },
+                            )
+
+        # TODO: @ishaan-jaff: Migrate all budget tracking to use LiteLLM_BudgetTable
+        _budget_id = None
+        if prisma_client is not None and data.soft_budget is not None:
+            # create the Budget Row for the LiteLLM Verification Token
+            budget_row = LiteLLM_BudgetTable(
+                soft_budget=data.soft_budget,
+                model_max_budget=data.model_max_budget or {},
+            )
+            new_budget = prisma_client.jsonify_object(
+                budget_row.json(exclude_none=True)
+            )
+
+            _budget = await prisma_client.db.litellm_budgettable.create(
+                data={
+                    **new_budget,  # type: ignore
+                    "created_by": user_api_key_dict.user_id or litellm_proxy_admin_name,
+                    "updated_by": user_api_key_dict.user_id or litellm_proxy_admin_name,
+                }
+            )
+            _budget_id = getattr(_budget, "budget_id", None)
+        data_json = data.json()  # type: ignore
+        # if we get max_budget passed to /key/generate, then use it as key_max_budget. Since generate_key_helper_fn is used to make new users
+        if "max_budget" in data_json:
+            data_json["key_max_budget"] = data_json.pop("max_budget", None)
+        if _budget_id is not None:
+            data_json["budget_id"] = _budget_id
+
+        if "budget_duration" in data_json:
+            data_json["key_budget_duration"] = data_json.pop("budget_duration", None)
+
+        response = await generate_key_helper_fn(
+            request_type="key", **data_json, table_name="key"
+        )
+
+        response["soft_budget"] = (
+            data.soft_budget
+        )  # include the user-input soft budget in the response
+
+        if data.send_invite_email is True:
+            if "email" not in general_settings.get("alerting", []):
+                raise ValueError(
+                    "Email alerting not setup on config.yaml. Please set `alerting=['email']. \nDocs: https://docs.litellm.ai/docs/proxy/email`"
+                )
+            event = WebhookEvent(
+                event="key_created",
+                event_group="key",
+                event_message=f"API Key Created",
+                token=response.get("token", ""),
+                spend=response.get("spend", 0.0),
+                max_budget=response.get("max_budget", 0.0),
+                user_id=response.get("user_id", None),
+                team_id=response.get("team_id", "Default Team"),
+                key_alias=response.get("key_alias", None),
+            )
+
+            # If user configured email alerting - send an Email letting their end-user know the key was created
+            asyncio.create_task(
+                proxy_logging_obj.slack_alerting_instance.send_key_created_or_user_invited_email(
+                    webhook_event=event,
+                )
+            )
+
+        # Enterprise Feature - Audit Logging. Enable with litellm.store_audit_logs = True
+        if litellm.store_audit_logs is True:
+            _updated_values = json.dumps(response, default=str)
+            asyncio.create_task(
+                create_audit_log_for_update(
+                    request_data=LiteLLM_AuditLogs(
+                        id=str(uuid.uuid4()),
+                        updated_at=datetime.now(timezone.utc),
+                        changed_by=litellm_changed_by
+                        or user_api_key_dict.user_id
+                        or litellm_proxy_admin_name,
+                        changed_by_api_key=user_api_key_dict.api_key,
+                        table_name=LitellmTableNames.KEY_TABLE_NAME,
+                        object_id=response.get("token_id", ""),
+                        action="created",
+                        updated_values=_updated_values,
+                        before_value=None,
+                    )
+                )
+            )
+
+        return GenerateKeyResponse(**response)
+    except Exception as e:
+        verbose_proxy_logger.error(
+            "litellm.proxy.proxy_server.generate_key_fn(): Exception occured - {}".format(
+                str(e)
+            )
+        )
+        verbose_proxy_logger.debug(traceback.format_exc())
+        if isinstance(e, HTTPException):
+            raise ProxyException(
+                message=getattr(e, "detail", f"Authentication Error({str(e)})"),
+                type="auth_error",
+                param=getattr(e, "param", "None"),
+                code=getattr(e, "status_code", status.HTTP_400_BAD_REQUEST),
+            )
+        elif isinstance(e, ProxyException):
+            raise e
+        raise ProxyException(
+            message="Authentication Error, " + str(e),
+            type="auth_error",
+            param=getattr(e, "param", "None"),
+            code=status.HTTP_400_BAD_REQUEST,
+        )
+
+
+@router.post(
+    "/key/update", tags=["key management"], dependencies=[Depends(user_api_key_auth)]
+)
+async def update_key_fn(
+    request: Request,
+    data: UpdateKeyRequest,
+    user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
+    litellm_changed_by: Optional[str] = Header(
+        None,
+        description="The litellm-changed-by header enables tracking of actions performed by authorized users on behalf of other users, providing an audit trail for accountability",
+    ),
+):
+    """
+    Update an existing key
+    """
+    from litellm.proxy.proxy_server import (
+        user_custom_key_generate,
+        prisma_client,
+        litellm_proxy_admin_name,
+        general_settings,
+        proxy_logging_obj,
+        create_audit_log_for_update,
+        user_api_key_cache,
+    )
+
+    try:
+        data_json: dict = data.json()
+        key = data_json.pop("key")
+        # get the row from db
+        if prisma_client is None:
+            raise Exception("Not connected to DB!")
+
+        existing_key_row = await prisma_client.get_data(
+            token=data.key, table_name="key", query_type="find_unique"
+        )
+
+        if existing_key_row is None:
+            raise HTTPException(
+                status_code=404,
+                detail={"error": f"Team not found, passed team_id={data.team_id}"},
+            )
+
+        # get non default values for key
+        non_default_values = {}
+        for k, v in data_json.items():
+            if v is not None and v not in (
+                [],
+                {},
+                0,
+            ):  # models default to [], spend defaults to 0, we should not reset these values
+                non_default_values[k] = v
+
+        if "duration" in non_default_values:
+            duration = non_default_values.pop("duration")
+            duration_s = _duration_in_seconds(duration=duration)
+            expires = datetime.now(timezone.utc) + timedelta(seconds=duration_s)
+            non_default_values["expires"] = expires
+
+        response = await prisma_client.update_data(
+            token=key, data={**non_default_values, "token": key}
+        )
+
+        # Delete - key from cache, since it's been updated!
+        # key updated - a new model could have been added to this key. it should not block requests after this is done
+        user_api_key_cache.delete_cache(key)
+        hashed_token = hash_token(key)
+        user_api_key_cache.delete_cache(hashed_token)
+
+        # Enterprise Feature - Audit Logging. Enable with litellm.store_audit_logs = True
+        if litellm.store_audit_logs is True:
+            _updated_values = json.dumps(data_json, default=str)
+
+            _before_value = existing_key_row.json(exclude_none=True)
+            _before_value = json.dumps(_before_value, default=str)
+
+            asyncio.create_task(
+                create_audit_log_for_update(
+                    request_data=LiteLLM_AuditLogs(
+                        id=str(uuid.uuid4()),
+                        updated_at=datetime.now(timezone.utc),
+                        changed_by=litellm_changed_by
+                        or user_api_key_dict.user_id
+                        or litellm_proxy_admin_name,
+                        changed_by_api_key=user_api_key_dict.api_key,
+                        table_name=LitellmTableNames.KEY_TABLE_NAME,
+                        object_id=data.key,
+                        action="updated",
+                        updated_values=_updated_values,
+                        before_value=_before_value,
+                    )
+                )
+            )
+
+        return {"key": key, **response["data"]}
+        # update based on remaining passed in values
+    except Exception as e:
+        if isinstance(e, HTTPException):
+            raise ProxyException(
+                message=getattr(e, "detail", f"Authentication Error({str(e)})"),
+                type="auth_error",
+                param=getattr(e, "param", "None"),
+                code=getattr(e, "status_code", status.HTTP_400_BAD_REQUEST),
+            )
+        elif isinstance(e, ProxyException):
+            raise e
+        raise ProxyException(
+            message="Authentication Error, " + str(e),
+            type="auth_error",
+            param=getattr(e, "param", "None"),
+            code=status.HTTP_400_BAD_REQUEST,
+        )
+
+
+@router.post(
+    "/key/delete", tags=["key management"], dependencies=[Depends(user_api_key_auth)]
+)
+async def delete_key_fn(
+    data: KeyRequest,
+    user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
+    litellm_changed_by: Optional[str] = Header(
+        None,
+        description="The litellm-changed-by header enables tracking of actions performed by authorized users on behalf of other users, providing an audit trail for accountability",
+    ),
+):
+    """
+    Delete a key from the key management system.
+
+    Parameters::
+    - keys (List[str]): A list of keys or hashed keys to delete. Example {"keys": ["sk-QWrxEynunsNpV1zT48HIrw", "837e17519f44683334df5291321d97b8bf1098cd490e49e215f6fea935aa28be"]}
+
+    Returns:
+    - deleted_keys (List[str]): A list of deleted keys. Example {"deleted_keys": ["sk-QWrxEynunsNpV1zT48HIrw", "837e17519f44683334df5291321d97b8bf1098cd490e49e215f6fea935aa28be"]}
+
+
+    Raises:
+        HTTPException: If an error occurs during key deletion.
+    """
+    try:
+        from litellm.proxy.proxy_server import (
+            user_custom_key_generate,
+            prisma_client,
+            litellm_proxy_admin_name,
+            general_settings,
+            proxy_logging_obj,
+            create_audit_log_for_update,
+            user_api_key_cache,
+        )
+
+        keys = data.keys
+        if len(keys) == 0:
+            raise ProxyException(
+                message=f"No keys provided, passed in: keys={keys}",
+                type="auth_error",
+                param="keys",
+                code=status.HTTP_400_BAD_REQUEST,
+            )
+
+        ## only allow user to delete keys they own
+        user_id = user_api_key_dict.user_id
+        verbose_proxy_logger.debug(
+            f"user_api_key_dict.user_role: {user_api_key_dict.user_role}"
+        )
+        if (
+            user_api_key_dict.user_role is not None
+            and user_api_key_dict.user_role == LitellmUserRoles.PROXY_ADMIN
+        ):
+            user_id = None  # unless they're admin
+
+        # Enterprise Feature - Audit Logging. Enable with litellm.store_audit_logs = True
+        # we do this after the first for loop, since first for loop is for validation. we only want this inserted after validation passes
+        if litellm.store_audit_logs is True:
+            # make an audit log for each team deleted
+            for key in data.keys:
+                key_row = await prisma_client.get_data(  # type: ignore
+                    token=key, table_name="key", query_type="find_unique"
+                )
+
+                key_row = key_row.json(exclude_none=True)
+                _key_row = json.dumps(key_row, default=str)
+
+                asyncio.create_task(
+                    create_audit_log_for_update(
+                        request_data=LiteLLM_AuditLogs(
+                            id=str(uuid.uuid4()),
+                            updated_at=datetime.now(timezone.utc),
+                            changed_by=litellm_changed_by
+                            or user_api_key_dict.user_id
+                            or litellm_proxy_admin_name,
+                            changed_by_api_key=user_api_key_dict.api_key,
+                            table_name=LitellmTableNames.KEY_TABLE_NAME,
+                            object_id=key,
+                            action="deleted",
+                            updated_values="{}",
+                            before_value=_key_row,
+                        )
+                    )
+                )
+
+        number_deleted_keys = await delete_verification_token(
+            tokens=keys, user_id=user_id
+        )
+        verbose_proxy_logger.debug(
+            f"/key/delete - deleted_keys={number_deleted_keys['deleted_keys']}"
+        )
+
+        try:
+            assert len(keys) == number_deleted_keys["deleted_keys"]
+        except Exception as e:
+            raise HTTPException(
+                status_code=400,
+                detail={
+                    "error": f"Not all keys passed in were deleted. This probably means you don't have access to delete all the keys passed in. Keys passed in={len(keys)}, Deleted keys ={number_deleted_keys['deleted_keys']}"
+                },
+            )
+
+        for key in keys:
+            user_api_key_cache.delete_cache(key)
+            # remove hash token from cache
+            hashed_token = hash_token(key)
+            user_api_key_cache.delete_cache(hashed_token)
+
+        verbose_proxy_logger.debug(
+            f"/keys/delete - cache after delete: {user_api_key_cache.in_memory_cache.cache_dict}"
+        )
+
+        return {"deleted_keys": keys}
+    except Exception as e:
+        if isinstance(e, HTTPException):
+            raise ProxyException(
+                message=getattr(e, "detail", f"Authentication Error({str(e)})"),
+                type="auth_error",
+                param=getattr(e, "param", "None"),
+                code=getattr(e, "status_code", status.HTTP_400_BAD_REQUEST),
+            )
+        elif isinstance(e, ProxyException):
+            raise e
+        raise ProxyException(
+            message="Authentication Error, " + str(e),
+            type="auth_error",
+            param=getattr(e, "param", "None"),
+            code=status.HTTP_400_BAD_REQUEST,
+        )
+
+
+@router.post(
+    "/v2/key/info", tags=["key management"], dependencies=[Depends(user_api_key_auth)]
+)
+async def info_key_fn_v2(
+    data: Optional[KeyRequest] = None,
+    user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
+):
+    """
+    Retrieve information about a list of keys.
+
+    **New endpoint**. Currently admin only.
+    Parameters:
+        keys: Optional[list] = body parameter representing the key(s) in the request
+        user_api_key_dict: UserAPIKeyAuth = Dependency representing the user's API key
+    Returns:
+        Dict containing the key and its associated information
+
+    Example Curl:
+    ```
+    curl -X GET "http://0.0.0.0:8000/key/info" \
+    -H "Authorization: Bearer sk-1234" \
+    -d {"keys": ["sk-1", "sk-2", "sk-3"]}
+    ```
+    """
+    from litellm.proxy.proxy_server import (
+        user_custom_key_generate,
+        prisma_client,
+        litellm_proxy_admin_name,
+        general_settings,
+        proxy_logging_obj,
+        create_audit_log_for_update,
+    )
+
+    try:
+        if prisma_client is None:
+            raise Exception(
+                f"Database not connected. Connect a database to your proxy - https://docs.litellm.ai/docs/simple_proxy#managing-auth---virtual-keys"
+            )
+        if data is None:
+            raise HTTPException(
+                status_code=status.HTTP_422_UNPROCESSABLE_ENTITY,
+                detail={"message": "Malformed request. No keys passed in."},
+            )
+
+        key_info = await prisma_client.get_data(
+            token=data.keys, table_name="key", query_type="find_all"
+        )
+        filtered_key_info = []
+        for k in key_info:
+            try:
+                k = k.model_dump()  # noqa
+            except:
+                # if using pydantic v1
+                k = k.dict()
+            filtered_key_info.append(k)
+        return {"key": data.keys, "info": filtered_key_info}
+
+    except Exception as e:
+        if isinstance(e, HTTPException):
+            raise ProxyException(
+                message=getattr(e, "detail", f"Authentication Error({str(e)})"),
+                type="auth_error",
+                param=getattr(e, "param", "None"),
+                code=getattr(e, "status_code", status.HTTP_400_BAD_REQUEST),
+            )
+        elif isinstance(e, ProxyException):
+            raise e
+        raise ProxyException(
+            message="Authentication Error, " + str(e),
+            type="auth_error",
+            param=getattr(e, "param", "None"),
+            code=status.HTTP_400_BAD_REQUEST,
+        )
+
+
+@router.get(
+    "/key/info", tags=["key management"], dependencies=[Depends(user_api_key_auth)]
+)
+async def info_key_fn(
+    key: Optional[str] = fastapi.Query(
+        default=None, description="Key in the request parameters"
+    ),
+    user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
+):
+    """
+    Retrieve information about a key.
+    Parameters:
+        key: Optional[str] = Query parameter representing the key in the request
+        user_api_key_dict: UserAPIKeyAuth = Dependency representing the user's API key
+    Returns:
+        Dict containing the key and its associated information
+
+    Example Curl:
+    ```
+    curl -X GET "http://0.0.0.0:8000/key/info?key=sk-02Wr4IAlN3NvPXvL5JVvDA" \
+-H "Authorization: Bearer sk-1234"
+    ```
+
+    Example Curl - if no key is passed, it will use the Key Passed in Authorization Header
+    ```
+    curl -X GET "http://0.0.0.0:8000/key/info" \
+-H "Authorization: Bearer sk-02Wr4IAlN3NvPXvL5JVvDA"
+    ```
+    """
+    from litellm.proxy.proxy_server import (
+        user_custom_key_generate,
+        prisma_client,
+        litellm_proxy_admin_name,
+        general_settings,
+        proxy_logging_obj,
+        create_audit_log_for_update,
+    )
+
+    try:
+        if prisma_client is None:
+            raise Exception(
+                f"Database not connected. Connect a database to your proxy - https://docs.litellm.ai/docs/simple_proxy#managing-auth---virtual-keys"
+            )
+        if key == None:
+            key = user_api_key_dict.api_key
+        key_info = await prisma_client.get_data(token=key)
+        ## REMOVE HASHED TOKEN INFO BEFORE RETURNING ##
+        try:
+            key_info = key_info.model_dump()  # noqa
+        except:
+            # if using pydantic v1
+            key_info = key_info.dict()
+        key_info.pop("token")
+        return {"key": key, "info": key_info}
+    except Exception as e:
+        if isinstance(e, HTTPException):
+            raise ProxyException(
+                message=getattr(e, "detail", f"Authentication Error({str(e)})"),
+                type="auth_error",
+                param=getattr(e, "param", "None"),
+                code=getattr(e, "status_code", status.HTTP_400_BAD_REQUEST),
+            )
+        elif isinstance(e, ProxyException):
+            raise e
+        raise ProxyException(
+            message="Authentication Error, " + str(e),
+            type="auth_error",
+            param=getattr(e, "param", "None"),
+            code=status.HTTP_400_BAD_REQUEST,
+        )
+
+
+def _duration_in_seconds(duration: str):
+    match = re.match(r"(\d+)([smhd]?)", duration)
+    if not match:
+        raise ValueError("Invalid duration format")
+
+    value, unit = match.groups()
+    value = int(value)
+
+    if unit == "s":
+        return value
+    elif unit == "m":
+        return value * 60
+    elif unit == "h":
+        return value * 3600
+    elif unit == "d":
+        return value * 86400
+    else:
+        raise ValueError("Unsupported duration unit")
+
+
+async def generate_key_helper_fn(
+    request_type: Literal[
+        "user", "key"
+    ],  # identifies if this request is from /user/new or /key/generate
+    duration: Optional[str],
+    models: list,
+    aliases: dict,
+    config: dict,
+    spend: float,
+    key_max_budget: Optional[float] = None,  # key_max_budget is used to Budget Per key
+    key_budget_duration: Optional[str] = None,
+    budget_id: Optional[float] = None,  # budget id <-> LiteLLM_BudgetTable
+    soft_budget: Optional[
+        float
+    ] = None,  # soft_budget is used to set soft Budgets Per user
+    max_budget: Optional[float] = None,  # max_budget is used to Budget Per user
+    budget_duration: Optional[str] = None,  # max_budget is used to Budget Per user
+    token: Optional[str] = None,
+    user_id: Optional[str] = None,
+    team_id: Optional[str] = None,
+    user_email: Optional[str] = None,
+    user_role: Optional[str] = None,
+    max_parallel_requests: Optional[int] = None,
+    metadata: Optional[dict] = {},
+    tpm_limit: Optional[int] = None,
+    rpm_limit: Optional[int] = None,
+    query_type: Literal["insert_data", "update_data"] = "insert_data",
+    update_key_values: Optional[dict] = None,
+    key_alias: Optional[str] = None,
+    allowed_cache_controls: Optional[list] = [],
+    permissions: Optional[dict] = {},
+    model_max_budget: Optional[dict] = {},
+    teams: Optional[list] = None,
+    organization_id: Optional[str] = None,
+    table_name: Optional[Literal["key", "user"]] = None,
+    send_invite_email: Optional[bool] = None,
+):
+    from litellm.proxy.proxy_server import (
+        prisma_client,
+        custom_db_client,
+        litellm_proxy_budget_name,
+        premium_user,
+    )
+
+    if prisma_client is None and custom_db_client is None:
+        raise Exception(
+            f"Connect Proxy to database to generate keys - https://docs.litellm.ai/docs/proxy/virtual_keys "
+        )
+
+    if token is None:
+        token = f"sk-{secrets.token_urlsafe(16)}"
+
+    if duration is None:  # allow tokens that never expire
+        expires = None
+    else:
+        duration_s = _duration_in_seconds(duration=duration)
+        expires = datetime.now(timezone.utc) + timedelta(seconds=duration_s)
+
+    if key_budget_duration is None:  # one-time budget
+        key_reset_at = None
+    else:
+        duration_s = _duration_in_seconds(duration=key_budget_duration)
+        key_reset_at = datetime.now(timezone.utc) + timedelta(seconds=duration_s)
+
+    if budget_duration is None:  # one-time budget
+        reset_at = None
+    else:
+        duration_s = _duration_in_seconds(duration=budget_duration)
+        reset_at = datetime.now(timezone.utc) + timedelta(seconds=duration_s)
+
+    aliases_json = json.dumps(aliases)
+    config_json = json.dumps(config)
+    permissions_json = json.dumps(permissions)
+    metadata_json = json.dumps(metadata)
+    model_max_budget_json = json.dumps(model_max_budget)
+    user_role = user_role
+    tpm_limit = tpm_limit
+    rpm_limit = rpm_limit
+    allowed_cache_controls = allowed_cache_controls
+
+    try:
+        # Create a new verification token (you may want to enhance this logic based on your needs)
+        user_data = {
+            "max_budget": max_budget,
+            "user_email": user_email,
+            "user_id": user_id,
+            "team_id": team_id,
+            "organization_id": organization_id,
+            "user_role": user_role,
+            "spend": spend,
+            "models": models,
+            "max_parallel_requests": max_parallel_requests,
+            "tpm_limit": tpm_limit,
+            "rpm_limit": rpm_limit,
+            "budget_duration": budget_duration,
+            "budget_reset_at": reset_at,
+            "allowed_cache_controls": allowed_cache_controls,
+        }
+        if teams is not None:
+            user_data["teams"] = teams
+        key_data = {
+            "token": token,
+            "key_alias": key_alias,
+            "expires": expires,
+            "models": models,
+            "aliases": aliases_json,
+            "config": config_json,
+            "spend": spend,
+            "max_budget": key_max_budget,
+            "user_id": user_id,
+            "team_id": team_id,
+            "max_parallel_requests": max_parallel_requests,
+            "metadata": metadata_json,
+            "tpm_limit": tpm_limit,
+            "rpm_limit": rpm_limit,
+            "budget_duration": key_budget_duration,
+            "budget_reset_at": key_reset_at,
+            "allowed_cache_controls": allowed_cache_controls,
+            "permissions": permissions_json,
+            "model_max_budget": model_max_budget_json,
+            "budget_id": budget_id,
+        }
+
+        if (
+            litellm.get_secret("DISABLE_KEY_NAME", False) == True
+        ):  # allow user to disable storing abbreviated key name (shown in UI, to help figure out which key spent how much)
+            pass
+        else:
+            key_data["key_name"] = f"sk-...{token[-4:]}"
+        saved_token = copy.deepcopy(key_data)
+        if isinstance(saved_token["aliases"], str):
+            saved_token["aliases"] = json.loads(saved_token["aliases"])
+        if isinstance(saved_token["config"], str):
+            saved_token["config"] = json.loads(saved_token["config"])
+        if isinstance(saved_token["metadata"], str):
+            saved_token["metadata"] = json.loads(saved_token["metadata"])
+        if isinstance(saved_token["permissions"], str):
+            if (
+                "get_spend_routes" in saved_token["permissions"]
+                and premium_user != True
+            ):
+                raise ValueError(
+                    "get_spend_routes permission is only available for LiteLLM Enterprise users"
+                )
+
+            saved_token["permissions"] = json.loads(saved_token["permissions"])
+        if isinstance(saved_token["model_max_budget"], str):
+            saved_token["model_max_budget"] = json.loads(
+                saved_token["model_max_budget"]
+            )
+
+        if saved_token.get("expires", None) is not None and isinstance(
+            saved_token["expires"], datetime
+        ):
+            saved_token["expires"] = saved_token["expires"].isoformat()
+        if prisma_client is not None:
+            if (
+                table_name is None or table_name == "user"
+            ):  # do not auto-create users for `/key/generate`
+                ## CREATE USER (If necessary)
+                if query_type == "insert_data":
+                    user_row = await prisma_client.insert_data(
+                        data=user_data, table_name="user"
+                    )
+                    ## use default user model list if no key-specific model list provided
+                    if len(user_row.models) > 0 and len(key_data["models"]) == 0:  # type: ignore
+                        key_data["models"] = user_row.models
+                elif query_type == "update_data":
+                    user_row = await prisma_client.update_data(
+                        data=user_data,
+                        table_name="user",
+                        update_key_values=update_key_values,
+                    )
+            if user_id == litellm_proxy_budget_name or (
+                table_name is not None and table_name == "user"
+            ):
+                # do not create a key for litellm_proxy_budget_name or if table name is set to just 'user'
+                # we only need to ensure this exists in the user table
+                # the LiteLLM_VerificationToken table will increase in size if we don't do this check
+                return user_data
+
+            ## CREATE KEY
+            verbose_proxy_logger.debug("prisma_client: Creating Key= %s", key_data)
+            create_key_response = await prisma_client.insert_data(
+                data=key_data, table_name="key"
+            )
+            key_data["token_id"] = getattr(create_key_response, "token", None)
+    except Exception as e:
+        verbose_proxy_logger.error(
+            "litellm.proxy.proxy_server.generate_key_helper_fn(): Exception occured - {}".format(
+                str(e)
+            )
+        )
+        verbose_proxy_logger.debug(traceback.format_exc())
+        if isinstance(e, HTTPException):
+            raise e
+        raise HTTPException(
+            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
+            detail={"error": "Internal Server Error."},
+        )
+
+    # Add budget related info in key_data - this ensures it's returned
+    key_data["budget_id"] = budget_id
+
+    if request_type == "user":
+        # if this is a /user/new request update the key_date with user_data fields
+        key_data.update(user_data)
+    return key_data
+
+
+async def delete_verification_token(tokens: List, user_id: Optional[str] = None):
+    from litellm.proxy.proxy_server import prisma_client, litellm_proxy_admin_name
+
+    try:
+        if prisma_client:
+            # Assuming 'db' is your Prisma Client instance
+            # check if admin making request - don't filter by user-id
+            if user_id == litellm_proxy_admin_name:
+                deleted_tokens = await prisma_client.delete_data(tokens=tokens)
+            # else
+            else:
+                deleted_tokens = await prisma_client.delete_data(
+                    tokens=tokens, user_id=user_id
+                )
+                _num_deleted_tokens = deleted_tokens.get("deleted_keys", 0)
+                if _num_deleted_tokens != len(tokens):
+                    raise Exception(
+                        "Failed to delete all tokens. Tried to delete tokens that don't belong to user: "
+                        + str(user_id)
+                    )
+        else:
+            raise Exception("DB not connected. prisma_client is None")
+    except Exception as e:
+        verbose_proxy_logger.error(
+            "litellm.proxy.proxy_server.delete_verification_token(): Exception occured - {}".format(
+                str(e)
+            )
+        )
+        verbose_proxy_logger.debug(traceback.format_exc())
+        raise e
+    return deleted_tokens
--- a/litellm/proxy/management_endpoints/team_endpoints.py
+++ b/litellm/proxy/management_endpoints/team_endpoints.py
@ -0,0 +1,899 @@
+from typing import Optional, List
+import fastapi
+from fastapi import Depends, Request, APIRouter, Header, status
+from fastapi import HTTPException
+import copy
+import json
+import uuid
+import litellm
+import asyncio
+from datetime import datetime, timedelta, timezone
+from litellm._logging import verbose_proxy_logger
+from litellm.proxy.auth.user_api_key_auth import user_api_key_auth
+from litellm.proxy._types import (
+    UserAPIKeyAuth,
+    LiteLLM_TeamTable,
+    LiteLLM_ModelTable,
+    LitellmUserRoles,
+    NewTeamRequest,
+    TeamMemberAddRequest,
+    UpdateTeamRequest,
+    BlockTeamRequest,
+    DeleteTeamRequest,
+    Member,
+    LitellmTableNames,
+    LiteLLM_AuditLogs,
+    TeamMemberDeleteRequest,
+    ProxyException,
+    CommonProxyErrors,
+)
+from litellm.proxy.management_helpers.utils import (
+    add_new_member,
+    management_endpoint_wrapper,
+)
+
+router = APIRouter()
+
+
+#### TEAM MANAGEMENT ####
+@router.post(
+    "/team/new",
+    tags=["team management"],
+    dependencies=[Depends(user_api_key_auth)],
+    response_model=LiteLLM_TeamTable,
+)
+@management_endpoint_wrapper
+async def new_team(
+    data: NewTeamRequest,
+    http_request: Request,
+    user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
+    litellm_changed_by: Optional[str] = Header(
+        None,
+        description="The litellm-changed-by header enables tracking of actions performed by authorized users on behalf of other users, providing an audit trail for accountability",
+    ),
+):
+    """
+    Allow users to create a new team. Apply user permissions to their team.
+
+    👉 [Detailed Doc on setting team budgets](https://docs.litellm.ai/docs/proxy/team_budgets)
+
+
+    Parameters:
+    - team_alias: Optional[str] - User defined team alias
+    - team_id: Optional[str] - The team id of the user. If none passed, we'll generate it.
+    - members_with_roles: List[{"role": "admin" or "user", "user_id": "<user-id>"}] - A list of users and their roles in the team. Get user_id when making a new user via `/user/new`.
+    - metadata: Optional[dict] - Metadata for team, store information for team. Example metadata = {"extra_info": "some info"}
+    - tpm_limit: Optional[int] - The TPM (Tokens Per Minute) limit for this team - all keys with this team_id will have at max this TPM limit
+    - rpm_limit: Optional[int] - The RPM (Requests Per Minute) limit for this team - all keys associated with this team_id will have at max this RPM limit
+    - max_budget: Optional[float] - The maximum budget allocated to the team - all keys for this team_id will have at max this max_budget
+    - budget_duration: Optional[str] - The duration of the budget for the team. Doc [here](https://docs.litellm.ai/docs/proxy/team_budgets)
+    - models: Optional[list] - A list of models associated with the team - all keys for this team_id will have at most, these models. If empty, assumes all models are allowed.
+    - blocked: bool - Flag indicating if the team is blocked or not - will stop all calls from keys with this team_id.
+
+    Returns:
+    - team_id: (str) Unique team id - used for tracking spend across multiple keys for same team id.
+
+    _deprecated_params:
+    - admins: list - A list of user_id's for the admin role
+    - users: list - A list of user_id's for the user role
+
+    Example Request:
+    ```
+    curl --location 'http://0.0.0.0:4000/team/new' \
+
+    --header 'Authorization: Bearer sk-1234' \
+
+    --header 'Content-Type: application/json' \
+
+    --data '{
+      "team_alias": "my-new-team_2",
+      "members_with_roles": [{"role": "admin", "user_id": "user-1234"},
+        {"role": "user", "user_id": "user-2434"}]
+    }'
+
+    ```
+
+     ```
+    curl --location 'http://0.0.0.0:4000/team/new' \
+
+    --header 'Authorization: Bearer sk-1234' \
+
+    --header 'Content-Type: application/json' \
+
+    --data '{
+                "team_alias": "QA Prod Bot", 
+                "max_budget": 0.000000001, 
+                "budget_duration": "1d"
+            }'
+
+    ```
+    """
+    from litellm.proxy.proxy_server import (
+        prisma_client,
+        litellm_proxy_admin_name,
+        create_audit_log_for_update,
+        _duration_in_seconds,
+    )
+
+    if prisma_client is None:
+        raise HTTPException(status_code=500, detail={"error": "No db connected"})
+
+    if data.team_id is None:
+        data.team_id = str(uuid.uuid4())
+    else:
+        # Check if team_id exists already
+        _existing_team_id = await prisma_client.get_data(
+            team_id=data.team_id, table_name="team", query_type="find_unique"
+        )
+        if _existing_team_id is not None:
+            raise HTTPException(
+                status_code=400,
+                detail={
+                    "error": f"Team id = {data.team_id} already exists. Please use a different team id."
+                },
+            )
+
+    if (
+        user_api_key_dict.user_role is None
+        or user_api_key_dict.user_role != LitellmUserRoles.PROXY_ADMIN
+    ):  # don't restrict proxy admin
+        if (
+            data.tpm_limit is not None
+            and user_api_key_dict.tpm_limit is not None
+            and data.tpm_limit > user_api_key_dict.tpm_limit
+        ):
+            raise HTTPException(
+                status_code=400,
+                detail={
+                    "error": f"tpm limit higher than user max. User tpm limit={user_api_key_dict.tpm_limit}. User role={user_api_key_dict.user_role}"
+                },
+            )
+
+        if (
+            data.rpm_limit is not None
+            and user_api_key_dict.rpm_limit is not None
+            and data.rpm_limit > user_api_key_dict.rpm_limit
+        ):
+            raise HTTPException(
+                status_code=400,
+                detail={
+                    "error": f"rpm limit higher than user max. User rpm limit={user_api_key_dict.rpm_limit}. User role={user_api_key_dict.user_role}"
+                },
+            )
+
+        if (
+            data.max_budget is not None
+            and user_api_key_dict.max_budget is not None
+            and data.max_budget > user_api_key_dict.max_budget
+        ):
+            raise HTTPException(
+                status_code=400,
+                detail={
+                    "error": f"max budget higher than user max. User max budget={user_api_key_dict.max_budget}. User role={user_api_key_dict.user_role}"
+                },
+            )
+
+        if data.models is not None and len(user_api_key_dict.models) > 0:
+            for m in data.models:
+                if m not in user_api_key_dict.models:
+                    raise HTTPException(
+                        status_code=400,
+                        detail={
+                            "error": f"Model not in allowed user models. User allowed models={user_api_key_dict.models}. User id={user_api_key_dict.user_id}"
+                        },
+                    )
+
+    if user_api_key_dict.user_id is not None:
+        creating_user_in_list = False
+        for member in data.members_with_roles:
+            if member.user_id == user_api_key_dict.user_id:
+                creating_user_in_list = True
+
+        if creating_user_in_list == False:
+            data.members_with_roles.append(
+                Member(role="admin", user_id=user_api_key_dict.user_id)
+            )
+
+    ## ADD TO MODEL TABLE
+    _model_id = None
+    if data.model_aliases is not None and isinstance(data.model_aliases, dict):
+        litellm_modeltable = LiteLLM_ModelTable(
+            model_aliases=json.dumps(data.model_aliases),
+            created_by=user_api_key_dict.user_id or litellm_proxy_admin_name,
+            updated_by=user_api_key_dict.user_id or litellm_proxy_admin_name,
+        )
+        model_dict = await prisma_client.db.litellm_modeltable.create(
+            {**litellm_modeltable.json(exclude_none=True)}  # type: ignore
+        )  # type: ignore
+
+        _model_id = model_dict.id
+
+    ## ADD TO TEAM TABLE
+    complete_team_data = LiteLLM_TeamTable(
+        **data.json(),
+        model_id=_model_id,
+    )
+
+    # If budget_duration is set, set `budget_reset_at`
+    if complete_team_data.budget_duration is not None:
+        duration_s = _duration_in_seconds(duration=complete_team_data.budget_duration)
+        reset_at = datetime.now(timezone.utc) + timedelta(seconds=duration_s)
+        complete_team_data.budget_reset_at = reset_at
+
+    team_row = await prisma_client.insert_data(
+        data=complete_team_data.json(exclude_none=True), table_name="team"
+    )
+
+    ## ADD TEAM ID TO USER TABLE ##
+    for user in complete_team_data.members_with_roles:
+        ## add team id to user row ##
+        await prisma_client.update_data(
+            user_id=user.user_id,
+            data={"user_id": user.user_id, "teams": [team_row.team_id]},
+            update_key_values_custom_query={
+                "teams": {
+                    "push ": [team_row.team_id],
+                }
+            },
+        )
+
+    # Enterprise Feature - Audit Logging. Enable with litellm.store_audit_logs = True
+    if litellm.store_audit_logs is True:
+        _updated_values = complete_team_data.json(exclude_none=True)
+
+        _updated_values = json.dumps(_updated_values, default=str)
+
+        asyncio.create_task(
+            create_audit_log_for_update(
+                request_data=LiteLLM_AuditLogs(
+                    id=str(uuid.uuid4()),
+                    updated_at=datetime.now(timezone.utc),
+                    changed_by=litellm_changed_by
+                    or user_api_key_dict.user_id
+                    or litellm_proxy_admin_name,
+                    changed_by_api_key=user_api_key_dict.api_key,
+                    table_name=LitellmTableNames.TEAM_TABLE_NAME,
+                    object_id=data.team_id,
+                    action="created",
+                    updated_values=_updated_values,
+                    before_value=None,
+                )
+            )
+        )
+
+    try:
+        return team_row.model_dump()
+    except Exception as e:
+        return team_row.dict()
+
+
+@router.post(
+    "/team/update", tags=["team management"], dependencies=[Depends(user_api_key_auth)]
+)
+@management_endpoint_wrapper
+async def update_team(
+    data: UpdateTeamRequest,
+    http_request: Request,
+    user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
+    litellm_changed_by: Optional[str] = Header(
+        None,
+        description="The litellm-changed-by header enables tracking of actions performed by authorized users on behalf of other users, providing an audit trail for accountability",
+    ),
+):
+    """
+    Use `/team/member_add` AND `/team/member/delete` to add/remove new team members  
+
+    You can now update team budget / rate limits via /team/update
+
+    Parameters:
+    - team_id: str - The team id of the user. Required param.
+    - team_alias: Optional[str] - User defined team alias
+    - metadata: Optional[dict] - Metadata for team, store information for team. Example metadata = {"team": "core-infra", "app": "app2", "email": "ishaan@berri.ai" }
+    - tpm_limit: Optional[int] - The TPM (Tokens Per Minute) limit for this team - all keys with this team_id will have at max this TPM limit
+    - rpm_limit: Optional[int] - The RPM (Requests Per Minute) limit for this team - all keys associated with this team_id will have at max this RPM limit
+    - max_budget: Optional[float] - The maximum budget allocated to the team - all keys for this team_id will have at max this max_budget
+    - budget_duration: Optional[str] - The duration of the budget for the team. Doc [here](https://docs.litellm.ai/docs/proxy/team_budgets)
+    - models: Optional[list] - A list of models associated with the team - all keys for this team_id will have at most, these models. If empty, assumes all models are allowed.
+    - blocked: bool - Flag indicating if the team is blocked or not - will stop all calls from keys with this team_id.
+
+    Example - update team TPM Limit
+
+    ```
+    curl --location 'http://0.0.0.0:8000/team/update' \
+
+    --header 'Authorization: Bearer sk-1234' \
+
+    --header 'Content-Type: application/json' \
+
+    --data-raw '{
+        "team_id": "litellm-test-client-id-new",
+        "tpm_limit": 100
+    }'
+    ```
+
+    Example - Update Team `max_budget` budget
+    ```
+    curl --location 'http://0.0.0.0:8000/team/update' \
+
+    --header 'Authorization: Bearer sk-1234' \
+
+    --header 'Content-Type: application/json' \
+
+    --data-raw '{
+        "team_id": "litellm-test-client-id-new",
+        "max_budget": 10
+    }'
+    ```
+    """
+    from litellm.proxy.proxy_server import (
+        prisma_client,
+        litellm_proxy_admin_name,
+        create_audit_log_for_update,
+        _duration_in_seconds,
+    )
+
+    if prisma_client is None:
+        raise HTTPException(status_code=500, detail={"error": "No db connected"})
+
+    if data.team_id is None:
+        raise HTTPException(status_code=400, detail={"error": "No team id passed in"})
+    verbose_proxy_logger.debug("/team/update - %s", data)
+
+    existing_team_row = await prisma_client.get_data(
+        team_id=data.team_id, table_name="team", query_type="find_unique"
+    )
+    if existing_team_row is None:
+        raise HTTPException(
+            status_code=404,
+            detail={"error": f"Team not found, passed team_id={data.team_id}"},
+        )
+
+    updated_kv = data.json(exclude_none=True)
+
+    # Check budget_duration and budget_reset_at
+    if data.budget_duration is not None:
+        duration_s = _duration_in_seconds(duration=data.budget_duration)
+        reset_at = datetime.now(timezone.utc) + timedelta(seconds=duration_s)
+
+        # set the budget_reset_at in DB
+        updated_kv["budget_reset_at"] = reset_at
+
+    team_row = await prisma_client.update_data(
+        update_key_values=updated_kv,
+        data=updated_kv,
+        table_name="team",
+        team_id=data.team_id,
+    )
+
+    # Enterprise Feature - Audit Logging. Enable with litellm.store_audit_logs = True
+    if litellm.store_audit_logs is True:
+        _before_value = existing_team_row.json(exclude_none=True)
+        _before_value = json.dumps(_before_value, default=str)
+        _after_value: str = json.dumps(updated_kv, default=str)
+
+        asyncio.create_task(
+            create_audit_log_for_update(
+                request_data=LiteLLM_AuditLogs(
+                    id=str(uuid.uuid4()),
+                    updated_at=datetime.now(timezone.utc),
+                    changed_by=litellm_changed_by
+                    or user_api_key_dict.user_id
+                    or litellm_proxy_admin_name,
+                    changed_by_api_key=user_api_key_dict.api_key,
+                    table_name=LitellmTableNames.TEAM_TABLE_NAME,
+                    object_id=data.team_id,
+                    action="updated",
+                    updated_values=_after_value,
+                    before_value=_before_value,
+                )
+            )
+        )
+
+    return team_row
+
+
+@router.post(
+    "/team/member_add",
+    tags=["team management"],
+    dependencies=[Depends(user_api_key_auth)],
+)
+@management_endpoint_wrapper
+async def team_member_add(
+    data: TeamMemberAddRequest,
+    http_request: Request,
+    user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
+):
+    """
+    [BETA]
+
+    Add new members (either via user_email or user_id) to a team
+
+    If user doesn't exist, new user row will also be added to User Table
+
+    ```
+
+    curl -X POST 'http://0.0.0.0:4000/team/member_add' \
+    -H 'Authorization: Bearer sk-1234' \
+    -H 'Content-Type: application/json' \
+    -d '{"team_id": "45e3e396-ee08-4a61-a88e-16b3ce7e0849", "member": {"role": "user", "user_id": "krrish247652@berri.ai"}}'
+
+    ```
+    """
+    from litellm.proxy.proxy_server import (
+        prisma_client,
+        litellm_proxy_admin_name,
+        create_audit_log_for_update,
+        _duration_in_seconds,
+    )
+
+    if prisma_client is None:
+        raise HTTPException(status_code=500, detail={"error": "No db connected"})
+
+    if data.team_id is None:
+        raise HTTPException(status_code=400, detail={"error": "No team id passed in"})
+
+    if data.member is None:
+        raise HTTPException(
+            status_code=400, detail={"error": "No member/members passed in"}
+        )
+
+    existing_team_row = await prisma_client.db.litellm_teamtable.find_unique(
+        where={"team_id": data.team_id}
+    )
+    if existing_team_row is None:
+        raise HTTPException(
+            status_code=404,
+            detail={
+                "error": f"Team not found for team_id={getattr(data, 'team_id', None)}"
+            },
+        )
+
+    complete_team_data = LiteLLM_TeamTable(**existing_team_row.model_dump())
+
+    if isinstance(data.member, Member):
+        # add to team db
+        new_member = data.member
+
+        complete_team_data.members_with_roles.append(new_member)
+
+    elif isinstance(data.member, List):
+        # add to team db
+        new_members = data.member
+
+        complete_team_data.members_with_roles.extend(new_members)
+
+    # ADD MEMBER TO TEAM
+    _db_team_members = [m.model_dump() for m in complete_team_data.members_with_roles]
+    updated_team = await prisma_client.db.litellm_teamtable.update(
+        where={"team_id": data.team_id},
+        data={"members_with_roles": json.dumps(_db_team_members)},  # type: ignore
+    )
+
+    if isinstance(data.member, Member):
+        await add_new_member(
+            new_member=data.member,
+            max_budget_in_team=data.max_budget_in_team,
+            prisma_client=prisma_client,
+            user_api_key_dict=user_api_key_dict,
+            litellm_proxy_admin_name=litellm_proxy_admin_name,
+            team_id=data.team_id,
+        )
+    elif isinstance(data.member, List):
+        tasks: List = []
+        for m in data.member:
+            await add_new_member(
+                new_member=m,
+                max_budget_in_team=data.max_budget_in_team,
+                prisma_client=prisma_client,
+                user_api_key_dict=user_api_key_dict,
+                litellm_proxy_admin_name=litellm_proxy_admin_name,
+                team_id=data.team_id,
+            )
+        await asyncio.gather(*tasks)
+
+    return updated_team
+
+
+@router.post(
+    "/team/member_delete",
+    tags=["team management"],
+    dependencies=[Depends(user_api_key_auth)],
+)
+@management_endpoint_wrapper
+async def team_member_delete(
+    data: TeamMemberDeleteRequest,
+    http_request: Request,
+    user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
+):
+    """
+    [BETA]
+
+    delete members (either via user_email or user_id) from a team
+
+    If user doesn't exist, an exception will be raised
+    ```
+    curl -X POST 'http://0.0.0.0:8000/team/update' \
+
+    -H 'Authorization: Bearer sk-1234' \
+
+    -H 'Content-Type: application/json' \
+
+    -D '{
+        "team_id": "45e3e396-ee08-4a61-a88e-16b3ce7e0849",
+        "user_id": "krrish247652@berri.ai"
+    }'
+    ```
+    """
+    from litellm.proxy.proxy_server import (
+        prisma_client,
+        litellm_proxy_admin_name,
+        create_audit_log_for_update,
+        _duration_in_seconds,
+    )
+
+    if prisma_client is None:
+        raise HTTPException(status_code=500, detail={"error": "No db connected"})
+
+    if data.team_id is None:
+        raise HTTPException(status_code=400, detail={"error": "No team id passed in"})
+
+    if data.user_id is None and data.user_email is None:
+        raise HTTPException(
+            status_code=400,
+            detail={"error": "Either user_id or user_email needs to be passed in"},
+        )
+
+    _existing_team_row = await prisma_client.db.litellm_teamtable.find_unique(
+        where={"team_id": data.team_id}
+    )
+
+    if _existing_team_row is None:
+        raise HTTPException(
+            status_code=400,
+            detail={"error": "Team id={} does not exist in db".format(data.team_id)},
+        )
+    existing_team_row = LiteLLM_TeamTable(**_existing_team_row.model_dump())
+
+    ## DELETE MEMBER FROM TEAM
+    new_team_members: List[Member] = []
+    for m in existing_team_row.members_with_roles:
+        if (
+            data.user_id is not None
+            and m.user_id is not None
+            and data.user_id == m.user_id
+        ):
+            continue
+        elif (
+            data.user_email is not None
+            and m.user_email is not None
+            and data.user_email == m.user_email
+        ):
+            continue
+        new_team_members.append(m)
+    existing_team_row.members_with_roles = new_team_members
+
+    _db_new_team_members: List[dict] = [m.model_dump() for m in new_team_members]
+
+    _ = await prisma_client.db.litellm_teamtable.update(
+        where={
+            "team_id": data.team_id,
+        },
+        data={"members_with_roles": json.dumps(_db_new_team_members)},  # type: ignore
+    )
+
+    ## DELETE TEAM ID from USER ROW, IF EXISTS ##
+    # get user row
+    key_val = {}
+    if data.user_id is not None:
+        key_val["user_id"] = data.user_id
+    elif data.user_email is not None:
+        key_val["user_email"] = data.user_email
+    existing_user_rows = await prisma_client.db.litellm_usertable.find_many(
+        where=key_val  # type: ignore
+    )
+
+    if existing_user_rows is not None and (
+        isinstance(existing_user_rows, list) and len(existing_user_rows) > 0
+    ):
+        for existing_user in existing_user_rows:
+            team_list = []
+            if data.team_id in existing_user.teams:
+                team_list = existing_user.teams
+                team_list.remove(data.team_id)
+                await prisma_client.db.litellm_usertable.update(
+                    where={
+                        "user_id": existing_user.user_id,
+                    },
+                    data={"teams": {"set": team_list}},
+                )
+
+    return existing_team_row
+
+
+@router.post(
+    "/team/delete", tags=["team management"], dependencies=[Depends(user_api_key_auth)]
+)
+@management_endpoint_wrapper
+async def delete_team(
+    data: DeleteTeamRequest,
+    http_request: Request,
+    user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
+    litellm_changed_by: Optional[str] = Header(
+        None,
+        description="The litellm-changed-by header enables tracking of actions performed by authorized users on behalf of other users, providing an audit trail for accountability",
+    ),
+):
+    """
+    delete team and associated team keys
+
+    ```
+    curl --location 'http://0.0.0.0:8000/team/delete' \
+
+    --header 'Authorization: Bearer sk-1234' \
+
+    --header 'Content-Type: application/json' \
+
+    --data-raw '{
+        "team_ids": ["45e3e396-ee08-4a61-a88e-16b3ce7e0849"]
+    }'
+    ```
+    """
+    from litellm.proxy.proxy_server import (
+        prisma_client,
+        litellm_proxy_admin_name,
+        create_audit_log_for_update,
+        _duration_in_seconds,
+    )
+
+    if prisma_client is None:
+        raise HTTPException(status_code=500, detail={"error": "No db connected"})
+
+    if data.team_ids is None:
+        raise HTTPException(status_code=400, detail={"error": "No team id passed in"})
+
+    # check that all teams passed exist
+    for team_id in data.team_ids:
+        team_row = await prisma_client.get_data(  # type: ignore
+            team_id=team_id, table_name="team", query_type="find_unique"
+        )
+        if team_row is None:
+            raise HTTPException(
+                status_code=404,
+                detail={"error": f"Team not found, passed team_id={team_id}"},
+            )
+
+    # Enterprise Feature - Audit Logging. Enable with litellm.store_audit_logs = True
+    # we do this after the first for loop, since first for loop is for validation. we only want this inserted after validation passes
+    if litellm.store_audit_logs is True:
+        # make an audit log for each team deleted
+        for team_id in data.team_ids:
+            team_row = await prisma_client.get_data(  # type: ignore
+                team_id=team_id, table_name="team", query_type="find_unique"
+            )
+
+            _team_row = team_row.json(exclude_none=True)
+
+            asyncio.create_task(
+                create_audit_log_for_update(
+                    request_data=LiteLLM_AuditLogs(
+                        id=str(uuid.uuid4()),
+                        updated_at=datetime.now(timezone.utc),
+                        changed_by=litellm_changed_by
+                        or user_api_key_dict.user_id
+                        or litellm_proxy_admin_name,
+                        changed_by_api_key=user_api_key_dict.api_key,
+                        table_name=LitellmTableNames.TEAM_TABLE_NAME,
+                        object_id=team_id,
+                        action="deleted",
+                        updated_values="{}",
+                        before_value=_team_row,
+                    )
+                )
+            )
+
+    # End of Audit logging
+
+    ## DELETE ASSOCIATED KEYS
+    await prisma_client.delete_data(team_id_list=data.team_ids, table_name="key")
+    ## DELETE TEAMS
+    deleted_teams = await prisma_client.delete_data(
+        team_id_list=data.team_ids, table_name="team"
+    )
+    return deleted_teams
+
+
+@router.get(
+    "/team/info", tags=["team management"], dependencies=[Depends(user_api_key_auth)]
+)
+@management_endpoint_wrapper
+async def team_info(
+    http_request: Request,
+    team_id: str = fastapi.Query(
+        default=None, description="Team ID in the request parameters"
+    ),
+):
+    """
+    get info on team + related keys
+
+    ```
+    curl --location 'http://localhost:4000/team/info' \
+    --header 'Authorization: Bearer sk-1234' \
+    --header 'Content-Type: application/json' \
+    --data '{
+        "teams": ["<team-id>",..]
+    }'
+    ```
+    """
+    from litellm.proxy.proxy_server import (
+        prisma_client,
+        litellm_proxy_admin_name,
+        create_audit_log_for_update,
+        _duration_in_seconds,
+    )
+
+    try:
+        if prisma_client is None:
+            raise HTTPException(
+                status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
+                detail={
+                    "error": f"Database not connected. Connect a database to your proxy - https://docs.litellm.ai/docs/simple_proxy#managing-auth---virtual-keys"
+                },
+            )
+        if team_id is None:
+            raise HTTPException(
+                status_code=status.HTTP_422_UNPROCESSABLE_ENTITY,
+                detail={"message": "Malformed request. No team id passed in."},
+            )
+
+        team_info = await prisma_client.get_data(
+            team_id=team_id, table_name="team", query_type="find_unique"
+        )
+        if team_info is None:
+            raise HTTPException(
+                status_code=status.HTTP_404_NOT_FOUND,
+                detail={"message": f"Team not found, passed team id: {team_id}."},
+            )
+
+        ## GET ALL KEYS ##
+        keys = await prisma_client.get_data(
+            team_id=team_id,
+            table_name="key",
+            query_type="find_all",
+            expires=datetime.now(),
+        )
+
+        if team_info is None:
+            ## make sure we still return a total spend ##
+            spend = 0
+            for k in keys:
+                spend += getattr(k, "spend", 0)
+            team_info = {"spend": spend}
+
+        ## REMOVE HASHED TOKEN INFO before returning ##
+        for key in keys:
+            try:
+                key = key.model_dump()  # noqa
+            except:
+                # if using pydantic v1
+                key = key.dict()
+            key.pop("token", None)
+        return {"team_id": team_id, "team_info": team_info, "keys": keys}
+
+    except Exception as e:
+        if isinstance(e, HTTPException):
+            raise ProxyException(
+                message=getattr(e, "detail", f"Authentication Error({str(e)})"),
+                type="auth_error",
+                param=getattr(e, "param", "None"),
+                code=getattr(e, "status_code", status.HTTP_400_BAD_REQUEST),
+            )
+        elif isinstance(e, ProxyException):
+            raise e
+        raise ProxyException(
+            message="Authentication Error, " + str(e),
+            type="auth_error",
+            param=getattr(e, "param", "None"),
+            code=status.HTTP_400_BAD_REQUEST,
+        )
+
+
+@router.post(
+    "/team/block", tags=["team management"], dependencies=[Depends(user_api_key_auth)]
+)
+@management_endpoint_wrapper
+async def block_team(
+    data: BlockTeamRequest,
+    http_request: Request,
+    user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
+):
+    """
+    Blocks all calls from keys with this team id.
+    """
+    from litellm.proxy.proxy_server import (
+        prisma_client,
+        litellm_proxy_admin_name,
+        create_audit_log_for_update,
+        _duration_in_seconds,
+    )
+
+    if prisma_client is None:
+        raise Exception("No DB Connected.")
+
+    record = await prisma_client.db.litellm_teamtable.update(
+        where={"team_id": data.team_id}, data={"blocked": True}  # type: ignore
+    )
+
+    return record
+
+
+@router.post(
+    "/team/unblock", tags=["team management"], dependencies=[Depends(user_api_key_auth)]
+)
+@management_endpoint_wrapper
+async def unblock_team(
+    data: BlockTeamRequest,
+    http_request: Request,
+    user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
+):
+    """
+    Blocks all calls from keys with this team id.
+    """
+    from litellm.proxy.proxy_server import (
+        prisma_client,
+        litellm_proxy_admin_name,
+        create_audit_log_for_update,
+        _duration_in_seconds,
+    )
+
+    if prisma_client is None:
+        raise Exception("No DB Connected.")
+
+    record = await prisma_client.db.litellm_teamtable.update(
+        where={"team_id": data.team_id}, data={"blocked": False}  # type: ignore
+    )
+
+    return record
+
+
+@router.get(
+    "/team/list", tags=["team management"], dependencies=[Depends(user_api_key_auth)]
+)
+@management_endpoint_wrapper
+async def list_team(
+    http_request: Request,
+    user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
+):
+    """
+    [Admin-only] List all available teams
+
+    ```
+    curl --location --request GET 'http://0.0.0.0:4000/team/list' \
+        --header 'Authorization: Bearer sk-1234'
+    ```
+    """
+    from litellm.proxy.proxy_server import (
+        prisma_client,
+        litellm_proxy_admin_name,
+        create_audit_log_for_update,
+        _duration_in_seconds,
+    )
+
+    if user_api_key_dict.user_role != LitellmUserRoles.PROXY_ADMIN:
+        raise HTTPException(
+            status_code=401,
+            detail={
+                "error": "Admin-only endpoint. Your user role={}".format(
+                    user_api_key_dict.user_role
+                )
+            },
+        )
+
+    if prisma_client is None:
+        raise HTTPException(
+            status_code=400,
+            detail={"error": CommonProxyErrors.db_not_connected_error.value},
+        )
+
+    response = await prisma_client.db.litellm_teamtable.find_many()
+
+    return response
--- a/litellm/proxy/management_helpers/utils.py
+++ b/litellm/proxy/management_helpers/utils.py
@ -1,5 +1,11 @@
 # What is this?
 ## Helper utils for the management endpoints (keys/users/teams)
+from datetime import datetime
+from functools import wraps
+from litellm.proxy._types import UserAPIKeyAuth, ManagementEndpointLoggingPayload
+from litellm.proxy.common_utils.http_parsing_utils import _read_request_body
+from litellm._logging import verbose_logger
+from fastapi import Request

 from litellm.proxy._types import LiteLLM_TeamTable, Member, UserAPIKeyAuth
 from litellm.proxy.utils import PrismaClient
@ -61,3 +67,110 @@ async def add_new_member(
                "budget_id": _budget_id,
            }
        )
+
+
+def management_endpoint_wrapper(func):
+    """
+    This wrapper does the following:
+
+    1. Log I/O, Exceptions to OTEL
+    2. Create an Audit log for success calls
+    """
+
+    @wraps(func)
+    async def wrapper(*args, **kwargs):
+        start_time = datetime.now()
+
+        try:
+            result = await func(*args, **kwargs)
+            end_time = datetime.now()
+            try:
+                if kwargs is None:
+                    kwargs = {}
+                user_api_key_dict: UserAPIKeyAuth = (
+                    kwargs.get("user_api_key_dict") or UserAPIKeyAuth()
+                )
+                _http_request: Request = kwargs.get("http_request")
+                parent_otel_span = user_api_key_dict.parent_otel_span
+                if parent_otel_span is not None:
+                    from litellm.proxy.proxy_server import open_telemetry_logger
+
+                    if open_telemetry_logger is not None:
+                        if _http_request:
+                            _route = _http_request.url.path
+                            _request_body: dict = await _read_request_body(
+                                request=_http_request
+                            )
+                            _response = dict(result) if result is not None else None
+
+                            logging_payload = ManagementEndpointLoggingPayload(
+                                route=_route,
+                                request_data=_request_body,
+                                response=_response,
+                                start_time=start_time,
+                                end_time=end_time,
+                            )
+
+                            await open_telemetry_logger.async_management_endpoint_success_hook(
+                                logging_payload=logging_payload,
+                                parent_otel_span=parent_otel_span,
+                            )
+
+                    if _http_request:
+                        _route = _http_request.url.path
+                        # Flush user_api_key cache if this was an update/delete call to /key, /team, or /user
+                        if _route in [
+                            "/key/update",
+                            "/key/delete",
+                            "/team/update",
+                            "/team/delete",
+                            "/user/update",
+                            "/user/delete",
+                            "/customer/update",
+                            "/customer/delete",
+                        ]:
+                            from litellm.proxy.proxy_server import user_api_key_cache
+
+                            user_api_key_cache.flush_cache()
+            except Exception as e:
+                # Non-Blocking Exception
+                verbose_logger.debug("Error in management endpoint wrapper: %s", str(e))
+                pass
+
+            return result
+        except Exception as e:
+            end_time = datetime.now()
+
+            if kwargs is None:
+                kwargs = {}
+            user_api_key_dict: UserAPIKeyAuth = (
+                kwargs.get("user_api_key_dict") or UserAPIKeyAuth()
+            )
+            parent_otel_span = user_api_key_dict.parent_otel_span
+            if parent_otel_span is not None:
+                from litellm.proxy.proxy_server import open_telemetry_logger
+
+                if open_telemetry_logger is not None:
+                    _http_request: Request = kwargs.get("http_request")
+                    if _http_request:
+                        _route = _http_request.url.path
+                        _request_body: dict = await _read_request_body(
+                            request=_http_request
+                        )
+                        logging_payload = ManagementEndpointLoggingPayload(
+                            route=_route,
+                            request_data=_request_body,
+                            response=None,
+                            start_time=start_time,
+                            end_time=end_time,
+                            exception=e,
+                        )
+
+                        await open_telemetry_logger.async_management_endpoint_failure_hook(
+                            logging_payload=logging_payload,
+                            parent_otel_span=parent_otel_span,
+                        )
+
+            raise e
+
+    return wrapper
--- a/litellm/proxy/proxy_config.yaml
+++ b/litellm/proxy/proxy_config.yaml
@ -20,6 +20,7 @@ model_list:

 general_settings:
  master_key: sk-1234
+  alerting: ["slack", "email"]

 litellm_settings:
  success_callback: ["prometheus"]
--- a/litellm/proxy/proxy_server.py
+++ b/litellm/proxy/proxy_server.py
--- a/litellm/proxy/spend_reporting_endpoints/spend_management_endpoints.py
+++ b/litellm/proxy/spend_reporting_endpoints/spend_management_endpoints.py
--- a/litellm/proxy/utils.py
+++ b/litellm/proxy/utils.py
@ -12,6 +12,8 @@ import litellm
 import backoff
 import traceback
 from pydantic import BaseModel
+import litellm.litellm_core_utils
+import litellm.litellm_core_utils.litellm_logging
 from litellm.proxy._types import (
    UserAPIKeyAuth,
    DynamoDBArgs,
@ -266,7 +268,9 @@ class ProxyLogging:
                    + litellm.failure_callback
                )
            )
-            litellm.utils.set_callbacks(callback_list=callback_list)
+            litellm.litellm_core_utils.litellm_logging.set_callbacks(
+                callback_list=callback_list
+            )

    # The actual implementation of the function
    async def pre_call_hook(
@ -331,7 +335,9 @@ class ProxyLogging:
            return data
        except Exception as e:
            if "litellm_logging_obj" in data:
-                logging_obj: litellm.utils.Logging = data["litellm_logging_obj"]
+                logging_obj: litellm.litellm_core_utils.litellm_logging.Logging = data[
+                    "litellm_logging_obj"
+                ]

                ## ASYNC FAILURE HANDLER ##
                error_message = ""
@ -455,6 +461,7 @@ class ProxyLogging:
            formatted_message += f"\n\nProxy URL: `{_proxy_base_url}`"

        extra_kwargs = {}
+        alerting_metadata = {}
        if request_data is not None:
            _url = self.slack_alerting_instance._add_langfuse_trace_id_to_alert(
                request_data=request_data
@ -462,7 +469,12 @@ class ProxyLogging:
            if _url is not None:
                extra_kwargs["🪢 Langfuse Trace"] = _url
                formatted_message += "\n\n🪢 Langfuse Trace: {}".format(_url)
-
+            if (
+                "metadata" in request_data
+                and request_data["metadata"].get("alerting_metadata", None) is not None
+                and isinstance(request_data["metadata"]["alerting_metadata"], dict)
+            ):
+                alerting_metadata = request_data["metadata"]["alerting_metadata"]
        for client in self.alerting:
            if client == "slack":
                await self.slack_alerting_instance.send_alert(
@ -470,6 +482,7 @@ class ProxyLogging:
                    level=level,
                    alert_type=alert_type,
                    user_info=None,
+                    alerting_metadata=alerting_metadata,
                    **extra_kwargs,
                )
            elif client == "sentry":
@ -510,7 +523,7 @@ class ProxyLogging:
        )

        if hasattr(self, "service_logging_obj"):
-            self.service_logging_obj.async_service_failure_hook(
+            await self.service_logging_obj.async_service_failure_hook(
                service=ServiceTypes.DB,
                duration=duration,
                error=error_message,
@ -1960,6 +1973,9 @@ async def send_email(receiver_email, subject, html):
    email_message["From"] = sender_email
    email_message["To"] = receiver_email
    email_message["Subject"] = subject
+    verbose_proxy_logger.debug(
+        "sending email from %s to %s", sender_email, receiver_email
+    )

    # Attach the body to the email
    email_message.attach(MIMEText(html, "html"))
@ -2115,6 +2131,16 @@ def _extract_from_regex(duration: str) -> Tuple[int, str]:
    return value, unit


+def get_last_day_of_month(year, month):
+    # Handle December case
+    if month == 12:
+        return 31
+    # Next month is January, so subtract a day from March 1st
+    next_month = datetime(year=year, month=month + 1, day=1)
+    last_day_of_month = (next_month - timedelta(days=1)).day
+    return last_day_of_month
+
+
 def _duration_in_seconds(duration: str) -> int:
    """
    Parameters:
@ -2141,12 +2167,28 @@ def _duration_in_seconds(duration: str) -> int:
        now = time.time()
        current_time = datetime.fromtimestamp(now)

-        # Calculate the first day of the next month
        if current_time.month == 12:
-            next_month = datetime(year=current_time.year + 1, month=1, day=1)
+            target_year = current_time.year + 1
+            target_month = 1
        else:
+            target_year = current_time.year
+            target_month = current_time.month + value
+
+        # Determine the day to set for next month
+        target_day = current_time.day
+        last_day_of_target_month = get_last_day_of_month(target_year, target_month)
+
+        if target_day > last_day_of_target_month:
+            target_day = last_day_of_target_month
+
        next_month = datetime(
-                year=current_time.year, month=current_time.month + value, day=1
+            year=target_year,
+            month=target_month,
+            day=target_day,
+            hour=current_time.hour,
+            minute=current_time.minute,
+            second=current_time.second,
+            microsecond=current_time.microsecond,
        )

        # Calculate the duration until the first day of the next month
@ -2718,47 +2760,6 @@ def _is_valid_team_configs(team_id=None, team_config=None, request_data=None):
    return


-def _is_user_proxy_admin(user_id_information: Optional[list]):
-    if user_id_information is None:
-        return False
-
-    if len(user_id_information) == 0 or user_id_information[0] is None:
-        return False
-
-    _user = user_id_information[0]
-    if (
-        _user.get("user_role", None) is not None
-        and _user.get("user_role") == LitellmUserRoles.PROXY_ADMIN.value
-    ):
-        return True
-
-    # if user_id_information contains litellm-proxy-budget
-    # get first user_id that is not litellm-proxy-budget
-    for user in user_id_information:
-        if user.get("user_id") != "litellm-proxy-budget":
-            _user = user
-            break
-
-    if (
-        _user.get("user_role", None) is not None
-        and _user.get("user_role") == LitellmUserRoles.PROXY_ADMIN.value
-    ):
-        return True
-
-    return False
-
-
-def _get_user_role(user_id_information: Optional[list]):
-    if user_id_information is None:
-        return None
-
-    if len(user_id_information) == 0 or user_id_information[0] is None:
-        return None
-
-    _user = user_id_information[0]
-    return _user.get("user_role")
-
-
 def encrypt_value(value: str, master_key: str):
    import hashlib
    import nacl.secret
--- a/litellm/router.py
+++ b/litellm/router.py
@ -66,6 +66,7 @@ from litellm.types.llms.openai import (
 )
 from litellm.scheduler import Scheduler, FlowItem
 from typing import Iterable
+from litellm.router_utils.handle_error import send_llm_exception_alert


 class Router:
@ -108,6 +109,7 @@ class Router:
        ] = None,  # generic fallbacks, works across all deployments
        fallbacks: List = [],
        context_window_fallbacks: List = [],
+        content_policy_fallbacks: List = [],
        model_group_alias: Optional[dict] = {},
        enable_pre_call_checks: bool = False,
        retry_after: int = 0,  # min time to wait before retrying a failed request
@ -311,6 +313,12 @@ class Router:
        self.context_window_fallbacks = (
            context_window_fallbacks or litellm.context_window_fallbacks
        )
+
+        _content_policy_fallbacks = (
+            content_policy_fallbacks or litellm.content_policy_fallbacks
+        )
+        self.validate_fallbacks(fallback_param=_content_policy_fallbacks)
+        self.content_policy_fallbacks = _content_policy_fallbacks
        self.total_calls: defaultdict = defaultdict(
            int
        )  # dict to store total calls made to each model
@ -576,6 +584,14 @@ class Router:

            return response
        except Exception as e:
+            asyncio.create_task(
+                send_llm_exception_alert(
+                    litellm_router_instance=self,
+                    request_kwargs=kwargs,
+                    error_traceback_str=traceback.format_exc(),
+                    original_exception=e,
+                )
+            )
            raise e

    async def _acompletion(
@ -1097,6 +1113,14 @@ class Router:

            return response
        except Exception as e:
+            asyncio.create_task(
+                send_llm_exception_alert(
+                    litellm_router_instance=self,
+                    request_kwargs=kwargs,
+                    error_traceback_str=traceback.format_exc(),
+                    original_exception=e,
+                )
+            )
            raise e

    async def _aimage_generation(self, prompt: str, model: str, **kwargs):
@ -1221,6 +1245,14 @@ class Router:

            return response
        except Exception as e:
+            asyncio.create_task(
+                send_llm_exception_alert(
+                    litellm_router_instance=self,
+                    request_kwargs=kwargs,
+                    error_traceback_str=traceback.format_exc(),
+                    original_exception=e,
+                )
+            )
            raise e

    async def _atranscription(self, file: BinaryIO, model: str, **kwargs):
@ -1387,6 +1419,14 @@ class Router:

            return response
        except Exception as e:
+            asyncio.create_task(
+                send_llm_exception_alert(
+                    litellm_router_instance=self,
+                    request_kwargs=kwargs,
+                    error_traceback_str=traceback.format_exc(),
+                    original_exception=e,
+                )
+            )
            raise e

    async def amoderation(self, model: str, input: str, **kwargs):
@ -1402,6 +1442,14 @@ class Router:

            return response
        except Exception as e:
+            asyncio.create_task(
+                send_llm_exception_alert(
+                    litellm_router_instance=self,
+                    request_kwargs=kwargs,
+                    error_traceback_str=traceback.format_exc(),
+                    original_exception=e,
+                )
+            )
            raise e

    async def _amoderation(self, model: str, input: str, **kwargs):
@ -1546,6 +1594,14 @@ class Router:

            return response
        except Exception as e:
+            asyncio.create_task(
+                send_llm_exception_alert(
+                    litellm_router_instance=self,
+                    request_kwargs=kwargs,
+                    error_traceback_str=traceback.format_exc(),
+                    original_exception=e,
+                )
+            )
            raise e

    async def _atext_completion(self, model: str, prompt: str, **kwargs):
@ -1741,6 +1797,14 @@ class Router:
            response = await self.async_function_with_fallbacks(**kwargs)
            return response
        except Exception as e:
+            asyncio.create_task(
+                send_llm_exception_alert(
+                    litellm_router_instance=self,
+                    request_kwargs=kwargs,
+                    error_traceback_str=traceback.format_exc(),
+                    original_exception=e,
+                )
+            )
            raise e

    async def _aembedding(self, input: Union[str, List], model: str, **kwargs):
@ -1998,6 +2062,9 @@ class Router:
        context_window_fallbacks = kwargs.get(
            "context_window_fallbacks", self.context_window_fallbacks
        )
+        content_policy_fallbacks = kwargs.get(
+            "content_policy_fallbacks", self.content_policy_fallbacks
+        )
        try:
            if mock_testing_fallbacks is not None and mock_testing_fallbacks == True:
                raise Exception(
@ -2016,7 +2083,10 @@ class Router:
                if (
                    hasattr(e, "status_code")
                    and e.status_code == 400  # type: ignore
-                    and not isinstance(e, litellm.ContextWindowExceededError)
+                    and not (
+                        isinstance(e, litellm.ContextWindowExceededError)
+                        or isinstance(e, litellm.ContentPolicyViolationError)
+                    )
                ):  # don't retry a malformed request
                    raise e
                if (
@ -2034,6 +2104,39 @@ class Router:
                    if fallback_model_group is None:
                        raise original_exception

+                    for mg in fallback_model_group:
+                        """
+                        Iterate through the model groups and try calling that deployment
+                        """
+                        try:
+                            kwargs["model"] = mg
+                            kwargs.setdefault("metadata", {}).update(
+                                {"model_group": mg}
+                            )  # update model_group used, if fallbacks are done
+                            response = await self.async_function_with_retries(
+                                *args, **kwargs
+                            )
+                            verbose_router_logger.info(
+                                "Successful fallback b/w models."
+                            )
+                            return response
+                        except Exception as e:
+                            pass
+                elif (
+                    isinstance(e, litellm.ContentPolicyViolationError)
+                    and content_policy_fallbacks is not None
+                ):
+                    fallback_model_group = None
+                    for (
+                        item
+                    ) in content_policy_fallbacks:  # [{"gpt-3.5-turbo": ["gpt-4"]}]
+                        if list(item.keys())[0] == model_group:
+                            fallback_model_group = item[model_group]
+                            break
+
+                    if fallback_model_group is None:
+                        raise original_exception
+
                    for mg in fallback_model_group:
                        """
                        Iterate through the model groups and try calling that deployment
@ -2114,6 +2217,9 @@ class Router:
        context_window_fallbacks = kwargs.pop(
            "context_window_fallbacks", self.context_window_fallbacks
        )
+        content_policy_fallbacks = kwargs.pop(
+            "content_policy_fallbacks", self.content_policy_fallbacks
+        )

        num_retries = kwargs.pop("num_retries")

@ -2141,6 +2247,7 @@ class Router:
                healthy_deployments=_healthy_deployments,
                context_window_fallbacks=context_window_fallbacks,
                regular_fallbacks=fallbacks,
+                content_policy_fallbacks=content_policy_fallbacks,
            )

            # decides how long to sleep before retry
@ -2206,10 +2313,12 @@ class Router:
        error: Exception,
        healthy_deployments: Optional[List] = None,
        context_window_fallbacks: Optional[List] = None,
+        content_policy_fallbacks: Optional[List] = None,
        regular_fallbacks: Optional[List] = None,
    ):
        """
        1. raise an exception for ContextWindowExceededError if context_window_fallbacks is not None
+        2. raise an exception for ContentPolicyViolationError if content_policy_fallbacks is not None

        2. raise an exception for RateLimitError if
            - there are no fallbacks
@ -2219,13 +2328,19 @@ class Router:
        if healthy_deployments is not None and isinstance(healthy_deployments, list):
            _num_healthy_deployments = len(healthy_deployments)

-        ### CHECK IF RATE LIMIT / CONTEXT WINDOW ERROR w/ fallbacks available / Bad Request Error
+        ### CHECK IF RATE LIMIT / CONTEXT WINDOW ERROR / CONTENT POLICY VIOLATION ERROR w/ fallbacks available / Bad Request Error
        if (
            isinstance(error, litellm.ContextWindowExceededError)
            and context_window_fallbacks is not None
        ):
            raise error

+        if (
+            isinstance(error, litellm.ContentPolicyViolationError)
+            and content_policy_fallbacks is not None
+        ):
+            raise error
+
        # Error we should only retry if there are other deployments
        if isinstance(error, openai.RateLimitError):
            if (
@ -2256,6 +2371,9 @@ class Router:
        context_window_fallbacks = kwargs.get(
            "context_window_fallbacks", self.context_window_fallbacks
        )
+        content_policy_fallbacks = kwargs.get(
+            "content_policy_fallbacks", self.content_policy_fallbacks
+        )
        try:
            if mock_testing_fallbacks is not None and mock_testing_fallbacks == True:
                raise Exception(
@ -2271,7 +2389,10 @@ class Router:
                if (
                    hasattr(e, "status_code")
                    and e.status_code == 400  # type: ignore
-                    and not isinstance(e, litellm.ContextWindowExceededError)
+                    and not (
+                        isinstance(e, litellm.ContextWindowExceededError)
+                        or isinstance(e, litellm.ContentPolicyViolationError)
+                    )
                ):  # don't retry a malformed request
                    raise e

@ -2294,6 +2415,37 @@ class Router:
                    if fallback_model_group is None:
                        raise original_exception

+                    for mg in fallback_model_group:
+                        """
+                        Iterate through the model groups and try calling that deployment
+                        """
+                        try:
+                            ## LOGGING
+                            kwargs = self.log_retry(kwargs=kwargs, e=original_exception)
+                            kwargs["model"] = mg
+                            kwargs.setdefault("metadata", {}).update(
+                                {"model_group": mg}
+                            )  # update model_group used, if fallbacks are done
+                            response = self.function_with_fallbacks(*args, **kwargs)
+                            return response
+                        except Exception as e:
+                            pass
+                elif (
+                    isinstance(e, litellm.ContentPolicyViolationError)
+                    and content_policy_fallbacks is not None
+                ):
+                    fallback_model_group = None
+
+                    for (
+                        item
+                    ) in content_policy_fallbacks:  # [{"gpt-3.5-turbo": ["gpt-4"]}]
+                        if list(item.keys())[0] == model_group:
+                            fallback_model_group = item[model_group]
+                            break
+
+                    if fallback_model_group is None:
+                        raise original_exception
+
                    for mg in fallback_model_group:
                        """
                        Iterate through the model groups and try calling that deployment
@ -2400,6 +2552,9 @@ class Router:
        context_window_fallbacks = kwargs.pop(
            "context_window_fallbacks", self.context_window_fallbacks
        )
+        content_policy_fallbacks = kwargs.pop(
+            "content_policy_fallbacks", self.content_policy_fallbacks
+        )

        try:
            # if the function call is successful, no exception will be raised and we'll break out of the loop
@ -2419,6 +2574,7 @@ class Router:
                healthy_deployments=_healthy_deployments,
                context_window_fallbacks=context_window_fallbacks,
                regular_fallbacks=fallbacks,
+                content_policy_fallbacks=content_policy_fallbacks,
            )

            # decides how long to sleep before retry
@ -4570,6 +4726,8 @@ class Router:
            default_webhook_url=router_alerting_config.webhook_url,
        )

+        self.slack_alerting_logger = _slack_alerting_logger
+
        litellm.callbacks.append(_slack_alerting_logger)
        litellm.success_callback.append(
            _slack_alerting_logger.response_taking_too_long_callback
--- a/litellm/router_utils/handle_error.py
+++ b/litellm/router_utils/handle_error.py
@ -0,0 +1,53 @@
+import asyncio
+import traceback
+from typing import TYPE_CHECKING, Any
+
+if TYPE_CHECKING:
+    from litellm.router import Router as _Router
+
+    LitellmRouter = _Router
+else:
+    LitellmRouter = Any
+
+
+async def send_llm_exception_alert(
+    litellm_router_instance: LitellmRouter,
+    request_kwargs: dict,
+    error_traceback_str: str,
+    original_exception,
+):
+    """
+    Sends a Slack / MS Teams alert for the LLM API call failure.
+
+    Parameters:
+        litellm_router_instance (_Router): The LitellmRouter instance.
+        original_exception (Any): The original exception that occurred.
+
+    Returns:
+        None
+    """
+    if litellm_router_instance is None:
+        return
+
+    if not hasattr(litellm_router_instance, "slack_alerting_logger"):
+        return
+
+    if litellm_router_instance.slack_alerting_logger is None:
+        return
+
+    if "proxy_server_request" in request_kwargs:
+        # Do not send any alert if it's a request from litellm proxy server request
+        # the proxy is already instrumented to send LLM API call failures
+        return
+
+    litellm_debug_info = getattr(original_exception, "litellm_debug_info", None)
+    exception_str = str(original_exception)
+    if litellm_debug_info is not None:
+        exception_str += litellm_debug_info
+    exception_str += f"\n\n{error_traceback_str[:2000]}"
+
+    await litellm_router_instance.slack_alerting_logger.send_alert(
+        message=f"LLM API call failed: `{exception_str}`",
+        level="High",
+        alert_type="llm_exceptions",
+    )
--- a/litellm/tests/test_alerting.py
+++ b/litellm/tests/test_alerting.py
@ -25,6 +25,9 @@ import pytest
 from litellm.router import AlertingConfig, Router
 from litellm.proxy._types import CallInfo
 from openai import APIError
+from litellm.router import AlertingConfig
+import litellm
+import os


@pytest.mark.parametrize(
@ -159,6 +162,29 @@ async def test_response_taking_too_long_callback(slack_alerting):
        mock_send_alert.assert_awaited_once()


+@pytest.mark.asyncio
+async def test_alerting_metadata(slack_alerting):
+    """
+    Test alerting_metadata is propogated correctly for response taking too long
+    """
+    start_time = datetime.now()
+    end_time = start_time + timedelta(seconds=301)
+    kwargs = {
+        "model": "test_model",
+        "messages": "test_messages",
+        "litellm_params": {"metadata": {"alerting_metadata": {"hello": "world"}}},
+    }
+    with patch.object(slack_alerting, "send_alert", new=AsyncMock()) as mock_send_alert:
+
+        ## RESPONSE TAKING TOO LONG
+        await slack_alerting.response_taking_too_long_callback(
+            kwargs, None, start_time, end_time
+        )
+        mock_send_alert.assert_awaited_once()
+
+        assert "hello" in mock_send_alert.call_args[1]["alerting_metadata"]
+
+
 # Test for budget crossed
@pytest.mark.asyncio
 async def test_budget_alerts_crossed(slack_alerting):
@ -204,7 +230,9 @@ async def test_send_alert(slack_alerting):
        slack_alerting.async_http_handler, "post", new=AsyncMock()
    ) as mock_post:
        mock_post.return_value.status_code = 200
-        await slack_alerting.send_alert("Test message", "Low", "budget_alerts")
+        await slack_alerting.send_alert(
+            "Test message", "Low", "budget_alerts", alerting_metadata={}
+        )
        mock_post.assert_awaited_once()


@ -263,7 +291,7 @@ async def test_daily_reports_completion(slack_alerting):
        await asyncio.sleep(3)
        response_val = await slack_alerting.send_daily_reports(router=router)

-        assert response_val == True
+        assert response_val is True

        mock_send_alert.assert_awaited_once()

@ -288,7 +316,7 @@ async def test_daily_reports_completion(slack_alerting):
        await asyncio.sleep(3)
        response_val = await slack_alerting.send_daily_reports(router=router)

-        assert response_val == True
+        assert response_val is True

        mock_send_alert.assert_awaited()

@ -743,3 +771,37 @@ async def test_region_outage_alerting_called(
            mock_send_alert.assert_called_once()
        else:
            mock_send_alert.assert_not_called()
+
+
+@pytest.mark.asyncio
+@pytest.mark.skip(reason="test only needs to run locally ")
+async def test_alerting():
+    router = litellm.Router(
+        model_list=[
+            {
+                "model_name": "gpt-3.5-turbo",
+                "litellm_params": {
+                    "model": "gpt-3.5-turbo",
+                    "api_key": "bad_key",
+                },
+            }
+        ],
+        debug_level="DEBUG",
+        set_verbose=True,
+        alerting_config=AlertingConfig(
+            alerting_threshold=10,  # threshold for slow / hanging llm responses (in seconds). Defaults to 300 seconds
+            webhook_url=os.getenv(
+                "SLACK_WEBHOOK_URL"
+            ),  # webhook you want to send alerts to
+        ),
+    )
+    try:
+        await router.acompletion(
+            model="gpt-3.5-turbo",
+            messages=[{"role": "user", "content": "Hey, how's it going?"}],
+        )
+
+    except:
+        pass
+    finally:
+        await asyncio.sleep(3)
--- a/litellm/tests/test_blocked_user_list.py
+++ b/litellm/tests/test_blocked_user_list.py
@ -29,19 +29,22 @@ import pytest, logging, asyncio
 import litellm, asyncio
 from litellm.proxy.proxy_server import (
    new_user,
-    generate_key_fn,
    user_api_key_auth,
    user_update,
+    user_info,
+    block_user,
+)
+from litellm.proxy.management_endpoints.key_management_endpoints import (
    delete_key_fn,
    info_key_fn,
    update_key_fn,
    generate_key_fn,
    generate_key_helper_fn,
+)
+from litellm.proxy.spend_reporting_endpoints.spend_management_endpoints import (
    spend_user_fn,
    spend_key_fn,
    view_spend_logs,
-    user_info,
-    block_user,
 )
 from litellm.proxy.utils import PrismaClient, ProxyLogging, hash_token
 from litellm._logging import verbose_proxy_logger
--- a/Show more
+++ b/Show more
				`@ -0,0 +1 @@`
				"use strict";(self.webpackChunk_N_E=self.webpackChunk_N_E\|\|[]).push([[665],{30953:function(e,t,r){r.d(t,{GH$:function(){return n}});var l=r(64090);let n=e=>{let{color:t="currentColor",size:r=24,className:n,...s}=e;return l.createElement("svg",{viewBox:"0 0 24 24",xmlns:"http://www.w3.org/2000/svg",width:r,height:r,fill:t,...s,className:"remixicon "+(n\|\|"")},l.createElement("path",{d:"M12 22C6.47715 22 2 17.5228 2 12C2 6.47715 6.47715 2 12 2C17.5228 2 22 6.47715 22 12C22 17.5228 17.5228 22 12 22ZM12 20C16.4183 20 20 16.4183 20 12C20 7.58172 16.4183 4 12 4C7.58172 4 4 7.58172 4 12C4 16.4183 7.58172 20 12 20ZM11.0026 16L6.75999 11.7574L8.17421 10.3431L11.0026 13.1716L16.6595 7.51472L18.0737 8.92893L11.0026 16Z"}))}}}]);
				`@ -1 +0,0 @@`
				"use strict";(self.webpackChunk_N_E=self.webpackChunk_N_E\|\|[]).push([[665],{30953:function(e,t,r){r.d(t,{GH$:function(){return n}});var l=r(2265);let n=e=>{let{color:t="currentColor",size:r=24,className:n,...s}=e;return l.createElement("svg",{viewBox:"0 0 24 24",xmlns:"http://www.w3.org/2000/svg",width:r,height:r,fill:t,...s,className:"remixicon "+(n\|\|"")},l.createElement("path",{d:"M12 22C6.47715 22 2 17.5228 2 12C2 6.47715 6.47715 2 12 2C17.5228 2 22 6.47715 22 12C22 17.5228 17.5228 22 12 22ZM12 20C16.4183 20 20 16.4183 20 12C20 7.58172 16.4183 4 12 4C7.58172 4 4 7.58172 4 12C4 16.4183 7.58172 20 12 20ZM11.0026 16L6.75999 11.7574L8.17421 10.3431L11.0026 13.1716L16.6595 7.51472L18.0737 8.92893L11.0026 16Z"}))}}}]);
				`@ -1 +0,0 @@`
				(self.webpackChunk_N_E=self.webpackChunk_N_E\|\|[]).push([[461],{20723:function(e,s,l){Promise.resolve().then(l.bind(l,667))},667:function(e,s,l){"use strict";l.r(s),l.d(s,{default:function(){return _}});var t=l(57437),a=l(2265),r=l(47907),n=l(2179),i=l(18190),o=l(13810),u=l(10384),c=l(46453),d=l(71801),m=l(52273),h=l(42440),x=l(30953),j=l(777),p=l(37963),f=l(60620),g=l(1861);function _(){let[e]=f.Z.useForm(),s=(0,r.useSearchParams)();s.get("token");let l=s.get("id"),[_,Z]=(0,a.useState)(null),[w,b]=(0,a.useState)(""),[N,S]=(0,a.useState)(""),[k,y]=(0,a.useState)(null),[v,E]=(0,a.useState)(""),[F,I]=(0,a.useState)("");return(0,a.useEffect)(()=>{l&&(0,j.W_)(l).then(e=>{let s=e.login_url;console.log("login_url:",s),E(s);let l=e.token,t=(0,p.o)(l);I(l),console.log("decoded:",t),Z(t.key),console.log("decoded user email:",t.user_email),S(t.user_email),y(t.user_id)})},[l]),(0,t.jsx)("div",{className:"mx-auto max-w-md mt-10",children:(0,t.jsxs)(o.Z,{children:[(0,t.jsx)(h.Z,{className:"text-sm mb-5 text-center",children:"\uD83D\uDE85 LiteLLM"}),(0,t.jsx)(h.Z,{className:"text-xl",children:"Sign up"}),(0,t.jsx)(d.Z,{children:"Claim your user account to login to Admin UI."}),(0,t.jsx)(i.Z,{className:"mt-4",title:"SSO",icon:x.GH$,color:"sky",children:(0,t.jsxs)(c.Z,{numItems:2,className:"flex justify-between items-center",children:[(0,t.jsx)(u.Z,{children:"SSO is under the Enterprise Tirer."}),(0,t.jsx)(u.Z,{children:(0,t.jsx)(n.Z,{variant:"primary",className:"mb-2",children:(0,t.jsx)("a",{href:"https://forms.gle/W3U4PZpJGFHWtHyA9",target:"_blank",children:"Get Free Trial"})})})]})}),(0,t.jsxs)(f.Z,{className:"mt-10 mb-5 mx-auto",layout:"vertical",onFinish:e=>{console.log("in handle submit. accessToken:",_,"token:",F,"formValues:",e),_&&F&&(e.user_email=N,k&&l&&(0,j.m_)(_,l,k,e.password).then(e=>{var s;let l="/ui/";console.log("redirecting to:",l+="?userID="+((null===(s=e.data)\|\|void 0===s?void 0:s.user_id)\|\|e.user_id)+"&token="+F),window.location.href=l}))},children:[(0,t.jsxs)(t.Fragment,{children:[(0,t.jsx)(f.Z.Item,{label:"Email Address",name:"user_email",children:(0,t.jsx)(m.Z,{type:"email",disabled:!0,value:N,defaultValue:N,className:"max-w-md"})}),(0,t.jsx)(f.Z.Item,{label:"Password",name:"password",rules:[{required:!0,message:"password required to sign up"}],help:"Create a password for your account",children:(0,t.jsx)(m.Z,{placeholder:"",type:"password",className:"max-w-md"})})]}),(0,t.jsx)("div",{className:"mt-10",children:(0,t.jsx)(g.ZP,{htmlType:"submit",children:"Sign Up"})})]})]})})}}},function(e){e.O(0,[665,505,684,777,971,69,744],function(){return e(e.s=20723)}),_N_E=e.O()}]);