Merge branch 'BerriAI:main' into main

2025-04-26 19:24:27 +00:00 · 2024-06-16 15:15:00 +02:00 · 2024-06-16 15:15:00 +02:00 · 1b644fa0fc
commit 1b644fa0fc
parent d338a94a57 3a35a58859
142 changed files with 19096 additions and 13214 deletions
--- a/.github/workflows/ghcr_deploy.yml
+++ b/.github/workflows/ghcr_deploy.yml
@ -25,6 +25,11 @@ jobs:
    if: github.repository == 'BerriAI/litellm'
    runs-on: ubuntu-latest
    steps:
      -
        name: Checkout
        uses: actions/checkout@v4
        with:
          ref: ${{ github.event.inputs.commit_hash }}
      -
        name: Set up QEMU
        uses: docker/setup-qemu-action@v3
@ -41,12 +46,14 @@ jobs:
        name: Build and push
        uses: docker/build-push-action@v5
        with:
          context: .
          push: true
          tags: litellm/litellm:${{ github.event.inputs.tag || 'latest' }} 
      -
        name: Build and push litellm-database image
        uses: docker/build-push-action@v5
        with:
          context: .
          push: true
          file: Dockerfile.database
          tags: litellm/litellm-database:${{ github.event.inputs.tag || 'latest' }}
@ -54,6 +61,7 @@ jobs:
        name: Build and push litellm-spend-logs image
        uses: docker/build-push-action@v5
        with:
          context: .
          push: true
          file: ./litellm-js/spend-logs/Dockerfile
          tags: litellm/litellm-spend_logs:${{ github.event.inputs.tag || 'latest' }}
@ -68,6 +76,8 @@ jobs:
    steps:
      - name: Checkout repository
        uses: actions/checkout@v4
        with:
          ref: ${{ github.event.inputs.commit_hash }}
      # Uses the `docker/login-action` action to log in to the Container registry registry using the account and password that will publish the packages. Once published, the packages are scoped to the account defined here.
      - name: Log in to the Container registry
        uses: docker/login-action@65b78e6e13532edd9afa3aa52ac7964289d1a9c1
@ -92,7 +102,7 @@ jobs:
      - name: Build and push Docker image
        uses: docker/build-push-action@4976231911ebf5f32aad765192d35f942aa48cb8
        with:
-          context: https://github.com/BerriAI/litellm.git#${{ github.event.inputs.commit_hash}}
+          context: .
          push: true
          tags: ${{ steps.meta.outputs.tags }}-${{ github.event.inputs.tag || 'latest' }}, ${{ steps.meta.outputs.tags }}-${{ github.event.inputs.release_type }} # if a tag is provided, use that, otherwise use the release tag, and if neither is available, use 'latest'
          labels: ${{ steps.meta.outputs.labels }}
@ -106,6 +116,8 @@ jobs:
    steps:
      - name: Checkout repository
        uses: actions/checkout@v4
        with:
          ref: ${{ github.event.inputs.commit_hash }}
      - name: Log in to the Container registry
        uses: docker/login-action@65b78e6e13532edd9afa3aa52ac7964289d1a9c1
@ -128,7 +140,7 @@ jobs:
      - name: Build and push Database Docker image
        uses: docker/build-push-action@f2a1d5e99d037542a71f64918e516c093c6f3fc4
        with:
-          context: https://github.com/BerriAI/litellm.git#${{ github.event.inputs.commit_hash}}
+          context: .
          file: Dockerfile.database
          push: true
          tags: ${{ steps.meta-database.outputs.tags }}-${{ github.event.inputs.tag || 'latest' }}, ${{ steps.meta-database.outputs.tags }}-${{ github.event.inputs.release_type }} 
@ -143,6 +155,8 @@ jobs:
    steps:
      - name: Checkout repository
        uses: actions/checkout@v4
        with:
          ref: ${{ github.event.inputs.commit_hash }}
      - name: Log in to the Container registry
        uses: docker/login-action@65b78e6e13532edd9afa3aa52ac7964289d1a9c1
@ -165,7 +179,7 @@ jobs:
      - name: Build and push Database Docker image
        uses: docker/build-push-action@f2a1d5e99d037542a71f64918e516c093c6f3fc4
        with:
-          context: https://github.com/BerriAI/litellm.git#${{ github.event.inputs.commit_hash}}
+          context: .
          file: ./litellm-js/spend-logs/Dockerfile
          push: true
          tags: ${{ steps.meta-spend-logs.outputs.tags }}-${{ github.event.inputs.tag || 'latest' }}, ${{ steps.meta-spend-logs.outputs.tags }}-${{ github.event.inputs.release_type }}
@ -176,6 +190,8 @@ jobs:
    steps:
      - name: Checkout repository
        uses: actions/checkout@v4
        with:
          ref: ${{ github.event.inputs.commit_hash }}
      - name: Log in to the Container registry
        uses: docker/login-action@65b78e6e13532edd9afa3aa52ac7964289d1a9c1
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@ -1,4 +1,19 @@
 repos:
 -   repo: local
    hooks:
    # -   id: mypy
    #     name: mypy
    #     entry: python3 -m mypy --ignore-missing-imports
    #     language: system
    #     types: [python]
    #     files: ^litellm/
    -   id: isort
        name: isort
        entry: isort
        language: system
        types: [python]
        files: litellm/.*\.py
        exclude: ^litellm/__init__.py$
 -   repo: https://github.com/psf/black
    rev: 24.2.0
    hooks:
@ -16,11 +31,10 @@ repos:
        name: Check if files match
        entry: python3 ci_cd/check_files_match.py
        language: system
-   repo: local
+    # -   id: check-file-length
-    hooks:
+    #     name: Check file length
-    -   id: mypy
+    #     entry: python check_file_length.py
-        name: mypy
+    #     args: ["10000"]  # set your desired maximum number of lines
-        entry: python3 -m mypy --ignore-missing-imports
+    #     language: python
-        language: system
+    #     files: litellm/.*\.py
-        types: [python]
+    #     exclude: ^litellm/tests/
        files: ^litellm/
--- a/check_file_length.py
+++ b/check_file_length.py
@ -0,0 +1,28 @@
 import sys
 def check_file_length(max_lines, filenames):
    bad_files = []
    for filename in filenames:
        with open(filename, "r") as file:
            lines = file.readlines()
            if len(lines) > max_lines:
                bad_files.append((filename, len(lines)))
    return bad_files
 if __name__ == "__main__":
    max_lines = int(sys.argv[1])
    filenames = sys.argv[2:]
    bad_files = check_file_length(max_lines, filenames)
    if bad_files:
        bad_files.sort(
            key=lambda x: x[1], reverse=True
        )  # Sort files by length in descending order
        for filename, length in bad_files:
            print(f"{filename}: {length} lines")
        sys.exit(1)
    else:
        sys.exit(0)
--- a/docs/my-website/docs/completion/input.md
+++ b/docs/my-website/docs/completion/input.md
@ -162,7 +162,7 @@ def completion(
    - `function`: *object* - Required.
- `tool_choice`: *string or object (optional)* - Controls which (if any) function is called by the model. none means the model will not call a function and instead generates a message. auto means the model can pick between generating a message or calling a function. Specifying a particular function via {"type: "function", "function": {"name": "my_function"}} forces the model to call that function.
+- `tool_choice`: *string or object (optional)* - Controls which (if any) function is called by the model. none means the model will not call a function and instead generates a message. auto means the model can pick between generating a message or calling a function. Specifying a particular function via `{"type: "function", "function": {"name": "my_function"}}` forces the model to call that function.
    - `none` is the default when no functions are present. `auto` is the default if functions are present.
--- a/docs/my-website/docs/debugging/hosted_debugging.md
+++ b/docs/my-website/docs/debugging/hosted_debugging.md
@ -1,90 +0,0 @@
 import Image from '@theme/IdealImage';
 import QueryParamReader from '../../src/components/queryParamReader.js'
 # [Beta] Monitor Logs in Production
 :::note
 This is in beta. Expect frequent updates, as we improve based on your feedback.
 :::
 LiteLLM provides an integration to let you monitor logs in production.
 👉 Jump to our sample LiteLLM Dashboard: https://admin.litellm.ai/
 <Image img={require('../../img/alt_dashboard.png')} alt="Dashboard" />
 ## Debug your first logs
 <a target="_blank" href="https://colab.research.google.com/github/BerriAI/litellm/blob/main/cookbook/liteLLM_OpenAI.ipynb">
  <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/>
 </a>
 ### 1. Get your LiteLLM Token
 Go to [admin.litellm.ai](https://admin.litellm.ai/) and copy the code snippet with your unique token
 <Image img={require('../../img/hosted_debugger_usage_page.png')} alt="Usage" />
 ### 2. Set up your environment
 **Add it to your .env**
 ```python
 import os 
 os.env["LITELLM_TOKEN"] = "e24c4c06-d027-4c30-9e78-18bc3a50aebb" # replace with your unique token
 ```
 **Turn on LiteLLM Client**
 ```python
 import litellm 
 litellm.client = True
 ```
 ### 3. Make a normal `completion()` call
 ```python
 import litellm 
 from litellm import completion
 import os 
 # set env variables
 os.environ["LITELLM_TOKEN"] = "e24c4c06-d027-4c30-9e78-18bc3a50aebb" # replace with your unique token
 os.environ["OPENAI_API_KEY"] = "openai key"
 litellm.use_client = True # enable logging dashboard 
 messages = [{ "content": "Hello, how are you?","role": "user"}]
 # openai call
 response = completion(model="gpt-3.5-turbo", messages=messages)
 ```
 Your `completion()` call print with a link to your session dashboard (https://admin.litellm.ai/<your_unique_token>)
 In the above case it would be: [`admin.litellm.ai/e24c4c06-d027-4c30-9e78-18bc3a50aebb`](https://admin.litellm.ai/e24c4c06-d027-4c30-9e78-18bc3a50aebb)
 Click on your personal dashboard link. Here's how you can find it 👇
 <Image img={require('../../img/dash_output.png')} alt="Dashboard" />
 [👋 Tell us if you need better privacy controls](https://calendly.com/d/4mp-gd3-k5k/berriai-1-1-onboarding-litellm-hosted-version?month=2023-08)  
 ### 3. Review request log 
 Oh! Looks like our request was made successfully. Let's click on it and see exactly what got sent to the LLM provider. 
 Ah! So we can see that this request was made to a **Baseten** (see litellm_params > custom_llm_provider) for a model with ID - **7qQNLDB** (see model). The message sent was - `"Hey, how's it going?"` and the response received was - `"As an AI language model, I don't have feelings or emotions, but I can assist you with your queries. How can I assist you today?"`
 <Image img={require('../../img/dashboard_log.png')} alt="Dashboard Log Row" />
 :::info
 🎉 Congratulations! You've successfully debugger your first log!
 :::
--- a/docs/my-website/docs/observability/promptlayer_integration.md
+++ b/docs/my-website/docs/observability/promptlayer_integration.md
@ -1,3 +1,5 @@
 import Image from '@theme/IdealImage';
 # Promptlayer Tutorial
 Promptlayer is a platform for prompt engineers. Log OpenAI requests. Search usage history. Track performance. Visually manage prompt templates.
--- a/docs/my-website/docs/providers/text_completion_openai.md
+++ b/docs/my-website/docs/providers/text_completion_openai.md
@ -1,3 +1,6 @@
 import Tabs from '@theme/Tabs';
 import TabItem from '@theme/TabItem';
 # OpenAI (Text Completion)
 LiteLLM supports OpenAI text completion models
--- a/docs/my-website/docs/providers/togetherai.md
+++ b/docs/my-website/docs/providers/togetherai.md
@ -208,7 +208,7 @@ print(response)
 Instead of using the `custom_llm_provider` arg to specify which provider you're using (e.g. together ai), you can just pass the provider name as part of the model name, and LiteLLM will parse it out. 
-Expected format: <custom_llm_provider>/<model_name>
+Expected format: `<custom_llm_provider>/<model_name>`
 e.g. completion(model="together_ai/togethercomputer/Llama-2-7B-32K-Instruct", ...)
--- a/docs/my-website/docs/proxy/alerting.md
+++ b/docs/my-website/docs/proxy/alerting.md
@ -32,40 +32,32 @@ Get a slack webhook url from https://api.slack.com/messaging/webhooks
 You can also use Discord Webhooks, see [here](#using-discord-webhooks)
 ### Step 2: Update config.yaml 
- Set `SLACK_WEBHOOK_URL` in your proxy env to enable Slack alerts.
+Set `SLACK_WEBHOOK_URL` in your proxy env to enable Slack alerts.
- Just for testing purposes, let's save a bad key to our proxy.
+
 ```bash
 export SLACK_WEBHOOK_URL="https://hooks.slack.com/services/<>/<>/<>"
 ```
 ### Step 2: Setup Proxy
 ```yaml
 model_list: 
    model_name: "azure-model"
    litellm_params:
        model: "azure/gpt-35-turbo"
        api_key: "my-bad-key" # 👈 bad key
 general_settings: 
    alerting: ["slack"]
    alerting_threshold: 300 # sends alerts if requests hang for 5min+ and responses take 5min+ 
 environment_variables:
    SLACK_WEBHOOK_URL: "https://hooks.slack.com/services/<>/<>/<>"
    SLACK_DAILY_REPORT_FREQUENCY: "86400"  # 24 hours; Optional: defaults to 12 hours
 ```
-
+Start proxy 
 ### Step 3: Start proxy
 ```bash
 $ litellm --config /path/to/config.yaml
 ```
 ## Testing Alerting is Setup Correctly
-Make a GET request to `/health/services`, expect to see a test slack alert in your provided webhook slack channel
+### Step 3: Test it!
-```shell
+
-curl -X GET 'http://localhost:4000/health/services?service=slack' \
+```bash
 curl -X GET 'http://0.0.0.0:4000/health/services?service=slack' \
 -H 'Authorization: Bearer sk-1234'
 ```
@ -84,7 +76,34 @@ litellm_settings:
 ```
 ## Advanced - Add Metadata to alerts 
 Add alerting metadata to proxy calls for debugging. 
 ```python
 import openai
 client = openai.OpenAI(
    api_key="anything",
    base_url="http://0.0.0.0:4000"
 )
 # request sent to model set on litellm proxy, `litellm --model`
 response = client.chat.completions.create(
    model="gpt-3.5-turbo",
    messages = [], 
    extra_body={
        "metadata": {
            "alerting_metadata": {
                "hello": "world"
            }
        }
    }
 )
 ```
 **Expected Response**
 <Image img={require('../../img/alerting_metadata.png')}/>
 ## Advanced - Opting into specific alert types
--- a/docs/my-website/docs/proxy/deploy.md
+++ b/docs/my-website/docs/proxy/deploy.md
@ -669,7 +669,7 @@ Once the stack is created, get the DatabaseURL of the Database resource, copy th
 #### 3. Connect to the EC2 Instance and deploy litellm on the EC2 container
 From the EC2 console, connect to the instance created by the stack (e.g., using SSH).
-Run the following command, replacing <database_url> with the value you copied in step 2
+Run the following command, replacing `<database_url>` with the value you copied in step 2
 ```shell
 docker run --name litellm-proxy \
--- a/docs/my-website/docs/proxy/email.md
+++ b/docs/my-website/docs/proxy/email.md
@ -5,6 +5,7 @@ import Image from '@theme/IdealImage';
 Send an Email to your users when:
 - A Proxy API Key is created for them 
 - Their API Key crosses it's Budget 
 - All Team members of a LiteLLM Team -> when the team crosses it's budget
 <Image img={require('../../img/email_notifs.png')} style={{ width: '500px' }}/>
--- a/docs/my-website/docs/proxy/model_management.md
+++ b/docs/my-website/docs/proxy/model_management.md
@ -1,3 +1,6 @@
 import Tabs from '@theme/Tabs';
 import TabItem from '@theme/TabItem';
 # Model Management
 Add new models + Get model info without restarting proxy.
--- a/docs/my-website/docs/proxy/perf.md
+++ b/docs/my-website/docs/proxy/perf.md
@ -1,3 +1,5 @@
 import Image from '@theme/IdealImage';
 # LiteLLM Proxy Performance
 ### Throughput - 30% Increase
--- a/docs/my-website/docs/proxy/reliability.md
+++ b/docs/my-website/docs/proxy/reliability.md
@ -409,6 +409,28 @@ print(response)
 </Tabs>
 ### Content Policy Fallbacks
 Fallback across providers (e.g. from Azure OpenAI to Anthropic) if you hit content policy violation errors. 
 ```yaml
 model_list:
 	- model_name: gpt-3.5-turbo-small
 	  litellm_params:
 		model: azure/chatgpt-v-2
        api_base: os.environ/AZURE_API_BASE
        api_key: os.environ/AZURE_API_KEY
        api_version: "2023-07-01-preview"
    - model_name: claude-opus
      litellm_params:
        model: claude-3-opus-20240229
        api_key: os.environ/ANTHROPIC_API_KEY
 litellm_settings:
  content_policy_fallbacks: [{"gpt-3.5-turbo-small": ["claude-opus"]}]
 ```
 ### EU-Region Filtering (Pre-Call Checks)
 **Before call is made** check if a call is within model context window with  **`enable_pre_call_checks: true`**.
--- a/docs/my-website/docs/proxy/team_budgets.md
+++ b/docs/my-website/docs/proxy/team_budgets.md
@ -46,7 +46,7 @@ Possible values for `budget_duration`
 | `budget_duration="1m"` | every 1 min |
 | `budget_duration="1h"` | every 1 hour |
 | `budget_duration="1d"` | every 1 day |
-| `budget_duration="1mo"` | start of every month |
+| `budget_duration="1mo"` | every 1 month |
 ### 2. Create a key for the `team`
--- a/docs/my-website/docs/proxy/users.md
+++ b/docs/my-website/docs/proxy/users.md
@ -413,6 +413,52 @@ curl 'http://0.0.0.0:4000/key/generate' \
 </TabItem>
 </Tabs>
 ### Reset Budgets 
 Reset budgets across keys/internal users/teams/customers
 `budget_duration`: Budget is reset at the end of specified duration. If not set, budget is never reset. You can set duration as seconds ("30s"), minutes ("30m"), hours ("30h"), days ("30d").
 <Tabs>
 <TabItem value="users" label="Internal Users">
 ```bash
 curl 'http://0.0.0.0:4000/user/new' \
 --header 'Authorization: Bearer <your-master-key>' \
 --header 'Content-Type: application/json' \
 --data-raw '{
  "max_budget": 10,
  "budget_duration": 10s, # 👈 KEY CHANGE
 }'
 ```
 </TabItem>
 <TabItem value="keys" label="Keys">
 ```bash
 curl 'http://0.0.0.0:4000/key/generate' \
 --header 'Authorization: Bearer <your-master-key>' \
 --header 'Content-Type: application/json' \
 --data-raw '{
  "max_budget": 10,
  "budget_duration": 10s, # 👈 KEY CHANGE
 }'
 ```
 </TabItem>
 <TabItem value="teams" label="Teams">
 ```bash
 curl 'http://0.0.0.0:4000/team/new' \
 --header 'Authorization: Bearer <your-master-key>' \
 --header 'Content-Type: application/json' \
 --data-raw '{
  "max_budget": 10,
  "budget_duration": 10s, # 👈 KEY CHANGE
 }'
 ```
 </TabItem>
 </Tabs>
 ## Set Rate Limits 
 You can set: 
--- a/docs/my-website/docs/routing.md
+++ b/docs/my-website/docs/routing.md
@ -790,84 +790,204 @@ If the error is a context window exceeded error, fall back to a larger model gro
 Fallbacks are done in-order - ["gpt-3.5-turbo, "gpt-4", "gpt-4-32k"], will do 'gpt-3.5-turbo' first, then 'gpt-4', etc.
-You can also set 'default_fallbacks', in case a specific model group is misconfigured / bad.
+You can also set `default_fallbacks`, in case a specific model group is misconfigured / bad.
 There are 3 types of fallbacks: 
 - `content_policy_fallbacks`: For litellm.ContentPolicyViolationError - LiteLLM maps content policy violation errors across providers [**See Code**](https://github.com/BerriAI/litellm/blob/89a43c872a1e3084519fb9de159bf52f5447c6c4/litellm/utils.py#L8495C27-L8495C54)
 - `context_window_fallbacks`: For litellm.ContextWindowExceededErrors - LiteLLM maps context window error messages across providers [**See Code**](https://github.com/BerriAI/litellm/blob/89a43c872a1e3084519fb9de159bf52f5447c6c4/litellm/utils.py#L8469)
 - `fallbacks`: For all remaining errors - e.g. litellm.RateLimitError
 **Content Policy Violation Fallback**
 Key change: 
 ```python
 content_policy_fallbacks=[{"claude-2": ["my-fallback-model"]}]
 ```
 <Tabs>
 <TabItem value="sdk" label="SDK">
 ```python
 from litellm import Router 
 router = Router(
 	model_list=[
-    { # list of model deployments 
+		{
-		"model_name": "azure/gpt-3.5-turbo", # openai model name 
+			"model_name": "claude-2",
-		"litellm_params": { # params for litellm completion/embedding call 
+			"litellm_params": {
-			"model": "azure/chatgpt-v-2", 
+				"model": "claude-2",
-			"api_key": "bad-key",
+				"api_key": "",
-			"api_version": os.getenv("AZURE_API_VERSION"),
+				"mock_response": Exception("content filtering policy"),
 			"api_base": os.getenv("AZURE_API_BASE")
 			},
 		"tpm": 240000,
 		"rpm": 1800
 	}, 
    { # list of model deployments 
 		"model_name": "azure/gpt-3.5-turbo-context-fallback", # openai model name 
 		"litellm_params": { # params for litellm completion/embedding call 
 			"model": "azure/chatgpt-v-2", 
 			"api_key": "bad-key",
 			"api_version": os.getenv("AZURE_API_VERSION"),
 			"api_base": os.getenv("AZURE_API_BASE")
 		},
 		"tpm": 240000,
 		"rpm": 1800
 		},
 		{
-		"model_name": "azure/gpt-3.5-turbo", # openai model name 
+			"model_name": "my-fallback-model",
-		"litellm_params": { # params for litellm completion/embedding call 
+			"litellm_params": {
-			"model": "azure/chatgpt-functioncalling", 
+				"model": "claude-2",
-			"api_key": "bad-key",
+				"api_key": "",
-			"api_version": os.getenv("AZURE_API_VERSION"),
+				"mock_response": "This works!",
 			"api_base": os.getenv("AZURE_API_BASE")
 			},
 		"tpm": 240000,
 		"rpm": 1800
 		},
-	{
+	],
-		"model_name": "gpt-3.5-turbo", # openai model name 
+	content_policy_fallbacks=[{"claude-2": ["my-fallback-model"]}], # 👈 KEY CHANGE
-		"litellm_params": { # params for litellm completion/embedding call 
+	# fallbacks=[..], # [OPTIONAL]
-			"model": "gpt-3.5-turbo", 
+	# context_window_fallbacks=[..], # [OPTIONAL]
-			"api_key": os.getenv("OPENAI_API_KEY"),
+)
 		},
 		"tpm": 1000000,
 		"rpm": 9000
 	},
    {
 		"model_name": "gpt-3.5-turbo-16k", # openai model name 
 		"litellm_params": { # params for litellm completion/embedding call 
 			"model": "gpt-3.5-turbo-16k", 
 			"api_key": os.getenv("OPENAI_API_KEY"),
 		},
 		"tpm": 1000000,
 		"rpm": 9000
 	}
 ]
-
+response = router.completion(
-router = Router(model_list=model_list, 
+	model="claude-2",
-                fallbacks=[{"azure/gpt-3.5-turbo": ["gpt-3.5-turbo"]}], 
+	messages=[{"role": "user", "content": "Hey, how's it going?"}],
-				default_fallbacks=["gpt-3.5-turbo-16k"],
+)
                context_window_fallbacks=[{"azure/gpt-3.5-turbo-context-fallback": ["gpt-3.5-turbo-16k"]}, {"gpt-3.5-turbo": ["gpt-3.5-turbo-16k"]}],
                set_verbose=True)
 user_message = "Hello, whats the weather in San Francisco??"
 messages = [{"content": user_message, "role": "user"}]
 # normal fallback call 
 response = router.completion(model="azure/gpt-3.5-turbo", messages=messages)
 # context window fallback call
 response = router.completion(model="azure/gpt-3.5-turbo-context-fallback", messages=messages)
 print(f"response: {response}")
 ```
 </TabItem>
 <TabItem value="proxy" label="PROXY">
 In your proxy config.yaml just add this line 👇
 ```yaml
 router_settings:
 	content_policy_fallbacks=[{"claude-2": ["my-fallback-model"]}]
 ```
 Start proxy 
 ```bash
 litellm --config /path/to/config.yaml
 # RUNNING on http://0.0.0.0:4000
 ```
 </TabItem>
 </Tabs>
 **Context Window Exceeded Fallback**
 Key change: 
 ```python
 context_window_fallbacks=[{"claude-2": ["my-fallback-model"]}]
 ```
 <Tabs>
 <TabItem value="sdk" label="SDK">
 ```python
 from litellm import Router 
 router = Router(
 	model_list=[
 		{
 			"model_name": "claude-2",
 			"litellm_params": {
 				"model": "claude-2",
 				"api_key": "",
 				"mock_response": Exception("prompt is too long"),
 			},
 		},
 		{
 			"model_name": "my-fallback-model",
 			"litellm_params": {
 				"model": "claude-2",
 				"api_key": "",
 				"mock_response": "This works!",
 			},
 		},
 	],
 	context_window_fallbacks=[{"claude-2": ["my-fallback-model"]}], # 👈 KEY CHANGE
 	# fallbacks=[..], # [OPTIONAL]
 	# content_policy_fallbacks=[..], # [OPTIONAL]
 )
 response = router.completion(
 	model="claude-2",
 	messages=[{"role": "user", "content": "Hey, how's it going?"}],
 )
 ```
 </TabItem>
 <TabItem value="proxy" label="PROXY">
 In your proxy config.yaml just add this line 👇
 ```yaml
 router_settings:
 	context_window_fallbacks=[{"claude-2": ["my-fallback-model"]}]
 ```
 Start proxy 
 ```bash
 litellm --config /path/to/config.yaml
 # RUNNING on http://0.0.0.0:4000
 ```
 </TabItem>
 </Tabs>
 **Regular Fallbacks**
 Key change: 
 ```python
 fallbacks=[{"claude-2": ["my-fallback-model"]}]
 ```
 <Tabs>
 <TabItem value="sdk" label="SDK">
 ```python
 from litellm import Router 
 router = Router(
 	model_list=[
 		{
 			"model_name": "claude-2",
 			"litellm_params": {
 				"model": "claude-2",
 				"api_key": "",
 				"mock_response": Exception("this is a rate limit error"),
 			},
 		},
 		{
 			"model_name": "my-fallback-model",
 			"litellm_params": {
 				"model": "claude-2",
 				"api_key": "",
 				"mock_response": "This works!",
 			},
 		},
 	],
 	fallbacks=[{"claude-2": ["my-fallback-model"]}], # 👈 KEY CHANGE
 	# context_window_fallbacks=[..], # [OPTIONAL]
 	# content_policy_fallbacks=[..], # [OPTIONAL]
 )
 response = router.completion(
 	model="claude-2",
 	messages=[{"role": "user", "content": "Hey, how's it going?"}],
 )
 ```
 </TabItem>
 <TabItem value="proxy" label="PROXY">
 In your proxy config.yaml just add this line 👇
 ```yaml
 router_settings:
 	fallbacks=[{"claude-2": ["my-fallback-model"]}]
 ```
 Start proxy 
 ```bash
 litellm --config /path/to/config.yaml
 # RUNNING on http://0.0.0.0:4000
 ```
 </TabItem>
 </Tabs>
 ### Caching
--- a/docs/my-website/docs/tutorials/TogetherAI_liteLLM.md
+++ b/docs/my-website/docs/tutorials/TogetherAI_liteLLM.md
@ -23,9 +23,13 @@ https://api.together.xyz/playground/chat?model=togethercomputer%2Fllama-2-70b-ch
 model_name = "together_ai/togethercomputer/llama-2-70b-chat"
 response = completion(model=model_name, messages=messages)
 print(response)
 ```
 ```
    {'choices': [{'finish_reason': 'stop', 'index': 0, 'message': {'role': 'assistant', 'content': "\n\nI'm not able to provide real-time weather information. However, I can suggest"}}], 'created': 1691629657.9288375, 'model': 'togethercomputer/llama-2-70b-chat', 'usage': {'prompt_tokens': 9, 'completion_tokens': 17, 'total_tokens': 26}}
 ```
 LiteLLM handles the prompt formatting for Together AI's Llama2 models as well, converting your message to the 
--- a/docs/my-website/img/alerting_metadata.png
+++ b/docs/my-website/img/alerting_metadata.png
--- a/docs/my-website/package-lock.json
+++ b/docs/my-website/package-lock.json
--- a/docs/my-website/package.json
+++ b/docs/my-website/package.json
@ -14,17 +14,17 @@
    "write-heading-ids": "docusaurus write-heading-ids"
  },
  "dependencies": {
-    "@docusaurus/core": "2.4.1",
+    "@docusaurus/core": "3.1.1",
-    "@docusaurus/plugin-google-gtag": "^2.4.1",
+    "@docusaurus/plugin-google-gtag": "3.1.1",
-    "@docusaurus/plugin-ideal-image": "^2.4.1",
+    "@docusaurus/plugin-ideal-image": "3.1.1",
-    "@docusaurus/preset-classic": "2.4.1",
+    "@docusaurus/preset-classic": "3.1.1",
    "@mdx-js/react": "^1.6.22",
    "clsx": "^1.2.1",
    "docusaurus": "^1.14.7",
    "docusaurus-lunr-search": "^2.4.1",
    "prism-react-renderer": "^1.3.5",
-    "react": "^17.0.2",
+    "react": "^18.1.0",
-    "react-dom": "^17.0.2",
+    "react-dom": "^18.1.0",
    "sharp": "^0.32.6",
    "uuid": "^9.0.1"
  },
--- a/docs/my-website/yarn.lock
+++ b/docs/my-website/yarn.lock
--- a/enterprise/utils.py
+++ b/enterprise/utils.py
@ -122,236 +122,6 @@ async def ui_get_spend_by_tags(
    return {"spend_per_tag": ui_tags}
 async def view_spend_logs_from_clickhouse(
    api_key=None, user_id=None, request_id=None, start_date=None, end_date=None
 ):
    verbose_logger.debug("Reading logs from Clickhouse")
    import os
    # if user has setup clickhouse
    # TODO: Move this to be a helper function
    # querying clickhouse for this data
    import clickhouse_connect
    from datetime import datetime
    port = os.getenv("CLICKHOUSE_PORT")
    if port is not None and isinstance(port, str):
        port = int(port)
    client = clickhouse_connect.get_client(
        host=os.getenv("CLICKHOUSE_HOST"),
        port=port,
        username=os.getenv("CLICKHOUSE_USERNAME", ""),
        password=os.getenv("CLICKHOUSE_PASSWORD", ""),
    )
    if (
        start_date is not None
        and isinstance(start_date, str)
        and end_date is not None
        and isinstance(end_date, str)
    ):
        # Convert the date strings to datetime objects
        start_date_obj = datetime.strptime(start_date, "%Y-%m-%d")
        end_date_obj = datetime.strptime(end_date, "%Y-%m-%d")
        # get top spend per day
        response = client.query(
            f"""
                SELECT
                    toDate(startTime) AS day,
                    sum(spend) AS total_spend
                FROM
                    spend_logs
                WHERE
                    toDate(startTime) BETWEEN toDate('2024-02-01') AND toDate('2024-02-29')
                GROUP BY
                    day
                ORDER BY
                    total_spend
                """
        )
        results = []
        result_rows = list(response.result_rows)
        for response in result_rows:
            current_row = {}
            current_row["users"] = {"example": 0.0}
            current_row["models"] = {}
            current_row["spend"] = float(response[1])
            current_row["startTime"] = str(response[0])
            # stubbed api_key
            current_row[""] = 0.0  # type: ignore
            results.append(current_row)
        return results
    else:
        # check if spend logs exist, if it does then return last 10 logs, sorted in descending order of startTime
        response = client.query(
            """
                SELECT
                    *
                FROM
                    default.spend_logs
                ORDER BY
                    startTime DESC
                LIMIT
                    10
            """
        )
        # get size of spend logs
        num_rows = client.query("SELECT count(*) FROM default.spend_logs")
        num_rows = num_rows.result_rows[0][0]
        # safely access num_rows.result_rows[0][0]
        if num_rows is None:
            num_rows = 0
        raw_rows = list(response.result_rows)
        response_data = {
            "logs": raw_rows,
            "log_count": num_rows,
        }
        return response_data
 def _create_clickhouse_material_views(client=None, table_names=[]):
    # Create Materialized Views if they don't exist
    # Materialized Views send new inserted rows to the aggregate tables
    verbose_logger.debug("Clickhouse: Creating Materialized Views")
    if "daily_aggregated_spend_per_model_mv" not in table_names:
        verbose_logger.debug("Clickhouse: Creating daily_aggregated_spend_per_model_mv")
        client.command(
            """
            CREATE MATERIALIZED VIEW daily_aggregated_spend_per_model_mv
            TO daily_aggregated_spend_per_model
            AS
            SELECT
                toDate(startTime) as day,
                sumState(spend) AS DailySpend,
                model as model
            FROM spend_logs
            GROUP BY
                day, model
            """
        )
    if "daily_aggregated_spend_per_api_key_mv" not in table_names:
        verbose_logger.debug(
            "Clickhouse: Creating daily_aggregated_spend_per_api_key_mv"
        )
        client.command(
            """
            CREATE MATERIALIZED VIEW daily_aggregated_spend_per_api_key_mv
            TO daily_aggregated_spend_per_api_key
            AS
            SELECT
                toDate(startTime) as day,
                sumState(spend) AS DailySpend,
                api_key as api_key
            FROM spend_logs
            GROUP BY
                day, api_key
            """
        )
    if "daily_aggregated_spend_per_user_mv" not in table_names:
        verbose_logger.debug("Clickhouse: Creating daily_aggregated_spend_per_user_mv")
        client.command(
            """
            CREATE MATERIALIZED VIEW daily_aggregated_spend_per_user_mv
            TO daily_aggregated_spend_per_user
            AS
            SELECT
                toDate(startTime) as day,
                sumState(spend) AS DailySpend,
                user as user
            FROM spend_logs
            GROUP BY
                day, user
            """
        )
    if "daily_aggregated_spend_mv" not in table_names:
        verbose_logger.debug("Clickhouse: Creating daily_aggregated_spend_mv")
        client.command(
            """
            CREATE MATERIALIZED VIEW daily_aggregated_spend_mv
            TO daily_aggregated_spend
            AS
            SELECT
                toDate(startTime) as day,
                sumState(spend) AS DailySpend
            FROM spend_logs
            GROUP BY
                day
            """
        )
 def _create_clickhouse_aggregate_tables(client=None, table_names=[]):
    # Basic Logging works without this - this is only used for low latency reporting apis
    verbose_logger.debug("Clickhouse: Creating Aggregate Tables")
    # Create Aggregeate Tables if they don't exist
    if "daily_aggregated_spend_per_model" not in table_names:
        verbose_logger.debug("Clickhouse: Creating daily_aggregated_spend_per_model")
        client.command(
            """
            CREATE TABLE daily_aggregated_spend_per_model
            (
                `day` Date,
                `DailySpend` AggregateFunction(sum, Float64),
                `model` String
            )
            ENGINE = SummingMergeTree()
            ORDER BY (day, model);
            """
        )
    if "daily_aggregated_spend_per_api_key" not in table_names:
        verbose_logger.debug("Clickhouse: Creating daily_aggregated_spend_per_api_key")
        client.command(
            """
            CREATE TABLE daily_aggregated_spend_per_api_key
            (
                `day` Date,
                `DailySpend` AggregateFunction(sum, Float64),
                `api_key` String
            )
            ENGINE = SummingMergeTree()
            ORDER BY (day, api_key);
            """
        )
    if "daily_aggregated_spend_per_user" not in table_names:
        verbose_logger.debug("Clickhouse: Creating daily_aggregated_spend_per_user")
        client.command(
            """
            CREATE TABLE daily_aggregated_spend_per_user
            (
                `day` Date,
                `DailySpend` AggregateFunction(sum, Float64),
                `user` String
            )
            ENGINE = SummingMergeTree()
            ORDER BY (day, user);
            """
        )
    if "daily_aggregated_spend" not in table_names:
        verbose_logger.debug("Clickhouse: Creating daily_aggregated_spend")
        client.command(
            """
            CREATE TABLE daily_aggregated_spend
            (
                `day` Date,
                `DailySpend` AggregateFunction(sum, Float64),
            )
            ENGINE = SummingMergeTree()
            ORDER BY (day);
            """
        )
    return
 def _forecast_daily_cost(data: list):
    import requests  # type: ignore
    from datetime import datetime, timedelta
--- a/litellm/init.py
+++ b/litellm/init.py
@ -240,6 +240,7 @@ num_retries: Optional[int] = None  # per model endpoint
 default_fallbacks: Optional[List] = None
 fallbacks: Optional[List] = None
 context_window_fallbacks: Optional[List] = None
 content_policy_fallbacks: Optional[List] = None
 allowed_fails: int = 0
 num_retries_per_request: Optional[int] = (
    None  # for the request overall (incl. fallbacks + model retries)
@ -714,6 +715,7 @@ openai_image_generation_models = ["dall-e-2", "dall-e-3"]
 from .timeout import timeout
 from .cost_calculator import completion_cost
 from litellm.litellm_core_utils.litellm_logging import Logging
 from .utils import (
    client,
    exception_type,
@ -722,12 +724,10 @@ from .utils import (
    token_counter,
    create_pretrained_tokenizer,
    create_tokenizer,
    cost_per_token,
    supports_function_calling,
    supports_parallel_function_calling,
    supports_vision,
    get_litellm_params,
    Logging,
    acreate,
    get_model_list,
    get_max_tokens,
@ -747,9 +747,10 @@ from .utils import (
    get_first_chars_messages,
    ModelResponse,
    ImageResponse,
    ImageObject,
    get_provider_fields,
 )
 from .types.utils import ImageObject
 from .llms.huggingface_restapi import HuggingfaceConfig
 from .llms.anthropic import AnthropicConfig
 from .llms.databricks import DatabricksConfig, DatabricksEmbeddingConfig
@ -826,4 +827,4 @@ from .router import Router
 from .assistants.main import *
 from .batches.main import *
 from .scheduler import *
-from .cost_calculator import response_cost_calculator
+from .cost_calculator import response_cost_calculator, cost_per_token
--- a/litellm/_logging.py
+++ b/litellm/_logging.py
@ -3,10 +3,17 @@ from logging import Formatter
 import traceback
 set_verbose = False
 if set_verbose is True:
    logging.warning(
        "`litellm.set_verbose` is deprecated. Please set `os.environ['LITELLM_LOG'] = 'DEBUG'` for debug logs."
    )
 json_logs = bool(os.getenv("JSON_LOGS", False))
 # Create a handler for the logger (you may need to adapt this based on your needs)
 log_level = os.getenv("LITELLM_LOG", "ERROR")
 numeric_level: str = getattr(logging, log_level.upper())
 handler = logging.StreamHandler()
-handler.setLevel(logging.DEBUG)
+handler.setLevel(numeric_level)
 class JsonFormatter(Formatter):
--- a/litellm/cost_calculator.py
+++ b/litellm/cost_calculator.py
@ -1,6 +1,6 @@
 # What is this?
 ## File for 'response_cost' calculation in Logging
-from typing import Optional, Union, Literal, List
+from typing import Optional, Union, Literal, List, Tuple
 import litellm._logging
 from litellm.utils import (
    ModelResponse,
@ -9,7 +9,6 @@ from litellm.utils import (
    TranscriptionResponse,
    TextCompletionResponse,
    CallTypes,
    cost_per_token,
    print_verbose,
    CostPerToken,
    token_counter,
@ -18,6 +17,224 @@ import litellm
 from litellm import verbose_logger
 def _cost_per_token_custom_pricing_helper(
    prompt_tokens=0,
    completion_tokens=0,
    response_time_ms=None,
    ### CUSTOM PRICING ###
    custom_cost_per_token: Optional[CostPerToken] = None,
    custom_cost_per_second: Optional[float] = None,
 ) -> Optional[Tuple[float, float]]:
    """Internal helper function for calculating cost, if custom pricing given"""
    if custom_cost_per_token is None and custom_cost_per_second is None:
        return None
    if custom_cost_per_token is not None:
        input_cost = custom_cost_per_token["input_cost_per_token"] * prompt_tokens
        output_cost = custom_cost_per_token["output_cost_per_token"] * completion_tokens
        return input_cost, output_cost
    elif custom_cost_per_second is not None:
        output_cost = custom_cost_per_second * response_time_ms / 1000  # type: ignore
        return 0, output_cost
    return None
 def cost_per_token(
    model: str = "",
    prompt_tokens=0,
    completion_tokens=0,
    response_time_ms=None,
    custom_llm_provider=None,
    region_name=None,
    ### CUSTOM PRICING ###
    custom_cost_per_token: Optional[CostPerToken] = None,
    custom_cost_per_second: Optional[float] = None,
 ) -> Tuple[float, float]:
    """
    Calculates the cost per token for a given model, prompt tokens, and completion tokens.
    Parameters:
        model (str): The name of the model to use. Default is ""
        prompt_tokens (int): The number of tokens in the prompt.
        completion_tokens (int): The number of tokens in the completion.
        response_time (float): The amount of time, in milliseconds, it took the call to complete.
        custom_llm_provider (str): The llm provider to whom the call was made (see init.py for full list)
        custom_cost_per_token: Optional[CostPerToken]: the cost per input + output token for the llm api call.
        custom_cost_per_second: Optional[float]: the cost per second for the llm api call.
    Returns:
        tuple: A tuple containing the cost in USD dollars for prompt tokens and completion tokens, respectively.
    """
    if model is None:
        raise Exception("Invalid arg. Model cannot be none.")
    ## CUSTOM PRICING ##
    response_cost = _cost_per_token_custom_pricing_helper(
        prompt_tokens=prompt_tokens,
        completion_tokens=completion_tokens,
        response_time_ms=response_time_ms,
        custom_cost_per_second=custom_cost_per_second,
        custom_cost_per_token=custom_cost_per_token,
    )
    if response_cost is not None:
        return response_cost[0], response_cost[1]
    # given
    prompt_tokens_cost_usd_dollar: float = 0
    completion_tokens_cost_usd_dollar: float = 0
    model_cost_ref = litellm.model_cost
    model_with_provider = model
    if custom_llm_provider is not None:
        model_with_provider = custom_llm_provider + "/" + model
        if region_name is not None:
            model_with_provider_and_region = (
                f"{custom_llm_provider}/{region_name}/{model}"
            )
            if (
                model_with_provider_and_region in model_cost_ref
            ):  # use region based pricing, if it's available
                model_with_provider = model_with_provider_and_region
    model_without_prefix = model
    model_parts = model.split("/")
    if len(model_parts) > 1:
        model_without_prefix = model_parts[1]
    else:
        model_without_prefix = model
    """
    Code block that formats model to lookup in litellm.model_cost
    Option1. model = "bedrock/ap-northeast-1/anthropic.claude-instant-v1". This is the most accurate since it is region based. Should always be option 1
    Option2. model = "openai/gpt-4"       - model = provider/model
    Option3. model = "anthropic.claude-3" - model = model
    """
    if (
        model_with_provider in model_cost_ref
    ):  # Option 2. use model with provider, model = "openai/gpt-4"
        model = model_with_provider
    elif model in model_cost_ref:  # Option 1. use model passed, model="gpt-4"
        model = model
    elif (
        model_without_prefix in model_cost_ref
    ):  # Option 3. if user passed model="bedrock/anthropic.claude-3", use model="anthropic.claude-3"
        model = model_without_prefix
    # see this https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models
    print_verbose(f"Looking up model={model} in model_cost_map")
    if model in model_cost_ref:
        print_verbose(f"Success: model={model} in model_cost_map")
        print_verbose(
            f"prompt_tokens={prompt_tokens}; completion_tokens={completion_tokens}"
        )
        if (
            model_cost_ref[model].get("input_cost_per_token", None) is not None
            and model_cost_ref[model].get("output_cost_per_token", None) is not None
        ):
            ## COST PER TOKEN ##
            prompt_tokens_cost_usd_dollar = (
                model_cost_ref[model]["input_cost_per_token"] * prompt_tokens
            )
            completion_tokens_cost_usd_dollar = (
                model_cost_ref[model]["output_cost_per_token"] * completion_tokens
            )
        elif (
            model_cost_ref[model].get("output_cost_per_second", None) is not None
            and response_time_ms is not None
        ):
            print_verbose(
                f"For model={model} - output_cost_per_second: {model_cost_ref[model].get('output_cost_per_second')}; response time: {response_time_ms}"
            )
            ## COST PER SECOND ##
            prompt_tokens_cost_usd_dollar = 0
            completion_tokens_cost_usd_dollar = (
                model_cost_ref[model]["output_cost_per_second"]
                * response_time_ms
                / 1000
            )
        elif (
            model_cost_ref[model].get("input_cost_per_second", None) is not None
            and response_time_ms is not None
        ):
            print_verbose(
                f"For model={model} - input_cost_per_second: {model_cost_ref[model].get('input_cost_per_second')}; response time: {response_time_ms}"
            )
            ## COST PER SECOND ##
            prompt_tokens_cost_usd_dollar = (
                model_cost_ref[model]["input_cost_per_second"] * response_time_ms / 1000
            )
            completion_tokens_cost_usd_dollar = 0.0
        print_verbose(
            f"Returned custom cost for model={model} - prompt_tokens_cost_usd_dollar: {prompt_tokens_cost_usd_dollar}, completion_tokens_cost_usd_dollar: {completion_tokens_cost_usd_dollar}"
        )
        return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar
    elif "ft:gpt-3.5-turbo" in model:
        print_verbose(f"Cost Tracking: {model} is an OpenAI FinteTuned LLM")
        # fuzzy match ft:gpt-3.5-turbo:abcd-id-cool-litellm
        prompt_tokens_cost_usd_dollar = (
            model_cost_ref["ft:gpt-3.5-turbo"]["input_cost_per_token"] * prompt_tokens
        )
        completion_tokens_cost_usd_dollar = (
            model_cost_ref["ft:gpt-3.5-turbo"]["output_cost_per_token"]
            * completion_tokens
        )
        return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar
    elif "ft:davinci-002" in model:
        print_verbose(f"Cost Tracking: {model} is an OpenAI FinteTuned LLM")
        # fuzzy match ft:davinci-002:abcd-id-cool-litellm
        prompt_tokens_cost_usd_dollar = (
            model_cost_ref["ft:davinci-002"]["input_cost_per_token"] * prompt_tokens
        )
        completion_tokens_cost_usd_dollar = (
            model_cost_ref["ft:davinci-002"]["output_cost_per_token"]
            * completion_tokens
        )
        return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar
    elif "ft:babbage-002" in model:
        print_verbose(f"Cost Tracking: {model} is an OpenAI FinteTuned LLM")
        # fuzzy match ft:babbage-002:abcd-id-cool-litellm
        prompt_tokens_cost_usd_dollar = (
            model_cost_ref["ft:babbage-002"]["input_cost_per_token"] * prompt_tokens
        )
        completion_tokens_cost_usd_dollar = (
            model_cost_ref["ft:babbage-002"]["output_cost_per_token"]
            * completion_tokens
        )
        return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar
    elif model in litellm.azure_llms:
        verbose_logger.debug(f"Cost Tracking: {model} is an Azure LLM")
        model = litellm.azure_llms[model]
        verbose_logger.debug(
            f"applying cost={model_cost_ref[model]['input_cost_per_token']} for prompt_tokens={prompt_tokens}"
        )
        prompt_tokens_cost_usd_dollar = (
            model_cost_ref[model]["input_cost_per_token"] * prompt_tokens
        )
        verbose_logger.debug(
            f"applying cost={model_cost_ref[model]['output_cost_per_token']} for completion_tokens={completion_tokens}"
        )
        completion_tokens_cost_usd_dollar = (
            model_cost_ref[model]["output_cost_per_token"] * completion_tokens
        )
        return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar
    elif model in litellm.azure_embedding_models:
        verbose_logger.debug(f"Cost Tracking: {model} is an Azure Embedding Model")
        model = litellm.azure_embedding_models[model]
        prompt_tokens_cost_usd_dollar = (
            model_cost_ref[model]["input_cost_per_token"] * prompt_tokens
        )
        completion_tokens_cost_usd_dollar = (
            model_cost_ref[model]["output_cost_per_token"] * completion_tokens
        )
        return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar
    else:
        # if model is not in model_prices_and_context_window.json. Raise an exception-let users know
        error_str = f"Model not in model_prices_and_context_window.json. You passed model={model}. Register pricing for model - https://docs.litellm.ai/docs/proxy/custom_pricing\n"
        raise litellm.exceptions.NotFoundError(  # type: ignore
            message=error_str,
            model=model,
            llm_provider="",
        )
 # Extract the number of billion parameters from the model name
 # only used for together_computer LLMs
 def get_model_params_and_category(model_name) -> str:
--- a/litellm/exceptions.py
+++ b/litellm/exceptions.py
@ -324,7 +324,7 @@ class ContextWindowExceededError(BadRequestError):  # type: ignore
        message,
        model,
        llm_provider,
-        response: httpx.Response,
+        response: Optional[httpx.Response] = None,
        litellm_debug_info: Optional[str] = None,
    ):
        self.status_code = 400
@ -332,11 +332,13 @@ class ContextWindowExceededError(BadRequestError):  # type: ignore
        self.model = model
        self.llm_provider = llm_provider
        self.litellm_debug_info = litellm_debug_info
        request = httpx.Request(method="POST", url="https://api.openai.com/v1")
        self.response = response or httpx.Response(status_code=400, request=request)
        super().__init__(
            message=self.message,
            model=self.model,  # type: ignore
            llm_provider=self.llm_provider,  # type: ignore
-            response=response,
+            response=self.response,
            litellm_debug_info=self.litellm_debug_info,
        )  # Call the base class constructor with the parameters it needs
@ -407,7 +409,7 @@ class ContentPolicyViolationError(BadRequestError):  # type: ignore
        message,
        model,
        llm_provider,
-        response: httpx.Response,
+        response: Optional[httpx.Response] = None,
        litellm_debug_info: Optional[str] = None,
    ):
        self.status_code = 400
@ -415,11 +417,13 @@ class ContentPolicyViolationError(BadRequestError):  # type: ignore
        self.model = model
        self.llm_provider = llm_provider
        self.litellm_debug_info = litellm_debug_info
        request = httpx.Request(method="POST", url="https://api.openai.com/v1")
        self.response = response or httpx.Response(status_code=500, request=request)
        super().__init__(
            message=self.message,
            model=self.model,  # type: ignore
            llm_provider=self.llm_provider,  # type: ignore
-            response=response,
+            response=self.response,
            litellm_debug_info=self.litellm_debug_info,
        )  # Call the base class constructor with the parameters it needs
--- a/litellm/integrations/clickhouse.py
+++ b/litellm/integrations/clickhouse.py
@ -226,14 +226,6 @@ def _start_clickhouse():
            response = client.query("DESCRIBE default.spend_logs")
            verbose_logger.debug(f"spend logs schema ={response.result_rows}")
        # RUN Enterprise Clickhouse Setup
        # TLDR: For Enterprise - we create views / aggregate tables for low latency reporting APIs
        from litellm.proxy.enterprise.utils import _create_clickhouse_aggregate_tables
        from litellm.proxy.enterprise.utils import _create_clickhouse_material_views
        _create_clickhouse_aggregate_tables(client=client, table_names=table_names)
        _create_clickhouse_material_views(client=client, table_names=table_names)
 class ClickhouseLogger:
    # Class variables or attributes
--- a/litellm/integrations/email_alerting.py
+++ b/litellm/integrations/email_alerting.py
@ -0,0 +1,136 @@
 """
 Functions for sending Email Alerts
 """
 import os
 from typing import Optional, List
 from litellm.proxy._types import WebhookEvent
 import asyncio
 from litellm._logging import verbose_logger, verbose_proxy_logger
 # we use this for the email header, please send a test email if you change this. verify it looks good on email
 LITELLM_LOGO_URL = "https://litellm-listing.s3.amazonaws.com/litellm_logo.png"
 LITELLM_SUPPORT_CONTACT = "support@berri.ai"
 async def get_all_team_member_emails(team_id: Optional[str] = None) -> list:
    verbose_logger.debug(
        "Email Alerting: Getting all team members for team_id=%s", team_id
    )
    if team_id is None:
        return []
    from litellm.proxy.proxy_server import premium_user, prisma_client
    if prisma_client is None:
        raise Exception("Not connected to DB!")
    team_row = await prisma_client.db.litellm_teamtable.find_unique(
        where={
            "team_id": team_id,
        }
    )
    if team_row is None:
        return []
    _team_members = team_row.members_with_roles
    verbose_logger.debug(
        "Email Alerting: Got team members for team_id=%s Team Members: %s",
        team_id,
        _team_members,
    )
    _team_member_user_ids: List[str] = []
    for member in _team_members:
        if member and isinstance(member, dict) and member.get("user_id") is not None:
            _team_member_user_ids.append(member.get("user_id"))
    sql_query = """
        SELECT user_email
        FROM "LiteLLM_UserTable"
        WHERE user_id = ANY($1::TEXT[]);
    """
    _result = await prisma_client.db.query_raw(sql_query, _team_member_user_ids)
    verbose_logger.debug("Email Alerting: Got all Emails for team, emails=%s", _result)
    if _result is None:
        return []
    emails = []
    for user in _result:
        if user and isinstance(user, dict) and user.get("user_email", None) is not None:
            emails.append(user.get("user_email"))
    return emails
 async def send_team_budget_alert(webhook_event: WebhookEvent) -> bool:
    """
    Send an Email Alert to All Team Members when the Team Budget is crossed
    Returns -> True if sent, False if not.
    """
    from litellm.proxy.utils import send_email
    from litellm.proxy.proxy_server import premium_user, prisma_client
    _team_id = webhook_event.team_id
    team_alias = webhook_event.team_alias
    verbose_logger.debug(
        "Email Alerting: Sending Team Budget Alert for team=%s", team_alias
    )
    email_logo_url = os.getenv("SMTP_SENDER_LOGO", os.getenv("EMAIL_LOGO_URL", None))
    email_support_contact = os.getenv("EMAIL_SUPPORT_CONTACT", None)
    # await self._check_if_using_premium_email_feature(
    #     premium_user, email_logo_url, email_support_contact
    # )
    if email_logo_url is None:
        email_logo_url = LITELLM_LOGO_URL
    if email_support_contact is None:
        email_support_contact = LITELLM_SUPPORT_CONTACT
    recipient_emails = await get_all_team_member_emails(_team_id)
    recipient_emails_str: str = ",".join(recipient_emails)
    verbose_logger.debug(
        "Email Alerting: Sending team budget alert to %s", recipient_emails_str
    )
    event_name = webhook_event.event_message
    max_budget = webhook_event.max_budget
    email_html_content = "Alert from LiteLLM Server"
    if recipient_emails_str is None:
        verbose_proxy_logger.error(
            "Email Alerting: Trying to send email alert to no recipient, got recipient_emails=%s",
            recipient_emails_str,
        )
    email_html_content = f"""
    <img src="{email_logo_url}" alt="LiteLLM Logo" width="150" height="50" /> <br/><br/><br/>
    Budget Crossed for Team <b> {team_alias} </b> <br/> <br/>
    Your Teams LLM API usage has crossed it's <b> budget of ${max_budget} </b>, current spend is <b>${webhook_event.spend}</b><br /> <br />
    API requests will be rejected until either (a) you increase your budget or (b) your budget gets reset <br /> <br />
    If you have any questions, please send an email to {email_support_contact} <br /> <br />
    Best, <br />
    The LiteLLM team <br />
    """
    email_event = {
        "to": recipient_emails_str,
        "subject": f"LiteLLM {event_name} for Team {team_alias}",
        "html": email_html_content,
    }
    await send_email(
        receiver_email=email_event["to"],
        subject=email_event["subject"],
        html=email_event["html"],
    )
    return False
--- a/litellm/integrations/slack_alerting.py
+++ b/litellm/integrations/slack_alerting.py
@ -330,6 +330,7 @@ class SlackAlerting(CustomLogger):
            messages = "Message not logged. litellm.redact_messages_in_exceptions=True"
        request_info = f"\nRequest Model: `{model}`\nAPI Base: `{api_base}`\nMessages: `{messages}`"
        slow_message = f"`Responses are slow - {round(time_difference_float,2)}s response time > Alerting threshold: {self.alerting_threshold}s`"
        alerting_metadata: dict = {}
        if time_difference_float > self.alerting_threshold:
            # add deployment latencies to alert
            if (
@ -337,7 +338,7 @@ class SlackAlerting(CustomLogger):
                and "litellm_params" in kwargs
                and "metadata" in kwargs["litellm_params"]
            ):
-                _metadata = kwargs["litellm_params"]["metadata"]
+                _metadata: dict = kwargs["litellm_params"]["metadata"]
                request_info = litellm.utils._add_key_name_and_team_to_alert(
                    request_info=request_info, metadata=_metadata
                )
@ -349,10 +350,14 @@ class SlackAlerting(CustomLogger):
                    request_info += (
                        f"\nAvailable Deployment Latencies\n{_deployment_latency_map}"
                    )
                if "alerting_metadata" in _metadata:
                    alerting_metadata = _metadata["alerting_metadata"]
            await self.send_alert(
                message=slow_message + request_info,
                level="Low",
                alert_type="llm_too_slow",
                alerting_metadata=alerting_metadata,
            )
    async def async_update_daily_reports(
@ -540,7 +545,12 @@ class SlackAlerting(CustomLogger):
        message += f"\n\nNext Run is at: `{time.time() + self.alerting_args.daily_report_frequency}`s"
        # send alert
-        await self.send_alert(message=message, level="Low", alert_type="daily_reports")
+        await self.send_alert(
            message=message,
            level="Low",
            alert_type="daily_reports",
            alerting_metadata={},
        )
        return True
@ -582,6 +592,7 @@ class SlackAlerting(CustomLogger):
            await asyncio.sleep(
                self.alerting_threshold
            )  # Set it to 5 minutes - i'd imagine this might be different for streaming, non-streaming, non-completion (embedding + img) requests
            alerting_metadata: dict = {}
            if (
                request_data is not None
                and request_data.get("litellm_status", "") != "success"
@ -606,7 +617,7 @@ class SlackAlerting(CustomLogger):
                ):
                    # In hanging requests sometime it has not made it to the point where the deployment is passed to the `request_data``
                    # in that case we fallback to the api base set in the request metadata
-                    _metadata = request_data["metadata"]
+                    _metadata: dict = request_data["metadata"]
                    _api_base = _metadata.get("api_base", "")
                    request_info = litellm.utils._add_key_name_and_team_to_alert(
@ -615,6 +626,9 @@ class SlackAlerting(CustomLogger):
                    if _api_base is None:
                        _api_base = ""
                    if "alerting_metadata" in _metadata:
                        alerting_metadata = _metadata["alerting_metadata"]
                    request_info += f"\nAPI Base: `{_api_base}`"
                # only alert hanging responses if they have not been marked as success
                alerting_message = (
@ -640,6 +654,7 @@ class SlackAlerting(CustomLogger):
                    message=alerting_message + request_info,
                    level="Medium",
                    alert_type="llm_requests_hanging",
                    alerting_metadata=alerting_metadata,
                )
    async def failed_tracking_alert(self, error_message: str):
@ -650,7 +665,10 @@ class SlackAlerting(CustomLogger):
        result = await _cache.async_get_cache(key=_cache_key)
        if result is None:
            await self.send_alert(
-                message=message, level="High", alert_type="budget_alerts"
+                message=message,
                level="High",
                alert_type="budget_alerts",
                alerting_metadata={},
            )
            await _cache.async_set_cache(
                key=_cache_key,
@ -680,7 +698,7 @@ class SlackAlerting(CustomLogger):
            return
        if "budget_alerts" not in self.alert_types:
            return
-        _id: str = "default_id"  # used for caching
+        _id: Optional[str] = "default_id"  # used for caching
        user_info_json = user_info.model_dump(exclude_none=True)
        for k, v in user_info_json.items():
            user_info_str = "\n{}: {}\n".format(k, v)
@ -751,6 +769,7 @@ class SlackAlerting(CustomLogger):
                    level="High",
                    alert_type="budget_alerts",
                    user_info=webhook_event,
                    alerting_metadata={},
                )
                await _cache.async_set_cache(
                    key=_cache_key,
@ -769,7 +788,13 @@ class SlackAlerting(CustomLogger):
        response_cost: Optional[float],
        max_budget: Optional[float],
    ):
-        if end_user_id is not None and token is not None and response_cost is not None:
+        if (
            self.alerting is not None
            and "webhook" in self.alerting
            and end_user_id is not None
            and token is not None
            and response_cost is not None
        ):
            # log customer spend
            event = WebhookEvent(
                spend=response_cost,
@ -941,7 +966,10 @@ class SlackAlerting(CustomLogger):
            )
            # send minor alert
            await self.send_alert(
-                message=msg, level="Medium", alert_type="outage_alerts"
+                message=msg,
                level="Medium",
                alert_type="outage_alerts",
                alerting_metadata={},
            )
            # set to true
            outage_value["minor_alert_sent"] = True
@ -963,7 +991,12 @@ class SlackAlerting(CustomLogger):
            )
            # send minor alert
-            await self.send_alert(message=msg, level="High", alert_type="outage_alerts")
+            await self.send_alert(
                message=msg,
                level="High",
                alert_type="outage_alerts",
                alerting_metadata={},
            )
            # set to true
            outage_value["major_alert_sent"] = True
@ -1062,7 +1095,10 @@ class SlackAlerting(CustomLogger):
                )
                # send minor alert
                await self.send_alert(
-                    message=msg, level="Medium", alert_type="outage_alerts"
+                    message=msg,
                    level="Medium",
                    alert_type="outage_alerts",
                    alerting_metadata={},
                )
                # set to true
                outage_value["minor_alert_sent"] = True
@ -1081,7 +1117,10 @@ class SlackAlerting(CustomLogger):
                )
                # send minor alert
                await self.send_alert(
-                    message=msg, level="High", alert_type="outage_alerts"
+                    message=msg,
                    level="High",
                    alert_type="outage_alerts",
                    alerting_metadata={},
                )
                # set to true
                outage_value["major_alert_sent"] = True
@ -1143,7 +1182,10 @@ Model Info:
 """
        alert_val = self.send_alert(
-            message=message, level="Low", alert_type="new_model_added"
+            message=message,
            level="Low",
            alert_type="new_model_added",
            alerting_metadata={},
        )
        if alert_val is not None and asyncio.iscoroutine(alert_val):
@ -1159,6 +1201,9 @@ Model Info:
        Currently only implemented for budget alerts
        Returns -> True if sent, False if not.
        Raises Exception
            - if WEBHOOK_URL is not set
        """
        webhook_url = os.getenv("WEBHOOK_URL", None)
@ -1297,7 +1342,9 @@ Model Info:
            verbose_proxy_logger.error("Error sending email alert %s", str(e))
            return False
-    async def send_email_alert_using_smtp(self, webhook_event: WebhookEvent) -> bool:
+    async def send_email_alert_using_smtp(
        self, webhook_event: WebhookEvent, alert_type: str
    ) -> bool:
        """
        Sends structured Email alert to an SMTP server
@ -1306,7 +1353,6 @@ Model Info:
        Returns -> True if sent, False if not.
        """
        from litellm.proxy.utils import send_email
        from litellm.proxy.proxy_server import premium_user, prisma_client
        email_logo_url = os.getenv(
@ -1360,6 +1406,10 @@ Model Info:
            subject=email_event["subject"],
            html=email_event["html"],
        )
        if webhook_event.event_group == "team":
            from litellm.integrations.email_alerting import send_team_budget_alert
            await send_team_budget_alert(webhook_event=webhook_event)
        return False
@ -1368,6 +1418,7 @@ Model Info:
        message: str,
        level: Literal["Low", "Medium", "High"],
        alert_type: Literal[AlertType],
        alerting_metadata: dict,
        user_info: Optional[WebhookEvent] = None,
        **kwargs,
    ):
@ -1401,7 +1452,9 @@ Model Info:
            and user_info is not None
        ):
            # only send budget alerts over Email
-            await self.send_email_alert_using_smtp(webhook_event=user_info)
+            await self.send_email_alert_using_smtp(
                webhook_event=user_info, alert_type=alert_type
            )
        if "slack" not in self.alerting:
            return
@ -1425,6 +1478,9 @@ Model Info:
        if kwargs:
            for key, value in kwargs.items():
                formatted_message += f"\n\n{key}: `{value}`\n\n"
        if alerting_metadata:
            for key, value in alerting_metadata.items():
                formatted_message += f"\n\n*Alerting Metadata*: \n{key}: `{value}`\n\n"
        if _proxy_base_url is not None:
            formatted_message += f"\n\nProxy URL: `{_proxy_base_url}`"
@ -1440,7 +1496,7 @@ Model Info:
            slack_webhook_url = os.getenv("SLACK_WEBHOOK_URL", None)
        if slack_webhook_url is None:
-            raise Exception("Missing SLACK_WEBHOOK_URL from environment")
+            raise ValueError("Missing SLACK_WEBHOOK_URL from environment")
        payload = {"text": formatted_message}
        headers = {"Content-type": "application/json"}
@ -1453,7 +1509,7 @@ Model Info:
            pass
        else:
            verbose_proxy_logger.debug(
-                "Error sending slack alert. Error=", response.text
+                "Error sending slack alert. Error={}".format(response.text)
            )
    async def async_log_success_event(self, kwargs, response_obj, start_time, end_time):
@ -1622,6 +1678,7 @@ Model Info:
                message=_weekly_spend_message,
                level="Low",
                alert_type="spend_reports",
                alerting_metadata={},
            )
        except Exception as e:
            verbose_proxy_logger.error("Error sending weekly spend report", e)
@ -1673,6 +1730,7 @@ Model Info:
                message=_spend_message,
                level="Low",
                alert_type="spend_reports",
                alerting_metadata={},
            )
        except Exception as e:
            verbose_proxy_logger.error("Error sending weekly spend report", e)
--- a/litellm/litellm_core_utils/core_helpers.py
+++ b/litellm/litellm_core_utils/core_helpers.py
@ -0,0 +1,41 @@
 # What is this?
 ## Helper utilities for the model response objects
 def map_finish_reason(
    finish_reason: str,
 ):  # openai supports 5 stop sequences - 'stop', 'length', 'function_call', 'content_filter', 'null'
    # anthropic mapping
    if finish_reason == "stop_sequence":
        return "stop"
    # cohere mapping - https://docs.cohere.com/reference/generate
    elif finish_reason == "COMPLETE":
        return "stop"
    elif finish_reason == "MAX_TOKENS":  # cohere + vertex ai
        return "length"
    elif finish_reason == "ERROR_TOXIC":
        return "content_filter"
    elif (
        finish_reason == "ERROR"
    ):  # openai currently doesn't support an 'error' finish reason
        return "stop"
    # huggingface mapping https://huggingface.github.io/text-generation-inference/#/Text%20Generation%20Inference/generate_stream
    elif finish_reason == "eos_token" or finish_reason == "stop_sequence":
        return "stop"
    elif (
        finish_reason == "FINISH_REASON_UNSPECIFIED" or finish_reason == "STOP"
    ):  # vertex ai - got from running `print(dir(response_obj.candidates[0].finish_reason))`: ['FINISH_REASON_UNSPECIFIED', 'MAX_TOKENS', 'OTHER', 'RECITATION', 'SAFETY', 'STOP',]
        return "stop"
    elif finish_reason == "SAFETY":  # vertex ai
        return "content_filter"
    elif finish_reason == "STOP":  # vertex ai
        return "stop"
    elif finish_reason == "end_turn" or finish_reason == "stop_sequence":  # anthropic
        return "stop"
    elif finish_reason == "max_tokens":  # anthropic
        return "length"
    elif finish_reason == "tool_use":  # anthropic
        return "tool_calls"
    elif finish_reason == "content_filtered":
        return "content_filter"
    return finish_reason
--- a/litellm/litellm_core_utils/litellm_logging.py
+++ b/litellm/litellm_core_utils/litellm_logging.py
--- a/litellm/litellm_core_utils/redact_messages.py
+++ b/litellm/litellm_core_utils/redact_messages.py
@ -12,7 +12,9 @@ from typing import TYPE_CHECKING, Any
 import litellm
 if TYPE_CHECKING:
-    from litellm.utils import Logging as _LiteLLMLoggingObject
+    from litellm.litellm_core_utils.litellm_logging import (
        Logging as _LiteLLMLoggingObject,
    )
    LiteLLMLoggingObject = _LiteLLMLoggingObject
 else:
@ -30,7 +32,6 @@ def redact_message_input_output_from_logging(
    if litellm.turn_off_message_logging is not True:
        return result
    _result = copy.deepcopy(result)
    # remove messages, prompts, input, response from logging
    litellm_logging_obj.model_call_details["messages"] = [
        {"role": "user", "content": "redacted-by-litellm"}
@ -53,8 +54,10 @@ def redact_message_input_output_from_logging(
            elif isinstance(choice, litellm.utils.StreamingChoices):
                choice.delta.content = "redacted-by-litellm"
    else:
-        if _result is not None:
+        if result is not None:
-            if isinstance(_result, litellm.ModelResponse):
+            if isinstance(result, litellm.ModelResponse):
                # only deep copy litellm.ModelResponse
                _result = copy.deepcopy(result)
                if hasattr(_result, "choices") and _result.choices is not None:
                    for choice in _result.choices:
                        if isinstance(choice, litellm.Choices):
@ -63,3 +66,6 @@ def redact_message_input_output_from_logging(
                            choice.delta.content = "redacted-by-litellm"
                return _result
    # by default return result
    return result
--- a/litellm/llms/anthropic.py
+++ b/litellm/llms/anthropic.py
@ -5,10 +5,16 @@ import requests, copy  # type: ignore
 import time
 from functools import partial
 from typing import Callable, Optional, List, Union
-from litellm.utils import ModelResponse, Usage, map_finish_reason, CustomStreamWrapper
+import litellm.litellm_core_utils
 from litellm.utils import ModelResponse, Usage, CustomStreamWrapper
 from litellm.litellm_core_utils.core_helpers import map_finish_reason
 import litellm
 from .prompt_templates.factory import prompt_factory, custom_prompt
-from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler
+from litellm.llms.custom_httpx.http_handler import (
    AsyncHTTPHandler,
    _get_async_httpx_client,
    _get_httpx_client,
 )
 from .base import BaseLLM
 import httpx  # type: ignore
 from litellm.types.llms.anthropic import AnthropicMessagesToolChoice
@ -171,7 +177,7 @@ async def make_call(
    logging_obj,
 ):
    if client is None:
-        client = AsyncHTTPHandler()  # Create a new client if none provided
+        client = _get_async_httpx_client()  # Create a new client if none provided
    response = await client.post(api_base, headers=headers, data=data, stream=True)
@ -201,7 +207,7 @@ class AnthropicChatCompletion(BaseLLM):
        response: Union[requests.Response, httpx.Response],
        model_response: ModelResponse,
        stream: bool,
-        logging_obj: litellm.utils.Logging,
+        logging_obj: litellm.litellm_core_utils.litellm_logging.Logging,
        optional_params: dict,
        api_key: str,
        data: Union[dict, str],
@ -316,7 +322,7 @@ class AnthropicChatCompletion(BaseLLM):
        response: Union[requests.Response, httpx.Response],
        model_response: ModelResponse,
        stream: bool,
-        logging_obj: litellm.utils.Logging,
+        logging_obj: litellm.litellm_core_utils.litellm_logging.Logging,
        optional_params: dict,
        api_key: str,
        data: Union[dict, str],
@ -463,9 +469,7 @@ class AnthropicChatCompletion(BaseLLM):
        logger_fn=None,
        headers={},
    ) -> Union[ModelResponse, CustomStreamWrapper]:
-        async_handler = AsyncHTTPHandler(
+        async_handler = _get_async_httpx_client()
            timeout=httpx.Timeout(timeout=600.0, connect=5.0)
        )
        response = await async_handler.post(api_base, headers=headers, json=data)
        if stream and _is_function_call:
            return self.process_streaming_response(
--- a/litellm/llms/base.py
+++ b/litellm/llms/base.py
@ -2,7 +2,7 @@
 import litellm
 import httpx, requests
 from typing import Optional, Union
-from litellm.utils import Logging
+from litellm.litellm_core_utils.litellm_logging import Logging
 class BaseLLM:
--- a/litellm/llms/bedrock.py
+++ b/litellm/llms/bedrock.py
@ -5,12 +5,10 @@ import time, uuid
 from typing import Callable, Optional, Any, Union, List
 import litellm
 from litellm.utils import (
    ModelResponse,
    get_secret,
    Usage,
    ImageResponse,
    map_finish_reason,
 )
 from litellm.litellm_core_utils.core_helpers import map_finish_reason
 from litellm.types.utils import ImageResponse, ModelResponse, Usage
 from .prompt_templates.factory import (
    prompt_factory,
    custom_prompt,
@ -633,7 +631,11 @@ def init_bedrock_client(
        config = boto3.session.Config()
    ### CHECK STS ###
-    if aws_web_identity_token is not None and aws_role_name is not None and aws_session_name is not None:
+    if (
        aws_web_identity_token is not None
        and aws_role_name is not None
        and aws_session_name is not None
    ):
        oidc_token = get_secret(aws_web_identity_token)
        if oidc_token is None:
@ -642,9 +644,7 @@ def init_bedrock_client(
                status_code=401,
            )
-        sts_client = boto3.client(
+        sts_client = boto3.client("sts")
            "sts"
        )
        # https://docs.aws.amazon.com/STS/latest/APIReference/API_AssumeRoleWithWebIdentity.html
        # https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/sts/client/assume_role_with_web_identity.html
--- a/litellm/llms/bedrock_httpx.py
+++ b/litellm/llms/bedrock_httpx.py
@ -22,13 +22,12 @@ from typing import (
 from litellm.utils import (
    ModelResponse,
    Usage,
    map_finish_reason,
    CustomStreamWrapper,
    Message,
    Choices,
    get_secret,
    Logging,
 )
 from litellm.litellm_core_utils.core_helpers import map_finish_reason
 from litellm.litellm_core_utils.litellm_logging import Logging
 from litellm.types.utils import Message, Choices
 import litellm, uuid
 from .prompt_templates.factory import (
    prompt_factory,
@ -41,7 +40,12 @@ from .prompt_templates.factory import (
    _bedrock_converse_messages_pt,
    _bedrock_tools_pt,
 )
-from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler
+from litellm.llms.custom_httpx.http_handler import (
    AsyncHTTPHandler,
    HTTPHandler,
    _get_async_httpx_client,
    _get_httpx_client,
 )
 from .base import BaseLLM
 import httpx  # type: ignore
 from .bedrock import BedrockError, convert_messages_to_prompt, ModelResponseIterator
@ -57,6 +61,7 @@ from litellm.caching import DualCache
 iam_cache = DualCache()
 class AmazonCohereChatConfig:
    """
    Reference - https://docs.aws.amazon.com/bedrock/latest/userguide/model-parameters-cohere-command-r-plus.html
@ -167,7 +172,7 @@ async def make_call(
    logging_obj,
 ):
    if client is None:
-        client = AsyncHTTPHandler()  # Create a new client if none provided
+        client = _get_async_httpx_client()  # Create a new client if none provided
    response = await client.post(api_base, headers=headers, data=data, stream=True)
@ -198,7 +203,7 @@ def make_sync_call(
    logging_obj,
 ):
    if client is None:
-        client = HTTPHandler()  # Create a new client if none provided
+        client = _get_httpx_client()  # Create a new client if none provided
    response = client.post(api_base, headers=headers, data=data, stream=True)
@ -327,13 +332,19 @@ class BedrockLLM(BaseLLM):
        ) = params_to_check
        ### CHECK STS ###
-        if aws_web_identity_token is not None and aws_role_name is not None and aws_session_name is not None:
+        if (
-            iam_creds_cache_key = json.dumps({
+            aws_web_identity_token is not None
            and aws_role_name is not None
            and aws_session_name is not None
        ):
            iam_creds_cache_key = json.dumps(
                {
                    "aws_web_identity_token": aws_web_identity_token,
                    "aws_role_name": aws_role_name,
                    "aws_session_name": aws_session_name,
                    "aws_region_name": aws_region_name,
-            })
+                }
            )
            iam_creds_dict = iam_cache.get_cache(iam_creds_cache_key)
            if iam_creds_dict is None:
@ -348,7 +359,7 @@ class BedrockLLM(BaseLLM):
                sts_client = boto3.client(
                    "sts",
                    region_name=aws_region_name,
-                    endpoint_url=f"https://sts.{aws_region_name}.amazonaws.com"
+                    endpoint_url=f"https://sts.{aws_region_name}.amazonaws.com",
                )
                # https://docs.aws.amazon.com/STS/latest/APIReference/API_AssumeRoleWithWebIdentity.html
@ -362,12 +373,18 @@ class BedrockLLM(BaseLLM):
                iam_creds_dict = {
                    "aws_access_key_id": sts_response["Credentials"]["AccessKeyId"],
-                    "aws_secret_access_key": sts_response["Credentials"]["SecretAccessKey"],
+                    "aws_secret_access_key": sts_response["Credentials"][
                        "SecretAccessKey"
                    ],
                    "aws_session_token": sts_response["Credentials"]["SessionToken"],
                    "region_name": aws_region_name,
                }
-                iam_cache.set_cache(key=iam_creds_cache_key, value=json.dumps(iam_creds_dict), ttl=3600 - 60)
+                iam_cache.set_cache(
                    key=iam_creds_cache_key,
                    value=json.dumps(iam_creds_dict),
                    ttl=3600 - 60,
                )
            session = boto3.Session(**iam_creds_dict)
@ -976,7 +993,7 @@ class BedrockLLM(BaseLLM):
                if isinstance(timeout, float) or isinstance(timeout, int):
                    timeout = httpx.Timeout(timeout)
                _params["timeout"] = timeout
-            self.client = HTTPHandler(**_params)  # type: ignore
+            self.client = _get_httpx_client(_params)  # type: ignore
        else:
            self.client = client
        if (stream is not None and stream == True) and provider != "ai21":
@ -1058,7 +1075,7 @@ class BedrockLLM(BaseLLM):
                if isinstance(timeout, float) or isinstance(timeout, int):
                    timeout = httpx.Timeout(timeout)
                _params["timeout"] = timeout
-            client = AsyncHTTPHandler(**_params)  # type: ignore
+            client = _get_async_httpx_client(_params)  # type: ignore
        else:
            client = client  # type: ignore
@ -1433,13 +1450,19 @@ class BedrockConverseLLM(BaseLLM):
        ) = params_to_check
        ### CHECK STS ###
-        if aws_web_identity_token is not None and aws_role_name is not None and aws_session_name is not None:
+        if (
-            iam_creds_cache_key = json.dumps({
+            aws_web_identity_token is not None
            and aws_role_name is not None
            and aws_session_name is not None
        ):
            iam_creds_cache_key = json.dumps(
                {
                    "aws_web_identity_token": aws_web_identity_token,
                    "aws_role_name": aws_role_name,
                    "aws_session_name": aws_session_name,
                    "aws_region_name": aws_region_name,
-            })
+                }
            )
            iam_creds_dict = iam_cache.get_cache(iam_creds_cache_key)
            if iam_creds_dict is None:
@ -1454,7 +1477,7 @@ class BedrockConverseLLM(BaseLLM):
                sts_client = boto3.client(
                    "sts",
                    region_name=aws_region_name,
-                    endpoint_url=f"https://sts.{aws_region_name}.amazonaws.com"
+                    endpoint_url=f"https://sts.{aws_region_name}.amazonaws.com",
                )
                # https://docs.aws.amazon.com/STS/latest/APIReference/API_AssumeRoleWithWebIdentity.html
@ -1468,12 +1491,18 @@ class BedrockConverseLLM(BaseLLM):
                iam_creds_dict = {
                    "aws_access_key_id": sts_response["Credentials"]["AccessKeyId"],
-                    "aws_secret_access_key": sts_response["Credentials"]["SecretAccessKey"],
+                    "aws_secret_access_key": sts_response["Credentials"][
                        "SecretAccessKey"
                    ],
                    "aws_session_token": sts_response["Credentials"]["SessionToken"],
                    "region_name": aws_region_name,
                }
-                iam_cache.set_cache(key=iam_creds_cache_key, value=json.dumps(iam_creds_dict), ttl=3600 - 60)
+                iam_cache.set_cache(
                    key=iam_creds_cache_key,
                    value=json.dumps(iam_creds_dict),
                    ttl=3600 - 60,
                )
            session = boto3.Session(**iam_creds_dict)
@ -1575,7 +1604,7 @@ class BedrockConverseLLM(BaseLLM):
                if isinstance(timeout, float) or isinstance(timeout, int):
                    timeout = httpx.Timeout(timeout)
                _params["timeout"] = timeout
-            client = AsyncHTTPHandler(**_params)  # type: ignore
+            client = _get_async_httpx_client(_params)  # type: ignore
        else:
            client = client  # type: ignore
@ -1847,7 +1876,7 @@ class BedrockConverseLLM(BaseLLM):
                if isinstance(timeout, float) or isinstance(timeout, int):
                    timeout = httpx.Timeout(timeout)
                _params["timeout"] = timeout
-            client = HTTPHandler(**_params)  # type: ignore
+            client = _get_httpx_client(_params)  # type: ignore
        else:
            client = client
        try:
--- a/litellm/llms/custom_httpx/http_handler.py
+++ b/litellm/llms/custom_httpx/http_handler.py
@ -219,3 +219,60 @@ class HTTPHandler:
            self.close()
        except Exception:
            pass
 def _get_async_httpx_client(params: Optional[dict] = None) -> AsyncHTTPHandler:
    """
    Retrieves the async HTTP client from the cache
    If not present, creates a new client
    Caches the new client and returns it.
    """
    _params_key_name = ""
    if params is not None:
        for key, value in params.items():
            try:
                _params_key_name += f"{key}_{value}"
            except Exception:
                pass
    _cache_key_name = "async_httpx_client" + _params_key_name
    if _cache_key_name in litellm.in_memory_llm_clients_cache:
        return litellm.in_memory_llm_clients_cache[_cache_key_name]
    if params is not None:
        _new_client = AsyncHTTPHandler(**params)
    else:
        _new_client = AsyncHTTPHandler(
            timeout=httpx.Timeout(timeout=600.0, connect=5.0)
        )
    litellm.in_memory_llm_clients_cache[_cache_key_name] = _new_client
    return _new_client
 def _get_httpx_client(params: Optional[dict] = None) -> HTTPHandler:
    """
    Retrieves the HTTP client from the cache
    If not present, creates a new client
    Caches the new client and returns it.
    """
    _params_key_name = ""
    if params is not None:
        for key, value in params.items():
            try:
                _params_key_name += f"{key}_{value}"
            except Exception:
                pass
    _cache_key_name = "httpx_client" + _params_key_name
    if _cache_key_name in litellm.in_memory_llm_clients_cache:
        return litellm.in_memory_llm_clients_cache[_cache_key_name]
    if params is not None:
        _new_client = HTTPHandler(**params)
    else:
        _new_client = HTTPHandler(timeout=httpx.Timeout(timeout=600.0, connect=5.0))
    litellm.in_memory_llm_clients_cache[_cache_key_name] = _new_client
    return _new_client
--- a/litellm/llms/databricks.py
+++ b/litellm/llms/databricks.py
@ -10,10 +10,10 @@ from typing import Callable, Optional, List, Union, Tuple, Literal
 from litellm.utils import (
    ModelResponse,
    Usage,
    map_finish_reason,
    CustomStreamWrapper,
    EmbeddingResponse,
 )
 from litellm.litellm_core_utils.core_helpers import map_finish_reason
 import litellm
 from .prompt_templates.factory import prompt_factory, custom_prompt
 from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler
@ -289,7 +289,7 @@ class DatabricksChatCompletion(BaseLLM):
        response: Union[requests.Response, httpx.Response],
        model_response: ModelResponse,
        stream: bool,
-        logging_obj: litellm.utils.Logging,
+        logging_obj: litellm.litellm_core_utils.litellm_logging.Logging,
        optional_params: dict,
        api_key: str,
        data: Union[dict, str],
--- a/litellm/llms/predibase.py
+++ b/litellm/llms/predibase.py
@ -12,11 +12,11 @@ from typing import Callable, Optional, List, Literal, Union
 from litellm.utils import (
    ModelResponse,
    Usage,
    map_finish_reason,
    CustomStreamWrapper,
    Message,
    Choices,
 )
 from litellm.litellm_core_utils.core_helpers import map_finish_reason
 import litellm
 from .prompt_templates.factory import prompt_factory, custom_prompt
 from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler
@ -198,7 +198,7 @@ class PredibaseChatCompletion(BaseLLM):
        response: Union[requests.Response, httpx.Response],
        model_response: ModelResponse,
        stream: bool,
-        logging_obj: litellm.utils.Logging,
+        logging_obj: litellm.litellm_core_utils.litellm_logging.Logging,
        optional_params: dict,
        api_key: str,
        data: Union[dict, str],
--- a/litellm/llms/triton.py
+++ b/litellm/llms/triton.py
@ -4,7 +4,6 @@ from enum import Enum
 import requests, copy  # type: ignore
 import time
 from typing import Callable, Optional, List
 from litellm.utils import ModelResponse, Usage, map_finish_reason, CustomStreamWrapper
 import litellm
 from .prompt_templates.factory import prompt_factory, custom_prompt
 from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler
--- a/litellm/llms/vertex_ai.py
+++ b/litellm/llms/vertex_ai.py
@ -5,7 +5,8 @@ import requests  # type: ignore
 import time
 from typing import Callable, Optional, Union, List, Literal, Any
 from pydantic import BaseModel
-from litellm.utils import ModelResponse, Usage, CustomStreamWrapper, map_finish_reason
+from litellm.utils import ModelResponse, Usage, CustomStreamWrapper
 from litellm.litellm_core_utils.core_helpers import map_finish_reason
 import litellm, uuid
 import httpx, inspect  # type: ignore
 from litellm.types.llms.vertex_ai import *
--- a/litellm/llms/vertex_ai_anthropic.py
+++ b/litellm/llms/vertex_ai_anthropic.py
@ -6,7 +6,8 @@ from enum import Enum
 import requests, copy  # type: ignore
 import time, uuid
 from typing import Callable, Optional, List
-from litellm.utils import ModelResponse, Usage, map_finish_reason, CustomStreamWrapper
+from litellm.utils import ModelResponse, Usage, CustomStreamWrapper
 from litellm.litellm_core_utils.core_helpers import map_finish_reason
 import litellm
 from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler
 from .prompt_templates.factory import (
--- a/litellm/llms/vertex_httpx.py
+++ b/litellm/llms/vertex_httpx.py
@ -8,7 +8,10 @@ from enum import Enum
 import requests  # type: ignore
 import time
 from typing import Callable, Optional, Union, List, Any, Tuple
-from litellm.utils import ModelResponse, Usage, CustomStreamWrapper, map_finish_reason
+import litellm.litellm_core_utils
 import litellm.litellm_core_utils.litellm_logging
 from litellm.utils import ModelResponse, Usage, CustomStreamWrapper
 from litellm.litellm_core_utils.core_helpers import map_finish_reason
 import litellm, uuid
 import httpx, inspect  # type: ignore
 from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler
@ -320,7 +323,7 @@ class VertexLLM(BaseLLM):
        model: str,
        response: httpx.Response,
        model_response: ModelResponse,
-        logging_obj: litellm.utils.Logging,
+        logging_obj: litellm.litellm_core_utils.litellm_logging.Logging,
        optional_params: dict,
        api_key: str,
        data: Union[dict, str],
--- a/litellm/main.py
+++ b/litellm/main.py
@ -368,7 +368,9 @@ async def acompletion(
        return response
    except Exception as e:
        verbose_logger.error(
-            "litellm.acompletion(): Exception occured - {}".format(str(e))
+            "litellm.acompletion(): Exception occured - {}\n{}".format(
                str(e), traceback.format_exc()
            )
        )
        verbose_logger.debug(traceback.format_exc())
        custom_llm_provider = custom_llm_provider or "openai"
@ -399,6 +401,7 @@ def mock_completion(
    stream: Optional[bool] = False,
    mock_response: Union[str, Exception] = "This is a mock request",
    logging=None,
    custom_llm_provider=None,
    **kwargs,
 ):
    """
@ -436,7 +439,7 @@ def mock_completion(
            raise litellm.APIError(
                status_code=getattr(mock_response, "status_code", 500),  # type: ignore
                message=getattr(mock_response, "text", str(mock_response)),
-                llm_provider=getattr(mock_response, "llm_provider", "openai"),  # type: ignore
+                llm_provider=getattr(mock_response, "llm_provider", custom_llm_provider or "openai"),  # type: ignore
                model=model,  # type: ignore
                request=httpx.Request(method="POST", url="https://api.openai.com/v1/"),
            )
@ -905,6 +908,7 @@ def completion(
                logging=logging,
                acompletion=acompletion,
                mock_delay=kwargs.get("mock_delay", None),
                custom_llm_provider=custom_llm_provider,
            )
        if custom_llm_provider == "azure":
            # azure configs
--- a/litellm/proxy/_experimental/out/404.html
+++ b/litellm/proxy/_experimental/out/404.html
--- a/litellm/proxy/_experimental/out/_next/static/S9_6IC27HNWjJtr-LNaAO/_buildManifest.js
+++ b/litellm/proxy/_experimental/out/_next/static/S9_6IC27HNWjJtr-LNaAO/_buildManifest.js
--- a/litellm/proxy/_experimental/out/_next/static/S9_6IC27HNWjJtr-LNaAO/_ssgManifest.js
+++ b/litellm/proxy/_experimental/out/_next/static/S9_6IC27HNWjJtr-LNaAO/_ssgManifest.js
--- a/litellm/proxy/_experimental/out/_next/static/chunks/131-6a03368053f9d26d.js
+++ b/litellm/proxy/_experimental/out/_next/static/chunks/131-6a03368053f9d26d.js
--- a/litellm/proxy/_experimental/out/_next/static/chunks/131-cb6bfe24e23e121b.js
+++ b/litellm/proxy/_experimental/out/_next/static/chunks/131-cb6bfe24e23e121b.js
--- a/litellm/proxy/_experimental/out/_next/static/chunks/294-0e35509d5ca95267.js
+++ b/litellm/proxy/_experimental/out/_next/static/chunks/294-0e35509d5ca95267.js
--- a/litellm/proxy/_experimental/out/_next/static/chunks/2f6dbc85-052c4579f80d66ae.js
+++ b/litellm/proxy/_experimental/out/_next/static/chunks/2f6dbc85-052c4579f80d66ae.js
--- a/litellm/proxy/_experimental/out/_next/static/chunks/2f6dbc85-cac2949a76539886.js
+++ b/litellm/proxy/_experimental/out/_next/static/chunks/2f6dbc85-cac2949a76539886.js
--- a/litellm/proxy/_experimental/out/_next/static/chunks/3014691f-589a5f4865c3822f.js
+++ b/litellm/proxy/_experimental/out/_next/static/chunks/3014691f-589a5f4865c3822f.js
@ -0,0 +1 @@
 "use strict";(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[665],{30953:function(e,t,r){r.d(t,{GH$:function(){return n}});var l=r(64090);let n=e=>{let{color:t="currentColor",size:r=24,className:n,...s}=e;return l.createElement("svg",{viewBox:"0 0 24 24",xmlns:"http://www.w3.org/2000/svg",width:r,height:r,fill:t,...s,className:"remixicon "+(n||"")},l.createElement("path",{d:"M12 22C6.47715 22 2 17.5228 2 12C2 6.47715 6.47715 2 12 2C17.5228 2 22 6.47715 22 12C22 17.5228 17.5228 22 12 22ZM12 20C16.4183 20 20 16.4183 20 12C20 7.58172 16.4183 4 12 4C7.58172 4 4 7.58172 4 12C4 16.4183 7.58172 20 12 20ZM11.0026 16L6.75999 11.7574L8.17421 10.3431L11.0026 13.1716L16.6595 7.51472L18.0737 8.92893L11.0026 16Z"}))}}}]);
--- a/litellm/proxy/_experimental/out/_next/static/chunks/3014691f-b24e8254c7593934.js
+++ b/litellm/proxy/_experimental/out/_next/static/chunks/3014691f-b24e8254c7593934.js
@ -1 +0,0 @@
 "use strict";(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[665],{30953:function(e,t,r){r.d(t,{GH$:function(){return n}});var l=r(2265);let n=e=>{let{color:t="currentColor",size:r=24,className:n,...s}=e;return l.createElement("svg",{viewBox:"0 0 24 24",xmlns:"http://www.w3.org/2000/svg",width:r,height:r,fill:t,...s,className:"remixicon "+(n||"")},l.createElement("path",{d:"M12 22C6.47715 22 2 17.5228 2 12C2 6.47715 6.47715 2 12 2C17.5228 2 22 6.47715 22 12C22 17.5228 17.5228 22 12 22ZM12 20C16.4183 20 20 16.4183 20 12C20 7.58172 16.4183 4 12 4C7.58172 4 4 7.58172 4 12C4 16.4183 7.58172 20 12 20ZM11.0026 16L6.75999 11.7574L8.17421 10.3431L11.0026 13.1716L16.6595 7.51472L18.0737 8.92893L11.0026 16Z"}))}}}]);
--- a/litellm/proxy/_experimental/out/_next/static/chunks/505-5ff3c318fddfa35c.js
+++ b/litellm/proxy/_experimental/out/_next/static/chunks/505-5ff3c318fddfa35c.js
--- a/litellm/proxy/_experimental/out/_next/static/chunks/684-16b194c83a169f6d.js
+++ b/litellm/proxy/_experimental/out/_next/static/chunks/684-16b194c83a169f6d.js
--- a/litellm/proxy/_experimental/out/_next/static/chunks/684-bb2d2f93d92acb0b.js
+++ b/litellm/proxy/_experimental/out/_next/static/chunks/684-bb2d2f93d92acb0b.js
--- a/litellm/proxy/_experimental/out/_next/static/chunks/69-04708d7d4a17c1ee.js
+++ b/litellm/proxy/_experimental/out/_next/static/chunks/69-04708d7d4a17c1ee.js
--- a/litellm/proxy/_experimental/out/_next/static/chunks/69-8316d07d1f41e39f.js
+++ b/litellm/proxy/_experimental/out/_next/static/chunks/69-8316d07d1f41e39f.js
--- a/litellm/proxy/_experimental/out/_next/static/chunks/759-83a8bdddfe32b5d9.js
+++ b/litellm/proxy/_experimental/out/_next/static/chunks/759-83a8bdddfe32b5d9.js
--- a/litellm/proxy/_experimental/out/_next/static/chunks/759-c0083d8a782d300e.js
+++ b/litellm/proxy/_experimental/out/_next/static/chunks/759-c0083d8a782d300e.js
--- a/litellm/proxy/_experimental/out/_next/static/chunks/app/_not-found-b1ee1381b72386c2.js
+++ b/litellm/proxy/_experimental/out/_next/static/chunks/app/_not-found-b1ee1381b72386c2.js
@ -1 +1 @@
-(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[165],{83155:function(e,t,n){(window.__NEXT_P=window.__NEXT_P||[]).push(["/_not-found",function(){return n(84032)}])},84032:function(e,t,n){"use strict";Object.defineProperty(t,"__esModule",{value:!0}),Object.defineProperty(t,"default",{enumerable:!0,get:function(){return i}}),n(86921);let o=n(57437);n(2265);let r={error:{fontFamily:'system-ui,"Segoe UI",Roboto,Helvetica,Arial,sans-serif,"Apple Color Emoji","Segoe UI Emoji"',height:"100vh",textAlign:"center",display:"flex",flexDirection:"column",alignItems:"center",justifyContent:"center"},desc:{display:"inline-block"},h1:{display:"inline-block",margin:"0 20px 0 0",padding:"0 23px 0 0",fontSize:24,fontWeight:500,verticalAlign:"top",lineHeight:"49px"},h2:{fontSize:14,fontWeight:400,lineHeight:"49px",margin:0}};function i(){return(0,o.jsxs)(o.Fragment,{children:[(0,o.jsx)("title",{children:"404: This page could not be found."}),(0,o.jsx)("div",{style:r.error,children:(0,o.jsxs)("div",{children:[(0,o.jsx)("style",{dangerouslySetInnerHTML:{__html:"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}),(0,o.jsx)("h1",{className:"next-error-h1",style:r.h1,children:"404"}),(0,o.jsx)("div",{style:r.desc,children:(0,o.jsx)("h2",{style:r.h2,children:"This page could not be found."})})]})})]})}("function"==typeof t.default||"object"==typeof t.default&&null!==t.default)&&void 0===t.default.__esModule&&(Object.defineProperty(t.default,"__esModule",{value:!0}),Object.assign(t.default,t),e.exports=t.default)}},function(e){e.O(0,[971,69,744],function(){return e(e.s=83155)}),_N_E=e.O()}]);
+(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[165],{83155:function(e,t,n){(window.__NEXT_P=window.__NEXT_P||[]).push(["/_not-found",function(){return n(84032)}])},84032:function(e,t,n){"use strict";Object.defineProperty(t,"__esModule",{value:!0}),Object.defineProperty(t,"default",{enumerable:!0,get:function(){return i}}),n(86921);let o=n(3827);n(64090);let r={error:{fontFamily:'system-ui,"Segoe UI",Roboto,Helvetica,Arial,sans-serif,"Apple Color Emoji","Segoe UI Emoji"',height:"100vh",textAlign:"center",display:"flex",flexDirection:"column",alignItems:"center",justifyContent:"center"},desc:{display:"inline-block"},h1:{display:"inline-block",margin:"0 20px 0 0",padding:"0 23px 0 0",fontSize:24,fontWeight:500,verticalAlign:"top",lineHeight:"49px"},h2:{fontSize:14,fontWeight:400,lineHeight:"49px",margin:0}};function i(){return(0,o.jsxs)(o.Fragment,{children:[(0,o.jsx)("title",{children:"404: This page could not be found."}),(0,o.jsx)("div",{style:r.error,children:(0,o.jsxs)("div",{children:[(0,o.jsx)("style",{dangerouslySetInnerHTML:{__html:"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}),(0,o.jsx)("h1",{className:"next-error-h1",style:r.h1,children:"404"}),(0,o.jsx)("div",{style:r.desc,children:(0,o.jsx)("h2",{style:r.h2,children:"This page could not be found."})})]})})]})}("function"==typeof t.default||"object"==typeof t.default&&null!==t.default)&&void 0===t.default.__esModule&&(Object.defineProperty(t.default,"__esModule",{value:!0}),Object.assign(t.default,t),e.exports=t.default)}},function(e){e.O(0,[971,69,744],function(){return e(e.s=83155)}),_N_E=e.O()}]);
--- a/litellm/proxy/_experimental/out/_next/static/chunks/app/layout-23af663d40748d68.js
+++ b/litellm/proxy/_experimental/out/_next/static/chunks/app/layout-23af663d40748d68.js
@ -1 +1 @@
-(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[185],{11837:function(n,e,t){Promise.resolve().then(t.t.bind(t,99646,23)),Promise.resolve().then(t.t.bind(t,63385,23))},63385:function(){},99646:function(n){n.exports={style:{fontFamily:"'__Inter_12bbc4', '__Inter_Fallback_12bbc4'",fontStyle:"normal"},className:"__className_12bbc4"}}},function(n){n.O(0,[971,69,744],function(){return n(n.s=11837)}),_N_E=n.O()}]);
+(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[185],{87421:function(n,e,t){Promise.resolve().then(t.t.bind(t,99646,23)),Promise.resolve().then(t.t.bind(t,63385,23))},63385:function(){},99646:function(n){n.exports={style:{fontFamily:"'__Inter_12bbc4', '__Inter_Fallback_12bbc4'",fontStyle:"normal"},className:"__className_12bbc4"}}},function(n){n.O(0,[971,69,744],function(){return n(n.s=87421)}),_N_E=n.O()}]);
--- a/litellm/proxy/_experimental/out/_next/static/chunks/app/model_hub/page-4cb65c32467214b5.js
+++ b/litellm/proxy/_experimental/out/_next/static/chunks/app/model_hub/page-4cb65c32467214b5.js
--- a/litellm/proxy/_experimental/out/_next/static/chunks/app/model_hub/page-a1942d43573c82c3.js
+++ b/litellm/proxy/_experimental/out/_next/static/chunks/app/model_hub/page-a1942d43573c82c3.js
--- a/litellm/proxy/_experimental/out/_next/static/chunks/app/onboarding/page-49a30e653b6ae929.js
+++ b/litellm/proxy/_experimental/out/_next/static/chunks/app/onboarding/page-49a30e653b6ae929.js
@ -1 +0,0 @@
 (self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[461],{20723:function(e,s,l){Promise.resolve().then(l.bind(l,667))},667:function(e,s,l){"use strict";l.r(s),l.d(s,{default:function(){return _}});var t=l(57437),a=l(2265),r=l(47907),n=l(2179),i=l(18190),o=l(13810),u=l(10384),c=l(46453),d=l(71801),m=l(52273),h=l(42440),x=l(30953),j=l(777),p=l(37963),f=l(60620),g=l(1861);function _(){let[e]=f.Z.useForm(),s=(0,r.useSearchParams)();s.get("token");let l=s.get("id"),[_,Z]=(0,a.useState)(null),[w,b]=(0,a.useState)(""),[N,S]=(0,a.useState)(""),[k,y]=(0,a.useState)(null),[v,E]=(0,a.useState)(""),[F,I]=(0,a.useState)("");return(0,a.useEffect)(()=>{l&&(0,j.W_)(l).then(e=>{let s=e.login_url;console.log("login_url:",s),E(s);let l=e.token,t=(0,p.o)(l);I(l),console.log("decoded:",t),Z(t.key),console.log("decoded user email:",t.user_email),S(t.user_email),y(t.user_id)})},[l]),(0,t.jsx)("div",{className:"mx-auto max-w-md mt-10",children:(0,t.jsxs)(o.Z,{children:[(0,t.jsx)(h.Z,{className:"text-sm mb-5 text-center",children:"\uD83D\uDE85 LiteLLM"}),(0,t.jsx)(h.Z,{className:"text-xl",children:"Sign up"}),(0,t.jsx)(d.Z,{children:"Claim your user account to login to Admin UI."}),(0,t.jsx)(i.Z,{className:"mt-4",title:"SSO",icon:x.GH$,color:"sky",children:(0,t.jsxs)(c.Z,{numItems:2,className:"flex justify-between items-center",children:[(0,t.jsx)(u.Z,{children:"SSO is under the Enterprise Tirer."}),(0,t.jsx)(u.Z,{children:(0,t.jsx)(n.Z,{variant:"primary",className:"mb-2",children:(0,t.jsx)("a",{href:"https://forms.gle/W3U4PZpJGFHWtHyA9",target:"_blank",children:"Get Free Trial"})})})]})}),(0,t.jsxs)(f.Z,{className:"mt-10 mb-5 mx-auto",layout:"vertical",onFinish:e=>{console.log("in handle submit. accessToken:",_,"token:",F,"formValues:",e),_&&F&&(e.user_email=N,k&&l&&(0,j.m_)(_,l,k,e.password).then(e=>{var s;let l="/ui/";console.log("redirecting to:",l+="?userID="+((null===(s=e.data)||void 0===s?void 0:s.user_id)||e.user_id)+"&token="+F),window.location.href=l}))},children:[(0,t.jsxs)(t.Fragment,{children:[(0,t.jsx)(f.Z.Item,{label:"Email Address",name:"user_email",children:(0,t.jsx)(m.Z,{type:"email",disabled:!0,value:N,defaultValue:N,className:"max-w-md"})}),(0,t.jsx)(f.Z.Item,{label:"Password",name:"password",rules:[{required:!0,message:"password required to sign up"}],help:"Create a password for your account",children:(0,t.jsx)(m.Z,{placeholder:"",type:"password",className:"max-w-md"})})]}),(0,t.jsx)("div",{className:"mt-10",children:(0,t.jsx)(g.ZP,{htmlType:"submit",children:"Sign Up"})})]})]})})}}},function(e){e.O(0,[665,505,684,777,971,69,744],function(){return e(e.s=20723)}),_N_E=e.O()}]);
--- a/litellm/proxy/_experimental/out/_next/static/chunks/app/onboarding/page-664c7288e11fff5a.js
+++ b/litellm/proxy/_experimental/out/_next/static/chunks/app/onboarding/page-664c7288e11fff5a.js
@ -0,0 +1 @@
 (self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[461],{61994:function(e,s,l){Promise.resolve().then(l.bind(l,667))},667:function(e,s,l){"use strict";l.r(s),l.d(s,{default:function(){return _}});var t=l(3827),a=l(64090),r=l(47907),n=l(16450),i=l(18190),o=l(13810),u=l(10384),c=l(46453),d=l(71801),m=l(52273),h=l(42440),x=l(30953),j=l(777),p=l(37963),f=l(60620),g=l(1861);function _(){let[e]=f.Z.useForm(),s=(0,r.useSearchParams)();s.get("token");let l=s.get("id"),[_,Z]=(0,a.useState)(null),[w,b]=(0,a.useState)(""),[N,S]=(0,a.useState)(""),[k,y]=(0,a.useState)(null),[v,E]=(0,a.useState)(""),[F,I]=(0,a.useState)("");return(0,a.useEffect)(()=>{l&&(0,j.W_)(l).then(e=>{let s=e.login_url;console.log("login_url:",s),E(s);let l=e.token,t=(0,p.o)(l);I(l),console.log("decoded:",t),Z(t.key),console.log("decoded user email:",t.user_email),S(t.user_email),y(t.user_id)})},[l]),(0,t.jsx)("div",{className:"mx-auto max-w-md mt-10",children:(0,t.jsxs)(o.Z,{children:[(0,t.jsx)(h.Z,{className:"text-sm mb-5 text-center",children:"\uD83D\uDE85 LiteLLM"}),(0,t.jsx)(h.Z,{className:"text-xl",children:"Sign up"}),(0,t.jsx)(d.Z,{children:"Claim your user account to login to Admin UI."}),(0,t.jsx)(i.Z,{className:"mt-4",title:"SSO",icon:x.GH$,color:"sky",children:(0,t.jsxs)(c.Z,{numItems:2,className:"flex justify-between items-center",children:[(0,t.jsx)(u.Z,{children:"SSO is under the Enterprise Tirer."}),(0,t.jsx)(u.Z,{children:(0,t.jsx)(n.Z,{variant:"primary",className:"mb-2",children:(0,t.jsx)("a",{href:"https://forms.gle/W3U4PZpJGFHWtHyA9",target:"_blank",children:"Get Free Trial"})})})]})}),(0,t.jsxs)(f.Z,{className:"mt-10 mb-5 mx-auto",layout:"vertical",onFinish:e=>{console.log("in handle submit. accessToken:",_,"token:",F,"formValues:",e),_&&F&&(e.user_email=N,k&&l&&(0,j.m_)(_,l,k,e.password).then(e=>{var s;let l="/ui/";console.log("redirecting to:",l+="?userID="+((null===(s=e.data)||void 0===s?void 0:s.user_id)||e.user_id)+"&token="+F),window.location.href=l}))},children:[(0,t.jsxs)(t.Fragment,{children:[(0,t.jsx)(f.Z.Item,{label:"Email Address",name:"user_email",children:(0,t.jsx)(m.Z,{type:"email",disabled:!0,value:N,defaultValue:N,className:"max-w-md"})}),(0,t.jsx)(f.Z.Item,{label:"Password",name:"password",rules:[{required:!0,message:"password required to sign up"}],help:"Create a password for your account",children:(0,t.jsx)(m.Z,{placeholder:"",type:"password",className:"max-w-md"})})]}),(0,t.jsx)("div",{className:"mt-10",children:(0,t.jsx)(g.ZP,{htmlType:"submit",children:"Sign Up"})})]})]})})}}},function(e){e.O(0,[665,294,684,777,971,69,744],function(){return e(e.s=61994)}),_N_E=e.O()}]);
--- a/litellm/proxy/_experimental/out/_next/static/chunks/app/page-626098dc8320c801.js
+++ b/litellm/proxy/_experimental/out/_next/static/chunks/app/page-626098dc8320c801.js
--- a/litellm/proxy/_experimental/out/_next/static/chunks/app/page-8028473f1a04553d.js
+++ b/litellm/proxy/_experimental/out/_next/static/chunks/app/page-8028473f1a04553d.js
--- a/litellm/proxy/_experimental/out/_next/static/chunks/fd9d1056-f593049e31b05aeb.js
+++ b/litellm/proxy/_experimental/out/_next/static/chunks/fd9d1056-f593049e31b05aeb.js
--- a/litellm/proxy/_experimental/out/_next/static/chunks/fd9d1056-f960ab1e6d32b002.js
+++ b/litellm/proxy/_experimental/out/_next/static/chunks/fd9d1056-f960ab1e6d32b002.js
--- a/litellm/proxy/_experimental/out/_next/static/chunks/main-160227023782230a.js
+++ b/litellm/proxy/_experimental/out/_next/static/chunks/main-160227023782230a.js
--- a/litellm/proxy/_experimental/out/_next/static/chunks/main-app-9b4fb13a7db53edf.js
+++ b/litellm/proxy/_experimental/out/_next/static/chunks/main-app-9b4fb13a7db53edf.js
@ -1 +1 @@
-(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[744],{70377:function(e,n,t){Promise.resolve().then(t.t.bind(t,47690,23)),Promise.resolve().then(t.t.bind(t,48955,23)),Promise.resolve().then(t.t.bind(t,5613,23)),Promise.resolve().then(t.t.bind(t,11902,23)),Promise.resolve().then(t.t.bind(t,31778,23)),Promise.resolve().then(t.t.bind(t,77831,23))}},function(e){var n=function(n){return e(e.s=n)};e.O(0,[971,69],function(){return n(35317),n(70377)}),_N_E=e.O()}]);
+(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[744],{32028:function(e,n,t){Promise.resolve().then(t.t.bind(t,47690,23)),Promise.resolve().then(t.t.bind(t,48955,23)),Promise.resolve().then(t.t.bind(t,5613,23)),Promise.resolve().then(t.t.bind(t,11902,23)),Promise.resolve().then(t.t.bind(t,31778,23)),Promise.resolve().then(t.t.bind(t,77831,23))}},function(e){var n=function(n){return e(e.s=n)};e.O(0,[971,69],function(){return n(35317),n(32028)}),_N_E=e.O()}]);
--- a/litellm/proxy/_experimental/out/_next/static/chunks/webpack-6f7793f21bbb2fbe.js
+++ b/litellm/proxy/_experimental/out/_next/static/chunks/webpack-6f7793f21bbb2fbe.js
@ -1 +1 @@
-!function(){"use strict";var e,t,n,r,o,u,i,c,f,a={},l={};function d(e){var t=l[e];if(void 0!==t)return t.exports;var n=l[e]={id:e,loaded:!1,exports:{}},r=!0;try{a[e](n,n.exports,d),r=!1}finally{r&&delete l[e]}return n.loaded=!0,n.exports}d.m=a,e=[],d.O=function(t,n,r,o){if(n){o=o||0;for(var u=e.length;u>0&&e[u-1][2]>o;u--)e[u]=e[u-1];e[u]=[n,r,o];return}for(var i=1/0,u=0;u<e.length;u++){for(var n=e[u][0],r=e[u][1],o=e[u][2],c=!0,f=0;f<n.length;f++)i>=o&&Object.keys(d.O).every(function(e){return d.O[e](n[f])})?n.splice(f--,1):(c=!1,o<i&&(i=o));if(c){e.splice(u--,1);var a=r();void 0!==a&&(t=a)}}return t},d.n=function(e){var t=e&&e.__esModule?function(){return e.default}:function(){return e};return d.d(t,{a:t}),t},n=Object.getPrototypeOf?function(e){return Object.getPrototypeOf(e)}:function(e){return e.__proto__},d.t=function(e,r){if(1&r&&(e=this(e)),8&r||"object"==typeof e&&e&&(4&r&&e.__esModule||16&r&&"function"==typeof e.then))return e;var o=Object.create(null);d.r(o);var u={};t=t||[null,n({}),n([]),n(n)];for(var i=2&r&&e;"object"==typeof i&&!~t.indexOf(i);i=n(i))Object.getOwnPropertyNames(i).forEach(function(t){u[t]=function(){return e[t]}});return u.default=function(){return e},d.d(o,u),o},d.d=function(e,t){for(var n in t)d.o(t,n)&&!d.o(e,n)&&Object.defineProperty(e,n,{enumerable:!0,get:t[n]})},d.f={},d.e=function(e){return Promise.all(Object.keys(d.f).reduce(function(t,n){return d.f[n](e,t),t},[]))},d.u=function(e){},d.miniCssF=function(e){return"static/css/63f65dbb14efd996.css"},d.g=function(){if("object"==typeof globalThis)return globalThis;try{return this||Function("return this")()}catch(e){if("object"==typeof window)return window}}(),d.o=function(e,t){return Object.prototype.hasOwnProperty.call(e,t)},r={},o="_N_E:",d.l=function(e,t,n,u){if(r[e]){r[e].push(t);return}if(void 0!==n)for(var i,c,f=document.getElementsByTagName("script"),a=0;a<f.length;a++){var l=f[a];if(l.getAttribute("src")==e||l.getAttribute("data-webpack")==o+n){i=l;break}}i||(c=!0,(i=document.createElement("script")).charset="utf-8",i.timeout=120,d.nc&&i.setAttribute("nonce",d.nc),i.setAttribute("data-webpack",o+n),i.src=d.tu(e)),r[e]=[t];var s=function(t,n){i.onerror=i.onload=null,clearTimeout(p);var o=r[e];if(delete r[e],i.parentNode&&i.parentNode.removeChild(i),o&&o.forEach(function(e){return e(n)}),t)return t(n)},p=setTimeout(s.bind(null,void 0,{type:"timeout",target:i}),12e4);i.onerror=s.bind(null,i.onerror),i.onload=s.bind(null,i.onload),c&&document.head.appendChild(i)},d.r=function(e){"undefined"!=typeof Symbol&&Symbol.toStringTag&&Object.defineProperty(e,Symbol.toStringTag,{value:"Module"}),Object.defineProperty(e,"__esModule",{value:!0})},d.nmd=function(e){return e.paths=[],e.children||(e.children=[]),e},d.tt=function(){return void 0===u&&(u={createScriptURL:function(e){return e}},"undefined"!=typeof trustedTypes&&trustedTypes.createPolicy&&(u=trustedTypes.createPolicy("nextjs#bundler",u))),u},d.tu=function(e){return d.tt().createScriptURL(e)},d.p="/ui/_next/",i={272:0},d.f.j=function(e,t){var n=d.o(i,e)?i[e]:void 0;if(0!==n){if(n)t.push(n[2]);else if(272!=e){var r=new Promise(function(t,r){n=i[e]=[t,r]});t.push(n[2]=r);var o=d.p+d.u(e),u=Error();d.l(o,function(t){if(d.o(i,e)&&(0!==(n=i[e])&&(i[e]=void 0),n)){var r=t&&("load"===t.type?"missing":t.type),o=t&&t.target&&t.target.src;u.message="Loading chunk "+e+" failed.\n("+r+": "+o+")",u.name="ChunkLoadError",u.type=r,u.request=o,n[1](u)}},"chunk-"+e,e)}else i[e]=0}},d.O.j=function(e){return 0===i[e]},c=function(e,t){var n,r,o=t[0],u=t[1],c=t[2],f=0;if(o.some(function(e){return 0!==i[e]})){for(n in u)d.o(u,n)&&(d.m[n]=u[n]);if(c)var a=c(d)}for(e&&e(t);f<o.length;f++)r=o[f],d.o(i,r)&&i[r]&&i[r][0](),i[r]=0;return d.O(a)},(f=self.webpackChunk_N_E=self.webpackChunk_N_E||[]).forEach(c.bind(null,0)),f.push=c.bind(null,f.push.bind(f))}();
+!function(){"use strict";var e,t,n,r,o,u,i,c,f,a={},l={};function d(e){var t=l[e];if(void 0!==t)return t.exports;var n=l[e]={id:e,loaded:!1,exports:{}},r=!0;try{a[e](n,n.exports,d),r=!1}finally{r&&delete l[e]}return n.loaded=!0,n.exports}d.m=a,e=[],d.O=function(t,n,r,o){if(n){o=o||0;for(var u=e.length;u>0&&e[u-1][2]>o;u--)e[u]=e[u-1];e[u]=[n,r,o];return}for(var i=1/0,u=0;u<e.length;u++){for(var n=e[u][0],r=e[u][1],o=e[u][2],c=!0,f=0;f<n.length;f++)i>=o&&Object.keys(d.O).every(function(e){return d.O[e](n[f])})?n.splice(f--,1):(c=!1,o<i&&(i=o));if(c){e.splice(u--,1);var a=r();void 0!==a&&(t=a)}}return t},d.n=function(e){var t=e&&e.__esModule?function(){return e.default}:function(){return e};return d.d(t,{a:t}),t},n=Object.getPrototypeOf?function(e){return Object.getPrototypeOf(e)}:function(e){return e.__proto__},d.t=function(e,r){if(1&r&&(e=this(e)),8&r||"object"==typeof e&&e&&(4&r&&e.__esModule||16&r&&"function"==typeof e.then))return e;var o=Object.create(null);d.r(o);var u={};t=t||[null,n({}),n([]),n(n)];for(var i=2&r&&e;"object"==typeof i&&!~t.indexOf(i);i=n(i))Object.getOwnPropertyNames(i).forEach(function(t){u[t]=function(){return e[t]}});return u.default=function(){return e},d.d(o,u),o},d.d=function(e,t){for(var n in t)d.o(t,n)&&!d.o(e,n)&&Object.defineProperty(e,n,{enumerable:!0,get:t[n]})},d.f={},d.e=function(e){return Promise.all(Object.keys(d.f).reduce(function(t,n){return d.f[n](e,t),t},[]))},d.u=function(e){},d.miniCssF=function(e){return"static/css/b65d5698d1a1958d.css"},d.g=function(){if("object"==typeof globalThis)return globalThis;try{return this||Function("return this")()}catch(e){if("object"==typeof window)return window}}(),d.o=function(e,t){return Object.prototype.hasOwnProperty.call(e,t)},r={},o="_N_E:",d.l=function(e,t,n,u){if(r[e]){r[e].push(t);return}if(void 0!==n)for(var i,c,f=document.getElementsByTagName("script"),a=0;a<f.length;a++){var l=f[a];if(l.getAttribute("src")==e||l.getAttribute("data-webpack")==o+n){i=l;break}}i||(c=!0,(i=document.createElement("script")).charset="utf-8",i.timeout=120,d.nc&&i.setAttribute("nonce",d.nc),i.setAttribute("data-webpack",o+n),i.src=d.tu(e)),r[e]=[t];var s=function(t,n){i.onerror=i.onload=null,clearTimeout(p);var o=r[e];if(delete r[e],i.parentNode&&i.parentNode.removeChild(i),o&&o.forEach(function(e){return e(n)}),t)return t(n)},p=setTimeout(s.bind(null,void 0,{type:"timeout",target:i}),12e4);i.onerror=s.bind(null,i.onerror),i.onload=s.bind(null,i.onload),c&&document.head.appendChild(i)},d.r=function(e){"undefined"!=typeof Symbol&&Symbol.toStringTag&&Object.defineProperty(e,Symbol.toStringTag,{value:"Module"}),Object.defineProperty(e,"__esModule",{value:!0})},d.nmd=function(e){return e.paths=[],e.children||(e.children=[]),e},d.tt=function(){return void 0===u&&(u={createScriptURL:function(e){return e}},"undefined"!=typeof trustedTypes&&trustedTypes.createPolicy&&(u=trustedTypes.createPolicy("nextjs#bundler",u))),u},d.tu=function(e){return d.tt().createScriptURL(e)},d.p="/ui/_next/",i={272:0},d.f.j=function(e,t){var n=d.o(i,e)?i[e]:void 0;if(0!==n){if(n)t.push(n[2]);else if(272!=e){var r=new Promise(function(t,r){n=i[e]=[t,r]});t.push(n[2]=r);var o=d.p+d.u(e),u=Error();d.l(o,function(t){if(d.o(i,e)&&(0!==(n=i[e])&&(i[e]=void 0),n)){var r=t&&("load"===t.type?"missing":t.type),o=t&&t.target&&t.target.src;u.message="Loading chunk "+e+" failed.\n("+r+": "+o+")",u.name="ChunkLoadError",u.type=r,u.request=o,n[1](u)}},"chunk-"+e,e)}else i[e]=0}},d.O.j=function(e){return 0===i[e]},c=function(e,t){var n,r,o=t[0],u=t[1],c=t[2],f=0;if(o.some(function(e){return 0!==i[e]})){for(n in u)d.o(u,n)&&(d.m[n]=u[n]);if(c)var a=c(d)}for(e&&e(t);f<o.length;f++)r=o[f],d.o(i,r)&&i[r]&&i[r][0](),i[r]=0;return d.O(a)},(f=self.webpackChunk_N_E=self.webpackChunk_N_E||[]).forEach(c.bind(null,0)),f.push=c.bind(null,f.push.bind(f))}();
--- a/litellm/proxy/_experimental/out/_next/static/css/63f65dbb14efd996.css
+++ b/litellm/proxy/_experimental/out/_next/static/css/63f65dbb14efd996.css
--- a/litellm/proxy/_experimental/out/_next/static/css/b65d5698d1a1958d.css
+++ b/litellm/proxy/_experimental/out/_next/static/css/b65d5698d1a1958d.css
--- a/litellm/proxy/_experimental/out/index.html
+++ b/litellm/proxy/_experimental/out/index.html
@ -1 +1 @@
-<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-887c75b16b85d4b4.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-f593049e31b05aeb.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-8316d07d1f41e39f.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-096338c8e1915716.js" async="" crossorigin=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-887c75b16b85d4b4.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/63f65dbb14efd996.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[68101,[\"936\",\"static/chunks/2f6dbc85-cac2949a76539886.js\",\"505\",\"static/chunks/505-5ff3c318fddfa35c.js\",\"131\",\"static/chunks/131-cb6bfe24e23e121b.js\",\"684\",\"static/chunks/684-16b194c83a169f6d.js\",\"759\",\"static/chunks/759-c0083d8a782d300e.js\",\"777\",\"static/chunks/777-71fb78fdb4897cc3.js\",\"931\",\"static/chunks/app/page-8028473f1a04553d.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/63f65dbb14efd996.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"sTvd1VbHSi_TBr1KiIpul\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_12bbc4\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>
+<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-6f7793f21bbb2fbe.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-f960ab1e6d32b002.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-04708d7d4a17c1ee.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-6f7793f21bbb2fbe.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/b65d5698d1a1958d.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[68101,[\"936\",\"static/chunks/2f6dbc85-052c4579f80d66ae.js\",\"294\",\"static/chunks/294-0e35509d5ca95267.js\",\"131\",\"static/chunks/131-6a03368053f9d26d.js\",\"684\",\"static/chunks/684-bb2d2f93d92acb0b.js\",\"759\",\"static/chunks/759-83a8bdddfe32b5d9.js\",\"777\",\"static/chunks/777-71fb78fdb4897cc3.js\",\"931\",\"static/chunks/app/page-626098dc8320c801.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/b65d5698d1a1958d.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"S9_6IC27HNWjJtr-LNaAO\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_12bbc4\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>
--- a/litellm/proxy/_experimental/out/index.txt
+++ b/litellm/proxy/_experimental/out/index.txt
@ -1,7 +1,7 @@
 2:I[77831,[],""]
-3:I[68101,["936","static/chunks/2f6dbc85-cac2949a76539886.js","505","static/chunks/505-5ff3c318fddfa35c.js","131","static/chunks/131-cb6bfe24e23e121b.js","684","static/chunks/684-16b194c83a169f6d.js","759","static/chunks/759-c0083d8a782d300e.js","777","static/chunks/777-71fb78fdb4897cc3.js","931","static/chunks/app/page-8028473f1a04553d.js"],""]
+3:I[68101,["936","static/chunks/2f6dbc85-052c4579f80d66ae.js","294","static/chunks/294-0e35509d5ca95267.js","131","static/chunks/131-6a03368053f9d26d.js","684","static/chunks/684-bb2d2f93d92acb0b.js","759","static/chunks/759-83a8bdddfe32b5d9.js","777","static/chunks/777-71fb78fdb4897cc3.js","931","static/chunks/app/page-626098dc8320c801.js"],""]
 4:I[5613,[],""]
 5:I[31778,[],""]
-0:["sTvd1VbHSi_TBr1KiIpul",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_12bbc4","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/63f65dbb14efd996.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
+0:["S9_6IC27HNWjJtr-LNaAO",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_12bbc4","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/b65d5698d1a1958d.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
 6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
 1:null
--- a/litellm/proxy/_experimental/out/model_hub.html
+++ b/litellm/proxy/_experimental/out/model_hub.html
--- a/litellm/proxy/_experimental/out/model_hub.txt
+++ b/litellm/proxy/_experimental/out/model_hub.txt
@ -1,7 +1,7 @@
 2:I[77831,[],""]
-3:I[87494,["505","static/chunks/505-5ff3c318fddfa35c.js","131","static/chunks/131-cb6bfe24e23e121b.js","777","static/chunks/777-71fb78fdb4897cc3.js","418","static/chunks/app/model_hub/page-a1942d43573c82c3.js"],""]
+3:I[87494,["294","static/chunks/294-0e35509d5ca95267.js","131","static/chunks/131-6a03368053f9d26d.js","777","static/chunks/777-71fb78fdb4897cc3.js","418","static/chunks/app/model_hub/page-4cb65c32467214b5.js"],""]
 4:I[5613,[],""]
 5:I[31778,[],""]
-0:["sTvd1VbHSi_TBr1KiIpul",[[["",{"children":["model_hub",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["model_hub",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","model_hub","children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined","styles":null}]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_12bbc4","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/63f65dbb14efd996.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
+0:["S9_6IC27HNWjJtr-LNaAO",[[["",{"children":["model_hub",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["model_hub",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","model_hub","children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined","styles":null}]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_12bbc4","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/b65d5698d1a1958d.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
 6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
 1:null
--- a/litellm/proxy/_experimental/out/onboarding.html
+++ b/litellm/proxy/_experimental/out/onboarding.html
--- a/litellm/proxy/_experimental/out/onboarding.txt
+++ b/litellm/proxy/_experimental/out/onboarding.txt
@ -1,7 +1,7 @@
 2:I[77831,[],""]
-3:I[667,["665","static/chunks/3014691f-b24e8254c7593934.js","505","static/chunks/505-5ff3c318fddfa35c.js","684","static/chunks/684-16b194c83a169f6d.js","777","static/chunks/777-71fb78fdb4897cc3.js","461","static/chunks/app/onboarding/page-49a30e653b6ae929.js"],""]
+3:I[667,["665","static/chunks/3014691f-589a5f4865c3822f.js","294","static/chunks/294-0e35509d5ca95267.js","684","static/chunks/684-bb2d2f93d92acb0b.js","777","static/chunks/777-71fb78fdb4897cc3.js","461","static/chunks/app/onboarding/page-664c7288e11fff5a.js"],""]
 4:I[5613,[],""]
 5:I[31778,[],""]
-0:["sTvd1VbHSi_TBr1KiIpul",[[["",{"children":["onboarding",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["onboarding",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","onboarding","children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined","styles":null}]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_12bbc4","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/63f65dbb14efd996.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
+0:["S9_6IC27HNWjJtr-LNaAO",[[["",{"children":["onboarding",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["onboarding",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","onboarding","children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined","styles":null}]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_12bbc4","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/b65d5698d1a1958d.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
 6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
 1:null
--- a/litellm/proxy/_super_secret_config.yaml
+++ b/litellm/proxy/_super_secret_config.yaml
@ -79,8 +79,8 @@ litellm_settings:
  failure_callback: ["langfuse"]
  cache: true
-# general_settings:
+general_settings:
-#   alerting: ["email"]
+  alerting: ["slack"]
 #   key_management_system: "aws_kms"
 #   key_management_settings:
 #     hosted_keys: ["LITELLM_MASTER_KEY"]
--- a/litellm/proxy/_types.py
+++ b/litellm/proxy/_types.py
@ -1358,10 +1358,11 @@ class CallInfo(LiteLLMBase):
    spend: float
    max_budget: Optional[float] = None
-    token: str = Field(description="Hashed value of that key")
+    token: Optional[str] = Field(default=None, description="Hashed value of that key")
    customer_id: Optional[str] = None
    user_id: Optional[str] = None
    team_id: Optional[str] = None
    team_alias: Optional[str] = None
    user_email: Optional[str] = None
    key_alias: Optional[str] = None
    projected_exceeded_date: Optional[str] = None
@ -1574,3 +1575,44 @@ class ManagementEndpointLoggingPayload(LiteLLMBase):
    exception: Optional[Any] = None
    start_time: Optional[datetime] = None
    end_time: Optional[datetime] = None
 class ProxyException(Exception):
    # NOTE: DO NOT MODIFY THIS
    # This is used to map exactly to OPENAI Exceptions
    def __init__(
        self,
        message: str,
        type: str,
        param: Optional[str],
        code: Optional[int],
    ):
        self.message = message
        self.type = type
        self.param = param
        self.code = code
        # rules for proxyExceptions
        # Litellm router.py returns "No healthy deployment available" when there are no deployments available
        # Should map to 429 errors https://github.com/BerriAI/litellm/issues/2487
        if (
            "No healthy deployment available" in self.message
            or "No deployments available" in self.message
        ):
            self.code = 429
    def to_dict(self) -> dict:
        """Converts the ProxyException instance to a dictionary."""
        return {
            "message": self.message,
            "type": self.type,
            "param": self.param,
            "code": self.code,
        }
 class CommonProxyErrors(enum.Enum):
    db_not_connected_error = "DB not connected"
    no_llm_router = "No models configured on proxy"
    not_allowed_access = "Admin-only endpoint. Not allowed to access this."
    not_premium_user = "You must be a LiteLLM Enterprise user to use this feature. If you have a license please set `LITELLM_LICENSE` in your env. If you want to obtain a license meet with us here: https://calendly.com/d/4mp-gd3-k5k/litellm-1-1-onboarding-chat"
--- a/litellm/proxy/auth/user_api_key_auth.py
+++ b/litellm/proxy/auth/user_api_key_auth.py
--- a/litellm/proxy/caching_routes.py
+++ b/litellm/proxy/caching_routes.py
@ -0,0 +1,194 @@
 from typing import Optional
 from fastapi import Depends, Request, APIRouter
 from fastapi import HTTPException
 import copy
 import litellm
 from litellm._logging import verbose_proxy_logger
 from litellm.proxy.auth.user_api_key_auth import user_api_key_auth
 router = APIRouter(
    prefix="/cache",
    tags=["caching"],
 )
@router.get(
    "/ping",
    dependencies=[Depends(user_api_key_auth)],
 )
 async def cache_ping():
    """
    Endpoint for checking if cache can be pinged
    """
    try:
        litellm_cache_params = {}
        specific_cache_params = {}
        if litellm.cache is None:
            raise HTTPException(
                status_code=503, detail="Cache not initialized. litellm.cache is None"
            )
        for k, v in vars(litellm.cache).items():
            try:
                if k == "cache":
                    continue
                litellm_cache_params[k] = str(copy.deepcopy(v))
            except Exception:
                litellm_cache_params[k] = "<unable to copy or convert>"
        for k, v in vars(litellm.cache.cache).items():
            try:
                specific_cache_params[k] = str(v)
            except Exception:
                specific_cache_params[k] = "<unable to copy or convert>"
        if litellm.cache.type == "redis":
            # ping the redis cache
            ping_response = await litellm.cache.ping()
            verbose_proxy_logger.debug(
                "/cache/ping: ping_response: " + str(ping_response)
            )
            # making a set cache call
            # add cache does not return anything
            await litellm.cache.async_add_cache(
                result="test_key",
                model="test-model",
                messages=[{"role": "user", "content": "test from litellm"}],
            )
            verbose_proxy_logger.debug("/cache/ping: done with set_cache()")
            return {
                "status": "healthy",
                "cache_type": litellm.cache.type,
                "ping_response": True,
                "set_cache_response": "success",
                "litellm_cache_params": litellm_cache_params,
                "redis_cache_params": specific_cache_params,
            }
        else:
            return {
                "status": "healthy",
                "cache_type": litellm.cache.type,
                "litellm_cache_params": litellm_cache_params,
            }
    except Exception as e:
        raise HTTPException(
            status_code=503,
            detail=f"Service Unhealthy ({str(e)}).Cache parameters: {litellm_cache_params}.specific_cache_params: {specific_cache_params}",
        )
@router.post(
    "/delete",
    tags=["caching"],
    dependencies=[Depends(user_api_key_auth)],
 )
 async def cache_delete(request: Request):
    """
    Endpoint for deleting a key from the cache. All responses from litellm proxy have `x-litellm-cache-key` in the headers
    Parameters:
    - **keys**: *Optional[List[str]]* - A list of keys to delete from the cache. Example {"keys": ["key1", "key2"]}
    ```shell
    curl -X POST "http://0.0.0.0:4000/cache/delete" \
    -H "Authorization: Bearer sk-1234" \
    -d '{"keys": ["key1", "key2"]}'
    ```
    """
    try:
        if litellm.cache is None:
            raise HTTPException(
                status_code=503, detail="Cache not initialized. litellm.cache is None"
            )
        request_data = await request.json()
        keys = request_data.get("keys", None)
        if litellm.cache.type == "redis":
            await litellm.cache.delete_cache_keys(keys=keys)
            return {
                "status": "success",
            }
        else:
            raise HTTPException(
                status_code=500,
                detail=f"Cache type {litellm.cache.type} does not support deleting a key. only `redis` is supported",
            )
    except Exception as e:
        raise HTTPException(
            status_code=500,
            detail=f"Cache Delete Failed({str(e)})",
        )
@router.get(
    "/redis/info",
    dependencies=[Depends(user_api_key_auth)],
 )
 async def cache_redis_info():
    """
    Endpoint for getting /redis/info
    """
    try:
        if litellm.cache is None:
            raise HTTPException(
                status_code=503, detail="Cache not initialized. litellm.cache is None"
            )
        if litellm.cache.type == "redis":
            client_list = litellm.cache.cache.client_list()
            redis_info = litellm.cache.cache.info()
            num_clients = len(client_list)
            return {
                "num_clients": num_clients,
                "clients": client_list,
                "info": redis_info,
            }
        else:
            raise HTTPException(
                status_code=500,
                detail=f"Cache type {litellm.cache.type} does not support flushing",
            )
    except Exception as e:
        raise HTTPException(
            status_code=503,
            detail=f"Service Unhealthy ({str(e)})",
        )
@router.post(
    "/flushall",
    tags=["caching"],
    dependencies=[Depends(user_api_key_auth)],
 )
 async def cache_flushall():
    """
    A function to flush all items from the cache. (All items will be deleted from the cache with this)
    Raises HTTPException if the cache is not initialized or if the cache type does not support flushing.
    Returns a dictionary with the status of the operation.
    Usage:
    ```
    curl -X POST http://0.0.0.0:4000/cache/flushall -H "Authorization: Bearer sk-1234"
    ```
    """
    try:
        if litellm.cache is None:
            raise HTTPException(
                status_code=503, detail="Cache not initialized. litellm.cache is None"
            )
        if litellm.cache.type == "redis":
            litellm.cache.cache.flushall()
            return {
                "status": "success",
            }
        else:
            raise HTTPException(
                status_code=500,
                detail=f"Cache type {litellm.cache.type} does not support flushing",
            )
    except Exception as e:
        raise HTTPException(
            status_code=503,
            detail=f"Service Unhealthy ({str(e)})",
        )
--- a/litellm/proxy/common_utils/management_endpoint_utils.py
+++ b/litellm/proxy/common_utils/management_endpoint_utils.py
@ -1,91 +0,0 @@
 from datetime import datetime
 from functools import wraps
 from litellm.proxy._types import UserAPIKeyAuth, ManagementEndpointLoggingPayload
 from litellm.proxy.common_utils.http_parsing_utils import _read_request_body
 from fastapi import Request
 def management_endpoint_wrapper(func):
    """
    This wrapper does the following:
    1. Log I/O, Exceptions to OTEL
    2. Create an Audit log for success calls
    """
    @wraps(func)
    async def wrapper(*args, **kwargs):
        start_time = datetime.now()
        try:
            result = await func(*args, **kwargs)
            end_time = datetime.now()
            if kwargs is None:
                kwargs = {}
            user_api_key_dict: UserAPIKeyAuth = (
                kwargs.get("user_api_key_dict") or UserAPIKeyAuth()
            )
            parent_otel_span = user_api_key_dict.parent_otel_span
            if parent_otel_span is not None:
                from litellm.proxy.proxy_server import open_telemetry_logger
                if open_telemetry_logger is not None:
                    _http_request: Request = kwargs.get("http_request")
                    if _http_request:
                        _route = _http_request.url.path
                        _request_body: dict = await _read_request_body(
                            request=_http_request
                        )
                        _response = dict(result) if result is not None else None
                        logging_payload = ManagementEndpointLoggingPayload(
                            route=_route,
                            request_data=_request_body,
                            response=_response,
                            start_time=start_time,
                            end_time=end_time,
                        )
                        await open_telemetry_logger.async_management_endpoint_success_hook(
                            logging_payload=logging_payload,
                            parent_otel_span=parent_otel_span,
                        )
            return result
        except Exception as e:
            end_time = datetime.now()
            if kwargs is None:
                kwargs = {}
            user_api_key_dict: UserAPIKeyAuth = (
                kwargs.get("user_api_key_dict") or UserAPIKeyAuth()
            )
            parent_otel_span = user_api_key_dict.parent_otel_span
            if parent_otel_span is not None:
                from litellm.proxy.proxy_server import open_telemetry_logger
                if open_telemetry_logger is not None:
                    _http_request: Request = kwargs.get("http_request")
                    if _http_request:
                        _route = _http_request.url.path
                        _request_body: dict = await _read_request_body(
                            request=_http_request
                        )
                        logging_payload = ManagementEndpointLoggingPayload(
                            route=_route,
                            request_data=_request_body,
                            response=None,
                            start_time=start_time,
                            end_time=end_time,
                            exception=e,
                        )
                        await open_telemetry_logger.async_management_endpoint_failure_hook(
                            logging_payload=logging_payload,
                            parent_otel_span=parent_otel_span,
                        )
            raise e
    return wrapper
--- a/litellm/proxy/health_endpoints/_health_endpoints.py
+++ b/litellm/proxy/health_endpoints/_health_endpoints.py
@ -0,0 +1,478 @@
 from typing import Optional, Literal
 import litellm
 import os
 import asyncio
 import fastapi
 import traceback
 from datetime import datetime, timedelta
 from fastapi import Depends, Request, APIRouter, Header, status
 from litellm.proxy.health_check import perform_health_check
 from fastapi import HTTPException
 import copy
 from litellm._logging import verbose_proxy_logger
 from litellm.proxy.auth.user_api_key_auth import user_api_key_auth
 from litellm.proxy._types import (
    UserAPIKeyAuth,
    ProxyException,
    WebhookEvent,
    CallInfo,
 )
 #### Health ENDPOINTS ####
 router = APIRouter()
@router.get(
    "/test",
    tags=["health"],
    dependencies=[Depends(user_api_key_auth)],
 )
 async def test_endpoint(request: Request):
    """
    [DEPRECATED] use `/health/liveliness` instead.
    A test endpoint that pings the proxy server to check if it's healthy.
    Parameters:
        request (Request): The incoming request.
    Returns:
        dict: A dictionary containing the route of the request URL.
    """
    # ping the proxy server to check if its healthy
    return {"route": request.url.path}
@router.get(
    "/health/services",
    tags=["health"],
    dependencies=[Depends(user_api_key_auth)],
    include_in_schema=False,
 )
 async def health_services_endpoint(
    user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
    service: Literal[
        "slack_budget_alerts", "langfuse", "slack", "openmeter", "webhook", "email"
    ] = fastapi.Query(description="Specify the service being hit."),
 ):
    """
    Hidden endpoint.
    Used by the UI to let user check if slack alerting is working as expected.
    """
    try:
        from litellm.proxy.proxy_server import (
            proxy_logging_obj,
            prisma_client,
            general_settings,
        )
        if service is None:
            raise HTTPException(
                status_code=400, detail={"error": "Service must be specified."}
            )
        if service not in [
            "slack_budget_alerts",
            "email",
            "langfuse",
            "slack",
            "openmeter",
            "webhook",
        ]:
            raise HTTPException(
                status_code=400,
                detail={
                    "error": f"Service must be in list. Service={service}. List={['slack_budget_alerts']}"
                },
            )
        if service == "openmeter":
            _ = await litellm.acompletion(
                model="openai/litellm-mock-response-model",
                messages=[{"role": "user", "content": "Hey, how's it going?"}],
                user="litellm:/health/services",
                mock_response="This is a mock response",
            )
            return {
                "status": "success",
                "message": "Mock LLM request made - check openmeter.",
            }
        if service == "langfuse":
            from litellm.integrations.langfuse import LangFuseLogger
            langfuse_logger = LangFuseLogger()
            langfuse_logger.Langfuse.auth_check()
            _ = litellm.completion(
                model="openai/litellm-mock-response-model",
                messages=[{"role": "user", "content": "Hey, how's it going?"}],
                user="litellm:/health/services",
                mock_response="This is a mock response",
            )
            return {
                "status": "success",
                "message": "Mock LLM request made - check langfuse.",
            }
        if service == "webhook":
            user_info = CallInfo(
                token=user_api_key_dict.token or "",
                spend=1,
                max_budget=0,
                user_id=user_api_key_dict.user_id,
                key_alias=user_api_key_dict.key_alias,
                team_id=user_api_key_dict.team_id,
            )
            await proxy_logging_obj.budget_alerts(
                type="user_budget",
                user_info=user_info,
            )
        if service == "slack" or service == "slack_budget_alerts":
            if "slack" in general_settings.get("alerting", []):
                # test_message = f"""\n🚨 `ProjectedLimitExceededError` 💸\n\n`Key Alias:` litellm-ui-test-alert \n`Expected Day of Error`: 28th March \n`Current Spend`: $100.00 \n`Projected Spend at end of month`: $1000.00 \n`Soft Limit`: $700"""
                # check if user has opted into unique_alert_webhooks
                if (
                    proxy_logging_obj.slack_alerting_instance.alert_to_webhook_url
                    is not None
                ):
                    for (
                        alert_type
                    ) in proxy_logging_obj.slack_alerting_instance.alert_to_webhook_url:
                        """
                        "llm_exceptions",
                        "llm_too_slow",
                        "llm_requests_hanging",
                        "budget_alerts",
                        "db_exceptions",
                        """
                        # only test alert if it's in active alert types
                        if (
                            proxy_logging_obj.slack_alerting_instance.alert_types
                            is not None
                            and alert_type
                            not in proxy_logging_obj.slack_alerting_instance.alert_types
                        ):
                            continue
                        test_message = "default test message"
                        if alert_type == "llm_exceptions":
                            test_message = f"LLM Exception test alert"
                        elif alert_type == "llm_too_slow":
                            test_message = f"LLM Too Slow test alert"
                        elif alert_type == "llm_requests_hanging":
                            test_message = f"LLM Requests Hanging test alert"
                        elif alert_type == "budget_alerts":
                            test_message = f"Budget Alert test alert"
                        elif alert_type == "db_exceptions":
                            test_message = f"DB Exception test alert"
                        elif alert_type == "outage_alerts":
                            test_message = f"Outage Alert Exception test alert"
                        elif alert_type == "daily_reports":
                            test_message = f"Daily Reports test alert"
                        await proxy_logging_obj.alerting_handler(
                            message=test_message, level="Low", alert_type=alert_type
                        )
                else:
                    await proxy_logging_obj.alerting_handler(
                        message="This is a test slack alert message",
                        level="Low",
                        alert_type="budget_alerts",
                    )
                if prisma_client is not None:
                    asyncio.create_task(
                        proxy_logging_obj.slack_alerting_instance.send_monthly_spend_report()
                    )
                    asyncio.create_task(
                        proxy_logging_obj.slack_alerting_instance.send_weekly_spend_report()
                    )
                alert_types = (
                    proxy_logging_obj.slack_alerting_instance.alert_types or []
                )
                alert_types = list(alert_types)
                return {
                    "status": "success",
                    "alert_types": alert_types,
                    "message": "Mock Slack Alert sent, verify Slack Alert Received on your channel",
                }
            else:
                raise HTTPException(
                    status_code=422,
                    detail={
                        "error": '"{}" not in proxy config: general_settings. Unable to test this.'.format(
                            service
                        )
                    },
                )
        if service == "email":
            webhook_event = WebhookEvent(
                event="key_created",
                event_group="key",
                event_message="Test Email Alert",
                token=user_api_key_dict.token or "",
                key_alias="Email Test key (This is only a test alert key. DO NOT USE THIS IN PRODUCTION.)",
                spend=0,
                max_budget=0,
                user_id=user_api_key_dict.user_id,
                user_email=os.getenv("TEST_EMAIL_ADDRESS"),
                team_id=user_api_key_dict.team_id,
            )
            # use create task - this can take 10 seconds. don't keep ui users waiting for notification to check their email
            asyncio.create_task(
                proxy_logging_obj.slack_alerting_instance.send_key_created_or_user_invited_email(
                    webhook_event=webhook_event
                )
            )
            return {
                "status": "success",
                "message": "Mock Email Alert sent, verify Email Alert Received",
            }
    except Exception as e:
        verbose_proxy_logger.error(
            "litellm.proxy.proxy_server.health_services_endpoint(): Exception occured - {}".format(
                str(e)
            )
        )
        verbose_proxy_logger.debug(traceback.format_exc())
        if isinstance(e, HTTPException):
            raise ProxyException(
                message=getattr(e, "detail", f"Authentication Error({str(e)})"),
                type="auth_error",
                param=getattr(e, "param", "None"),
                code=getattr(e, "status_code", status.HTTP_500_INTERNAL_SERVER_ERROR),
            )
        elif isinstance(e, ProxyException):
            raise e
        raise ProxyException(
            message="Authentication Error, " + str(e),
            type="auth_error",
            param=getattr(e, "param", "None"),
            code=status.HTTP_500_INTERNAL_SERVER_ERROR,
        )
@router.get("/health", tags=["health"], dependencies=[Depends(user_api_key_auth)])
 async def health_endpoint(
    user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
    model: Optional[str] = fastapi.Query(
        None, description="Specify the model name (optional)"
    ),
 ):
    """
    🚨 USE `/health/liveliness` to health check the proxy 🚨
    See more 👉 https://docs.litellm.ai/docs/proxy/health
    Check the health of all the endpoints in config.yaml
    To run health checks in the background, add this to config.yaml:
    ```
    general_settings:
        # ... other settings
        background_health_checks: True
    ```
    else, the health checks will be run on models when /health is called.
    """
    from litellm.proxy.proxy_server import (
        health_check_results,
        use_background_health_checks,
        user_model,
        llm_model_list,
    )
    try:
        if llm_model_list is None:
            # if no router set, check if user set a model using litellm --model ollama/llama2
            if user_model is not None:
                healthy_endpoints, unhealthy_endpoints = await perform_health_check(
                    model_list=[], cli_model=user_model
                )
                return {
                    "healthy_endpoints": healthy_endpoints,
                    "unhealthy_endpoints": unhealthy_endpoints,
                    "healthy_count": len(healthy_endpoints),
                    "unhealthy_count": len(unhealthy_endpoints),
                }
            raise HTTPException(
                status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
                detail={"error": "Model list not initialized"},
            )
        _llm_model_list = copy.deepcopy(llm_model_list)
        ### FILTER MODELS FOR ONLY THOSE USER HAS ACCESS TO ###
        if len(user_api_key_dict.models) > 0:
            allowed_model_names = user_api_key_dict.models
        else:
            allowed_model_names = []  #
        if use_background_health_checks:
            return health_check_results
        else:
            healthy_endpoints, unhealthy_endpoints = await perform_health_check(
                _llm_model_list, model
            )
            return {
                "healthy_endpoints": healthy_endpoints,
                "unhealthy_endpoints": unhealthy_endpoints,
                "healthy_count": len(healthy_endpoints),
                "unhealthy_count": len(unhealthy_endpoints),
            }
    except Exception as e:
        verbose_proxy_logger.error(
            "litellm.proxy.proxy_server.py::health_endpoint(): Exception occured - {}".format(
                str(e)
            )
        )
        verbose_proxy_logger.debug(traceback.format_exc())
        raise e
 db_health_cache = {"status": "unknown", "last_updated": datetime.now()}
 def _db_health_readiness_check():
    from litellm.proxy.proxy_server import prisma_client
    global db_health_cache
    # Note - Intentionally don't try/except this so it raises an exception when it fails
    # if timedelta is less than 2 minutes return DB Status
    time_diff = datetime.now() - db_health_cache["last_updated"]
    if db_health_cache["status"] != "unknown" and time_diff < timedelta(minutes=2):
        return db_health_cache
    prisma_client.health_check()
    db_health_cache = {"status": "connected", "last_updated": datetime.now()}
    return db_health_cache
@router.get(
    "/active/callbacks",
    tags=["health"],
    dependencies=[Depends(user_api_key_auth)],
 )
 async def active_callbacks():
    """
    Returns a list of active callbacks on litellm.callbacks, litellm.input_callback, litellm.failure_callback, litellm.success_callback
    """
    from litellm.proxy.proxy_server import proxy_logging_obj, general_settings
    _alerting = str(general_settings.get("alerting"))
    # get success callbacks
    litellm_callbacks = [str(x) for x in litellm.callbacks]
    litellm_input_callbacks = [str(x) for x in litellm.input_callback]
    litellm_failure_callbacks = [str(x) for x in litellm.failure_callback]
    litellm_success_callbacks = [str(x) for x in litellm.success_callback]
    litellm_async_success_callbacks = [str(x) for x in litellm._async_success_callback]
    litellm_async_failure_callbacks = [str(x) for x in litellm._async_failure_callback]
    litellm_async_input_callbacks = [str(x) for x in litellm._async_input_callback]
    all_litellm_callbacks = (
        litellm_callbacks
        + litellm_input_callbacks
        + litellm_failure_callbacks
        + litellm_success_callbacks
        + litellm_async_success_callbacks
        + litellm_async_failure_callbacks
        + litellm_async_input_callbacks
    )
    alerting = proxy_logging_obj.alerting
    _num_alerting = 0
    if alerting and isinstance(alerting, list):
        _num_alerting = len(alerting)
    return {
        "alerting": _alerting,
        "litellm.callbacks": litellm_callbacks,
        "litellm.input_callback": litellm_input_callbacks,
        "litellm.failure_callback": litellm_failure_callbacks,
        "litellm.success_callback": litellm_success_callbacks,
        "litellm._async_success_callback": litellm_async_success_callbacks,
        "litellm._async_failure_callback": litellm_async_failure_callbacks,
        "litellm._async_input_callback": litellm_async_input_callbacks,
        "all_litellm_callbacks": all_litellm_callbacks,
        "num_callbacks": len(all_litellm_callbacks),
        "num_alerting": _num_alerting,
    }
@router.get(
    "/health/readiness",
    tags=["health"],
    dependencies=[Depends(user_api_key_auth)],
 )
 async def health_readiness():
    """
    Unprotected endpoint for checking if worker can receive requests
    """
    from litellm.proxy.proxy_server import proxy_logging_obj, prisma_client, version
    try:
        # get success callback
        success_callback_names = []
        try:
            # this was returning a JSON of the values in some of the callbacks
            # all we need is the callback name, hence we do str(callback)
            success_callback_names = [str(x) for x in litellm.success_callback]
        except:
            # don't let this block the /health/readiness response, if we can't convert to str -> return litellm.success_callback
            success_callback_names = litellm.success_callback
        # check Cache
        cache_type = None
        if litellm.cache is not None:
            from litellm.caching import RedisSemanticCache
            cache_type = litellm.cache.type
            if isinstance(litellm.cache.cache, RedisSemanticCache):
                # ping the cache
                # TODO: @ishaan-jaff - we should probably not ping the cache on every /health/readiness check
                try:
                    index_info = await litellm.cache.cache._index_info()
                except Exception as e:
                    index_info = "index does not exist - error: " + str(e)
                cache_type = {"type": cache_type, "index_info": index_info}
        # check DB
        if prisma_client is not None:  # if db passed in, check if it's connected
            db_health_status = _db_health_readiness_check()
            return {
                "status": "healthy",
                "db": "connected",
                "cache": cache_type,
                "litellm_version": version,
                "success_callbacks": success_callback_names,
                **db_health_status,
            }
        else:
            return {
                "status": "healthy",
                "db": "Not connected",
                "cache": cache_type,
                "litellm_version": version,
                "success_callbacks": success_callback_names,
            }
    except Exception as e:
        raise HTTPException(status_code=503, detail=f"Service Unhealthy ({str(e)})")
@router.get(
    "/health/liveliness",
    tags=["health"],
    dependencies=[Depends(user_api_key_auth)],
 )
 async def health_liveliness():
    """
    Unprotected endpoint for checking if worker is alive
    """
    return "I'm alive!"
--- a/litellm/proxy/management_endpoints/key_management_endpoints.py
+++ b/litellm/proxy/management_endpoints/key_management_endpoints.py
@ -0,0 +1,926 @@
 """
 KEY MANAGEMENT
 All /key management endpoints 
 /key/generate
 /key/info
 /key/update
 /key/delete
 """
 import copy
 import json
 import uuid
 import re
 import traceback
 import asyncio
 import secrets
 from typing import Optional, List
 import fastapi
 from fastapi import Depends, Request, APIRouter, Header, status
 from fastapi import HTTPException
 import litellm
 from datetime import datetime, timedelta, timezone
 from litellm._logging import verbose_proxy_logger
 from litellm.proxy.auth.user_api_key_auth import user_api_key_auth
 from litellm.proxy._types import *
 router = APIRouter()
@router.post(
    "/key/generate",
    tags=["key management"],
    dependencies=[Depends(user_api_key_auth)],
    response_model=GenerateKeyResponse,
 )
 async def generate_key_fn(
    data: GenerateKeyRequest,
    user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
    litellm_changed_by: Optional[str] = Header(
        None,
        description="The litellm-changed-by header enables tracking of actions performed by authorized users on behalf of other users, providing an audit trail for accountability",
    ),
 ):
    """
    Generate an API key based on the provided data.
    Docs: https://docs.litellm.ai/docs/proxy/virtual_keys
    Parameters:
    - duration: Optional[str] - Specify the length of time the token is valid for. You can set duration as seconds ("30s"), minutes ("30m"), hours ("30h"), days ("30d").
    - key_alias: Optional[str] - User defined key alias
    - team_id: Optional[str] - The team id of the key
    - user_id: Optional[str] - The user id of the key
    - models: Optional[list] - Model_name's a user is allowed to call. (if empty, key is allowed to call all models)
    - aliases: Optional[dict] - Any alias mappings, on top of anything in the config.yaml model list. - https://docs.litellm.ai/docs/proxy/virtual_keys#managing-auth---upgradedowngrade-models
    - config: Optional[dict] - any key-specific configs, overrides config in config.yaml
    - spend: Optional[int] - Amount spent by key. Default is 0. Will be updated by proxy whenever key is used. https://docs.litellm.ai/docs/proxy/virtual_keys#managing-auth---tracking-spend
    - send_invite_email: Optional[bool] - Whether to send an invite email to the user_id, with the generate key
    - max_budget: Optional[float] - Specify max budget for a given key.
    - max_parallel_requests: Optional[int] - Rate limit a user based on the number of parallel requests. Raises 429 error, if user's parallel requests > x.
    - metadata: Optional[dict] - Metadata for key, store information for key. Example metadata = {"team": "core-infra", "app": "app2", "email": "ishaan@berri.ai" }
    - permissions: Optional[dict] - key-specific permissions. Currently just used for turning off pii masking (if connected). Example - {"pii": false}
    - model_max_budget: Optional[dict] - key-specific model budget in USD. Example - {"text-davinci-002": 0.5, "gpt-3.5-turbo": 0.5}. IF null or {} then no model specific budget.
    Examples:
    1. Allow users to turn on/off pii masking
    ```bash
    curl --location 'http://0.0.0.0:8000/key/generate' \
        --header 'Authorization: Bearer sk-1234' \
        --header 'Content-Type: application/json' \
        --data '{
            "permissions": {"allow_pii_controls": true}
    }'
    ```
    Returns:
    - key: (str) The generated api key
    - expires: (datetime) Datetime object for when key expires.
    - user_id: (str) Unique user id - used for tracking spend across multiple keys for same user id.
    """
    try:
        from litellm.proxy.proxy_server import (
            user_custom_key_generate,
            prisma_client,
            litellm_proxy_admin_name,
            general_settings,
            proxy_logging_obj,
            create_audit_log_for_update,
        )
        verbose_proxy_logger.debug("entered /key/generate")
        if user_custom_key_generate is not None:
            result = await user_custom_key_generate(data)
            decision = result.get("decision", True)
            message = result.get("message", "Authentication Failed - Custom Auth Rule")
            if not decision:
                raise HTTPException(
                    status_code=status.HTTP_403_FORBIDDEN, detail=message
                )
        # check if user set default key/generate params on config.yaml
        if litellm.default_key_generate_params is not None:
            for elem in data:
                key, value = elem
                if value is None and key in [
                    "max_budget",
                    "user_id",
                    "team_id",
                    "max_parallel_requests",
                    "tpm_limit",
                    "rpm_limit",
                    "budget_duration",
                ]:
                    setattr(
                        data, key, litellm.default_key_generate_params.get(key, None)
                    )
                elif key == "models" and value == []:
                    setattr(data, key, litellm.default_key_generate_params.get(key, []))
                elif key == "metadata" and value == {}:
                    setattr(data, key, litellm.default_key_generate_params.get(key, {}))
        # check if user set default key/generate params on config.yaml
        if litellm.upperbound_key_generate_params is not None:
            for elem in data:
                # if key in litellm.upperbound_key_generate_params, use the min of value and litellm.upperbound_key_generate_params[key]
                key, value = elem
                if (
                    value is not None
                    and getattr(litellm.upperbound_key_generate_params, key, None)
                    is not None
                ):
                    # if value is float/int
                    if key in [
                        "max_budget",
                        "max_parallel_requests",
                        "tpm_limit",
                        "rpm_limit",
                    ]:
                        if value > getattr(litellm.upperbound_key_generate_params, key):
                            raise HTTPException(
                                status_code=400,
                                detail={
                                    "error": f"{key} is over max limit set in config - user_value={value}; max_value={getattr(litellm.upperbound_key_generate_params, key)}"
                                },
                            )
                    elif key == "budget_duration":
                        # budgets are in 1s, 1m, 1h, 1d, 1m (30s, 30m, 30h, 30d, 30m)
                        # compare the duration in seconds and max duration in seconds
                        upperbound_budget_duration = _duration_in_seconds(
                            duration=getattr(
                                litellm.upperbound_key_generate_params, key
                            )
                        )
                        user_set_budget_duration = _duration_in_seconds(duration=value)
                        if user_set_budget_duration > upperbound_budget_duration:
                            raise HTTPException(
                                status_code=400,
                                detail={
                                    "error": f"Budget duration is over max limit set in config - user_value={user_set_budget_duration}; max_value={upperbound_budget_duration}"
                                },
                            )
        # TODO: @ishaan-jaff: Migrate all budget tracking to use LiteLLM_BudgetTable
        _budget_id = None
        if prisma_client is not None and data.soft_budget is not None:
            # create the Budget Row for the LiteLLM Verification Token
            budget_row = LiteLLM_BudgetTable(
                soft_budget=data.soft_budget,
                model_max_budget=data.model_max_budget or {},
            )
            new_budget = prisma_client.jsonify_object(
                budget_row.json(exclude_none=True)
            )
            _budget = await prisma_client.db.litellm_budgettable.create(
                data={
                    **new_budget,  # type: ignore
                    "created_by": user_api_key_dict.user_id or litellm_proxy_admin_name,
                    "updated_by": user_api_key_dict.user_id or litellm_proxy_admin_name,
                }
            )
            _budget_id = getattr(_budget, "budget_id", None)
        data_json = data.json()  # type: ignore
        # if we get max_budget passed to /key/generate, then use it as key_max_budget. Since generate_key_helper_fn is used to make new users
        if "max_budget" in data_json:
            data_json["key_max_budget"] = data_json.pop("max_budget", None)
        if _budget_id is not None:
            data_json["budget_id"] = _budget_id
        if "budget_duration" in data_json:
            data_json["key_budget_duration"] = data_json.pop("budget_duration", None)
        response = await generate_key_helper_fn(
            request_type="key", **data_json, table_name="key"
        )
        response["soft_budget"] = (
            data.soft_budget
        )  # include the user-input soft budget in the response
        if data.send_invite_email is True:
            if "email" not in general_settings.get("alerting", []):
                raise ValueError(
                    "Email alerting not setup on config.yaml. Please set `alerting=['email']. \nDocs: https://docs.litellm.ai/docs/proxy/email`"
                )
            event = WebhookEvent(
                event="key_created",
                event_group="key",
                event_message=f"API Key Created",
                token=response.get("token", ""),
                spend=response.get("spend", 0.0),
                max_budget=response.get("max_budget", 0.0),
                user_id=response.get("user_id", None),
                team_id=response.get("team_id", "Default Team"),
                key_alias=response.get("key_alias", None),
            )
            # If user configured email alerting - send an Email letting their end-user know the key was created
            asyncio.create_task(
                proxy_logging_obj.slack_alerting_instance.send_key_created_or_user_invited_email(
                    webhook_event=event,
                )
            )
        # Enterprise Feature - Audit Logging. Enable with litellm.store_audit_logs = True
        if litellm.store_audit_logs is True:
            _updated_values = json.dumps(response, default=str)
            asyncio.create_task(
                create_audit_log_for_update(
                    request_data=LiteLLM_AuditLogs(
                        id=str(uuid.uuid4()),
                        updated_at=datetime.now(timezone.utc),
                        changed_by=litellm_changed_by
                        or user_api_key_dict.user_id
                        or litellm_proxy_admin_name,
                        changed_by_api_key=user_api_key_dict.api_key,
                        table_name=LitellmTableNames.KEY_TABLE_NAME,
                        object_id=response.get("token_id", ""),
                        action="created",
                        updated_values=_updated_values,
                        before_value=None,
                    )
                )
            )
        return GenerateKeyResponse(**response)
    except Exception as e:
        verbose_proxy_logger.error(
            "litellm.proxy.proxy_server.generate_key_fn(): Exception occured - {}".format(
                str(e)
            )
        )
        verbose_proxy_logger.debug(traceback.format_exc())
        if isinstance(e, HTTPException):
            raise ProxyException(
                message=getattr(e, "detail", f"Authentication Error({str(e)})"),
                type="auth_error",
                param=getattr(e, "param", "None"),
                code=getattr(e, "status_code", status.HTTP_400_BAD_REQUEST),
            )
        elif isinstance(e, ProxyException):
            raise e
        raise ProxyException(
            message="Authentication Error, " + str(e),
            type="auth_error",
            param=getattr(e, "param", "None"),
            code=status.HTTP_400_BAD_REQUEST,
        )
@router.post(
    "/key/update", tags=["key management"], dependencies=[Depends(user_api_key_auth)]
 )
 async def update_key_fn(
    request: Request,
    data: UpdateKeyRequest,
    user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
    litellm_changed_by: Optional[str] = Header(
        None,
        description="The litellm-changed-by header enables tracking of actions performed by authorized users on behalf of other users, providing an audit trail for accountability",
    ),
 ):
    """
    Update an existing key
    """
    from litellm.proxy.proxy_server import (
        user_custom_key_generate,
        prisma_client,
        litellm_proxy_admin_name,
        general_settings,
        proxy_logging_obj,
        create_audit_log_for_update,
        user_api_key_cache,
    )
    try:
        data_json: dict = data.json()
        key = data_json.pop("key")
        # get the row from db
        if prisma_client is None:
            raise Exception("Not connected to DB!")
        existing_key_row = await prisma_client.get_data(
            token=data.key, table_name="key", query_type="find_unique"
        )
        if existing_key_row is None:
            raise HTTPException(
                status_code=404,
                detail={"error": f"Team not found, passed team_id={data.team_id}"},
            )
        # get non default values for key
        non_default_values = {}
        for k, v in data_json.items():
            if v is not None and v not in (
                [],
                {},
                0,
            ):  # models default to [], spend defaults to 0, we should not reset these values
                non_default_values[k] = v
        if "duration" in non_default_values:
            duration = non_default_values.pop("duration")
            duration_s = _duration_in_seconds(duration=duration)
            expires = datetime.now(timezone.utc) + timedelta(seconds=duration_s)
            non_default_values["expires"] = expires
        response = await prisma_client.update_data(
            token=key, data={**non_default_values, "token": key}
        )
        # Delete - key from cache, since it's been updated!
        # key updated - a new model could have been added to this key. it should not block requests after this is done
        user_api_key_cache.delete_cache(key)
        hashed_token = hash_token(key)
        user_api_key_cache.delete_cache(hashed_token)
        # Enterprise Feature - Audit Logging. Enable with litellm.store_audit_logs = True
        if litellm.store_audit_logs is True:
            _updated_values = json.dumps(data_json, default=str)
            _before_value = existing_key_row.json(exclude_none=True)
            _before_value = json.dumps(_before_value, default=str)
            asyncio.create_task(
                create_audit_log_for_update(
                    request_data=LiteLLM_AuditLogs(
                        id=str(uuid.uuid4()),
                        updated_at=datetime.now(timezone.utc),
                        changed_by=litellm_changed_by
                        or user_api_key_dict.user_id
                        or litellm_proxy_admin_name,
                        changed_by_api_key=user_api_key_dict.api_key,
                        table_name=LitellmTableNames.KEY_TABLE_NAME,
                        object_id=data.key,
                        action="updated",
                        updated_values=_updated_values,
                        before_value=_before_value,
                    )
                )
            )
        return {"key": key, **response["data"]}
        # update based on remaining passed in values
    except Exception as e:
        if isinstance(e, HTTPException):
            raise ProxyException(
                message=getattr(e, "detail", f"Authentication Error({str(e)})"),
                type="auth_error",
                param=getattr(e, "param", "None"),
                code=getattr(e, "status_code", status.HTTP_400_BAD_REQUEST),
            )
        elif isinstance(e, ProxyException):
            raise e
        raise ProxyException(
            message="Authentication Error, " + str(e),
            type="auth_error",
            param=getattr(e, "param", "None"),
            code=status.HTTP_400_BAD_REQUEST,
        )
@router.post(
    "/key/delete", tags=["key management"], dependencies=[Depends(user_api_key_auth)]
 )
 async def delete_key_fn(
    data: KeyRequest,
    user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
    litellm_changed_by: Optional[str] = Header(
        None,
        description="The litellm-changed-by header enables tracking of actions performed by authorized users on behalf of other users, providing an audit trail for accountability",
    ),
 ):
    """
    Delete a key from the key management system.
    Parameters::
    - keys (List[str]): A list of keys or hashed keys to delete. Example {"keys": ["sk-QWrxEynunsNpV1zT48HIrw", "837e17519f44683334df5291321d97b8bf1098cd490e49e215f6fea935aa28be"]}
    Returns:
    - deleted_keys (List[str]): A list of deleted keys. Example {"deleted_keys": ["sk-QWrxEynunsNpV1zT48HIrw", "837e17519f44683334df5291321d97b8bf1098cd490e49e215f6fea935aa28be"]}
    Raises:
        HTTPException: If an error occurs during key deletion.
    """
    try:
        from litellm.proxy.proxy_server import (
            user_custom_key_generate,
            prisma_client,
            litellm_proxy_admin_name,
            general_settings,
            proxy_logging_obj,
            create_audit_log_for_update,
            user_api_key_cache,
        )
        keys = data.keys
        if len(keys) == 0:
            raise ProxyException(
                message=f"No keys provided, passed in: keys={keys}",
                type="auth_error",
                param="keys",
                code=status.HTTP_400_BAD_REQUEST,
            )
        ## only allow user to delete keys they own
        user_id = user_api_key_dict.user_id
        verbose_proxy_logger.debug(
            f"user_api_key_dict.user_role: {user_api_key_dict.user_role}"
        )
        if (
            user_api_key_dict.user_role is not None
            and user_api_key_dict.user_role == LitellmUserRoles.PROXY_ADMIN
        ):
            user_id = None  # unless they're admin
        # Enterprise Feature - Audit Logging. Enable with litellm.store_audit_logs = True
        # we do this after the first for loop, since first for loop is for validation. we only want this inserted after validation passes
        if litellm.store_audit_logs is True:
            # make an audit log for each team deleted
            for key in data.keys:
                key_row = await prisma_client.get_data(  # type: ignore
                    token=key, table_name="key", query_type="find_unique"
                )
                key_row = key_row.json(exclude_none=True)
                _key_row = json.dumps(key_row, default=str)
                asyncio.create_task(
                    create_audit_log_for_update(
                        request_data=LiteLLM_AuditLogs(
                            id=str(uuid.uuid4()),
                            updated_at=datetime.now(timezone.utc),
                            changed_by=litellm_changed_by
                            or user_api_key_dict.user_id
                            or litellm_proxy_admin_name,
                            changed_by_api_key=user_api_key_dict.api_key,
                            table_name=LitellmTableNames.KEY_TABLE_NAME,
                            object_id=key,
                            action="deleted",
                            updated_values="{}",
                            before_value=_key_row,
                        )
                    )
                )
        number_deleted_keys = await delete_verification_token(
            tokens=keys, user_id=user_id
        )
        verbose_proxy_logger.debug(
            f"/key/delete - deleted_keys={number_deleted_keys['deleted_keys']}"
        )
        try:
            assert len(keys) == number_deleted_keys["deleted_keys"]
        except Exception as e:
            raise HTTPException(
                status_code=400,
                detail={
                    "error": f"Not all keys passed in were deleted. This probably means you don't have access to delete all the keys passed in. Keys passed in={len(keys)}, Deleted keys ={number_deleted_keys['deleted_keys']}"
                },
            )
        for key in keys:
            user_api_key_cache.delete_cache(key)
            # remove hash token from cache
            hashed_token = hash_token(key)
            user_api_key_cache.delete_cache(hashed_token)
        verbose_proxy_logger.debug(
            f"/keys/delete - cache after delete: {user_api_key_cache.in_memory_cache.cache_dict}"
        )
        return {"deleted_keys": keys}
    except Exception as e:
        if isinstance(e, HTTPException):
            raise ProxyException(
                message=getattr(e, "detail", f"Authentication Error({str(e)})"),
                type="auth_error",
                param=getattr(e, "param", "None"),
                code=getattr(e, "status_code", status.HTTP_400_BAD_REQUEST),
            )
        elif isinstance(e, ProxyException):
            raise e
        raise ProxyException(
            message="Authentication Error, " + str(e),
            type="auth_error",
            param=getattr(e, "param", "None"),
            code=status.HTTP_400_BAD_REQUEST,
        )
@router.post(
    "/v2/key/info", tags=["key management"], dependencies=[Depends(user_api_key_auth)]
 )
 async def info_key_fn_v2(
    data: Optional[KeyRequest] = None,
    user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
 ):
    """
    Retrieve information about a list of keys.
    **New endpoint**. Currently admin only.
    Parameters:
        keys: Optional[list] = body parameter representing the key(s) in the request
        user_api_key_dict: UserAPIKeyAuth = Dependency representing the user's API key
    Returns:
        Dict containing the key and its associated information
    Example Curl:
    ```
    curl -X GET "http://0.0.0.0:8000/key/info" \
    -H "Authorization: Bearer sk-1234" \
    -d {"keys": ["sk-1", "sk-2", "sk-3"]}
    ```
    """
    from litellm.proxy.proxy_server import (
        user_custom_key_generate,
        prisma_client,
        litellm_proxy_admin_name,
        general_settings,
        proxy_logging_obj,
        create_audit_log_for_update,
    )
    try:
        if prisma_client is None:
            raise Exception(
                f"Database not connected. Connect a database to your proxy - https://docs.litellm.ai/docs/simple_proxy#managing-auth---virtual-keys"
            )
        if data is None:
            raise HTTPException(
                status_code=status.HTTP_422_UNPROCESSABLE_ENTITY,
                detail={"message": "Malformed request. No keys passed in."},
            )
        key_info = await prisma_client.get_data(
            token=data.keys, table_name="key", query_type="find_all"
        )
        filtered_key_info = []
        for k in key_info:
            try:
                k = k.model_dump()  # noqa
            except:
                # if using pydantic v1
                k = k.dict()
            filtered_key_info.append(k)
        return {"key": data.keys, "info": filtered_key_info}
    except Exception as e:
        if isinstance(e, HTTPException):
            raise ProxyException(
                message=getattr(e, "detail", f"Authentication Error({str(e)})"),
                type="auth_error",
                param=getattr(e, "param", "None"),
                code=getattr(e, "status_code", status.HTTP_400_BAD_REQUEST),
            )
        elif isinstance(e, ProxyException):
            raise e
        raise ProxyException(
            message="Authentication Error, " + str(e),
            type="auth_error",
            param=getattr(e, "param", "None"),
            code=status.HTTP_400_BAD_REQUEST,
        )
@router.get(
    "/key/info", tags=["key management"], dependencies=[Depends(user_api_key_auth)]
 )
 async def info_key_fn(
    key: Optional[str] = fastapi.Query(
        default=None, description="Key in the request parameters"
    ),
    user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
 ):
    """
    Retrieve information about a key.
    Parameters:
        key: Optional[str] = Query parameter representing the key in the request
        user_api_key_dict: UserAPIKeyAuth = Dependency representing the user's API key
    Returns:
        Dict containing the key and its associated information
    Example Curl:
    ```
    curl -X GET "http://0.0.0.0:8000/key/info?key=sk-02Wr4IAlN3NvPXvL5JVvDA" \
 -H "Authorization: Bearer sk-1234"
    ```
    Example Curl - if no key is passed, it will use the Key Passed in Authorization Header
    ```
    curl -X GET "http://0.0.0.0:8000/key/info" \
 -H "Authorization: Bearer sk-02Wr4IAlN3NvPXvL5JVvDA"
    ```
    """
    from litellm.proxy.proxy_server import (
        user_custom_key_generate,
        prisma_client,
        litellm_proxy_admin_name,
        general_settings,
        proxy_logging_obj,
        create_audit_log_for_update,
    )
    try:
        if prisma_client is None:
            raise Exception(
                f"Database not connected. Connect a database to your proxy - https://docs.litellm.ai/docs/simple_proxy#managing-auth---virtual-keys"
            )
        if key == None:
            key = user_api_key_dict.api_key
        key_info = await prisma_client.get_data(token=key)
        ## REMOVE HASHED TOKEN INFO BEFORE RETURNING ##
        try:
            key_info = key_info.model_dump()  # noqa
        except:
            # if using pydantic v1
            key_info = key_info.dict()
        key_info.pop("token")
        return {"key": key, "info": key_info}
    except Exception as e:
        if isinstance(e, HTTPException):
            raise ProxyException(
                message=getattr(e, "detail", f"Authentication Error({str(e)})"),
                type="auth_error",
                param=getattr(e, "param", "None"),
                code=getattr(e, "status_code", status.HTTP_400_BAD_REQUEST),
            )
        elif isinstance(e, ProxyException):
            raise e
        raise ProxyException(
            message="Authentication Error, " + str(e),
            type="auth_error",
            param=getattr(e, "param", "None"),
            code=status.HTTP_400_BAD_REQUEST,
        )
 def _duration_in_seconds(duration: str):
    match = re.match(r"(\d+)([smhd]?)", duration)
    if not match:
        raise ValueError("Invalid duration format")
    value, unit = match.groups()
    value = int(value)
    if unit == "s":
        return value
    elif unit == "m":
        return value * 60
    elif unit == "h":
        return value * 3600
    elif unit == "d":
        return value * 86400
    else:
        raise ValueError("Unsupported duration unit")
 async def generate_key_helper_fn(
    request_type: Literal[
        "user", "key"
    ],  # identifies if this request is from /user/new or /key/generate
    duration: Optional[str],
    models: list,
    aliases: dict,
    config: dict,
    spend: float,
    key_max_budget: Optional[float] = None,  # key_max_budget is used to Budget Per key
    key_budget_duration: Optional[str] = None,
    budget_id: Optional[float] = None,  # budget id <-> LiteLLM_BudgetTable
    soft_budget: Optional[
        float
    ] = None,  # soft_budget is used to set soft Budgets Per user
    max_budget: Optional[float] = None,  # max_budget is used to Budget Per user
    budget_duration: Optional[str] = None,  # max_budget is used to Budget Per user
    token: Optional[str] = None,
    user_id: Optional[str] = None,
    team_id: Optional[str] = None,
    user_email: Optional[str] = None,
    user_role: Optional[str] = None,
    max_parallel_requests: Optional[int] = None,
    metadata: Optional[dict] = {},
    tpm_limit: Optional[int] = None,
    rpm_limit: Optional[int] = None,
    query_type: Literal["insert_data", "update_data"] = "insert_data",
    update_key_values: Optional[dict] = None,
    key_alias: Optional[str] = None,
    allowed_cache_controls: Optional[list] = [],
    permissions: Optional[dict] = {},
    model_max_budget: Optional[dict] = {},
    teams: Optional[list] = None,
    organization_id: Optional[str] = None,
    table_name: Optional[Literal["key", "user"]] = None,
    send_invite_email: Optional[bool] = None,
 ):
    from litellm.proxy.proxy_server import (
        prisma_client,
        custom_db_client,
        litellm_proxy_budget_name,
        premium_user,
    )
    if prisma_client is None and custom_db_client is None:
        raise Exception(
            f"Connect Proxy to database to generate keys - https://docs.litellm.ai/docs/proxy/virtual_keys "
        )
    if token is None:
        token = f"sk-{secrets.token_urlsafe(16)}"
    if duration is None:  # allow tokens that never expire
        expires = None
    else:
        duration_s = _duration_in_seconds(duration=duration)
        expires = datetime.now(timezone.utc) + timedelta(seconds=duration_s)
    if key_budget_duration is None:  # one-time budget
        key_reset_at = None
    else:
        duration_s = _duration_in_seconds(duration=key_budget_duration)
        key_reset_at = datetime.now(timezone.utc) + timedelta(seconds=duration_s)
    if budget_duration is None:  # one-time budget
        reset_at = None
    else:
        duration_s = _duration_in_seconds(duration=budget_duration)
        reset_at = datetime.now(timezone.utc) + timedelta(seconds=duration_s)
    aliases_json = json.dumps(aliases)
    config_json = json.dumps(config)
    permissions_json = json.dumps(permissions)
    metadata_json = json.dumps(metadata)
    model_max_budget_json = json.dumps(model_max_budget)
    user_role = user_role
    tpm_limit = tpm_limit
    rpm_limit = rpm_limit
    allowed_cache_controls = allowed_cache_controls
    try:
        # Create a new verification token (you may want to enhance this logic based on your needs)
        user_data = {
            "max_budget": max_budget,
            "user_email": user_email,
            "user_id": user_id,
            "team_id": team_id,
            "organization_id": organization_id,
            "user_role": user_role,
            "spend": spend,
            "models": models,
            "max_parallel_requests": max_parallel_requests,
            "tpm_limit": tpm_limit,
            "rpm_limit": rpm_limit,
            "budget_duration": budget_duration,
            "budget_reset_at": reset_at,
            "allowed_cache_controls": allowed_cache_controls,
        }
        if teams is not None:
            user_data["teams"] = teams
        key_data = {
            "token": token,
            "key_alias": key_alias,
            "expires": expires,
            "models": models,
            "aliases": aliases_json,
            "config": config_json,
            "spend": spend,
            "max_budget": key_max_budget,
            "user_id": user_id,
            "team_id": team_id,
            "max_parallel_requests": max_parallel_requests,
            "metadata": metadata_json,
            "tpm_limit": tpm_limit,
            "rpm_limit": rpm_limit,
            "budget_duration": key_budget_duration,
            "budget_reset_at": key_reset_at,
            "allowed_cache_controls": allowed_cache_controls,
            "permissions": permissions_json,
            "model_max_budget": model_max_budget_json,
            "budget_id": budget_id,
        }
        if (
            litellm.get_secret("DISABLE_KEY_NAME", False) == True
        ):  # allow user to disable storing abbreviated key name (shown in UI, to help figure out which key spent how much)
            pass
        else:
            key_data["key_name"] = f"sk-...{token[-4:]}"
        saved_token = copy.deepcopy(key_data)
        if isinstance(saved_token["aliases"], str):
            saved_token["aliases"] = json.loads(saved_token["aliases"])
        if isinstance(saved_token["config"], str):
            saved_token["config"] = json.loads(saved_token["config"])
        if isinstance(saved_token["metadata"], str):
            saved_token["metadata"] = json.loads(saved_token["metadata"])
        if isinstance(saved_token["permissions"], str):
            if (
                "get_spend_routes" in saved_token["permissions"]
                and premium_user != True
            ):
                raise ValueError(
                    "get_spend_routes permission is only available for LiteLLM Enterprise users"
                )
            saved_token["permissions"] = json.loads(saved_token["permissions"])
        if isinstance(saved_token["model_max_budget"], str):
            saved_token["model_max_budget"] = json.loads(
                saved_token["model_max_budget"]
            )
        if saved_token.get("expires", None) is not None and isinstance(
            saved_token["expires"], datetime
        ):
            saved_token["expires"] = saved_token["expires"].isoformat()
        if prisma_client is not None:
            if (
                table_name is None or table_name == "user"
            ):  # do not auto-create users for `/key/generate`
                ## CREATE USER (If necessary)
                if query_type == "insert_data":
                    user_row = await prisma_client.insert_data(
                        data=user_data, table_name="user"
                    )
                    ## use default user model list if no key-specific model list provided
                    if len(user_row.models) > 0 and len(key_data["models"]) == 0:  # type: ignore
                        key_data["models"] = user_row.models
                elif query_type == "update_data":
                    user_row = await prisma_client.update_data(
                        data=user_data,
                        table_name="user",
                        update_key_values=update_key_values,
                    )
            if user_id == litellm_proxy_budget_name or (
                table_name is not None and table_name == "user"
            ):
                # do not create a key for litellm_proxy_budget_name or if table name is set to just 'user'
                # we only need to ensure this exists in the user table
                # the LiteLLM_VerificationToken table will increase in size if we don't do this check
                return user_data
            ## CREATE KEY
            verbose_proxy_logger.debug("prisma_client: Creating Key= %s", key_data)
            create_key_response = await prisma_client.insert_data(
                data=key_data, table_name="key"
            )
            key_data["token_id"] = getattr(create_key_response, "token", None)
    except Exception as e:
        verbose_proxy_logger.error(
            "litellm.proxy.proxy_server.generate_key_helper_fn(): Exception occured - {}".format(
                str(e)
            )
        )
        verbose_proxy_logger.debug(traceback.format_exc())
        if isinstance(e, HTTPException):
            raise e
        raise HTTPException(
            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
            detail={"error": "Internal Server Error."},
        )
    # Add budget related info in key_data - this ensures it's returned
    key_data["budget_id"] = budget_id
    if request_type == "user":
        # if this is a /user/new request update the key_date with user_data fields
        key_data.update(user_data)
    return key_data
 async def delete_verification_token(tokens: List, user_id: Optional[str] = None):
    from litellm.proxy.proxy_server import prisma_client, litellm_proxy_admin_name
    try:
        if prisma_client:
            # Assuming 'db' is your Prisma Client instance
            # check if admin making request - don't filter by user-id
            if user_id == litellm_proxy_admin_name:
                deleted_tokens = await prisma_client.delete_data(tokens=tokens)
            # else
            else:
                deleted_tokens = await prisma_client.delete_data(
                    tokens=tokens, user_id=user_id
                )
                _num_deleted_tokens = deleted_tokens.get("deleted_keys", 0)
                if _num_deleted_tokens != len(tokens):
                    raise Exception(
                        "Failed to delete all tokens. Tried to delete tokens that don't belong to user: "
                        + str(user_id)
                    )
        else:
            raise Exception("DB not connected. prisma_client is None")
    except Exception as e:
        verbose_proxy_logger.error(
            "litellm.proxy.proxy_server.delete_verification_token(): Exception occured - {}".format(
                str(e)
            )
        )
        verbose_proxy_logger.debug(traceback.format_exc())
        raise e
    return deleted_tokens
--- a/litellm/proxy/management_endpoints/team_endpoints.py
+++ b/litellm/proxy/management_endpoints/team_endpoints.py
@ -0,0 +1,899 @@
 from typing import Optional, List
 import fastapi
 from fastapi import Depends, Request, APIRouter, Header, status
 from fastapi import HTTPException
 import copy
 import json
 import uuid
 import litellm
 import asyncio
 from datetime import datetime, timedelta, timezone
 from litellm._logging import verbose_proxy_logger
 from litellm.proxy.auth.user_api_key_auth import user_api_key_auth
 from litellm.proxy._types import (
    UserAPIKeyAuth,
    LiteLLM_TeamTable,
    LiteLLM_ModelTable,
    LitellmUserRoles,
    NewTeamRequest,
    TeamMemberAddRequest,
    UpdateTeamRequest,
    BlockTeamRequest,
    DeleteTeamRequest,
    Member,
    LitellmTableNames,
    LiteLLM_AuditLogs,
    TeamMemberDeleteRequest,
    ProxyException,
    CommonProxyErrors,
 )
 from litellm.proxy.management_helpers.utils import (
    add_new_member,
    management_endpoint_wrapper,
 )
 router = APIRouter()
 #### TEAM MANAGEMENT ####
@router.post(
    "/team/new",
    tags=["team management"],
    dependencies=[Depends(user_api_key_auth)],
    response_model=LiteLLM_TeamTable,
 )
@management_endpoint_wrapper
 async def new_team(
    data: NewTeamRequest,
    http_request: Request,
    user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
    litellm_changed_by: Optional[str] = Header(
        None,
        description="The litellm-changed-by header enables tracking of actions performed by authorized users on behalf of other users, providing an audit trail for accountability",
    ),
 ):
    """
    Allow users to create a new team. Apply user permissions to their team.
    👉 [Detailed Doc on setting team budgets](https://docs.litellm.ai/docs/proxy/team_budgets)
    Parameters:
    - team_alias: Optional[str] - User defined team alias
    - team_id: Optional[str] - The team id of the user. If none passed, we'll generate it.
    - members_with_roles: List[{"role": "admin" or "user", "user_id": "<user-id>"}] - A list of users and their roles in the team. Get user_id when making a new user via `/user/new`.
    - metadata: Optional[dict] - Metadata for team, store information for team. Example metadata = {"extra_info": "some info"}
    - tpm_limit: Optional[int] - The TPM (Tokens Per Minute) limit for this team - all keys with this team_id will have at max this TPM limit
    - rpm_limit: Optional[int] - The RPM (Requests Per Minute) limit for this team - all keys associated with this team_id will have at max this RPM limit
    - max_budget: Optional[float] - The maximum budget allocated to the team - all keys for this team_id will have at max this max_budget
    - budget_duration: Optional[str] - The duration of the budget for the team. Doc [here](https://docs.litellm.ai/docs/proxy/team_budgets)
    - models: Optional[list] - A list of models associated with the team - all keys for this team_id will have at most, these models. If empty, assumes all models are allowed.
    - blocked: bool - Flag indicating if the team is blocked or not - will stop all calls from keys with this team_id.
    Returns:
    - team_id: (str) Unique team id - used for tracking spend across multiple keys for same team id.
    _deprecated_params:
    - admins: list - A list of user_id's for the admin role
    - users: list - A list of user_id's for the user role
    Example Request:
    ```
    curl --location 'http://0.0.0.0:4000/team/new' \
    --header 'Authorization: Bearer sk-1234' \
    --header 'Content-Type: application/json' \
    --data '{
      "team_alias": "my-new-team_2",
      "members_with_roles": [{"role": "admin", "user_id": "user-1234"},
        {"role": "user", "user_id": "user-2434"}]
    }'
    ```
     ```
    curl --location 'http://0.0.0.0:4000/team/new' \
    --header 'Authorization: Bearer sk-1234' \
    --header 'Content-Type: application/json' \
    --data '{
                "team_alias": "QA Prod Bot", 
                "max_budget": 0.000000001, 
                "budget_duration": "1d"
            }'
    ```
    """
    from litellm.proxy.proxy_server import (
        prisma_client,
        litellm_proxy_admin_name,
        create_audit_log_for_update,
        _duration_in_seconds,
    )
    if prisma_client is None:
        raise HTTPException(status_code=500, detail={"error": "No db connected"})
    if data.team_id is None:
        data.team_id = str(uuid.uuid4())
    else:
        # Check if team_id exists already
        _existing_team_id = await prisma_client.get_data(
            team_id=data.team_id, table_name="team", query_type="find_unique"
        )
        if _existing_team_id is not None:
            raise HTTPException(
                status_code=400,
                detail={
                    "error": f"Team id = {data.team_id} already exists. Please use a different team id."
                },
            )
    if (
        user_api_key_dict.user_role is None
        or user_api_key_dict.user_role != LitellmUserRoles.PROXY_ADMIN
    ):  # don't restrict proxy admin
        if (
            data.tpm_limit is not None
            and user_api_key_dict.tpm_limit is not None
            and data.tpm_limit > user_api_key_dict.tpm_limit
        ):
            raise HTTPException(
                status_code=400,
                detail={
                    "error": f"tpm limit higher than user max. User tpm limit={user_api_key_dict.tpm_limit}. User role={user_api_key_dict.user_role}"
                },
            )
        if (
            data.rpm_limit is not None
            and user_api_key_dict.rpm_limit is not None
            and data.rpm_limit > user_api_key_dict.rpm_limit
        ):
            raise HTTPException(
                status_code=400,
                detail={
                    "error": f"rpm limit higher than user max. User rpm limit={user_api_key_dict.rpm_limit}. User role={user_api_key_dict.user_role}"
                },
            )
        if (
            data.max_budget is not None
            and user_api_key_dict.max_budget is not None
            and data.max_budget > user_api_key_dict.max_budget
        ):
            raise HTTPException(
                status_code=400,
                detail={
                    "error": f"max budget higher than user max. User max budget={user_api_key_dict.max_budget}. User role={user_api_key_dict.user_role}"
                },
            )
        if data.models is not None and len(user_api_key_dict.models) > 0:
            for m in data.models:
                if m not in user_api_key_dict.models:
                    raise HTTPException(
                        status_code=400,
                        detail={
                            "error": f"Model not in allowed user models. User allowed models={user_api_key_dict.models}. User id={user_api_key_dict.user_id}"
                        },
                    )
    if user_api_key_dict.user_id is not None:
        creating_user_in_list = False
        for member in data.members_with_roles:
            if member.user_id == user_api_key_dict.user_id:
                creating_user_in_list = True
        if creating_user_in_list == False:
            data.members_with_roles.append(
                Member(role="admin", user_id=user_api_key_dict.user_id)
            )
    ## ADD TO MODEL TABLE
    _model_id = None
    if data.model_aliases is not None and isinstance(data.model_aliases, dict):
        litellm_modeltable = LiteLLM_ModelTable(
            model_aliases=json.dumps(data.model_aliases),
            created_by=user_api_key_dict.user_id or litellm_proxy_admin_name,
            updated_by=user_api_key_dict.user_id or litellm_proxy_admin_name,
        )
        model_dict = await prisma_client.db.litellm_modeltable.create(
            {**litellm_modeltable.json(exclude_none=True)}  # type: ignore
        )  # type: ignore
        _model_id = model_dict.id
    ## ADD TO TEAM TABLE
    complete_team_data = LiteLLM_TeamTable(
        **data.json(),
        model_id=_model_id,
    )
    # If budget_duration is set, set `budget_reset_at`
    if complete_team_data.budget_duration is not None:
        duration_s = _duration_in_seconds(duration=complete_team_data.budget_duration)
        reset_at = datetime.now(timezone.utc) + timedelta(seconds=duration_s)
        complete_team_data.budget_reset_at = reset_at
    team_row = await prisma_client.insert_data(
        data=complete_team_data.json(exclude_none=True), table_name="team"
    )
    ## ADD TEAM ID TO USER TABLE ##
    for user in complete_team_data.members_with_roles:
        ## add team id to user row ##
        await prisma_client.update_data(
            user_id=user.user_id,
            data={"user_id": user.user_id, "teams": [team_row.team_id]},
            update_key_values_custom_query={
                "teams": {
                    "push ": [team_row.team_id],
                }
            },
        )
    # Enterprise Feature - Audit Logging. Enable with litellm.store_audit_logs = True
    if litellm.store_audit_logs is True:
        _updated_values = complete_team_data.json(exclude_none=True)
        _updated_values = json.dumps(_updated_values, default=str)
        asyncio.create_task(
            create_audit_log_for_update(
                request_data=LiteLLM_AuditLogs(
                    id=str(uuid.uuid4()),
                    updated_at=datetime.now(timezone.utc),
                    changed_by=litellm_changed_by
                    or user_api_key_dict.user_id
                    or litellm_proxy_admin_name,
                    changed_by_api_key=user_api_key_dict.api_key,
                    table_name=LitellmTableNames.TEAM_TABLE_NAME,
                    object_id=data.team_id,
                    action="created",
                    updated_values=_updated_values,
                    before_value=None,
                )
            )
        )
    try:
        return team_row.model_dump()
    except Exception as e:
        return team_row.dict()
@router.post(
    "/team/update", tags=["team management"], dependencies=[Depends(user_api_key_auth)]
 )
@management_endpoint_wrapper
 async def update_team(
    data: UpdateTeamRequest,
    http_request: Request,
    user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
    litellm_changed_by: Optional[str] = Header(
        None,
        description="The litellm-changed-by header enables tracking of actions performed by authorized users on behalf of other users, providing an audit trail for accountability",
    ),
 ):
    """
    Use `/team/member_add` AND `/team/member/delete` to add/remove new team members  
    You can now update team budget / rate limits via /team/update
    Parameters:
    - team_id: str - The team id of the user. Required param.
    - team_alias: Optional[str] - User defined team alias
    - metadata: Optional[dict] - Metadata for team, store information for team. Example metadata = {"team": "core-infra", "app": "app2", "email": "ishaan@berri.ai" }
    - tpm_limit: Optional[int] - The TPM (Tokens Per Minute) limit for this team - all keys with this team_id will have at max this TPM limit
    - rpm_limit: Optional[int] - The RPM (Requests Per Minute) limit for this team - all keys associated with this team_id will have at max this RPM limit
    - max_budget: Optional[float] - The maximum budget allocated to the team - all keys for this team_id will have at max this max_budget
    - budget_duration: Optional[str] - The duration of the budget for the team. Doc [here](https://docs.litellm.ai/docs/proxy/team_budgets)
    - models: Optional[list] - A list of models associated with the team - all keys for this team_id will have at most, these models. If empty, assumes all models are allowed.
    - blocked: bool - Flag indicating if the team is blocked or not - will stop all calls from keys with this team_id.
    Example - update team TPM Limit
    ```
    curl --location 'http://0.0.0.0:8000/team/update' \
    --header 'Authorization: Bearer sk-1234' \
    --header 'Content-Type: application/json' \
    --data-raw '{
        "team_id": "litellm-test-client-id-new",
        "tpm_limit": 100
    }'
    ```
    Example - Update Team `max_budget` budget
    ```
    curl --location 'http://0.0.0.0:8000/team/update' \
    --header 'Authorization: Bearer sk-1234' \
    --header 'Content-Type: application/json' \
    --data-raw '{
        "team_id": "litellm-test-client-id-new",
        "max_budget": 10
    }'
    ```
    """
    from litellm.proxy.proxy_server import (
        prisma_client,
        litellm_proxy_admin_name,
        create_audit_log_for_update,
        _duration_in_seconds,
    )
    if prisma_client is None:
        raise HTTPException(status_code=500, detail={"error": "No db connected"})
    if data.team_id is None:
        raise HTTPException(status_code=400, detail={"error": "No team id passed in"})
    verbose_proxy_logger.debug("/team/update - %s", data)
    existing_team_row = await prisma_client.get_data(
        team_id=data.team_id, table_name="team", query_type="find_unique"
    )
    if existing_team_row is None:
        raise HTTPException(
            status_code=404,
            detail={"error": f"Team not found, passed team_id={data.team_id}"},
        )
    updated_kv = data.json(exclude_none=True)
    # Check budget_duration and budget_reset_at
    if data.budget_duration is not None:
        duration_s = _duration_in_seconds(duration=data.budget_duration)
        reset_at = datetime.now(timezone.utc) + timedelta(seconds=duration_s)
        # set the budget_reset_at in DB
        updated_kv["budget_reset_at"] = reset_at
    team_row = await prisma_client.update_data(
        update_key_values=updated_kv,
        data=updated_kv,
        table_name="team",
        team_id=data.team_id,
    )
    # Enterprise Feature - Audit Logging. Enable with litellm.store_audit_logs = True
    if litellm.store_audit_logs is True:
        _before_value = existing_team_row.json(exclude_none=True)
        _before_value = json.dumps(_before_value, default=str)
        _after_value: str = json.dumps(updated_kv, default=str)
        asyncio.create_task(
            create_audit_log_for_update(
                request_data=LiteLLM_AuditLogs(
                    id=str(uuid.uuid4()),
                    updated_at=datetime.now(timezone.utc),
                    changed_by=litellm_changed_by
                    or user_api_key_dict.user_id
                    or litellm_proxy_admin_name,
                    changed_by_api_key=user_api_key_dict.api_key,
                    table_name=LitellmTableNames.TEAM_TABLE_NAME,
                    object_id=data.team_id,
                    action="updated",
                    updated_values=_after_value,
                    before_value=_before_value,
                )
            )
        )
    return team_row
@router.post(
    "/team/member_add",
    tags=["team management"],
    dependencies=[Depends(user_api_key_auth)],
 )
@management_endpoint_wrapper
 async def team_member_add(
    data: TeamMemberAddRequest,
    http_request: Request,
    user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
 ):
    """
    [BETA]
    Add new members (either via user_email or user_id) to a team
    If user doesn't exist, new user row will also be added to User Table
    ```
    curl -X POST 'http://0.0.0.0:4000/team/member_add' \
    -H 'Authorization: Bearer sk-1234' \
    -H 'Content-Type: application/json' \
    -d '{"team_id": "45e3e396-ee08-4a61-a88e-16b3ce7e0849", "member": {"role": "user", "user_id": "krrish247652@berri.ai"}}'
    ```
    """
    from litellm.proxy.proxy_server import (
        prisma_client,
        litellm_proxy_admin_name,
        create_audit_log_for_update,
        _duration_in_seconds,
    )
    if prisma_client is None:
        raise HTTPException(status_code=500, detail={"error": "No db connected"})
    if data.team_id is None:
        raise HTTPException(status_code=400, detail={"error": "No team id passed in"})
    if data.member is None:
        raise HTTPException(
            status_code=400, detail={"error": "No member/members passed in"}
        )
    existing_team_row = await prisma_client.db.litellm_teamtable.find_unique(
        where={"team_id": data.team_id}
    )
    if existing_team_row is None:
        raise HTTPException(
            status_code=404,
            detail={
                "error": f"Team not found for team_id={getattr(data, 'team_id', None)}"
            },
        )
    complete_team_data = LiteLLM_TeamTable(**existing_team_row.model_dump())
    if isinstance(data.member, Member):
        # add to team db
        new_member = data.member
        complete_team_data.members_with_roles.append(new_member)
    elif isinstance(data.member, List):
        # add to team db
        new_members = data.member
        complete_team_data.members_with_roles.extend(new_members)
    # ADD MEMBER TO TEAM
    _db_team_members = [m.model_dump() for m in complete_team_data.members_with_roles]
    updated_team = await prisma_client.db.litellm_teamtable.update(
        where={"team_id": data.team_id},
        data={"members_with_roles": json.dumps(_db_team_members)},  # type: ignore
    )
    if isinstance(data.member, Member):
        await add_new_member(
            new_member=data.member,
            max_budget_in_team=data.max_budget_in_team,
            prisma_client=prisma_client,
            user_api_key_dict=user_api_key_dict,
            litellm_proxy_admin_name=litellm_proxy_admin_name,
            team_id=data.team_id,
        )
    elif isinstance(data.member, List):
        tasks: List = []
        for m in data.member:
            await add_new_member(
                new_member=m,
                max_budget_in_team=data.max_budget_in_team,
                prisma_client=prisma_client,
                user_api_key_dict=user_api_key_dict,
                litellm_proxy_admin_name=litellm_proxy_admin_name,
                team_id=data.team_id,
            )
        await asyncio.gather(*tasks)
    return updated_team
@router.post(
    "/team/member_delete",
    tags=["team management"],
    dependencies=[Depends(user_api_key_auth)],
 )
@management_endpoint_wrapper
 async def team_member_delete(
    data: TeamMemberDeleteRequest,
    http_request: Request,
    user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
 ):
    """
    [BETA]
    delete members (either via user_email or user_id) from a team
    If user doesn't exist, an exception will be raised
    ```
    curl -X POST 'http://0.0.0.0:8000/team/update' \
    -H 'Authorization: Bearer sk-1234' \
    -H 'Content-Type: application/json' \
    -D '{
        "team_id": "45e3e396-ee08-4a61-a88e-16b3ce7e0849",
        "user_id": "krrish247652@berri.ai"
    }'
    ```
    """
    from litellm.proxy.proxy_server import (
        prisma_client,
        litellm_proxy_admin_name,
        create_audit_log_for_update,
        _duration_in_seconds,
    )
    if prisma_client is None:
        raise HTTPException(status_code=500, detail={"error": "No db connected"})
    if data.team_id is None:
        raise HTTPException(status_code=400, detail={"error": "No team id passed in"})
    if data.user_id is None and data.user_email is None:
        raise HTTPException(
            status_code=400,
            detail={"error": "Either user_id or user_email needs to be passed in"},
        )
    _existing_team_row = await prisma_client.db.litellm_teamtable.find_unique(
        where={"team_id": data.team_id}
    )
    if _existing_team_row is None:
        raise HTTPException(
            status_code=400,
            detail={"error": "Team id={} does not exist in db".format(data.team_id)},
        )
    existing_team_row = LiteLLM_TeamTable(**_existing_team_row.model_dump())
    ## DELETE MEMBER FROM TEAM
    new_team_members: List[Member] = []
    for m in existing_team_row.members_with_roles:
        if (
            data.user_id is not None
            and m.user_id is not None
            and data.user_id == m.user_id
        ):
            continue
        elif (
            data.user_email is not None
            and m.user_email is not None
            and data.user_email == m.user_email
        ):
            continue
        new_team_members.append(m)
    existing_team_row.members_with_roles = new_team_members
    _db_new_team_members: List[dict] = [m.model_dump() for m in new_team_members]
    _ = await prisma_client.db.litellm_teamtable.update(
        where={
            "team_id": data.team_id,
        },
        data={"members_with_roles": json.dumps(_db_new_team_members)},  # type: ignore
    )
    ## DELETE TEAM ID from USER ROW, IF EXISTS ##
    # get user row
    key_val = {}
    if data.user_id is not None:
        key_val["user_id"] = data.user_id
    elif data.user_email is not None:
        key_val["user_email"] = data.user_email
    existing_user_rows = await prisma_client.db.litellm_usertable.find_many(
        where=key_val  # type: ignore
    )
    if existing_user_rows is not None and (
        isinstance(existing_user_rows, list) and len(existing_user_rows) > 0
    ):
        for existing_user in existing_user_rows:
            team_list = []
            if data.team_id in existing_user.teams:
                team_list = existing_user.teams
                team_list.remove(data.team_id)
                await prisma_client.db.litellm_usertable.update(
                    where={
                        "user_id": existing_user.user_id,
                    },
                    data={"teams": {"set": team_list}},
                )
    return existing_team_row
@router.post(
    "/team/delete", tags=["team management"], dependencies=[Depends(user_api_key_auth)]
 )
@management_endpoint_wrapper
 async def delete_team(
    data: DeleteTeamRequest,
    http_request: Request,
    user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
    litellm_changed_by: Optional[str] = Header(
        None,
        description="The litellm-changed-by header enables tracking of actions performed by authorized users on behalf of other users, providing an audit trail for accountability",
    ),
 ):
    """
    delete team and associated team keys
    ```
    curl --location 'http://0.0.0.0:8000/team/delete' \
    --header 'Authorization: Bearer sk-1234' \
    --header 'Content-Type: application/json' \
    --data-raw '{
        "team_ids": ["45e3e396-ee08-4a61-a88e-16b3ce7e0849"]
    }'
    ```
    """
    from litellm.proxy.proxy_server import (
        prisma_client,
        litellm_proxy_admin_name,
        create_audit_log_for_update,
        _duration_in_seconds,
    )
    if prisma_client is None:
        raise HTTPException(status_code=500, detail={"error": "No db connected"})
    if data.team_ids is None:
        raise HTTPException(status_code=400, detail={"error": "No team id passed in"})
    # check that all teams passed exist
    for team_id in data.team_ids:
        team_row = await prisma_client.get_data(  # type: ignore
            team_id=team_id, table_name="team", query_type="find_unique"
        )
        if team_row is None:
            raise HTTPException(
                status_code=404,
                detail={"error": f"Team not found, passed team_id={team_id}"},
            )
    # Enterprise Feature - Audit Logging. Enable with litellm.store_audit_logs = True
    # we do this after the first for loop, since first for loop is for validation. we only want this inserted after validation passes
    if litellm.store_audit_logs is True:
        # make an audit log for each team deleted
        for team_id in data.team_ids:
            team_row = await prisma_client.get_data(  # type: ignore
                team_id=team_id, table_name="team", query_type="find_unique"
            )
            _team_row = team_row.json(exclude_none=True)
            asyncio.create_task(
                create_audit_log_for_update(
                    request_data=LiteLLM_AuditLogs(
                        id=str(uuid.uuid4()),
                        updated_at=datetime.now(timezone.utc),
                        changed_by=litellm_changed_by
                        or user_api_key_dict.user_id
                        or litellm_proxy_admin_name,
                        changed_by_api_key=user_api_key_dict.api_key,
                        table_name=LitellmTableNames.TEAM_TABLE_NAME,
                        object_id=team_id,
                        action="deleted",
                        updated_values="{}",
                        before_value=_team_row,
                    )
                )
            )
    # End of Audit logging
    ## DELETE ASSOCIATED KEYS
    await prisma_client.delete_data(team_id_list=data.team_ids, table_name="key")
    ## DELETE TEAMS
    deleted_teams = await prisma_client.delete_data(
        team_id_list=data.team_ids, table_name="team"
    )
    return deleted_teams
@router.get(
    "/team/info", tags=["team management"], dependencies=[Depends(user_api_key_auth)]
 )
@management_endpoint_wrapper
 async def team_info(
    http_request: Request,
    team_id: str = fastapi.Query(
        default=None, description="Team ID in the request parameters"
    ),
 ):
    """
    get info on team + related keys
    ```
    curl --location 'http://localhost:4000/team/info' \
    --header 'Authorization: Bearer sk-1234' \
    --header 'Content-Type: application/json' \
    --data '{
        "teams": ["<team-id>",..]
    }'
    ```
    """
    from litellm.proxy.proxy_server import (
        prisma_client,
        litellm_proxy_admin_name,
        create_audit_log_for_update,
        _duration_in_seconds,
    )
    try:
        if prisma_client is None:
            raise HTTPException(
                status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
                detail={
                    "error": f"Database not connected. Connect a database to your proxy - https://docs.litellm.ai/docs/simple_proxy#managing-auth---virtual-keys"
                },
            )
        if team_id is None:
            raise HTTPException(
                status_code=status.HTTP_422_UNPROCESSABLE_ENTITY,
                detail={"message": "Malformed request. No team id passed in."},
            )
        team_info = await prisma_client.get_data(
            team_id=team_id, table_name="team", query_type="find_unique"
        )
        if team_info is None:
            raise HTTPException(
                status_code=status.HTTP_404_NOT_FOUND,
                detail={"message": f"Team not found, passed team id: {team_id}."},
            )
        ## GET ALL KEYS ##
        keys = await prisma_client.get_data(
            team_id=team_id,
            table_name="key",
            query_type="find_all",
            expires=datetime.now(),
        )
        if team_info is None:
            ## make sure we still return a total spend ##
            spend = 0
            for k in keys:
                spend += getattr(k, "spend", 0)
            team_info = {"spend": spend}
        ## REMOVE HASHED TOKEN INFO before returning ##
        for key in keys:
            try:
                key = key.model_dump()  # noqa
            except:
                # if using pydantic v1
                key = key.dict()
            key.pop("token", None)
        return {"team_id": team_id, "team_info": team_info, "keys": keys}
    except Exception as e:
        if isinstance(e, HTTPException):
            raise ProxyException(
                message=getattr(e, "detail", f"Authentication Error({str(e)})"),
                type="auth_error",
                param=getattr(e, "param", "None"),
                code=getattr(e, "status_code", status.HTTP_400_BAD_REQUEST),
            )
        elif isinstance(e, ProxyException):
            raise e
        raise ProxyException(
            message="Authentication Error, " + str(e),
            type="auth_error",
            param=getattr(e, "param", "None"),
            code=status.HTTP_400_BAD_REQUEST,
        )
@router.post(
    "/team/block", tags=["team management"], dependencies=[Depends(user_api_key_auth)]
 )
@management_endpoint_wrapper
 async def block_team(
    data: BlockTeamRequest,
    http_request: Request,
    user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
 ):
    """
    Blocks all calls from keys with this team id.
    """
    from litellm.proxy.proxy_server import (
        prisma_client,
        litellm_proxy_admin_name,
        create_audit_log_for_update,
        _duration_in_seconds,
    )
    if prisma_client is None:
        raise Exception("No DB Connected.")
    record = await prisma_client.db.litellm_teamtable.update(
        where={"team_id": data.team_id}, data={"blocked": True}  # type: ignore
    )
    return record
@router.post(
    "/team/unblock", tags=["team management"], dependencies=[Depends(user_api_key_auth)]
 )
@management_endpoint_wrapper
 async def unblock_team(
    data: BlockTeamRequest,
    http_request: Request,
    user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
 ):
    """
    Blocks all calls from keys with this team id.
    """
    from litellm.proxy.proxy_server import (
        prisma_client,
        litellm_proxy_admin_name,
        create_audit_log_for_update,
        _duration_in_seconds,
    )
    if prisma_client is None:
        raise Exception("No DB Connected.")
    record = await prisma_client.db.litellm_teamtable.update(
        where={"team_id": data.team_id}, data={"blocked": False}  # type: ignore
    )
    return record
@router.get(
    "/team/list", tags=["team management"], dependencies=[Depends(user_api_key_auth)]
 )
@management_endpoint_wrapper
 async def list_team(
    http_request: Request,
    user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
 ):
    """
    [Admin-only] List all available teams
    ```
    curl --location --request GET 'http://0.0.0.0:4000/team/list' \
        --header 'Authorization: Bearer sk-1234'
    ```
    """
    from litellm.proxy.proxy_server import (
        prisma_client,
        litellm_proxy_admin_name,
        create_audit_log_for_update,
        _duration_in_seconds,
    )
    if user_api_key_dict.user_role != LitellmUserRoles.PROXY_ADMIN:
        raise HTTPException(
            status_code=401,
            detail={
                "error": "Admin-only endpoint. Your user role={}".format(
                    user_api_key_dict.user_role
                )
            },
        )
    if prisma_client is None:
        raise HTTPException(
            status_code=400,
            detail={"error": CommonProxyErrors.db_not_connected_error.value},
        )
    response = await prisma_client.db.litellm_teamtable.find_many()
    return response
--- a/litellm/proxy/management_helpers/utils.py
+++ b/litellm/proxy/management_helpers/utils.py
@ -1,5 +1,11 @@
 # What is this?
 ## Helper utils for the management endpoints (keys/users/teams)
 from datetime import datetime
 from functools import wraps
 from litellm.proxy._types import UserAPIKeyAuth, ManagementEndpointLoggingPayload
 from litellm.proxy.common_utils.http_parsing_utils import _read_request_body
 from litellm._logging import verbose_logger
 from fastapi import Request
 from litellm.proxy._types import LiteLLM_TeamTable, Member, UserAPIKeyAuth
 from litellm.proxy.utils import PrismaClient
@ -61,3 +67,110 @@ async def add_new_member(
                "budget_id": _budget_id,
            }
        )
 def management_endpoint_wrapper(func):
    """
    This wrapper does the following:
    1. Log I/O, Exceptions to OTEL
    2. Create an Audit log for success calls
    """
    @wraps(func)
    async def wrapper(*args, **kwargs):
        start_time = datetime.now()
        try:
            result = await func(*args, **kwargs)
            end_time = datetime.now()
            try:
                if kwargs is None:
                    kwargs = {}
                user_api_key_dict: UserAPIKeyAuth = (
                    kwargs.get("user_api_key_dict") or UserAPIKeyAuth()
                )
                _http_request: Request = kwargs.get("http_request")
                parent_otel_span = user_api_key_dict.parent_otel_span
                if parent_otel_span is not None:
                    from litellm.proxy.proxy_server import open_telemetry_logger
                    if open_telemetry_logger is not None:
                        if _http_request:
                            _route = _http_request.url.path
                            _request_body: dict = await _read_request_body(
                                request=_http_request
                            )
                            _response = dict(result) if result is not None else None
                            logging_payload = ManagementEndpointLoggingPayload(
                                route=_route,
                                request_data=_request_body,
                                response=_response,
                                start_time=start_time,
                                end_time=end_time,
                            )
                            await open_telemetry_logger.async_management_endpoint_success_hook(
                                logging_payload=logging_payload,
                                parent_otel_span=parent_otel_span,
                            )
                    if _http_request:
                        _route = _http_request.url.path
                        # Flush user_api_key cache if this was an update/delete call to /key, /team, or /user
                        if _route in [
                            "/key/update",
                            "/key/delete",
                            "/team/update",
                            "/team/delete",
                            "/user/update",
                            "/user/delete",
                            "/customer/update",
                            "/customer/delete",
                        ]:
                            from litellm.proxy.proxy_server import user_api_key_cache
                            user_api_key_cache.flush_cache()
            except Exception as e:
                # Non-Blocking Exception
                verbose_logger.debug("Error in management endpoint wrapper: %s", str(e))
                pass
            return result
        except Exception as e:
            end_time = datetime.now()
            if kwargs is None:
                kwargs = {}
            user_api_key_dict: UserAPIKeyAuth = (
                kwargs.get("user_api_key_dict") or UserAPIKeyAuth()
            )
            parent_otel_span = user_api_key_dict.parent_otel_span
            if parent_otel_span is not None:
                from litellm.proxy.proxy_server import open_telemetry_logger
                if open_telemetry_logger is not None:
                    _http_request: Request = kwargs.get("http_request")
                    if _http_request:
                        _route = _http_request.url.path
                        _request_body: dict = await _read_request_body(
                            request=_http_request
                        )
                        logging_payload = ManagementEndpointLoggingPayload(
                            route=_route,
                            request_data=_request_body,
                            response=None,
                            start_time=start_time,
                            end_time=end_time,
                            exception=e,
                        )
                        await open_telemetry_logger.async_management_endpoint_failure_hook(
                            logging_payload=logging_payload,
                            parent_otel_span=parent_otel_span,
                        )
            raise e
    return wrapper
--- a/litellm/proxy/proxy_config.yaml
+++ b/litellm/proxy/proxy_config.yaml
@ -20,6 +20,7 @@ model_list:
 general_settings:
  master_key: sk-1234
  alerting: ["slack", "email"]
 litellm_settings:
  success_callback: ["prometheus"]
--- a/litellm/proxy/proxy_server.py
+++ b/litellm/proxy/proxy_server.py
--- a/litellm/proxy/spend_reporting_endpoints/spend_management_endpoints.py
+++ b/litellm/proxy/spend_reporting_endpoints/spend_management_endpoints.py
--- a/litellm/proxy/utils.py
+++ b/litellm/proxy/utils.py
@ -12,6 +12,8 @@ import litellm
 import backoff
 import traceback
 from pydantic import BaseModel
 import litellm.litellm_core_utils
 import litellm.litellm_core_utils.litellm_logging
 from litellm.proxy._types import (
    UserAPIKeyAuth,
    DynamoDBArgs,
@ -266,7 +268,9 @@ class ProxyLogging:
                    + litellm.failure_callback
                )
            )
-            litellm.utils.set_callbacks(callback_list=callback_list)
+            litellm.litellm_core_utils.litellm_logging.set_callbacks(
                callback_list=callback_list
            )
    # The actual implementation of the function
    async def pre_call_hook(
@ -331,7 +335,9 @@ class ProxyLogging:
            return data
        except Exception as e:
            if "litellm_logging_obj" in data:
-                logging_obj: litellm.utils.Logging = data["litellm_logging_obj"]
+                logging_obj: litellm.litellm_core_utils.litellm_logging.Logging = data[
                    "litellm_logging_obj"
                ]
                ## ASYNC FAILURE HANDLER ##
                error_message = ""
@ -455,6 +461,7 @@ class ProxyLogging:
            formatted_message += f"\n\nProxy URL: `{_proxy_base_url}`"
        extra_kwargs = {}
        alerting_metadata = {}
        if request_data is not None:
            _url = self.slack_alerting_instance._add_langfuse_trace_id_to_alert(
                request_data=request_data
@ -462,7 +469,12 @@ class ProxyLogging:
            if _url is not None:
                extra_kwargs["🪢 Langfuse Trace"] = _url
                formatted_message += "\n\n🪢 Langfuse Trace: {}".format(_url)
-
+            if (
                "metadata" in request_data
                and request_data["metadata"].get("alerting_metadata", None) is not None
                and isinstance(request_data["metadata"]["alerting_metadata"], dict)
            ):
                alerting_metadata = request_data["metadata"]["alerting_metadata"]
        for client in self.alerting:
            if client == "slack":
                await self.slack_alerting_instance.send_alert(
@ -470,6 +482,7 @@ class ProxyLogging:
                    level=level,
                    alert_type=alert_type,
                    user_info=None,
                    alerting_metadata=alerting_metadata,
                    **extra_kwargs,
                )
            elif client == "sentry":
@ -510,7 +523,7 @@ class ProxyLogging:
        )
        if hasattr(self, "service_logging_obj"):
-            self.service_logging_obj.async_service_failure_hook(
+            await self.service_logging_obj.async_service_failure_hook(
                service=ServiceTypes.DB,
                duration=duration,
                error=error_message,
@ -1960,6 +1973,9 @@ async def send_email(receiver_email, subject, html):
    email_message["From"] = sender_email
    email_message["To"] = receiver_email
    email_message["Subject"] = subject
    verbose_proxy_logger.debug(
        "sending email from %s to %s", sender_email, receiver_email
    )
    # Attach the body to the email
    email_message.attach(MIMEText(html, "html"))
@ -2115,6 +2131,16 @@ def _extract_from_regex(duration: str) -> Tuple[int, str]:
    return value, unit
 def get_last_day_of_month(year, month):
    # Handle December case
    if month == 12:
        return 31
    # Next month is January, so subtract a day from March 1st
    next_month = datetime(year=year, month=month + 1, day=1)
    last_day_of_month = (next_month - timedelta(days=1)).day
    return last_day_of_month
 def _duration_in_seconds(duration: str) -> int:
    """
    Parameters:
@ -2141,12 +2167,28 @@ def _duration_in_seconds(duration: str) -> int:
        now = time.time()
        current_time = datetime.fromtimestamp(now)
        # Calculate the first day of the next month
        if current_time.month == 12:
-            next_month = datetime(year=current_time.year + 1, month=1, day=1)
+            target_year = current_time.year + 1
            target_month = 1
        else:
            target_year = current_time.year
            target_month = current_time.month + value
        # Determine the day to set for next month
        target_day = current_time.day
        last_day_of_target_month = get_last_day_of_month(target_year, target_month)
        if target_day > last_day_of_target_month:
            target_day = last_day_of_target_month
        next_month = datetime(
-                year=current_time.year, month=current_time.month + value, day=1
+            year=target_year,
            month=target_month,
            day=target_day,
            hour=current_time.hour,
            minute=current_time.minute,
            second=current_time.second,
            microsecond=current_time.microsecond,
        )
        # Calculate the duration until the first day of the next month
@ -2718,47 +2760,6 @@ def _is_valid_team_configs(team_id=None, team_config=None, request_data=None):
    return
 def _is_user_proxy_admin(user_id_information: Optional[list]):
    if user_id_information is None:
        return False
    if len(user_id_information) == 0 or user_id_information[0] is None:
        return False
    _user = user_id_information[0]
    if (
        _user.get("user_role", None) is not None
        and _user.get("user_role") == LitellmUserRoles.PROXY_ADMIN.value
    ):
        return True
    # if user_id_information contains litellm-proxy-budget
    # get first user_id that is not litellm-proxy-budget
    for user in user_id_information:
        if user.get("user_id") != "litellm-proxy-budget":
            _user = user
            break
    if (
        _user.get("user_role", None) is not None
        and _user.get("user_role") == LitellmUserRoles.PROXY_ADMIN.value
    ):
        return True
    return False
 def _get_user_role(user_id_information: Optional[list]):
    if user_id_information is None:
        return None
    if len(user_id_information) == 0 or user_id_information[0] is None:
        return None
    _user = user_id_information[0]
    return _user.get("user_role")
 def encrypt_value(value: str, master_key: str):
    import hashlib
    import nacl.secret
--- a/litellm/router.py
+++ b/litellm/router.py
@ -66,6 +66,7 @@ from litellm.types.llms.openai import (
 )
 from litellm.scheduler import Scheduler, FlowItem
 from typing import Iterable
 from litellm.router_utils.handle_error import send_llm_exception_alert
 class Router:
@ -108,6 +109,7 @@ class Router:
        ] = None,  # generic fallbacks, works across all deployments
        fallbacks: List = [],
        context_window_fallbacks: List = [],
        content_policy_fallbacks: List = [],
        model_group_alias: Optional[dict] = {},
        enable_pre_call_checks: bool = False,
        retry_after: int = 0,  # min time to wait before retrying a failed request
@ -311,6 +313,12 @@ class Router:
        self.context_window_fallbacks = (
            context_window_fallbacks or litellm.context_window_fallbacks
        )
        _content_policy_fallbacks = (
            content_policy_fallbacks or litellm.content_policy_fallbacks
        )
        self.validate_fallbacks(fallback_param=_content_policy_fallbacks)
        self.content_policy_fallbacks = _content_policy_fallbacks
        self.total_calls: defaultdict = defaultdict(
            int
        )  # dict to store total calls made to each model
@ -576,6 +584,14 @@ class Router:
            return response
        except Exception as e:
            asyncio.create_task(
                send_llm_exception_alert(
                    litellm_router_instance=self,
                    request_kwargs=kwargs,
                    error_traceback_str=traceback.format_exc(),
                    original_exception=e,
                )
            )
            raise e
    async def _acompletion(
@ -1097,6 +1113,14 @@ class Router:
            return response
        except Exception as e:
            asyncio.create_task(
                send_llm_exception_alert(
                    litellm_router_instance=self,
                    request_kwargs=kwargs,
                    error_traceback_str=traceback.format_exc(),
                    original_exception=e,
                )
            )
            raise e
    async def _aimage_generation(self, prompt: str, model: str, **kwargs):
@ -1221,6 +1245,14 @@ class Router:
            return response
        except Exception as e:
            asyncio.create_task(
                send_llm_exception_alert(
                    litellm_router_instance=self,
                    request_kwargs=kwargs,
                    error_traceback_str=traceback.format_exc(),
                    original_exception=e,
                )
            )
            raise e
    async def _atranscription(self, file: BinaryIO, model: str, **kwargs):
@ -1387,6 +1419,14 @@ class Router:
            return response
        except Exception as e:
            asyncio.create_task(
                send_llm_exception_alert(
                    litellm_router_instance=self,
                    request_kwargs=kwargs,
                    error_traceback_str=traceback.format_exc(),
                    original_exception=e,
                )
            )
            raise e
    async def amoderation(self, model: str, input: str, **kwargs):
@ -1402,6 +1442,14 @@ class Router:
            return response
        except Exception as e:
            asyncio.create_task(
                send_llm_exception_alert(
                    litellm_router_instance=self,
                    request_kwargs=kwargs,
                    error_traceback_str=traceback.format_exc(),
                    original_exception=e,
                )
            )
            raise e
    async def _amoderation(self, model: str, input: str, **kwargs):
@ -1546,6 +1594,14 @@ class Router:
            return response
        except Exception as e:
            asyncio.create_task(
                send_llm_exception_alert(
                    litellm_router_instance=self,
                    request_kwargs=kwargs,
                    error_traceback_str=traceback.format_exc(),
                    original_exception=e,
                )
            )
            raise e
    async def _atext_completion(self, model: str, prompt: str, **kwargs):
@ -1741,6 +1797,14 @@ class Router:
            response = await self.async_function_with_fallbacks(**kwargs)
            return response
        except Exception as e:
            asyncio.create_task(
                send_llm_exception_alert(
                    litellm_router_instance=self,
                    request_kwargs=kwargs,
                    error_traceback_str=traceback.format_exc(),
                    original_exception=e,
                )
            )
            raise e
    async def _aembedding(self, input: Union[str, List], model: str, **kwargs):
@ -1998,6 +2062,9 @@ class Router:
        context_window_fallbacks = kwargs.get(
            "context_window_fallbacks", self.context_window_fallbacks
        )
        content_policy_fallbacks = kwargs.get(
            "content_policy_fallbacks", self.content_policy_fallbacks
        )
        try:
            if mock_testing_fallbacks is not None and mock_testing_fallbacks == True:
                raise Exception(
@ -2016,7 +2083,10 @@ class Router:
                if (
                    hasattr(e, "status_code")
                    and e.status_code == 400  # type: ignore
-                    and not isinstance(e, litellm.ContextWindowExceededError)
+                    and not (
                        isinstance(e, litellm.ContextWindowExceededError)
                        or isinstance(e, litellm.ContentPolicyViolationError)
                    )
                ):  # don't retry a malformed request
                    raise e
                if (
@ -2034,6 +2104,39 @@ class Router:
                    if fallback_model_group is None:
                        raise original_exception
                    for mg in fallback_model_group:
                        """
                        Iterate through the model groups and try calling that deployment
                        """
                        try:
                            kwargs["model"] = mg
                            kwargs.setdefault("metadata", {}).update(
                                {"model_group": mg}
                            )  # update model_group used, if fallbacks are done
                            response = await self.async_function_with_retries(
                                *args, **kwargs
                            )
                            verbose_router_logger.info(
                                "Successful fallback b/w models."
                            )
                            return response
                        except Exception as e:
                            pass
                elif (
                    isinstance(e, litellm.ContentPolicyViolationError)
                    and content_policy_fallbacks is not None
                ):
                    fallback_model_group = None
                    for (
                        item
                    ) in content_policy_fallbacks:  # [{"gpt-3.5-turbo": ["gpt-4"]}]
                        if list(item.keys())[0] == model_group:
                            fallback_model_group = item[model_group]
                            break
                    if fallback_model_group is None:
                        raise original_exception
                    for mg in fallback_model_group:
                        """
                        Iterate through the model groups and try calling that deployment
@ -2114,6 +2217,9 @@ class Router:
        context_window_fallbacks = kwargs.pop(
            "context_window_fallbacks", self.context_window_fallbacks
        )
        content_policy_fallbacks = kwargs.pop(
            "content_policy_fallbacks", self.content_policy_fallbacks
        )
        num_retries = kwargs.pop("num_retries")
@ -2141,6 +2247,7 @@ class Router:
                healthy_deployments=_healthy_deployments,
                context_window_fallbacks=context_window_fallbacks,
                regular_fallbacks=fallbacks,
                content_policy_fallbacks=content_policy_fallbacks,
            )
            # decides how long to sleep before retry
@ -2206,10 +2313,12 @@ class Router:
        error: Exception,
        healthy_deployments: Optional[List] = None,
        context_window_fallbacks: Optional[List] = None,
        content_policy_fallbacks: Optional[List] = None,
        regular_fallbacks: Optional[List] = None,
    ):
        """
        1. raise an exception for ContextWindowExceededError if context_window_fallbacks is not None
        2. raise an exception for ContentPolicyViolationError if content_policy_fallbacks is not None
        2. raise an exception for RateLimitError if
            - there are no fallbacks
@ -2219,13 +2328,19 @@ class Router:
        if healthy_deployments is not None and isinstance(healthy_deployments, list):
            _num_healthy_deployments = len(healthy_deployments)
-        ### CHECK IF RATE LIMIT / CONTEXT WINDOW ERROR w/ fallbacks available / Bad Request Error
+        ### CHECK IF RATE LIMIT / CONTEXT WINDOW ERROR / CONTENT POLICY VIOLATION ERROR w/ fallbacks available / Bad Request Error
        if (
            isinstance(error, litellm.ContextWindowExceededError)
            and context_window_fallbacks is not None
        ):
            raise error
        if (
            isinstance(error, litellm.ContentPolicyViolationError)
            and content_policy_fallbacks is not None
        ):
            raise error
        # Error we should only retry if there are other deployments
        if isinstance(error, openai.RateLimitError):
            if (
@ -2256,6 +2371,9 @@ class Router:
        context_window_fallbacks = kwargs.get(
            "context_window_fallbacks", self.context_window_fallbacks
        )
        content_policy_fallbacks = kwargs.get(
            "content_policy_fallbacks", self.content_policy_fallbacks
        )
        try:
            if mock_testing_fallbacks is not None and mock_testing_fallbacks == True:
                raise Exception(
@ -2271,7 +2389,10 @@ class Router:
                if (
                    hasattr(e, "status_code")
                    and e.status_code == 400  # type: ignore
-                    and not isinstance(e, litellm.ContextWindowExceededError)
+                    and not (
                        isinstance(e, litellm.ContextWindowExceededError)
                        or isinstance(e, litellm.ContentPolicyViolationError)
                    )
                ):  # don't retry a malformed request
                    raise e
@ -2294,6 +2415,37 @@ class Router:
                    if fallback_model_group is None:
                        raise original_exception
                    for mg in fallback_model_group:
                        """
                        Iterate through the model groups and try calling that deployment
                        """
                        try:
                            ## LOGGING
                            kwargs = self.log_retry(kwargs=kwargs, e=original_exception)
                            kwargs["model"] = mg
                            kwargs.setdefault("metadata", {}).update(
                                {"model_group": mg}
                            )  # update model_group used, if fallbacks are done
                            response = self.function_with_fallbacks(*args, **kwargs)
                            return response
                        except Exception as e:
                            pass
                elif (
                    isinstance(e, litellm.ContentPolicyViolationError)
                    and content_policy_fallbacks is not None
                ):
                    fallback_model_group = None
                    for (
                        item
                    ) in content_policy_fallbacks:  # [{"gpt-3.5-turbo": ["gpt-4"]}]
                        if list(item.keys())[0] == model_group:
                            fallback_model_group = item[model_group]
                            break
                    if fallback_model_group is None:
                        raise original_exception
                    for mg in fallback_model_group:
                        """
                        Iterate through the model groups and try calling that deployment
@ -2400,6 +2552,9 @@ class Router:
        context_window_fallbacks = kwargs.pop(
            "context_window_fallbacks", self.context_window_fallbacks
        )
        content_policy_fallbacks = kwargs.pop(
            "content_policy_fallbacks", self.content_policy_fallbacks
        )
        try:
            # if the function call is successful, no exception will be raised and we'll break out of the loop
@ -2419,6 +2574,7 @@ class Router:
                healthy_deployments=_healthy_deployments,
                context_window_fallbacks=context_window_fallbacks,
                regular_fallbacks=fallbacks,
                content_policy_fallbacks=content_policy_fallbacks,
            )
            # decides how long to sleep before retry
@ -4570,6 +4726,8 @@ class Router:
            default_webhook_url=router_alerting_config.webhook_url,
        )
        self.slack_alerting_logger = _slack_alerting_logger
        litellm.callbacks.append(_slack_alerting_logger)
        litellm.success_callback.append(
            _slack_alerting_logger.response_taking_too_long_callback
--- a/litellm/router_utils/handle_error.py
+++ b/litellm/router_utils/handle_error.py
@ -0,0 +1,53 @@
 import asyncio
 import traceback
 from typing import TYPE_CHECKING, Any
 if TYPE_CHECKING:
    from litellm.router import Router as _Router
    LitellmRouter = _Router
 else:
    LitellmRouter = Any
 async def send_llm_exception_alert(
    litellm_router_instance: LitellmRouter,
    request_kwargs: dict,
    error_traceback_str: str,
    original_exception,
 ):
    """
    Sends a Slack / MS Teams alert for the LLM API call failure.
    Parameters:
        litellm_router_instance (_Router): The LitellmRouter instance.
        original_exception (Any): The original exception that occurred.
    Returns:
        None
    """
    if litellm_router_instance is None:
        return
    if not hasattr(litellm_router_instance, "slack_alerting_logger"):
        return
    if litellm_router_instance.slack_alerting_logger is None:
        return
    if "proxy_server_request" in request_kwargs:
        # Do not send any alert if it's a request from litellm proxy server request
        # the proxy is already instrumented to send LLM API call failures
        return
    litellm_debug_info = getattr(original_exception, "litellm_debug_info", None)
    exception_str = str(original_exception)
    if litellm_debug_info is not None:
        exception_str += litellm_debug_info
    exception_str += f"\n\n{error_traceback_str[:2000]}"
    await litellm_router_instance.slack_alerting_logger.send_alert(
        message=f"LLM API call failed: `{exception_str}`",
        level="High",
        alert_type="llm_exceptions",
    )
--- a/litellm/tests/test_alerting.py
+++ b/litellm/tests/test_alerting.py
@ -25,6 +25,9 @@ import pytest
 from litellm.router import AlertingConfig, Router
 from litellm.proxy._types import CallInfo
 from openai import APIError
 from litellm.router import AlertingConfig
 import litellm
 import os
@pytest.mark.parametrize(
@ -159,6 +162,29 @@ async def test_response_taking_too_long_callback(slack_alerting):
        mock_send_alert.assert_awaited_once()
@pytest.mark.asyncio
 async def test_alerting_metadata(slack_alerting):
    """
    Test alerting_metadata is propogated correctly for response taking too long
    """
    start_time = datetime.now()
    end_time = start_time + timedelta(seconds=301)
    kwargs = {
        "model": "test_model",
        "messages": "test_messages",
        "litellm_params": {"metadata": {"alerting_metadata": {"hello": "world"}}},
    }
    with patch.object(slack_alerting, "send_alert", new=AsyncMock()) as mock_send_alert:
        ## RESPONSE TAKING TOO LONG
        await slack_alerting.response_taking_too_long_callback(
            kwargs, None, start_time, end_time
        )
        mock_send_alert.assert_awaited_once()
        assert "hello" in mock_send_alert.call_args[1]["alerting_metadata"]
 # Test for budget crossed
@pytest.mark.asyncio
 async def test_budget_alerts_crossed(slack_alerting):
@ -204,7 +230,9 @@ async def test_send_alert(slack_alerting):
        slack_alerting.async_http_handler, "post", new=AsyncMock()
    ) as mock_post:
        mock_post.return_value.status_code = 200
-        await slack_alerting.send_alert("Test message", "Low", "budget_alerts")
+        await slack_alerting.send_alert(
            "Test message", "Low", "budget_alerts", alerting_metadata={}
        )
        mock_post.assert_awaited_once()
@ -263,7 +291,7 @@ async def test_daily_reports_completion(slack_alerting):
        await asyncio.sleep(3)
        response_val = await slack_alerting.send_daily_reports(router=router)
-        assert response_val == True
+        assert response_val is True
        mock_send_alert.assert_awaited_once()
@ -288,7 +316,7 @@ async def test_daily_reports_completion(slack_alerting):
        await asyncio.sleep(3)
        response_val = await slack_alerting.send_daily_reports(router=router)
-        assert response_val == True
+        assert response_val is True
        mock_send_alert.assert_awaited()
@ -743,3 +771,37 @@ async def test_region_outage_alerting_called(
            mock_send_alert.assert_called_once()
        else:
            mock_send_alert.assert_not_called()
@pytest.mark.asyncio
@pytest.mark.skip(reason="test only needs to run locally ")
 async def test_alerting():
    router = litellm.Router(
        model_list=[
            {
                "model_name": "gpt-3.5-turbo",
                "litellm_params": {
                    "model": "gpt-3.5-turbo",
                    "api_key": "bad_key",
                },
            }
        ],
        debug_level="DEBUG",
        set_verbose=True,
        alerting_config=AlertingConfig(
            alerting_threshold=10,  # threshold for slow / hanging llm responses (in seconds). Defaults to 300 seconds
            webhook_url=os.getenv(
                "SLACK_WEBHOOK_URL"
            ),  # webhook you want to send alerts to
        ),
    )
    try:
        await router.acompletion(
            model="gpt-3.5-turbo",
            messages=[{"role": "user", "content": "Hey, how's it going?"}],
        )
    except:
        pass
    finally:
        await asyncio.sleep(3)
--- a/litellm/tests/test_blocked_user_list.py
+++ b/litellm/tests/test_blocked_user_list.py
@ -29,19 +29,22 @@ import pytest, logging, asyncio
 import litellm, asyncio
 from litellm.proxy.proxy_server import (
    new_user,
    generate_key_fn,
    user_api_key_auth,
    user_update,
    user_info,
    block_user,
 )
 from litellm.proxy.management_endpoints.key_management_endpoints import (
    delete_key_fn,
    info_key_fn,
    update_key_fn,
    generate_key_fn,
    generate_key_helper_fn,
 )
 from litellm.proxy.spend_reporting_endpoints.spend_management_endpoints import (
    spend_user_fn,
    spend_key_fn,
    view_spend_logs,
    user_info,
    block_user,
 )
 from litellm.proxy.utils import PrismaClient, ProxyLogging, hash_token
 from litellm._logging import verbose_proxy_logger
--- a/Show more
+++ b/Show more
		`@ -0,0 +1 @@`
							"use strict";(self.webpackChunk_N_E=self.webpackChunk_N_E\|\|[]).push([[665],{30953:function(e,t,r){r.d(t,{GH$:function(){return n}});var l=r(64090);let n=e=>{let{color:t="currentColor",size:r=24,className:n,...s}=e;return l.createElement("svg",{viewBox:"0 0 24 24",xmlns:"http://www.w3.org/2000/svg",width:r,height:r,fill:t,...s,className:"remixicon "+(n\|\|"")},l.createElement("path",{d:"M12 22C6.47715 22 2 17.5228 2 12C2 6.47715 6.47715 2 12 2C17.5228 2 22 6.47715 22 12C22 17.5228 17.5228 22 12 22ZM12 20C16.4183 20 20 16.4183 20 12C20 7.58172 16.4183 4 12 4C7.58172 4 4 7.58172 4 12C4 16.4183 7.58172 20 12 20ZM11.0026 16L6.75999 11.7574L8.17421 10.3431L11.0026 13.1716L16.6595 7.51472L18.0737 8.92893L11.0026 16Z"}))}}}]);
		`@ -1 +0,0 @@`
			"use strict";(self.webpackChunk_N_E=self.webpackChunk_N_E\|\|[]).push([[665],{30953:function(e,t,r){r.d(t,{GH$:function(){return n}});var l=r(2265);let n=e=>{let{color:t="currentColor",size:r=24,className:n,...s}=e;return l.createElement("svg",{viewBox:"0 0 24 24",xmlns:"http://www.w3.org/2000/svg",width:r,height:r,fill:t,...s,className:"remixicon "+(n\|\|"")},l.createElement("path",{d:"M12 22C6.47715 22 2 17.5228 2 12C2 6.47715 6.47715 2 12 2C17.5228 2 22 6.47715 22 12C22 17.5228 17.5228 22 12 22ZM12 20C16.4183 20 20 16.4183 20 12C20 7.58172 16.4183 4 12 4C7.58172 4 4 7.58172 4 12C4 16.4183 7.58172 20 12 20ZM11.0026 16L6.75999 11.7574L8.17421 10.3431L11.0026 13.1716L16.6595 7.51472L18.0737 8.92893L11.0026 16Z"}))}}}]);
		`@ -1 +1 @@`
			(self.webpackChunk_N_E=self.webpackChunk_N_E\|\|[]).push([[165],{83155:function(e,t,n){(window.__NEXT_P=window.__NEXT_P\|\|[]).push(["/_not-found",function(){return n(84032)}])},84032:function(e,t,n){"use strict";Object.defineProperty(t,"__esModule",{value:!0}),Object.defineProperty(t,"default",{enumerable:!0,get:function(){return i}}),n(86921);let o=n(57437);n(2265);let r={error:{fontFamily:'system-ui,"Segoe UI",Roboto,Helvetica,Arial,sans-serif,"Apple Color Emoji","Segoe UI Emoji"',height:"100vh",textAlign:"center",display:"flex",flexDirection:"column",alignItems:"center",justifyContent:"center"},desc:{display:"inline-block"},h1:{display:"inline-block",margin:"0 20px 0 0",padding:"0 23px 0 0",fontSize:24,fontWeight:500,verticalAlign:"top",lineHeight:"49px"},h2:{fontSize:14,fontWeight:400,lineHeight:"49px",margin:0}};function i(){return(0,o.jsxs)(o.Fragment,{children:[(0,o.jsx)("title",{children:"404: This page could not be found."}),(0,o.jsx)("div",{style:r.error,children:(0,o.jsxs)("div",{children:[(0,o.jsx)("style",{dangerouslySetInnerHTML:{__html:"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}),(0,o.jsx)("h1",{className:"next-error-h1",style:r.h1,children:"404"}),(0,o.jsx)("div",{style:r.desc,children:(0,o.jsx)("h2",{style:r.h2,children:"This page could not be found."})})]})})]})}("function"==typeof t.default\|\|"object"==typeof t.default&&null!==t.default)&&void 0===t.default.__esModule&&(Object.defineProperty(t.default,"__esModule",{value:!0}),Object.assign(t.default,t),e.exports=t.default)}},function(e){e.O(0,[971,69,744],function(){return e(e.s=83155)}),_N_E=e.O()}]);				(self.webpackChunk_N_E=self.webpackChunk_N_E\|\|[]).push([[165],{83155:function(e,t,n){(window.__NEXT_P=window.__NEXT_P\|\|[]).push(["/_not-found",function(){return n(84032)}])},84032:function(e,t,n){"use strict";Object.defineProperty(t,"__esModule",{value:!0}),Object.defineProperty(t,"default",{enumerable:!0,get:function(){return i}}),n(86921);let o=n(3827);n(64090);let r={error:{fontFamily:'system-ui,"Segoe UI",Roboto,Helvetica,Arial,sans-serif,"Apple Color Emoji","Segoe UI Emoji"',height:"100vh",textAlign:"center",display:"flex",flexDirection:"column",alignItems:"center",justifyContent:"center"},desc:{display:"inline-block"},h1:{display:"inline-block",margin:"0 20px 0 0",padding:"0 23px 0 0",fontSize:24,fontWeight:500,verticalAlign:"top",lineHeight:"49px"},h2:{fontSize:14,fontWeight:400,lineHeight:"49px",margin:0}};function i(){return(0,o.jsxs)(o.Fragment,{children:[(0,o.jsx)("title",{children:"404: This page could not be found."}),(0,o.jsx)("div",{style:r.error,children:(0,o.jsxs)("div",{children:[(0,o.jsx)("style",{dangerouslySetInnerHTML:{__html:"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}),(0,o.jsx)("h1",{className:"next-error-h1",style:r.h1,children:"404"}),(0,o.jsx)("div",{style:r.desc,children:(0,o.jsx)("h2",{style:r.h2,children:"This page could not be found."})})]})})]})}("function"==typeof t.default\|\|"object"==typeof t.default&&null!==t.default)&&void 0===t.default.__esModule&&(Object.defineProperty(t.default,"__esModule",{value:!0}),Object.assign(t.default,t),e.exports=t.default)}},function(e){e.O(0,[971,69,744],function(){return e(e.s=83155)}),_N_E=e.O()}]);
		`@ -1 +1 @@`
			`(self.webpackChunk_N_E=self.webpackChunk_N_E\|\|[]).push([[185],{11837:function(n,e,t){Promise.resolve().then(t.t.bind(t,99646,23)),Promise.resolve().then(t.t.bind(t,63385,23))},63385:function(){},99646:function(n){n.exports={style:{fontFamily:"'__Inter_12bbc4', '__Inter_Fallback_12bbc4'",fontStyle:"normal"},className:"__className_12bbc4"}}},function(n){n.O(0,[971,69,744],function(){return n(n.s=11837)}),_N_E=n.O()}]);`				`(self.webpackChunk_N_E=self.webpackChunk_N_E\|\|[]).push([[185],{87421:function(n,e,t){Promise.resolve().then(t.t.bind(t,99646,23)),Promise.resolve().then(t.t.bind(t,63385,23))},63385:function(){},99646:function(n){n.exports={style:{fontFamily:"'__Inter_12bbc4', '__Inter_Fallback_12bbc4'",fontStyle:"normal"},className:"__className_12bbc4"}}},function(n){n.O(0,[971,69,744],function(){return n(n.s=87421)}),_N_E=n.O()}]);`
		`@ -1 +1 @@`
			!function(){"use strict";var e,t,n,r,o,u,i,c,f,a={},l={};function d(e){var t=l[e];if(void 0!==t)return t.exports;var n=l[e]={id:e,loaded:!1,exports:{}},r=!0;try{a[e](n,n.exports,d),r=!1}finally{r&&delete l[e]}return n.loaded=!0,n.exports}d.m=a,e=[],d.O=function(t,n,r,o){if(n){o=o\|\|0;for(var u=e.length;u>0&&e[u-1][2]>o;u--)e[u]=e[u-1];e[u]=[n,r,o];return}for(var i=1/0,u=0;u<e.length;u++){for(var n=e[u][0],r=e[u][1],o=e[u][2],c=!0,f=0;f<n.length;f++)i>=o&&Object.keys(d.O).every(function(e){return d.O[e](n[f])})?n.splice(f--,1):(c=!1,o<i&&(i=o));if(c){e.splice(u--,1);var a=r();void 0!==a&&(t=a)}}return t},d.n=function(e){var t=e&&e.__esModule?function(){return e.default}:function(){return e};return d.d(t,{a:t}),t},n=Object.getPrototypeOf?function(e){return Object.getPrototypeOf(e)}:function(e){return e.__proto__},d.t=function(e,r){if(1&r&&(e=this(e)),8&r\|\|"object"==typeof e&&e&&(4&r&&e.__esModule\|\|16&r&&"function"==typeof e.then))return e;var o=Object.create(null);d.r(o);var u={};t=t\|\|[null,n({}),n([]),n(n)];for(var i=2&r&&e;"object"==typeof i&&!~t.indexOf(i);i=n(i))Object.getOwnPropertyNames(i).forEach(function(t){u[t]=function(){return e[t]}});return u.default=function(){return e},d.d(o,u),o},d.d=function(e,t){for(var n in t)d.o(t,n)&&!d.o(e,n)&&Object.defineProperty(e,n,{enumerable:!0,get:t[n]})},d.f={},d.e=function(e){return Promise.all(Object.keys(d.f).reduce(function(t,n){return d.f[n](e,t),t},[]))},d.u=function(e){},d.miniCssF=function(e){return"static/css/63f65dbb14efd996.css"},d.g=function(){if("object"==typeof globalThis)return globalThis;try{return this\|\|Function("return this")()}catch(e){if("object"==typeof window)return window}}(),d.o=function(e,t){return Object.prototype.hasOwnProperty.call(e,t)},r={},o="_N_E:",d.l=function(e,t,n,u){if(r[e]){r[e].push(t);return}if(void 0!==n)for(var i,c,f=document.getElementsByTagName("script"),a=0;a<f.length;a++){var l=f[a];if(l.getAttribute("src")==e\|\|l.getAttribute("data-webpack")==o+n){i=l;break}}i\|\|(c=!0,(i=document.createElement("script")).charset="utf-8",i.timeout=120,d.nc&&i.setAttribute("nonce",d.nc),i.setAttribute("data-webpack",o+n),i.src=d.tu(e)),r[e]=[t];var s=function(t,n){i.onerror=i.onload=null,clearTimeout(p);var o=r[e];if(delete r[e],i.parentNode&&i.parentNode.removeChild(i),o&&o.forEach(function(e){return e(n)}),t)return t(n)},p=setTimeout(s.bind(null,void 0,{type:"timeout",target:i}),12e4);i.onerror=s.bind(null,i.onerror),i.onload=s.bind(null,i.onload),c&&document.head.appendChild(i)},d.r=function(e){"undefined"!=typeof Symbol&&Symbol.toStringTag&&Object.defineProperty(e,Symbol.toStringTag,{value:"Module"}),Object.defineProperty(e,"__esModule",{value:!0})},d.nmd=function(e){return e.paths=[],e.children\|\|(e.children=[]),e},d.tt=function(){return void 0===u&&(u={createScriptURL:function(e){return e}},"undefined"!=typeof trustedTypes&&trustedTypes.createPolicy&&(u=trustedTypes.createPolicy("nextjs#bundler",u))),u},d.tu=function(e){return d.tt().createScriptURL(e)},d.p="/ui/_next/",i={272:0},d.f.j=function(e,t){var n=d.o(i,e)?i[e]:void 0;if(0!==n){if(n)t.push(n[2]);else if(272!=e){var r=new Promise(function(t,r){n=i[e]=[t,r]});t.push(n[2]=r);var o=d.p+d.u(e),u=Error();d.l(o,function(t){if(d.o(i,e)&&(0!==(n=i[e])&&(i[e]=void 0),n)){var r=t&&("load"===t.type?"missing":t.type),o=t&&t.target&&t.target.src;u.message="Loading chunk "+e+" failed.\n("+r+": "+o+")",u.name="ChunkLoadError",u.type=r,u.request=o,n[1](u)}},"chunk-"+e,e)}else i[e]=0}},d.O.j=function(e){return 0===i[e]},c=function(e,t){var n,r,o=t[0],u=t[1],c=t[2],f=0;if(o.some(function(e){return 0!==i[e]})){for(n in u)d.o(u,n)&&(d.m[n]=u[n]);if(c)var a=c(d)}for(e&&e(t);f<o.length;f++)r=o[f],d.o(i,r)&&i[r]&&i[r][0](),i[r]=0;return d.O(a)},(f=self.webpackChunk_N_E=self.webpackChunk_N_E\|\|[]).forEach(c.bind(null,0)),f.push=c.bind(null,f.push.bind(f))}();				!function(){"use strict";var e,t,n,r,o,u,i,c,f,a={},l={};function d(e){var t=l[e];if(void 0!==t)return t.exports;var n=l[e]={id:e,loaded:!1,exports:{}},r=!0;try{a[e](n,n.exports,d),r=!1}finally{r&&delete l[e]}return n.loaded=!0,n.exports}d.m=a,e=[],d.O=function(t,n,r,o){if(n){o=o\|\|0;for(var u=e.length;u>0&&e[u-1][2]>o;u--)e[u]=e[u-1];e[u]=[n,r,o];return}for(var i=1/0,u=0;u<e.length;u++){for(var n=e[u][0],r=e[u][1],o=e[u][2],c=!0,f=0;f<n.length;f++)i>=o&&Object.keys(d.O).every(function(e){return d.O[e](n[f])})?n.splice(f--,1):(c=!1,o<i&&(i=o));if(c){e.splice(u--,1);var a=r();void 0!==a&&(t=a)}}return t},d.n=function(e){var t=e&&e.__esModule?function(){return e.default}:function(){return e};return d.d(t,{a:t}),t},n=Object.getPrototypeOf?function(e){return Object.getPrototypeOf(e)}:function(e){return e.__proto__},d.t=function(e,r){if(1&r&&(e=this(e)),8&r\|\|"object"==typeof e&&e&&(4&r&&e.__esModule\|\|16&r&&"function"==typeof e.then))return e;var o=Object.create(null);d.r(o);var u={};t=t\|\|[null,n({}),n([]),n(n)];for(var i=2&r&&e;"object"==typeof i&&!~t.indexOf(i);i=n(i))Object.getOwnPropertyNames(i).forEach(function(t){u[t]=function(){return e[t]}});return u.default=function(){return e},d.d(o,u),o},d.d=function(e,t){for(var n in t)d.o(t,n)&&!d.o(e,n)&&Object.defineProperty(e,n,{enumerable:!0,get:t[n]})},d.f={},d.e=function(e){return Promise.all(Object.keys(d.f).reduce(function(t,n){return d.f[n](e,t),t},[]))},d.u=function(e){},d.miniCssF=function(e){return"static/css/b65d5698d1a1958d.css"},d.g=function(){if("object"==typeof globalThis)return globalThis;try{return this\|\|Function("return this")()}catch(e){if("object"==typeof window)return window}}(),d.o=function(e,t){return Object.prototype.hasOwnProperty.call(e,t)},r={},o="_N_E:",d.l=function(e,t,n,u){if(r[e]){r[e].push(t);return}if(void 0!==n)for(var i,c,f=document.getElementsByTagName("script"),a=0;a<f.length;a++){var l=f[a];if(l.getAttribute("src")==e\|\|l.getAttribute("data-webpack")==o+n){i=l;break}}i\|\|(c=!0,(i=document.createElement("script")).charset="utf-8",i.timeout=120,d.nc&&i.setAttribute("nonce",d.nc),i.setAttribute("data-webpack",o+n),i.src=d.tu(e)),r[e]=[t];var s=function(t,n){i.onerror=i.onload=null,clearTimeout(p);var o=r[e];if(delete r[e],i.parentNode&&i.parentNode.removeChild(i),o&&o.forEach(function(e){return e(n)}),t)return t(n)},p=setTimeout(s.bind(null,void 0,{type:"timeout",target:i}),12e4);i.onerror=s.bind(null,i.onerror),i.onload=s.bind(null,i.onload),c&&document.head.appendChild(i)},d.r=function(e){"undefined"!=typeof Symbol&&Symbol.toStringTag&&Object.defineProperty(e,Symbol.toStringTag,{value:"Module"}),Object.defineProperty(e,"__esModule",{value:!0})},d.nmd=function(e){return e.paths=[],e.children\|\|(e.children=[]),e},d.tt=function(){return void 0===u&&(u={createScriptURL:function(e){return e}},"undefined"!=typeof trustedTypes&&trustedTypes.createPolicy&&(u=trustedTypes.createPolicy("nextjs#bundler",u))),u},d.tu=function(e){return d.tt().createScriptURL(e)},d.p="/ui/_next/",i={272:0},d.f.j=function(e,t){var n=d.o(i,e)?i[e]:void 0;if(0!==n){if(n)t.push(n[2]);else if(272!=e){var r=new Promise(function(t,r){n=i[e]=[t,r]});t.push(n[2]=r);var o=d.p+d.u(e),u=Error();d.l(o,function(t){if(d.o(i,e)&&(0!==(n=i[e])&&(i[e]=void 0),n)){var r=t&&("load"===t.type?"missing":t.type),o=t&&t.target&&t.target.src;u.message="Loading chunk "+e+" failed.\n("+r+": "+o+")",u.name="ChunkLoadError",u.type=r,u.request=o,n[1](u)}},"chunk-"+e,e)}else i[e]=0}},d.O.j=function(e){return 0===i[e]},c=function(e,t){var n,r,o=t[0],u=t[1],c=t[2],f=0;if(o.some(function(e){return 0!==i[e]})){for(n in u)d.o(u,n)&&(d.m[n]=u[n]);if(c)var a=c(d)}for(e&&e(t);f<o.length;f++)r=o[f],d.o(i,r)&&i[r]&&i[r][0](),i[r]=0;return d.O(a)},(f=self.webpackChunk_N_E=self.webpackChunk_N_E\|\|[]).forEach(c.bind(null,0)),f.push=c.bind(null,f.push.bind(f))}();
		`@ -1 +1 @@`
			<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-887c75b16b85d4b4.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-f593049e31b05aeb.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-8316d07d1f41e39f.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-096338c8e1915716.js" async="" crossorigin=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-887c75b16b85d4b4.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f\|\|[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/63f65dbb14efd996.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[68101,[\"936\",\"static/chunks/2f6dbc85-cac2949a76539886.js\",\"505\",\"static/chunks/505-5ff3c318fddfa35c.js\",\"131\",\"static/chunks/131-cb6bfe24e23e121b.js\",\"684\",\"static/chunks/684-16b194c83a169f6d.js\",\"759\",\"static/chunks/759-c0083d8a782d300e.js\",\"777\",\"static/chunks/777-71fb78fdb4897cc3.js\",\"931\",\"static/chunks/app/page-8028473f1a04553d.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/63f65dbb14efd996.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"sTvd1VbHSi_TBr1KiIpul\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_12bbc4\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>				<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-6f7793f21bbb2fbe.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-f960ab1e6d32b002.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-04708d7d4a17c1ee.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-6f7793f21bbb2fbe.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f\|\|[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/b65d5698d1a1958d.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[68101,[\"936\",\"static/chunks/2f6dbc85-052c4579f80d66ae.js\",\"294\",\"static/chunks/294-0e35509d5ca95267.js\",\"131\",\"static/chunks/131-6a03368053f9d26d.js\",\"684\",\"static/chunks/684-bb2d2f93d92acb0b.js\",\"759\",\"static/chunks/759-83a8bdddfe32b5d9.js\",\"777\",\"static/chunks/777-71fb78fdb4897cc3.js\",\"931\",\"static/chunks/app/page-626098dc8320c801.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/b65d5698d1a1958d.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"S9_6IC27HNWjJtr-LNaAO\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_12bbc4\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>