diff --git a/.circleci/config.yml b/.circleci/config.yml
index 2727cd221b..14a8cbd5a2 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -42,7 +42,7 @@ jobs:
             pip install lunary==0.2.5
             pip install "langfuse==2.27.1"
             pip install numpydoc
-            pip install traceloop-sdk==0.0.69
+            pip install traceloop-sdk==0.18.2
             pip install openai
             pip install prisma            
             pip install "httpx==0.24.1"
diff --git a/README.md b/README.md
index 684d5de730..5e94b0fd94 100644
--- a/README.md
+++ b/README.md
@@ -34,7 +34,7 @@ LiteLLM manages:
 [**Jump to OpenAI Proxy Docs**](https://github.com/BerriAI/litellm?tab=readme-ov-file#openai-proxy---docs) <br>
 [**Jump to Supported LLM Providers**](https://github.com/BerriAI/litellm?tab=readme-ov-file#supported-providers-docs)
 
-🚨 **Stable Release:** Use docker images with: `main-stable` tag. These run through 12 hr load tests (1k req./min). 
+🚨 **Stable Release:** Use docker images with the `-stable` tag. These have undergone 12 hour load tests, before being published. 
 
 Support for more providers. Missing a provider or LLM Platform, raise a [feature request](https://github.com/BerriAI/litellm/issues/new?assignees=&labels=enhancement&projects=&template=feature_request.yml&title=%5BFeature%5D%3A+).
 
diff --git a/docs/my-website/docs/image_generation.md b/docs/my-website/docs/image_generation.md
index 002d95c030..7bb4d2c991 100644
--- a/docs/my-website/docs/image_generation.md
+++ b/docs/my-website/docs/image_generation.md
@@ -150,4 +150,20 @@ response = image_generation(
             model="bedrock/stability.stable-diffusion-xl-v0",
         )
 print(f"response: {response}")
+```
+
+## VertexAI - Image Generation Models
+
+### Usage 
+
+Use this for image generation models on VertexAI
+
+```python
+response = litellm.image_generation(
+    prompt="An olympic size swimming pool",
+    model="vertex_ai/imagegeneration@006",
+    vertex_ai_project="adroit-crow-413218",
+    vertex_ai_location="us-central1",
+)
+print(f"response: {response}")
 ```
\ No newline at end of file
diff --git a/docs/my-website/docs/observability/lago.md b/docs/my-website/docs/observability/lago.md
new file mode 100644
index 0000000000..337a2b553e
--- /dev/null
+++ b/docs/my-website/docs/observability/lago.md
@@ -0,0 +1,173 @@
+import Image from '@theme/IdealImage';
+import Tabs from '@theme/Tabs';
+import TabItem from '@theme/TabItem';
+
+# Lago - Usage Based Billing
+
+[Lago](https://www.getlago.com/) offers a self-hosted and cloud, metering and usage-based billing solution.
+
+<Image img={require('../../img/lago.jpeg')} />
+
+## Quick Start
+Use just 1 lines of code, to instantly log your responses **across all providers** with Lago
+
+Get your Lago [API Key](https://docs.getlago.com/guide/self-hosted/docker#find-your-api-key)
+
+```python
+litellm.callbacks = ["lago"] # logs cost + usage of successful calls to lago
+```
+
+
+<Tabs>
+<TabItem value="sdk" label="SDK">
+
+```python
+# pip install lago 
+import litellm
+import os
+
+os.environ["LAGO_API_BASE"] = "" # http://0.0.0.0:3000
+os.environ["LAGO_API_KEY"] = ""
+os.environ["LAGO_API_EVENT_CODE"] = "" # The billable metric's code - https://docs.getlago.com/guide/events/ingesting-usage#define-a-billable-metric
+
+# LLM API Keys
+os.environ['OPENAI_API_KEY']=""
+
+# set lago as a callback, litellm will send the data to lago
+litellm.success_callback = ["lago"] 
+ 
+# openai call
+response = litellm.completion(
+  model="gpt-3.5-turbo",
+  messages=[
+    {"role": "user", "content": "Hi 👋 - i'm openai"}
+  ],
+  user="your_customer_id" # 👈 SET YOUR CUSTOMER ID HERE
+)
+```
+
+</TabItem>
+<TabItem value="proxy" label="PROXY">
+
+1. Add to Config.yaml
+```yaml
+model_list:
+- litellm_params:
+    api_base: https://openai-function-calling-workers.tasslexyz.workers.dev/
+    api_key: my-fake-key
+    model: openai/my-fake-model
+  model_name: fake-openai-endpoint
+
+litellm_settings:
+  callbacks: ["lago"] # 👈 KEY CHANGE
+```
+
+2. Start Proxy
+
+```
+litellm --config /path/to/config.yaml
+```
+
+3. Test it! 
+
+<Tabs>
+<TabItem value="curl" label="Curl">
+
+```bash
+curl --location 'http://0.0.0.0:4000/chat/completions' \
+--header 'Content-Type: application/json' \
+--data ' {
+      "model": "fake-openai-endpoint",
+      "messages": [
+        {
+          "role": "user",
+          "content": "what llm are you"
+        }
+      ],
+      "user": "your-customer-id" # 👈 SET YOUR CUSTOMER ID
+    }
+'
+```
+</TabItem>
+<TabItem value="openai_python" label="OpenAI Python SDK">
+
+```python
+import openai
+client = openai.OpenAI(
+    api_key="anything",
+    base_url="http://0.0.0.0:4000"
+)
+
+# request sent to model set on litellm proxy, `litellm --model`
+response = client.chat.completions.create(model="gpt-3.5-turbo", messages = [
+    {
+        "role": "user",
+        "content": "this is a test request, write a short poem"
+    }
+], user="my_customer_id") # 👈 whatever your customer id is
+
+print(response)
+```
+</TabItem>
+<TabItem value="langchain" label="Langchain">
+
+```python
+from langchain.chat_models import ChatOpenAI
+from langchain.prompts.chat import (
+    ChatPromptTemplate,
+    HumanMessagePromptTemplate,
+    SystemMessagePromptTemplate,
+)
+from langchain.schema import HumanMessage, SystemMessage
+import os 
+
+os.environ["OPENAI_API_KEY"] = "anything"
+
+chat = ChatOpenAI(
+    openai_api_base="http://0.0.0.0:4000",
+    model = "gpt-3.5-turbo",
+    temperature=0.1,
+    extra_body={
+        "user": "my_customer_id"  # 👈 whatever your customer id is
+    }
+)
+
+messages = [
+    SystemMessage(
+        content="You are a helpful assistant that im using to make a test request to."
+    ),
+    HumanMessage(
+        content="test from litellm. tell me why it's amazing in 1 sentence"
+    ),
+]
+response = chat(messages)
+
+print(response)
+```
+</TabItem>
+</Tabs>
+</TabItem>
+</Tabs>
+
+
+<Image img={require('../../img/lago_2.png')} />
+
+## Advanced - Lagos Logging object 
+
+This is what LiteLLM will log to Lagos
+
+```
+{
+    "event": {
+      "transaction_id": "<generated_unique_id>",
+      "external_customer_id": <litellm_end_user_id>, # passed via `user` param in /chat/completion call - https://platform.openai.com/docs/api-reference/chat/create
+      "code": os.getenv("LAGO_API_EVENT_CODE"), 
+      "properties": {
+          "input_tokens": <number>,
+          "output_tokens": <number>,
+          "model": <string>,
+          "response_cost": <number>, # 👈 LITELLM CALCULATED RESPONSE COST - https://github.com/BerriAI/litellm/blob/d43f75150a65f91f60dc2c0c9462ce3ffc713c1f/litellm/utils.py#L1473
+      }
+    }
+}
+```
\ No newline at end of file
diff --git a/docs/my-website/docs/observability/langsmith_integration.md b/docs/my-website/docs/observability/langsmith_integration.md
index 78c7e31190..b115866d54 100644
--- a/docs/my-website/docs/observability/langsmith_integration.md
+++ b/docs/my-website/docs/observability/langsmith_integration.md
@@ -71,6 +71,23 @@ response = litellm.completion(
 )
 print(response)
 ```
+
+### Make LiteLLM Proxy use Custom `LANGSMITH_BASE_URL`
+
+If you're using a custom LangSmith instance, you can set the
+`LANGSMITH_BASE_URL` environment variable to point to your instance.
+For example, you can make LiteLLM Proxy log to a local LangSmith instance with
+this config:
+
+```yaml
+litellm_settings:
+  success_callback: ["langsmith"]
+
+environment_variables:
+  LANGSMITH_BASE_URL: "http://localhost:1984"
+  LANGSMITH_PROJECT: "litellm-proxy"
+```
+
 ## Support & Talk to Founders
 
 - [Schedule Demo 👋](https://calendly.com/d/4mp-gd3-k5k/berriai-1-1-onboarding-litellm-hosted-version)
diff --git a/docs/my-website/docs/observability/openmeter.md b/docs/my-website/docs/observability/openmeter.md
index 64d9c39d21..2f53568757 100644
--- a/docs/my-website/docs/observability/openmeter.md
+++ b/docs/my-website/docs/observability/openmeter.md
@@ -20,7 +20,7 @@ Use just 2 lines of code, to instantly log your responses **across all providers
 Get your OpenMeter API Key from https://openmeter.cloud/meters
 
 ```python
-litellm.success_callback = ["openmeter"] # logs cost + usage of successful calls to openmeter
+litellm.callbacks = ["openmeter"] # logs cost + usage of successful calls to openmeter
 ```
 
 
@@ -28,7 +28,7 @@ litellm.success_callback = ["openmeter"] # logs cost + usage of successful calls
 <TabItem value="sdk" label="SDK">
 
 ```python
-# pip install langfuse 
+# pip install openmeter 
 import litellm
 import os
 
@@ -39,8 +39,8 @@ os.environ["OPENMETER_API_KEY"] = ""
 # LLM API Keys
 os.environ['OPENAI_API_KEY']=""
 
-# set langfuse as a callback, litellm will send the data to langfuse
-litellm.success_callback = ["openmeter"] 
+# set openmeter as a callback, litellm will send the data to openmeter
+litellm.callbacks = ["openmeter"] 
  
 # openai call
 response = litellm.completion(
@@ -64,7 +64,7 @@ model_list:
   model_name: fake-openai-endpoint
 
 litellm_settings:
-  success_callback: ["openmeter"] # 👈 KEY CHANGE
+  callbacks: ["openmeter"] # 👈 KEY CHANGE
 ```
 
 2. Start Proxy
diff --git a/docs/my-website/docs/providers/anthropic.md b/docs/my-website/docs/providers/anthropic.md
index 5bb47d780d..38be0c433a 100644
--- a/docs/my-website/docs/providers/anthropic.md
+++ b/docs/my-website/docs/providers/anthropic.md
@@ -223,6 +223,32 @@ assert isinstance(
 
 ```
 
+### Setting `anthropic-beta` Header in Requests
+
+Pass the the `extra_headers` param to litellm, All headers will be forwarded to Anthropic API
+
+```python
+response = completion(
+    model="anthropic/claude-3-opus-20240229",
+    messages=messages,
+    tools=tools,
+)
+```
+
+### Forcing Anthropic Tool Use
+
+If you want Claude to use a specific tool to answer the user’s question
+
+You can do this by specifying the tool in the `tool_choice` field like so:
+```python
+response = completion(
+    model="anthropic/claude-3-opus-20240229",
+    messages=messages,
+    tools=tools,
+    tool_choice={"type": "tool", "name": "get_weather"},
+)
+```
+
 
 ### Parallel Function Calling 
 
diff --git a/docs/my-website/docs/providers/ollama.md b/docs/my-website/docs/providers/ollama.md
index 1c913c08c8..c1c8fc57c8 100644
--- a/docs/my-website/docs/providers/ollama.md
+++ b/docs/my-website/docs/providers/ollama.md
@@ -101,13 +101,19 @@ Ollama supported models: https://github.com/ollama/ollama
 
 | Model Name           | Function Call                                                                     |
 |----------------------|-----------------------------------------------------------------------------------
-| Mistral    | `completion(model='ollama/mistral', messages, api_base="http://localhost:11434", stream=True)` | 
+| Mistral    | `completion(model='ollama/mistral', messages, api_base="http://localhost:11434", stream=True)` |
+| Mistral-7B-Instruct-v0.1 | `completion(model='ollama/mistral-7B-Instruct-v0.1', messages, api_base="http://localhost:11434", stream=False)` |
+| Mistral-7B-Instruct-v0.2 | `completion(model='ollama/mistral-7B-Instruct-v0.2', messages, api_base="http://localhost:11434", stream=False)` |
+| Mixtral-8x7B-Instruct-v0.1 | `completion(model='ollama/mistral-8x7B-Instruct-v0.1', messages, api_base="http://localhost:11434", stream=False)` |
+| Mixtral-8x22B-Instruct-v0.1 | `completion(model='ollama/mixtral-8x22B-Instruct-v0.1', messages, api_base="http://localhost:11434", stream=False)` |
 | Llama2 7B            | `completion(model='ollama/llama2', messages, api_base="http://localhost:11434", stream=True)` | 
 | Llama2 13B           | `completion(model='ollama/llama2:13b', messages, api_base="http://localhost:11434", stream=True)` | 
 | Llama2 70B           | `completion(model='ollama/llama2:70b', messages, api_base="http://localhost:11434", stream=True)` | 
 | Llama2 Uncensored    | `completion(model='ollama/llama2-uncensored', messages, api_base="http://localhost:11434", stream=True)` | 
 | Code Llama    | `completion(model='ollama/codellama', messages, api_base="http://localhost:11434", stream=True)` | 
-| Llama2 Uncensored    | `completion(model='ollama/llama2-uncensored', messages, api_base="http://localhost:11434", stream=True)` | 
+| Llama2 Uncensored    | `completion(model='ollama/llama2-uncensored', messages, api_base="http://localhost:11434", stream=True)` |
+|Meta LLaMa3 8B | `completion(model='ollama/llama3', messages, api_base="http://localhost:11434", stream=False)` |
+| Meta LLaMa3 70B | `completion(model='ollama/llama3:70b', messages, api_base="http://localhost:11434", stream=False)` |
 | Orca Mini            | `completion(model='ollama/orca-mini', messages, api_base="http://localhost:11434", stream=True)` |
 | Vicuna               | `completion(model='ollama/vicuna', messages, api_base="http://localhost:11434", stream=True)` |
 | Nous-Hermes          | `completion(model='ollama/nous-hermes', messages, api_base="http://localhost:11434", stream=True)` |
diff --git a/docs/my-website/docs/providers/openai.md b/docs/my-website/docs/providers/openai.md
index c44a67412c..2f261ce178 100644
--- a/docs/my-website/docs/providers/openai.md
+++ b/docs/my-website/docs/providers/openai.md
@@ -188,6 +188,7 @@ These also support the `OPENAI_API_BASE` environment variable, which can be used
 ## OpenAI Vision Models 
 | Model Name            | Function Call                                                   |
 |-----------------------|-----------------------------------------------------------------|
+| gpt-4o   | `response = completion(model="gpt-4o", messages=messages)` |
 | gpt-4-turbo    | `response = completion(model="gpt-4-turbo", messages=messages)` |
 | gpt-4-vision-preview    | `response = completion(model="gpt-4-vision-preview", messages=messages)` |
 
diff --git a/docs/my-website/docs/providers/vertex.md b/docs/my-website/docs/providers/vertex.md
index b67eb350b4..32c3ea1881 100644
--- a/docs/my-website/docs/providers/vertex.md
+++ b/docs/my-website/docs/providers/vertex.md
@@ -508,6 +508,31 @@ All models listed [here](https://github.com/BerriAI/litellm/blob/57f37f743886a02
 | text-embedding-preview-0409 | `embedding(model="vertex_ai/text-embedding-preview-0409", input)` |
 | text-multilingual-embedding-preview-0409 | `embedding(model="vertex_ai/text-multilingual-embedding-preview-0409", input)` | 
 
+## Image Generation Models
+
+Usage 
+
+```python
+response = await litellm.aimage_generation(
+    prompt="An olympic size swimming pool",
+    model="vertex_ai/imagegeneration@006",
+    vertex_ai_project="adroit-crow-413218",
+    vertex_ai_location="us-central1",
+)
+```
+
+**Generating multiple images**
+
+Use the `n` parameter to pass how many images you want generated
+```python
+response = await litellm.aimage_generation(
+    prompt="An olympic size swimming pool",
+    model="vertex_ai/imagegeneration@006",
+    vertex_ai_project="adroit-crow-413218",
+    vertex_ai_location="us-central1",
+    n=1,
+)
+```
 
 ## Extra
 
diff --git a/docs/my-website/docs/proxy/alerting.md b/docs/my-website/docs/proxy/alerting.md
index 230a3a22e9..4f47a77cb3 100644
--- a/docs/my-website/docs/proxy/alerting.md
+++ b/docs/my-website/docs/proxy/alerting.md
@@ -1,4 +1,4 @@
-# 🚨 Alerting 
+# 🚨 Alerting / Webhooks
 
 Get alerts for:
 
@@ -11,7 +11,7 @@ Get alerts for:
 - Daily Reports:
     - **LLM** Top 5 slowest deployments
     - **LLM** Top 5 deployments with most failed requests
-    - **Spend** Weekly & Monthly spend per Team, Tag
+- **Spend** Weekly & Monthly spend per Team, Tag
 
 
 ## Quick Start
@@ -61,10 +61,38 @@ curl -X GET 'http://localhost:4000/health/services?service=slack' \
   -H 'Authorization: Bearer sk-1234'
 ```
 
+## Advanced - Opting into specific alert types
 
-## Extras
+Set `alert_types` if you want to Opt into only specific alert types
 
-### Using Discord Webhooks
+```shell
+general_settings:
+  alerting: ["slack"]
+  alert_types: ["spend_reports"] 
+```
+
+All Possible Alert Types
+
+```python
+alert_types: 
+Optional[
+List[
+    Literal[
+        "llm_exceptions",
+        "llm_too_slow",
+        "llm_requests_hanging",
+        "budget_alerts",
+        "db_exceptions",
+        "daily_reports",
+        "spend_reports",
+        "cooldown_deployment",
+        "new_model_added",
+    ]
+]
+```
+
+
+## Advanced - Using Discord Webhooks
 
 Discord provides a slack compatible webhook url that you can use for alerting
 
@@ -96,3 +124,80 @@ environment_variables:
 ```
 
 That's it ! You're ready to go !
+
+## Advanced - [BETA] Webhooks for Budget Alerts
+
+**Note**: This is a beta feature, so the spec might change.
+
+Set a webhook to get notified for budget alerts. 
+
+1. Setup config.yaml
+
+Add url to your environment, for testing you can use a link from [here](https://webhook.site/)
+
+```bash
+export WEBHOOK_URL="https://webhook.site/6ab090e8-c55f-4a23-b075-3209f5c57906"
+```
+
+Add 'webhook' to config.yaml
+```yaml
+general_settings: 
+  alerting: ["webhook"] # 👈 KEY CHANGE
+```
+
+2. Start proxy
+
+```bash
+litellm --config /path/to/config.yaml
+
+# RUNNING on http://0.0.0.0:4000
+```
+
+3. Test it!
+
+```bash
+curl -X GET --location 'http://0.0.0.0:4000/health/services?service=webhook' \
+--header 'Authorization: Bearer sk-1234'
+```
+
+**Expected Response**
+
+```bash
+{
+  "spend": 1, # the spend for the 'event_group'
+  "max_budget": 0, # the 'max_budget' set for the 'event_group'
+  "token": "88dc28d0f030c55ed4ab77ed8faf098196cb1c05df778539800c9f1243fe6b4b",
+  "user_id": "default_user_id",
+  "team_id": null,
+  "user_email": null,
+  "key_alias": null,
+  "projected_exceeded_data": null,
+  "projected_spend": null,
+  "event": "budget_crossed", # Literal["budget_crossed", "threshold_crossed", "projected_limit_exceeded"]
+  "event_group": "user",
+  "event_message": "User Budget: Budget Crossed"
+}
+```
+
+**API Spec for Webhook Event**
+
+- `spend` *float*: The current spend amount for the 'event_group'.
+- `max_budget` *float*: The maximum allowed budget for the 'event_group'.
+- `token` *str*: A hashed value of the key, used for authentication or identification purposes.
+- `user_id` *str or null*: The ID of the user associated with the event (optional).
+- `team_id` *str or null*: The ID of the team associated with the event (optional).
+- `user_email` *str or null*: The email of the user associated with the event (optional).
+- `key_alias` *str or null*: An alias for the key associated with the event (optional).
+- `projected_exceeded_date` *str or null*: The date when the budget is projected to be exceeded, returned when 'soft_budget' is set for key (optional).
+- `projected_spend` *float or null*: The projected spend amount, returned when 'soft_budget' is set for key (optional).
+- `event` *Literal["budget_crossed", "threshold_crossed", "projected_limit_exceeded"]*: The type of event that triggered the webhook. Possible values are:
+    * "budget_crossed": Indicates that the spend has exceeded the max budget.
+    * "threshold_crossed": Indicates that spend has crossed a threshold (currently sent when 85% and 95% of budget is reached).
+    * "projected_limit_exceeded": For "key" only - Indicates that the projected spend is expected to exceed the soft budget threshold.
+- `event_group` *Literal["user", "key", "team", "proxy"]*: The group associated with the event. Possible values are:
+    * "user": The event is related to a specific user.
+    * "key": The event is related to a specific key.
+    * "team": The event is related to a team.
+    * "proxy": The event is related to a proxy.
+
+- `event_message` *str*: A human-readable description of the event.
\ No newline at end of file
diff --git a/docs/my-website/docs/proxy/billing.md b/docs/my-website/docs/proxy/billing.md
new file mode 100644
index 0000000000..d3d1400cde
--- /dev/null
+++ b/docs/my-website/docs/proxy/billing.md
@@ -0,0 +1,319 @@
+import Image from '@theme/IdealImage';
+import Tabs from '@theme/Tabs';
+import TabItem from '@theme/TabItem';
+
+# 💵 Billing
+
+Bill internal teams, external customers for their usage
+
+**🚨 Requirements**
+- [Setup Lago](https://docs.getlago.com/guide/self-hosted/docker#run-the-app), for usage-based billing. We recommend following [their Stripe tutorial](https://docs.getlago.com/templates/per-transaction/stripe#step-1-create-billable-metrics-for-transaction)
+
+Steps:
+- Connect the proxy to Lago
+- Set the id you want to bill for (customers, internal users, teams)
+- Start! 
+
+## Quick Start
+
+Bill internal teams for their usage
+
+### 1. Connect proxy to Lago 
+
+Set 'lago' as a callback on your proxy config.yaml
+
+```yaml
+model_name:
+  - model_name: fake-openai-endpoint
+    litellm_params:
+      model: openai/fake
+      api_key: fake-key
+      api_base: https://exampleopenaiendpoint-production.up.railway.app/
+
+litellm_settings:
+  callbacks: ["lago"] # 👈 KEY CHANGE
+
+general_settings:
+  master_key: sk-1234
+```
+
+Add your Lago keys to the environment
+
+```bash
+export LAGO_API_BASE="http://localhost:3000" # self-host - https://docs.getlago.com/guide/self-hosted/docker#run-the-app
+export LAGO_API_KEY="3e29d607-de54-49aa-a019-ecf585729070" # Get key - https://docs.getlago.com/guide/self-hosted/docker#find-your-api-key
+export LAGO_API_EVENT_CODE="openai_tokens" # name of lago billing code
+export LAGO_API_CHARGE_BY="team_id" # 👈 Charges 'team_id' attached to proxy key
+```
+
+Start proxy 
+
+```bash
+litellm --config /path/to/config.yaml
+```
+
+### 2. Create Key for Internal Team 
+
+```bash
+curl 'http://0.0.0.0:4000/key/generate' \
+--header 'Authorization: Bearer sk-1234' \
+--header 'Content-Type: application/json' \
+--data-raw '{"team_id": "my-unique-id"}' # 👈 Internal Team's ID
+```
+
+Response Object:
+
+```bash
+{
+  "key": "sk-tXL0wt5-lOOVK9sfY2UacA",
+}
+```
+
+
+### 3. Start billing! 
+
+<Tabs>
+<TabItem value="curl" label="Curl">
+
+```bash
+curl --location 'http://0.0.0.0:4000/chat/completions' \
+--header 'Content-Type: application/json' \
+--header 'Authorization: Bearer sk-tXL0wt5-lOOVK9sfY2UacA' \ # 👈 Team's Key
+--data ' {
+      "model": "fake-openai-endpoint",
+      "messages": [
+        {
+          "role": "user",
+          "content": "what llm are you"
+        }
+      ],
+    }
+'
+```
+</TabItem>
+<TabItem value="openai_python" label="OpenAI Python SDK">
+
+```python
+import openai
+client = openai.OpenAI(
+    api_key="sk-tXL0wt5-lOOVK9sfY2UacA", # 👈 Team's Key
+    base_url="http://0.0.0.0:4000"
+)
+
+# request sent to model set on litellm proxy, `litellm --model`
+response = client.chat.completions.create(model="gpt-3.5-turbo", messages = [
+    {
+        "role": "user",
+        "content": "this is a test request, write a short poem"
+    }
+])
+
+print(response)
+```
+</TabItem>
+<TabItem value="langchain" label="Langchain">
+
+```python
+from langchain.chat_models import ChatOpenAI
+from langchain.prompts.chat import (
+    ChatPromptTemplate,
+    HumanMessagePromptTemplate,
+    SystemMessagePromptTemplate,
+)
+from langchain.schema import HumanMessage, SystemMessage
+import os 
+
+os.environ["OPENAI_API_KEY"] = "sk-tXL0wt5-lOOVK9sfY2UacA" # 👈 Team's Key
+
+chat = ChatOpenAI(
+    openai_api_base="http://0.0.0.0:4000",
+    model = "gpt-3.5-turbo",
+    temperature=0.1,
+)
+
+messages = [
+    SystemMessage(
+        content="You are a helpful assistant that im using to make a test request to."
+    ),
+    HumanMessage(
+        content="test from litellm. tell me why it's amazing in 1 sentence"
+    ),
+]
+response = chat(messages)
+
+print(response)
+```
+</TabItem>
+</Tabs>
+
+**See Results on Lago**
+
+
+<Image img={require('../../img/lago_2.png')}  style={{ width: '500px', height: 'auto' }} />
+
+## Advanced - Lago Logging object 
+
+This is what LiteLLM will log to Lagos
+
+```
+{
+    "event": {
+      "transaction_id": "<generated_unique_id>",
+      "external_customer_id": <selected_id>, # either 'end_user_id', 'user_id', or 'team_id'. Default 'end_user_id'. 
+      "code": os.getenv("LAGO_API_EVENT_CODE"), 
+      "properties": {
+          "input_tokens": <number>,
+          "output_tokens": <number>,
+          "model": <string>,
+          "response_cost": <number>, # 👈 LITELLM CALCULATED RESPONSE COST - https://github.com/BerriAI/litellm/blob/d43f75150a65f91f60dc2c0c9462ce3ffc713c1f/litellm/utils.py#L1473
+      }
+    }
+}
+```
+
+## Advanced - Bill Customers, Internal Users 
+
+For:
+- Customers (id passed via 'user' param in /chat/completion call) = 'end_user_id'
+- Internal Users (id set when [creating keys](https://docs.litellm.ai/docs/proxy/virtual_keys#advanced---spend-tracking)) = 'user_id' 
+- Teams (id set when [creating keys](https://docs.litellm.ai/docs/proxy/virtual_keys#advanced---spend-tracking)) = 'team_id' 
+
+
+
+<Tabs>
+<TabItem value="customers" label="Customer Billing">
+
+1. Set 'LAGO_API_CHARGE_BY' to 'end_user_id'
+
+  ```bash
+  export LAGO_API_CHARGE_BY="end_user_id"
+  ```
+
+2. Test it!
+
+  <Tabs>
+  <TabItem value="curl" label="Curl">
+
+  ```shell
+  curl --location 'http://0.0.0.0:4000/chat/completions' \
+  --header 'Content-Type: application/json' \
+  --data ' {
+        "model": "gpt-3.5-turbo",
+        "messages": [
+          {
+            "role": "user",
+            "content": "what llm are you"
+          }
+        ],
+        "user": "my_customer_id" # 👈 whatever your customer id is
+      }
+  '
+  ```
+  </TabItem>
+  <TabItem value="openai_sdk" label="OpenAI Python SDK">
+
+  ```python
+  import openai
+  client = openai.OpenAI(
+      api_key="anything",
+      base_url="http://0.0.0.0:4000"
+  )
+
+  # request sent to model set on litellm proxy, `litellm --model`
+  response = client.chat.completions.create(model="gpt-3.5-turbo", messages = [
+      {
+          "role": "user",
+          "content": "this is a test request, write a short poem"
+      }
+  ], user="my_customer_id") # 👈 whatever your customer id is
+
+  print(response)
+  ```
+
+  </TabItem>
+  <TabItem value="langchain" label="Langchain">
+
+  ```python
+  from langchain.chat_models import ChatOpenAI
+  from langchain.prompts.chat import (
+      ChatPromptTemplate,
+      HumanMessagePromptTemplate,
+      SystemMessagePromptTemplate,
+  )
+  from langchain.schema import HumanMessage, SystemMessage
+  import os 
+
+  os.environ["OPENAI_API_KEY"] = "anything"
+
+  chat = ChatOpenAI(
+      openai_api_base="http://0.0.0.0:4000",
+      model = "gpt-3.5-turbo",
+      temperature=0.1,
+      extra_body={
+          "user": "my_customer_id"  # 👈 whatever your customer id is
+      }
+  )
+
+  messages = [
+      SystemMessage(
+          content="You are a helpful assistant that im using to make a test request to."
+      ),
+      HumanMessage(
+          content="test from litellm. tell me why it's amazing in 1 sentence"
+      ),
+  ]
+  response = chat(messages)
+
+  print(response)
+  ```
+
+  </TabItem>
+  </Tabs>
+
+</TabItem>
+<TabItem value="users" label="Internal User Billing">
+
+1. Set 'LAGO_API_CHARGE_BY' to 'user_id'
+
+```bash
+export LAGO_API_CHARGE_BY="user_id"
+```
+
+2. Create a key for that user 
+
+```bash
+curl 'http://0.0.0.0:4000/key/generate' \
+--header 'Authorization: Bearer <your-master-key>' \
+--header 'Content-Type: application/json' \
+--data-raw '{"user_id": "my-unique-id"}' # 👈 Internal User's id
+```
+
+Response Object:
+
+```bash
+{
+  "key": "sk-tXL0wt5-lOOVK9sfY2UacA",
+}
+```
+
+3. Make API Calls with that Key 
+
+```python
+import openai
+client = openai.OpenAI(
+    api_key="sk-tXL0wt5-lOOVK9sfY2UacA", # 👈 Generated key
+    base_url="http://0.0.0.0:4000"
+)
+
+# request sent to model set on litellm proxy, `litellm --model`
+response = client.chat.completions.create(model="gpt-3.5-turbo", messages = [
+    {
+        "role": "user",
+        "content": "this is a test request, write a short poem"
+    }
+])
+
+print(response)
+```
+</TabItem>
+</Tabs>
diff --git a/docs/my-website/docs/proxy/call_hooks.md b/docs/my-website/docs/proxy/call_hooks.md
index 3195e2e5aa..3a8726e879 100644
--- a/docs/my-website/docs/proxy/call_hooks.md
+++ b/docs/my-website/docs/proxy/call_hooks.md
@@ -25,26 +25,45 @@ class MyCustomHandler(CustomLogger): # https://docs.litellm.ai/docs/observabilit
     def __init__(self):
         pass
 
-    #### ASYNC #### 
-    
-    async def async_log_stream_event(self, kwargs, response_obj, start_time, end_time):
-        pass
-
-    async def async_log_pre_api_call(self, model, messages, kwargs):
-        pass
-
-    async def async_log_success_event(self, kwargs, response_obj, start_time, end_time):
-        pass
-
-    async def async_log_failure_event(self, kwargs, response_obj, start_time, end_time):
-        pass
-
     #### CALL HOOKS - proxy only #### 
 
-    async def async_pre_call_hook(self, user_api_key_dict: UserAPIKeyAuth, cache: DualCache, data: dict, call_type: Literal["completion", "embeddings"]):
+    async def async_pre_call_hook(self, user_api_key_dict: UserAPIKeyAuth, cache: DualCache, data: dict, call_type: Literal[
+            "completion",
+            "text_completion",
+            "embeddings",
+            "image_generation",
+            "moderation",
+            "audio_transcription",
+        ]) -> Optional[dict, str, Exception]: 
         data["model"] = "my-new-model"
         return data 
 
+    async def async_post_call_failure_hook(
+        self, original_exception: Exception, user_api_key_dict: UserAPIKeyAuth
+    ):
+        pass
+
+    async def async_post_call_success_hook(
+        self,
+        user_api_key_dict: UserAPIKeyAuth,
+        response,
+    ):
+        pass
+
+    async def async_moderation_hook( # call made in parallel to llm api call
+        self,
+        data: dict,
+        user_api_key_dict: UserAPIKeyAuth,
+        call_type: Literal["completion", "embeddings", "image_generation"],
+    ):
+        pass
+
+    async def async_post_call_streaming_hook(
+        self,
+        user_api_key_dict: UserAPIKeyAuth,
+        response: str,
+    ):
+        pass
 proxy_handler_instance = MyCustomHandler()
 ```
 
@@ -190,4 +209,100 @@ general_settings:
 
 **Result**
 
-<Image img={require('../../img/end_user_enforcement.png')}/>
\ No newline at end of file
+<Image img={require('../../img/end_user_enforcement.png')}/>
+
+## Advanced - Return rejected message as response 
+
+For chat completions and text completion calls, you can return a rejected message as a user response. 
+
+Do this by returning a string. LiteLLM takes care of returning the response in the correct format depending on the endpoint and if it's streaming/non-streaming.
+
+For non-chat/text completion endpoints, this response is returned as a 400 status code exception. 
+
+
+### 1. Create Custom Handler 
+
+```python
+from litellm.integrations.custom_logger import CustomLogger
+import litellm
+from litellm.utils import get_formatted_prompt
+
+# This file includes the custom callbacks for LiteLLM Proxy
+# Once defined, these can be passed in proxy_config.yaml
+class MyCustomHandler(CustomLogger):
+    def __init__(self):
+        pass
+
+    #### CALL HOOKS - proxy only #### 
+
+    async def async_pre_call_hook(self, user_api_key_dict: UserAPIKeyAuth, cache: DualCache, data: dict, call_type: Literal[
+            "completion",
+            "text_completion",
+            "embeddings",
+            "image_generation",
+            "moderation",
+            "audio_transcription",
+        ]) -> Optional[dict, str, Exception]: 
+        formatted_prompt = get_formatted_prompt(data=data, call_type=call_type)
+
+        if "Hello world" in formatted_prompt:
+            return "This is an invalid response"
+
+        return data 
+
+proxy_handler_instance = MyCustomHandler()
+```
+
+### 2. Update config.yaml 
+
+```yaml
+model_list:
+  - model_name: gpt-3.5-turbo
+    litellm_params:
+      model: gpt-3.5-turbo
+
+litellm_settings:
+  callbacks: custom_callbacks.proxy_handler_instance # sets litellm.callbacks = [proxy_handler_instance]
+```
+
+
+### 3. Test it!
+
+```shell
+$ litellm /path/to/config.yaml
+```
+```shell
+curl --location 'http://0.0.0.0:4000/chat/completions' \
+    --data ' {
+    "model": "gpt-3.5-turbo",
+    "messages": [
+        {
+        "role": "user",
+        "content": "Hello world"
+        }
+    ],
+    }'
+```
+
+**Expected Response**
+
+```
+{
+    "id": "chatcmpl-d00bbede-2d90-4618-bf7b-11a1c23cf360",
+    "choices": [
+        {
+            "finish_reason": "stop",
+            "index": 0,
+            "message": {
+                "content": "This is an invalid response.", # 👈 REJECTED RESPONSE
+                "role": "assistant"
+            }
+        }
+    ],
+    "created": 1716234198,
+    "model": null,
+    "object": "chat.completion",
+    "system_fingerprint": null,
+    "usage": {}
+}
+```
\ No newline at end of file
diff --git a/docs/my-website/docs/proxy/debugging.md b/docs/my-website/docs/proxy/debugging.md
index c5653d90f7..b9f2ba8da9 100644
--- a/docs/my-website/docs/proxy/debugging.md
+++ b/docs/my-website/docs/proxy/debugging.md
@@ -5,6 +5,8 @@
 - debug (prints info logs)
 - detailed debug (prints debug logs)
 
+The proxy also supports json logs. [See here](#json-logs)
+
 ## `debug`
 
 **via cli**
@@ -31,4 +33,20 @@ $ litellm --detailed_debug
 
 ```python
 os.environ["LITELLM_LOG"] = "DEBUG"
-```
\ No newline at end of file
+```
+
+## JSON LOGS
+
+Set `JSON_LOGS="True"` in your env:
+
+```bash
+export JSON_LOGS="True"
+```
+
+Start proxy 
+
+```bash
+$ litellm
+```
+
+The proxy will now all logs in json format.
\ No newline at end of file
diff --git a/docs/my-website/docs/proxy/enterprise.md b/docs/my-website/docs/proxy/enterprise.md
index 1831164bee..8904faa455 100644
--- a/docs/my-website/docs/proxy/enterprise.md
+++ b/docs/my-website/docs/proxy/enterprise.md
@@ -1,7 +1,8 @@
+import Image from '@theme/IdealImage';
 import Tabs from '@theme/Tabs';
 import TabItem from '@theme/TabItem';
 
-# ✨ Enterprise Features - Content Mod, SSO
+# ✨ Enterprise Features - Content Mod, SSO, Custom Swagger
 
 Features here are behind a commercial license in our `/enterprise` folder. [**See Code**](https://github.com/BerriAI/litellm/tree/main/enterprise)
 
@@ -20,6 +21,7 @@ Features:
 - ✅ Reject calls (incoming / outgoing) with Banned Keywords (e.g. competitors)
 - ✅ Don't log/store specific requests to Langfuse, Sentry, etc. (eg confidential LLM requests)
 - ✅ Tracking Spend for Custom Tags
+- ✅ Custom Branding + Routes on Swagger Docs
 
 
 
@@ -526,4 +528,39 @@ curl -X GET "http://0.0.0.0:4000/spend/tags" \
 
 <!-- ## Tracking Spend per Key
 
-## Tracking Spend per User -->
\ No newline at end of file
+## Tracking Spend per User -->
+
+## Swagger Docs - Custom Routes + Branding 
+
+:::info 
+
+Requires a LiteLLM Enterprise key to use. Request one [here](https://calendly.com/d/4mp-gd3-k5k/litellm-1-1-onboarding-chat)
+
+:::
+
+Set LiteLLM Key in your environment
+
+```bash
+LITELLM_LICENSE=""
+```
+
+### Customize Title + Description
+
+In your environment, set: 
+
+```bash
+DOCS_TITLE="TotalGPT"
+DOCS_DESCRIPTION="Sample Company Description"
+```
+
+### Customize Routes
+
+Hide admin routes from users. 
+
+In your environment, set: 
+
+```bash
+DOCS_FILTERED="True" # only shows openai routes to user
+```
+
+<Image img={require('../../img/custom_swagger.png')}  style={{ width: '900px', height: 'auto' }} />
\ No newline at end of file
diff --git a/docs/my-website/img/custom_swagger.png b/docs/my-website/img/custom_swagger.png
new file mode 100644
index 0000000000..e17c0882bd
Binary files /dev/null and b/docs/my-website/img/custom_swagger.png differ
diff --git a/docs/my-website/img/lago.jpeg b/docs/my-website/img/lago.jpeg
new file mode 100644
index 0000000000..546852f1c4
Binary files /dev/null and b/docs/my-website/img/lago.jpeg differ
diff --git a/docs/my-website/img/lago_2.png b/docs/my-website/img/lago_2.png
new file mode 100644
index 0000000000..24ecb49ef7
Binary files /dev/null and b/docs/my-website/img/lago_2.png differ
diff --git a/docs/my-website/sidebars.js b/docs/my-website/sidebars.js
index 62202cc7eb..f840ed7897 100644
--- a/docs/my-website/sidebars.js
+++ b/docs/my-website/sidebars.js
@@ -41,6 +41,7 @@ const sidebars = {
         "proxy/reliability",
         "proxy/cost_tracking",
         "proxy/users",
+        "proxy/billing",
         "proxy/user_keys",
         "proxy/enterprise",
         "proxy/virtual_keys",
@@ -175,6 +176,7 @@ const sidebars = {
         "observability/custom_callback",
         "observability/langfuse_integration",
         "observability/sentry",
+        "observability/lago",
         "observability/openmeter",
         "observability/promptlayer_integration",
         "observability/wandb_integration",
diff --git a/litellm/__init__.py b/litellm/__init__.py
index 0db5d365a6..92610afd9d 100644
--- a/litellm/__init__.py
+++ b/litellm/__init__.py
@@ -27,8 +27,8 @@ input_callback: List[Union[str, Callable]] = []
 success_callback: List[Union[str, Callable]] = []
 failure_callback: List[Union[str, Callable]] = []
 service_callback: List[Union[str, Callable]] = []
-callbacks: List[Callable] = []
-_custom_logger_compatible_callbacks: list = ["openmeter"]
+_custom_logger_compatible_callbacks_literal = Literal["lago", "openmeter"]
+callbacks: List[Union[Callable, _custom_logger_compatible_callbacks_literal]] = []
 _langfuse_default_tags: Optional[
     List[
         Literal[
@@ -724,6 +724,9 @@ from .utils import (
     get_supported_openai_params,
     get_api_base,
     get_first_chars_messages,
+    ModelResponse,
+    ImageResponse,
+    ImageObject,
 )
 from .llms.huggingface_restapi import HuggingfaceConfig
 from .llms.anthropic import AnthropicConfig
diff --git a/litellm/_logging.py b/litellm/_logging.py
index f31ee41f8b..0759ad51e9 100644
--- a/litellm/_logging.py
+++ b/litellm/_logging.py
@@ -1,19 +1,33 @@
-import logging
+import logging, os, json
+from logging import Formatter
 
 set_verbose = False
-json_logs = False
+json_logs = bool(os.getenv("JSON_LOGS", False))
 # Create a handler for the logger (you may need to adapt this based on your needs)
 handler = logging.StreamHandler()
 handler.setLevel(logging.DEBUG)
 
+
+class JsonFormatter(Formatter):
+    def __init__(self):
+        super(JsonFormatter, self).__init__()
+
+    def format(self, record):
+        json_record = {}
+        json_record["message"] = record.getMessage()
+        return json.dumps(json_record)
+
+
 # Create a formatter and set it for the handler
-formatter = logging.Formatter(
-    "\033[92m%(asctime)s - %(name)s:%(levelname)s\033[0m: %(filename)s:%(lineno)s - %(message)s",
-    datefmt="%H:%M:%S",
-)
+if json_logs:
+    handler.setFormatter(JsonFormatter())
+else:
+    formatter = logging.Formatter(
+        "\033[92m%(asctime)s - %(name)s:%(levelname)s\033[0m: %(filename)s:%(lineno)s - %(message)s",
+        datefmt="%H:%M:%S",
+    )
 
-
-handler.setFormatter(formatter)
+    handler.setFormatter(formatter)
 
 verbose_proxy_logger = logging.getLogger("LiteLLM Proxy")
 verbose_router_logger = logging.getLogger("LiteLLM Router")
diff --git a/litellm/exceptions.py b/litellm/exceptions.py
index d239f1e128..d189b7ebe2 100644
--- a/litellm/exceptions.py
+++ b/litellm/exceptions.py
@@ -15,11 +15,19 @@ from typing import Optional
 
 
 class AuthenticationError(openai.AuthenticationError):  # type: ignore
-    def __init__(self, message, llm_provider, model, response: httpx.Response):
+    def __init__(
+        self,
+        message,
+        llm_provider,
+        model,
+        response: httpx.Response,
+        litellm_debug_info: Optional[str] = None,
+    ):
         self.status_code = 401
         self.message = message
         self.llm_provider = llm_provider
         self.model = model
+        self.litellm_debug_info = litellm_debug_info
         super().__init__(
             self.message, response=response, body=None
         )  # Call the base class constructor with the parameters it needs
@@ -27,11 +35,19 @@ class AuthenticationError(openai.AuthenticationError):  # type: ignore
 
 # raise when invalid models passed, example gpt-8
 class NotFoundError(openai.NotFoundError):  # type: ignore
-    def __init__(self, message, model, llm_provider, response: httpx.Response):
+    def __init__(
+        self,
+        message,
+        model,
+        llm_provider,
+        response: httpx.Response,
+        litellm_debug_info: Optional[str] = None,
+    ):
         self.status_code = 404
         self.message = message
         self.model = model
         self.llm_provider = llm_provider
+        self.litellm_debug_info = litellm_debug_info
         super().__init__(
             self.message, response=response, body=None
         )  # Call the base class constructor with the parameters it needs
@@ -39,12 +55,18 @@ class NotFoundError(openai.NotFoundError):  # type: ignore
 
 class BadRequestError(openai.BadRequestError):  # type: ignore
     def __init__(
-        self, message, model, llm_provider, response: Optional[httpx.Response] = None
+        self,
+        message,
+        model,
+        llm_provider,
+        response: Optional[httpx.Response] = None,
+        litellm_debug_info: Optional[str] = None,
     ):
         self.status_code = 400
         self.message = message
         self.model = model
         self.llm_provider = llm_provider
+        self.litellm_debug_info = litellm_debug_info
         response = response or httpx.Response(
             status_code=self.status_code,
             request=httpx.Request(
@@ -57,18 +79,28 @@ class BadRequestError(openai.BadRequestError):  # type: ignore
 
 
 class UnprocessableEntityError(openai.UnprocessableEntityError):  # type: ignore
-    def __init__(self, message, model, llm_provider, response: httpx.Response):
+    def __init__(
+        self,
+        message,
+        model,
+        llm_provider,
+        response: httpx.Response,
+        litellm_debug_info: Optional[str] = None,
+    ):
         self.status_code = 422
         self.message = message
         self.model = model
         self.llm_provider = llm_provider
+        self.litellm_debug_info = litellm_debug_info
         super().__init__(
             self.message, response=response, body=None
         )  # Call the base class constructor with the parameters it needs
 
 
 class Timeout(openai.APITimeoutError):  # type: ignore
-    def __init__(self, message, model, llm_provider):
+    def __init__(
+        self, message, model, llm_provider, litellm_debug_info: Optional[str] = None
+    ):
         request = httpx.Request(method="POST", url="https://api.openai.com/v1")
         super().__init__(
             request=request
@@ -77,6 +109,7 @@ class Timeout(openai.APITimeoutError):  # type: ignore
         self.message = message
         self.model = model
         self.llm_provider = llm_provider
+        self.litellm_debug_info = litellm_debug_info
 
     # custom function to convert to str
     def __str__(self):
@@ -84,22 +117,38 @@ class Timeout(openai.APITimeoutError):  # type: ignore
 
 
 class PermissionDeniedError(openai.PermissionDeniedError):  # type:ignore
-    def __init__(self, message, llm_provider, model, response: httpx.Response):
+    def __init__(
+        self,
+        message,
+        llm_provider,
+        model,
+        response: httpx.Response,
+        litellm_debug_info: Optional[str] = None,
+    ):
         self.status_code = 403
         self.message = message
         self.llm_provider = llm_provider
         self.model = model
+        self.litellm_debug_info = litellm_debug_info
         super().__init__(
             self.message, response=response, body=None
         )  # Call the base class constructor with the parameters it needs
 
 
 class RateLimitError(openai.RateLimitError):  # type: ignore
-    def __init__(self, message, llm_provider, model, response: httpx.Response):
+    def __init__(
+        self,
+        message,
+        llm_provider,
+        model,
+        response: httpx.Response,
+        litellm_debug_info: Optional[str] = None,
+    ):
         self.status_code = 429
         self.message = message
         self.llm_provider = llm_provider
         self.modle = model
+        self.litellm_debug_info = litellm_debug_info
         super().__init__(
             self.message, response=response, body=None
         )  # Call the base class constructor with the parameters it needs
@@ -107,11 +156,45 @@ class RateLimitError(openai.RateLimitError):  # type: ignore
 
 # sub class of rate limit error - meant to give more granularity for error handling context window exceeded errors
 class ContextWindowExceededError(BadRequestError):  # type: ignore
-    def __init__(self, message, model, llm_provider, response: httpx.Response):
+    def __init__(
+        self,
+        message,
+        model,
+        llm_provider,
+        response: httpx.Response,
+        litellm_debug_info: Optional[str] = None,
+    ):
         self.status_code = 400
         self.message = message
         self.model = model
         self.llm_provider = llm_provider
+        self.litellm_debug_info = litellm_debug_info
+        super().__init__(
+            message=self.message,
+            model=self.model,  # type: ignore
+            llm_provider=self.llm_provider,  # type: ignore
+            response=response,
+        )  # Call the base class constructor with the parameters it needs
+
+
+# sub class of bad request error - meant to help us catch guardrails-related errors on proxy.
+class RejectedRequestError(BadRequestError):  # type: ignore
+    def __init__(
+        self,
+        message,
+        model,
+        llm_provider,
+        request_data: dict,
+        litellm_debug_info: Optional[str] = None,
+    ):
+        self.status_code = 400
+        self.message = message
+        self.model = model
+        self.llm_provider = llm_provider
+        self.litellm_debug_info = litellm_debug_info
+        self.request_data = request_data
+        request = httpx.Request(method="POST", url="https://api.openai.com/v1")
+        response = httpx.Response(status_code=500, request=request)
         super().__init__(
             message=self.message,
             model=self.model,  # type: ignore
@@ -122,11 +205,19 @@ class ContextWindowExceededError(BadRequestError):  # type: ignore
 
 class ContentPolicyViolationError(BadRequestError):  # type: ignore
     #  Error code: 400 - {'error': {'code': 'content_policy_violation', 'message': 'Your request was rejected as a result of our safety system. Image descriptions generated from your prompt may contain text that is not allowed by our safety system. If you believe this was done in error, your request may succeed if retried, or by adjusting your prompt.', 'param': None, 'type': 'invalid_request_error'}}
-    def __init__(self, message, model, llm_provider, response: httpx.Response):
+    def __init__(
+        self,
+        message,
+        model,
+        llm_provider,
+        response: httpx.Response,
+        litellm_debug_info: Optional[str] = None,
+    ):
         self.status_code = 400
         self.message = message
         self.model = model
         self.llm_provider = llm_provider
+        self.litellm_debug_info = litellm_debug_info
         super().__init__(
             message=self.message,
             model=self.model,  # type: ignore
@@ -136,11 +227,19 @@ class ContentPolicyViolationError(BadRequestError):  # type: ignore
 
 
 class ServiceUnavailableError(openai.APIStatusError):  # type: ignore
-    def __init__(self, message, llm_provider, model, response: httpx.Response):
+    def __init__(
+        self,
+        message,
+        llm_provider,
+        model,
+        response: httpx.Response,
+        litellm_debug_info: Optional[str] = None,
+    ):
         self.status_code = 503
         self.message = message
         self.llm_provider = llm_provider
         self.model = model
+        self.litellm_debug_info = litellm_debug_info
         super().__init__(
             self.message, response=response, body=None
         )  # Call the base class constructor with the parameters it needs
@@ -149,33 +248,51 @@ class ServiceUnavailableError(openai.APIStatusError):  # type: ignore
 # raise this when the API returns an invalid response object - https://github.com/openai/openai-python/blob/1be14ee34a0f8e42d3f9aa5451aa4cb161f1781f/openai/api_requestor.py#L401
 class APIError(openai.APIError):  # type: ignore
     def __init__(
-        self, status_code, message, llm_provider, model, request: httpx.Request
+        self,
+        status_code,
+        message,
+        llm_provider,
+        model,
+        request: httpx.Request,
+        litellm_debug_info: Optional[str] = None,
     ):
         self.status_code = status_code
         self.message = message
         self.llm_provider = llm_provider
         self.model = model
+        self.litellm_debug_info = litellm_debug_info
         super().__init__(self.message, request=request, body=None)  # type: ignore
 
 
 # raised if an invalid request (not get, delete, put, post) is made
 class APIConnectionError(openai.APIConnectionError):  # type: ignore
-    def __init__(self, message, llm_provider, model, request: httpx.Request):
+    def __init__(
+        self,
+        message,
+        llm_provider,
+        model,
+        request: httpx.Request,
+        litellm_debug_info: Optional[str] = None,
+    ):
         self.message = message
         self.llm_provider = llm_provider
         self.model = model
         self.status_code = 500
+        self.litellm_debug_info = litellm_debug_info
         super().__init__(message=self.message, request=request)
 
 
 # raised if an invalid request (not get, delete, put, post) is made
 class APIResponseValidationError(openai.APIResponseValidationError):  # type: ignore
-    def __init__(self, message, llm_provider, model):
+    def __init__(
+        self, message, llm_provider, model, litellm_debug_info: Optional[str] = None
+    ):
         self.message = message
         self.llm_provider = llm_provider
         self.model = model
         request = httpx.Request(method="POST", url="https://api.openai.com/v1")
         response = httpx.Response(status_code=500, request=request)
+        self.litellm_debug_info = litellm_debug_info
         super().__init__(response=response, body=None, message=message)
 
 
diff --git a/litellm/integrations/custom_logger.py b/litellm/integrations/custom_logger.py
index d508825922..e192cdaea7 100644
--- a/litellm/integrations/custom_logger.py
+++ b/litellm/integrations/custom_logger.py
@@ -4,7 +4,6 @@ import dotenv, os
 
 from litellm.proxy._types import UserAPIKeyAuth
 from litellm.caching import DualCache
-
 from typing import Literal, Union, Optional
 import traceback
 
@@ -64,8 +63,17 @@ class CustomLogger:  # https://docs.litellm.ai/docs/observability/custom_callbac
         user_api_key_dict: UserAPIKeyAuth,
         cache: DualCache,
         data: dict,
-        call_type: Literal["completion", "embeddings", "image_generation"],
-    ):
+        call_type: Literal[
+            "completion",
+            "text_completion",
+            "embeddings",
+            "image_generation",
+            "moderation",
+            "audio_transcription",
+        ],
+    ) -> Optional[
+        Union[Exception, str, dict]
+    ]:  # raise exception if invalid, return a str for the user to receive - if rejected, or return a modified dictionary for passing into litellm
         pass
 
     async def async_post_call_failure_hook(
diff --git a/litellm/integrations/lago.py b/litellm/integrations/lago.py
new file mode 100644
index 0000000000..e6d38f530c
--- /dev/null
+++ b/litellm/integrations/lago.py
@@ -0,0 +1,179 @@
+# What is this?
+## On Success events log cost to Lago - https://github.com/BerriAI/litellm/issues/3639
+
+import dotenv, os, json
+import litellm
+import traceback, httpx
+from litellm.integrations.custom_logger import CustomLogger
+from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler
+import uuid
+from typing import Optional, Literal
+
+
+def get_utc_datetime():
+    import datetime as dt
+    from datetime import datetime
+
+    if hasattr(dt, "UTC"):
+        return datetime.now(dt.UTC)  # type: ignore
+    else:
+        return datetime.utcnow()  # type: ignore
+
+
+class LagoLogger(CustomLogger):
+    def __init__(self) -> None:
+        super().__init__()
+        self.validate_environment()
+        self.async_http_handler = AsyncHTTPHandler()
+        self.sync_http_handler = HTTPHandler()
+
+    def validate_environment(self):
+        """
+        Expects
+        LAGO_API_BASE,
+        LAGO_API_KEY,
+        LAGO_API_EVENT_CODE,
+
+        Optional:
+        LAGO_API_CHARGE_BY
+
+        in the environment
+        """
+        missing_keys = []
+        if os.getenv("LAGO_API_KEY", None) is None:
+            missing_keys.append("LAGO_API_KEY")
+
+        if os.getenv("LAGO_API_BASE", None) is None:
+            missing_keys.append("LAGO_API_BASE")
+
+        if os.getenv("LAGO_API_EVENT_CODE", None) is None:
+            missing_keys.append("LAGO_API_EVENT_CODE")
+
+        if len(missing_keys) > 0:
+            raise Exception("Missing keys={} in environment.".format(missing_keys))
+
+    def _common_logic(self, kwargs: dict, response_obj) -> dict:
+        call_id = response_obj.get("id", kwargs.get("litellm_call_id"))
+        dt = get_utc_datetime().isoformat()
+        cost = kwargs.get("response_cost", None)
+        model = kwargs.get("model")
+        usage = {}
+
+        if (
+            isinstance(response_obj, litellm.ModelResponse)
+            or isinstance(response_obj, litellm.EmbeddingResponse)
+        ) and hasattr(response_obj, "usage"):
+            usage = {
+                "prompt_tokens": response_obj["usage"].get("prompt_tokens", 0),
+                "completion_tokens": response_obj["usage"].get("completion_tokens", 0),
+                "total_tokens": response_obj["usage"].get("total_tokens"),
+            }
+
+        litellm_params = kwargs.get("litellm_params", {}) or {}
+        proxy_server_request = litellm_params.get("proxy_server_request") or {}
+        end_user_id = proxy_server_request.get("body", {}).get("user", None)
+        user_id = litellm_params["metadata"].get("user_api_key_user_id", None)
+        team_id = litellm_params["metadata"].get("user_api_key_team_id", None)
+        org_id = litellm_params["metadata"].get("user_api_key_org_id", None)
+
+        charge_by: Literal["end_user_id", "team_id", "user_id"] = "end_user_id"
+        external_customer_id: Optional[str] = None
+
+        if os.getenv("LAGO_API_CHARGE_BY", None) is not None and isinstance(
+            os.environ["LAGO_API_CHARGE_BY"], str
+        ):
+            if os.environ["LAGO_API_CHARGE_BY"] in [
+                "end_user_id",
+                "user_id",
+                "team_id",
+            ]:
+                charge_by = os.environ["LAGO_API_CHARGE_BY"]  # type: ignore
+            else:
+                raise Exception("invalid LAGO_API_CHARGE_BY set")
+
+        if charge_by == "end_user_id":
+            external_customer_id = end_user_id
+        elif charge_by == "team_id":
+            external_customer_id = team_id
+        elif charge_by == "user_id":
+            external_customer_id = user_id
+
+        if external_customer_id is None:
+            raise Exception("External Customer ID is not set")
+
+        return {
+            "event": {
+                "transaction_id": str(uuid.uuid4()),
+                "external_customer_id": external_customer_id,
+                "code": os.getenv("LAGO_API_EVENT_CODE"),
+                "properties": {"model": model, "response_cost": cost, **usage},
+            }
+        }
+
+    def log_success_event(self, kwargs, response_obj, start_time, end_time):
+        _url = os.getenv("LAGO_API_BASE")
+        assert _url is not None and isinstance(
+            _url, str
+        ), "LAGO_API_BASE missing or not set correctly. LAGO_API_BASE={}".format(_url)
+        if _url.endswith("/"):
+            _url += "api/v1/events"
+        else:
+            _url += "/api/v1/events"
+
+        api_key = os.getenv("LAGO_API_KEY")
+
+        _data = self._common_logic(kwargs=kwargs, response_obj=response_obj)
+        _headers = {
+            "Content-Type": "application/json",
+            "Authorization": "Bearer {}".format(api_key),
+        }
+
+        try:
+            response = self.sync_http_handler.post(
+                url=_url,
+                data=json.dumps(_data),
+                headers=_headers,
+            )
+
+            response.raise_for_status()
+        except Exception as e:
+            if hasattr(response, "text"):
+                litellm.print_verbose(f"\nError Message: {response.text}")
+            raise e
+
+    async def async_log_success_event(self, kwargs, response_obj, start_time, end_time):
+        try:
+            _url = os.getenv("LAGO_API_BASE")
+            assert _url is not None and isinstance(
+                _url, str
+            ), "LAGO_API_BASE missing or not set correctly. LAGO_API_BASE={}".format(
+                _url
+            )
+            if _url.endswith("/"):
+                _url += "api/v1/events"
+            else:
+                _url += "/api/v1/events"
+
+            api_key = os.getenv("LAGO_API_KEY")
+
+            _data = self._common_logic(kwargs=kwargs, response_obj=response_obj)
+            _headers = {
+                "Content-Type": "application/json",
+                "Authorization": "Bearer {}".format(api_key),
+            }
+        except Exception as e:
+            raise e
+
+        response: Optional[httpx.Response] = None
+        try:
+            response = await self.async_http_handler.post(
+                url=_url,
+                data=json.dumps(_data),
+                headers=_headers,
+            )
+
+            response.raise_for_status()
+        except Exception as e:
+            if response is not None and hasattr(response, "text"):
+                litellm.print_verbose(f"\nError Message: {response.text}")
+            raise e
diff --git a/litellm/integrations/langfuse.py b/litellm/integrations/langfuse.py
index feff4ad851..153b677e07 100644
--- a/litellm/integrations/langfuse.py
+++ b/litellm/integrations/langfuse.py
@@ -93,6 +93,7 @@ class LangFuseLogger:
             )
 
             litellm_params = kwargs.get("litellm_params", {})
+            litellm_call_id = kwargs.get("litellm_call_id", None)
             metadata = (
                 litellm_params.get("metadata", {}) or {}
             )  # if litellm_params['metadata'] == None
@@ -161,6 +162,7 @@ class LangFuseLogger:
                     response_obj,
                     level,
                     print_verbose,
+                    litellm_call_id,
                 )
             elif response_obj is not None:
                 self._log_langfuse_v1(
@@ -255,6 +257,7 @@ class LangFuseLogger:
         response_obj,
         level,
         print_verbose,
+        litellm_call_id,
     ) -> tuple:
         import langfuse
 
@@ -318,7 +321,7 @@ class LangFuseLogger:
 
             session_id = clean_metadata.pop("session_id", None)
             trace_name = clean_metadata.pop("trace_name", None)
-            trace_id = clean_metadata.pop("trace_id", None)
+            trace_id = clean_metadata.pop("trace_id", litellm_call_id)
             existing_trace_id = clean_metadata.pop("existing_trace_id", None)
             update_trace_keys = clean_metadata.pop("update_trace_keys", [])
             debug = clean_metadata.pop("debug_langfuse", None)
@@ -351,9 +354,13 @@ class LangFuseLogger:
 
                 # Special keys that are found in the function arguments and not the metadata
                 if "input" in update_trace_keys:
-                    trace_params["input"] = input if not mask_input else "redacted-by-litellm"
+                    trace_params["input"] = (
+                        input if not mask_input else "redacted-by-litellm"
+                    )
                 if "output" in update_trace_keys:
-                    trace_params["output"] = output if not mask_output else "redacted-by-litellm"
+                    trace_params["output"] = (
+                        output if not mask_output else "redacted-by-litellm"
+                    )
             else:  # don't overwrite an existing trace
                 trace_params = {
                     "id": trace_id,
@@ -375,7 +382,9 @@ class LangFuseLogger:
                 if level == "ERROR":
                     trace_params["status_message"] = output
                 else:
-                    trace_params["output"] = output if not mask_output else "redacted-by-litellm"
+                    trace_params["output"] = (
+                        output if not mask_output else "redacted-by-litellm"
+                    )
 
             if debug == True or (isinstance(debug, str) and debug.lower() == "true"):
                 if "metadata" in trace_params:
diff --git a/litellm/integrations/langsmith.py b/litellm/integrations/langsmith.py
index 92e4402155..3e25b4ee77 100644
--- a/litellm/integrations/langsmith.py
+++ b/litellm/integrations/langsmith.py
@@ -44,6 +44,8 @@ class LangsmithLogger:
         print_verbose(
             f"Langsmith Logging - project_name: {project_name}, run_name {run_name}"
         )
+        langsmith_base_url = os.getenv("LANGSMITH_BASE_URL", "https://api.smith.langchain.com")
+
         try:
             print_verbose(
                 f"Langsmith Logging - Enters logging function for model {kwargs}"
@@ -86,8 +88,12 @@ class LangsmithLogger:
                 "end_time": end_time,
             }
 
+            url = f"{langsmith_base_url}/runs"
+            print_verbose(
+                f"Langsmith Logging - About to send data to {url} ..."
+            )
             response = requests.post(
-                "https://api.smith.langchain.com/runs",
+                url=url,
                 json=data,
                 headers={"x-api-key": self.langsmith_api_key},
             )
diff --git a/litellm/integrations/slack_alerting.py b/litellm/integrations/slack_alerting.py
index 40258af6b8..68b8b8309d 100644
--- a/litellm/integrations/slack_alerting.py
+++ b/litellm/integrations/slack_alerting.py
@@ -1,7 +1,7 @@
 #### What this does ####
 #    Class for sending Slack Alerts #
 import dotenv, os
-from litellm.proxy._types import UserAPIKeyAuth
+from litellm.proxy._types import UserAPIKeyAuth, CallInfo
 from litellm._logging import verbose_logger, verbose_proxy_logger
 import litellm, threading
 from typing import List, Literal, Any, Union, Optional, Dict
@@ -36,6 +36,13 @@ class SlackAlertingArgs(LiteLLMBase):
         os.getenv("SLACK_DAILY_REPORT_FREQUENCY", default_daily_report_frequency)
     )
     report_check_interval: int = 5 * 60  # 5 minutes
+    budget_alert_ttl: int = 24 * 60 * 60  # 24 hours
+
+
+class WebhookEvent(CallInfo):
+    event: Literal["budget_crossed", "threshold_crossed", "projected_limit_exceeded"]
+    event_group: Literal["user", "key", "team", "proxy"]
+    event_message: str  # human-readable description of event
 
 
 class DeploymentMetrics(LiteLLMBase):
@@ -87,6 +94,9 @@ class SlackAlerting(CustomLogger):
                 "budget_alerts",
                 "db_exceptions",
                 "daily_reports",
+                "spend_reports",
+                "cooldown_deployment",
+                "new_model_added",
             ]
         ] = [
             "llm_exceptions",
@@ -95,6 +105,9 @@ class SlackAlerting(CustomLogger):
             "budget_alerts",
             "db_exceptions",
             "daily_reports",
+            "spend_reports",
+            "cooldown_deployment",
+            "new_model_added",
         ],
         alert_to_webhook_url: Optional[
             Dict
@@ -158,13 +171,28 @@ class SlackAlerting(CustomLogger):
     ) -> Optional[str]:
         """
         Returns langfuse trace url
+
+        - check:
+        -> existing_trace_id
+        -> trace_id
+        -> litellm_call_id
         """
         # do nothing for now
-        if (
-            request_data is not None
-            and request_data.get("metadata", {}).get("trace_id", None) is not None
-        ):
-            trace_id = request_data["metadata"]["trace_id"]
+        if request_data is not None:
+            trace_id = None
+            if (
+                request_data.get("metadata", {}).get("existing_trace_id", None)
+                is not None
+            ):
+                trace_id = request_data["metadata"]["existing_trace_id"]
+            elif request_data.get("metadata", {}).get("trace_id", None) is not None:
+                trace_id = request_data["metadata"]["trace_id"]
+            elif request_data.get("litellm_logging_obj", None) is not None and hasattr(
+                request_data["litellm_logging_obj"], "model_call_details"
+            ):
+                trace_id = request_data["litellm_logging_obj"].model_call_details[
+                    "litellm_call_id"
+                ]
             if litellm.utils.langFuseLogger is not None:
                 base_url = litellm.utils.langFuseLogger.Langfuse.base_url
                 return f"{base_url}/trace/{trace_id}"
@@ -549,127 +577,131 @@ class SlackAlerting(CustomLogger):
                     alert_type="llm_requests_hanging",
                 )
 
+    async def failed_tracking_alert(self, error_message: str):
+        """Raise alert when tracking failed for specific model"""
+        _cache: DualCache = self.internal_usage_cache
+        message = "Failed Tracking Cost for" + error_message
+        _cache_key = "budget_alerts:failed_tracking:{}".format(message)
+        result = await _cache.async_get_cache(key=_cache_key)
+        if result is None:
+            await self.send_alert(
+                message=message, level="High", alert_type="budget_alerts"
+            )
+            await _cache.async_set_cache(
+                key=_cache_key,
+                value="SENT",
+                ttl=self.alerting_args.budget_alert_ttl,
+            )
+
     async def budget_alerts(
         self,
         type: Literal[
             "token_budget",
             "user_budget",
-            "user_and_proxy_budget",
-            "failed_budgets",
-            "failed_tracking",
+            "team_budget",
+            "proxy_budget",
             "projected_limit_exceeded",
         ],
-        user_max_budget: float,
-        user_current_spend: float,
-        user_info=None,
-        error_message="",
+        user_info: CallInfo,
     ):
+        ## PREVENTITIVE ALERTING ## - https://github.com/BerriAI/litellm/issues/2727
+        # - Alert once within 24hr period
+        # - Cache this information
+        # - Don't re-alert, if alert already sent
+        _cache: DualCache = self.internal_usage_cache
+
         if self.alerting is None or self.alert_types is None:
             # do nothing if alerting is not switched on
             return
         if "budget_alerts" not in self.alert_types:
             return
         _id: str = "default_id"  # used for caching
-        if type == "user_and_proxy_budget":
-            user_info = dict(user_info)
-            user_id = user_info["user_id"]
-            _id = user_id
-            max_budget = user_info["max_budget"]
-            spend = user_info["spend"]
-            user_email = user_info["user_email"]
-            user_info = f"""\nUser ID: {user_id}\nMax Budget: ${max_budget}\nSpend: ${spend}\nUser Email: {user_email}"""
+        user_info_json = user_info.model_dump(exclude_none=True)
+        for k, v in user_info_json.items():
+            user_info_str = "\n{}: {}\n".format(k, v)
+
+        event: Optional[
+            Literal["budget_crossed", "threshold_crossed", "projected_limit_exceeded"]
+        ] = None
+        event_group: Optional[Literal["user", "team", "key", "proxy"]] = None
+        event_message: str = ""
+        webhook_event: Optional[WebhookEvent] = None
+        if type == "proxy_budget":
+            event_group = "proxy"
+            event_message += "Proxy Budget: "
+        elif type == "user_budget":
+            event_group = "user"
+            event_message += "User Budget: "
+            _id = user_info.user_id or _id
+        elif type == "team_budget":
+            event_group = "team"
+            event_message += "Team Budget: "
+            _id = user_info.team_id or _id
         elif type == "token_budget":
-            token_info = dict(user_info)
-            token = token_info["token"]
-            _id = token
-            spend = token_info["spend"]
-            max_budget = token_info["max_budget"]
-            user_id = token_info["user_id"]
-            user_info = f"""\nToken: {token}\nSpend: ${spend}\nMax Budget: ${max_budget}\nUser ID: {user_id}"""
-        elif type == "failed_tracking":
-            user_id = str(user_info)
-            _id = user_id
-            user_info = f"\nUser ID: {user_id}\n Error {error_message}"
-            message = "Failed Tracking Cost for" + user_info
-            await self.send_alert(
-                message=message, level="High", alert_type="budget_alerts"
-            )
-            return
-        elif type == "projected_limit_exceeded" and user_info is not None:
-            """
-            Input variables:
-            user_info = {
-                "key_alias": key_alias,
-                "projected_spend": projected_spend,
-                "projected_exceeded_date": projected_exceeded_date,
-            }
-            user_max_budget=soft_limit,
-            user_current_spend=new_spend
-            """
-            message = f"""\n🚨 `ProjectedLimitExceededError` 💸\n\n`Key Alias:` {user_info["key_alias"]} \n`Expected Day of Error`: {user_info["projected_exceeded_date"]} \n`Current Spend`: {user_current_spend} \n`Projected Spend at end of month`: {user_info["projected_spend"]} \n`Soft Limit`: {user_max_budget}"""
-            await self.send_alert(
-                message=message, level="High", alert_type="budget_alerts"
-            )
-            return
-        else:
-            user_info = str(user_info)
+            event_group = "key"
+            event_message += "Key Budget: "
+            _id = user_info.token
+        elif type == "projected_limit_exceeded":
+            event_group = "key"
+            event_message += "Key Budget: Projected Limit Exceeded"
+            event = "projected_limit_exceeded"
+            _id = user_info.token
 
         # percent of max_budget left to spend
-        if user_max_budget > 0:
-            percent_left = (user_max_budget - user_current_spend) / user_max_budget
+        if user_info.max_budget > 0:
+            percent_left = (
+                user_info.max_budget - user_info.spend
+            ) / user_info.max_budget
         else:
             percent_left = 0
-        verbose_proxy_logger.debug(
-            f"Budget Alerts: Percent left: {percent_left} for {user_info}"
-        )
-
-        ## PREVENTITIVE ALERTING ## - https://github.com/BerriAI/litellm/issues/2727
-        # - Alert once within 28d period
-        # - Cache this information
-        # - Don't re-alert, if alert already sent
-        _cache: DualCache = self.internal_usage_cache
 
         # check if crossed budget
-        if user_current_spend >= user_max_budget:
-            verbose_proxy_logger.debug("Budget Crossed for %s", user_info)
-            message = "Budget Crossed for" + user_info
-            result = await _cache.async_get_cache(key=message)
-            if result is None:
-                await self.send_alert(
-                    message=message, level="High", alert_type="budget_alerts"
-                )
-                await _cache.async_set_cache(key=message, value="SENT", ttl=2419200)
-            return
+        if user_info.spend >= user_info.max_budget:
+            event = "budget_crossed"
+            event_message += "Budget Crossed"
+        elif percent_left <= 0.05:
+            event = "threshold_crossed"
+            event_message += "5% Threshold Crossed"
+        elif percent_left <= 0.15:
+            event = "threshold_crossed"
+            event_message += "15% Threshold Crossed"
 
-        # check if 5% of max budget is left
-        if percent_left <= 0.05:
-            message = "5% budget left for" + user_info
-            cache_key = "alerting:{}".format(_id)
-            result = await _cache.async_get_cache(key=cache_key)
+        if event is not None and event_group is not None:
+            _cache_key = "budget_alerts:{}:{}".format(event, _id)
+            result = await _cache.async_get_cache(key=_cache_key)
             if result is None:
+                webhook_event = WebhookEvent(
+                    event=event,
+                    event_group=event_group,
+                    event_message=event_message,
+                    **user_info_json,
+                )
                 await self.send_alert(
-                    message=message, level="Medium", alert_type="budget_alerts"
+                    message=event_message + "\n\n" + user_info_str,
+                    level="High",
+                    alert_type="budget_alerts",
+                    user_info=webhook_event,
+                )
+                await _cache.async_set_cache(
+                    key=_cache_key,
+                    value="SENT",
+                    ttl=self.alerting_args.budget_alert_ttl,
                 )
 
-                await _cache.async_set_cache(key=cache_key, value="SENT", ttl=2419200)
-
             return
-
-        # check if 15% of max budget is left
-        if percent_left <= 0.15:
-            message = "15% budget left for" + user_info
-            result = await _cache.async_get_cache(key=message)
-            if result is None:
-                await self.send_alert(
-                    message=message, level="Low", alert_type="budget_alerts"
-                )
-                await _cache.async_set_cache(key=message, value="SENT", ttl=2419200)
-            return
-
         return
 
-    async def model_added_alert(self, model_name: str, litellm_model_name: str):
-        model_info = litellm.model_cost.get(litellm_model_name, {})
+    async def model_added_alert(
+        self, model_name: str, litellm_model_name: str, passed_model_info: Any
+    ):
+        base_model_from_user = getattr(passed_model_info, "base_model", None)
+        model_info = {}
+        base_model = ""
+        if base_model_from_user is not None:
+            model_info = litellm.model_cost.get(base_model_from_user, {})
+            base_model = f"Base Model: `{base_model_from_user}`\n"
+        else:
+            model_info = litellm.model_cost.get(litellm_model_name, {})
         model_info_str = ""
         for k, v in model_info.items():
             if k == "input_cost_per_token" or k == "output_cost_per_token":
@@ -681,6 +713,7 @@ class SlackAlerting(CustomLogger):
         message = f"""
 *🚅 New Model Added*
 Model Name: `{model_name}`
+{base_model}
 
 Usage OpenAI Python SDK:
 ```
@@ -715,6 +748,34 @@ Model Info:
     async def model_removed_alert(self, model_name: str):
         pass
 
+    async def send_webhook_alert(self, webhook_event: WebhookEvent) -> bool:
+        """
+        Sends structured alert to webhook, if set.
+
+        Currently only implemented for budget alerts
+
+        Returns -> True if sent, False if not.
+        """
+
+        webhook_url = os.getenv("WEBHOOK_URL", None)
+        if webhook_url is None:
+            raise Exception("Missing webhook_url from environment")
+
+        payload = webhook_event.model_dump_json()
+        headers = {"Content-type": "application/json"}
+
+        response = await self.async_http_handler.post(
+            url=webhook_url,
+            headers=headers,
+            data=payload,
+        )
+        if response.status_code == 200:
+            return True
+        else:
+            print("Error sending webhook alert. Error=", response.text)  # noqa
+
+        return False
+
     async def send_alert(
         self,
         message: str,
@@ -726,9 +787,11 @@ Model Info:
             "budget_alerts",
             "db_exceptions",
             "daily_reports",
+            "spend_reports",
             "new_model_added",
             "cooldown_deployment",
         ],
+        user_info: Optional[WebhookEvent] = None,
         **kwargs,
     ):
         """
@@ -748,6 +811,19 @@ Model Info:
         if self.alerting is None:
             return
 
+        if (
+            "webhook" in self.alerting
+            and alert_type == "budget_alerts"
+            and user_info is not None
+        ):
+            await self.send_webhook_alert(webhook_event=user_info)
+
+        if "slack" not in self.alerting:
+            return
+
+        if alert_type not in self.alert_types:
+            return
+
         from datetime import datetime
         import json
 
@@ -795,27 +871,37 @@ Model Info:
 
     async def async_log_success_event(self, kwargs, response_obj, start_time, end_time):
         """Log deployment latency"""
-        if "daily_reports" in self.alert_types:
-            model_id = (
-                kwargs.get("litellm_params", {}).get("model_info", {}).get("id", "")
-            )
-            response_s: timedelta = end_time - start_time
-
-            final_value = response_s
-            total_tokens = 0
-
-            if isinstance(response_obj, litellm.ModelResponse):
-                completion_tokens = response_obj.usage.completion_tokens
-                final_value = float(response_s.total_seconds() / completion_tokens)
-
-            await self.async_update_daily_reports(
-                DeploymentMetrics(
-                    id=model_id,
-                    failed_request=False,
-                    latency_per_output_token=final_value,
-                    updated_at=litellm.utils.get_utc_datetime(),
+        try:
+            if "daily_reports" in self.alert_types:
+                model_id = (
+                    kwargs.get("litellm_params", {}).get("model_info", {}).get("id", "")
                 )
+                response_s: timedelta = end_time - start_time
+
+                final_value = response_s
+                total_tokens = 0
+
+                if isinstance(response_obj, litellm.ModelResponse):
+                    completion_tokens = response_obj.usage.completion_tokens
+                    if completion_tokens is not None and completion_tokens > 0:
+                        final_value = float(
+                            response_s.total_seconds() / completion_tokens
+                        )
+
+                await self.async_update_daily_reports(
+                    DeploymentMetrics(
+                        id=model_id,
+                        failed_request=False,
+                        latency_per_output_token=final_value,
+                        updated_at=litellm.utils.get_utc_datetime(),
+                    )
+                )
+        except Exception as e:
+            verbose_proxy_logger.error(
+                "[Non-Blocking Error] Slack Alerting: Got error in logging LLM deployment latency: ",
+                e,
             )
+            pass
 
     async def async_log_failure_event(self, kwargs, response_obj, start_time, end_time):
         """Log failure + deployment latency"""
@@ -942,7 +1028,7 @@ Model Info:
             await self.send_alert(
                 message=_weekly_spend_message,
                 level="Low",
-                alert_type="daily_reports",
+                alert_type="spend_reports",
             )
         except Exception as e:
             verbose_proxy_logger.error("Error sending weekly spend report", e)
@@ -993,7 +1079,7 @@ Model Info:
             await self.send_alert(
                 message=_spend_message,
                 level="Low",
-                alert_type="daily_reports",
+                alert_type="spend_reports",
             )
         except Exception as e:
             verbose_proxy_logger.error("Error sending weekly spend report", e)
diff --git a/litellm/llms/anthropic.py b/litellm/llms/anthropic.py
index 97a473a2ee..f14dabc03a 100644
--- a/litellm/llms/anthropic.py
+++ b/litellm/llms/anthropic.py
@@ -93,6 +93,7 @@ class AnthropicConfig:
             "max_tokens",
             "tools",
             "tool_choice",
+            "extra_headers",
         ]
 
     def map_openai_params(self, non_default_params: dict, optional_params: dict):
@@ -504,7 +505,9 @@ class AnthropicChatCompletion(BaseLLM):
         ## Handle Tool Calling
         if "tools" in optional_params:
             _is_function_call = True
-            headers["anthropic-beta"] = "tools-2024-04-04"
+            if "anthropic-beta" not in headers:
+                # default to v1 of "anthropic-beta"
+                headers["anthropic-beta"] = "tools-2024-05-16"
 
             anthropic_tools = []
             for tool in optional_params["tools"]:
diff --git a/litellm/llms/base.py b/litellm/llms/base.py
index d940d94714..8c2f5101e1 100644
--- a/litellm/llms/base.py
+++ b/litellm/llms/base.py
@@ -21,7 +21,7 @@ class BaseLLM:
         messages: list,
         print_verbose,
         encoding,
-    ) -> litellm.utils.ModelResponse:
+    ) -> Union[litellm.utils.ModelResponse, litellm.utils.CustomStreamWrapper]:
         """
         Helper function to process the response across sync + async completion calls
         """
diff --git a/litellm/llms/bedrock_httpx.py b/litellm/llms/bedrock_httpx.py
index 1ff3767bdc..5fe0e0cc17 100644
--- a/litellm/llms/bedrock_httpx.py
+++ b/litellm/llms/bedrock_httpx.py
@@ -1,6 +1,6 @@
 # What is this?
 ## Initial implementation of calling bedrock via httpx client (allows for async calls).
-## V0 - just covers cohere command-r support
+## V1 - covers cohere + anthropic claude-3 support
 
 import os, types
 import json
@@ -29,12 +29,20 @@ from litellm.utils import (
     get_secret,
     Logging,
 )
-import litellm
-from .prompt_templates.factory import prompt_factory, custom_prompt, cohere_message_pt
+import litellm, uuid
+from .prompt_templates.factory import (
+    prompt_factory,
+    custom_prompt,
+    cohere_message_pt,
+    construct_tool_use_system_prompt,
+    extract_between_tags,
+    parse_xml_params,
+    contains_tag,
+)
 from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler
 from .base import BaseLLM
 import httpx  # type: ignore
-from .bedrock import BedrockError, convert_messages_to_prompt
+from .bedrock import BedrockError, convert_messages_to_prompt, ModelResponseIterator
 from litellm.types.llms.bedrock import *
 
 
@@ -280,7 +288,8 @@ class BedrockLLM(BaseLLM):
         messages: List,
         print_verbose,
         encoding,
-    ) -> ModelResponse:
+    ) -> Union[ModelResponse, CustomStreamWrapper]:
+        provider = model.split(".")[0]
         ## LOGGING
         logging_obj.post_call(
             input=messages,
@@ -297,26 +306,210 @@ class BedrockLLM(BaseLLM):
             raise BedrockError(message=response.text, status_code=422)
 
         try:
-            model_response.choices[0].message.content = completion_response["text"]  # type: ignore
+            if provider == "cohere":
+                if "text" in completion_response:
+                    outputText = completion_response["text"]  # type: ignore
+                elif "generations" in completion_response:
+                    outputText = completion_response["generations"][0]["text"]
+                    model_response["finish_reason"] = map_finish_reason(
+                        completion_response["generations"][0]["finish_reason"]
+                    )
+            elif provider == "anthropic":
+                if model.startswith("anthropic.claude-3"):
+                    json_schemas: dict = {}
+                    _is_function_call = False
+                    ## Handle Tool Calling
+                    if "tools" in optional_params:
+                        _is_function_call = True
+                        for tool in optional_params["tools"]:
+                            json_schemas[tool["function"]["name"]] = tool[
+                                "function"
+                            ].get("parameters", None)
+                    outputText = completion_response.get("content")[0].get("text", None)
+                    if outputText is not None and contains_tag(
+                        "invoke", outputText
+                    ):  # OUTPUT PARSE FUNCTION CALL
+                        function_name = extract_between_tags("tool_name", outputText)[0]
+                        function_arguments_str = extract_between_tags(
+                            "invoke", outputText
+                        )[0].strip()
+                        function_arguments_str = (
+                            f"<invoke>{function_arguments_str}</invoke>"
+                        )
+                        function_arguments = parse_xml_params(
+                            function_arguments_str,
+                            json_schema=json_schemas.get(
+                                function_name, None
+                            ),  # check if we have a json schema for this function name)
+                        )
+                        _message = litellm.Message(
+                            tool_calls=[
+                                {
+                                    "id": f"call_{uuid.uuid4()}",
+                                    "type": "function",
+                                    "function": {
+                                        "name": function_name,
+                                        "arguments": json.dumps(function_arguments),
+                                    },
+                                }
+                            ],
+                            content=None,
+                        )
+                        model_response.choices[0].message = _message  # type: ignore
+                        model_response._hidden_params["original_response"] = (
+                            outputText  # allow user to access raw anthropic tool calling response
+                        )
+                    if (
+                        _is_function_call == True
+                        and stream is not None
+                        and stream == True
+                    ):
+                        print_verbose(
+                            f"INSIDE BEDROCK STREAMING TOOL CALLING CONDITION BLOCK"
+                        )
+                        # return an iterator
+                        streaming_model_response = ModelResponse(stream=True)
+                        streaming_model_response.choices[0].finish_reason = getattr(
+                            model_response.choices[0], "finish_reason", "stop"
+                        )
+                        # streaming_model_response.choices = [litellm.utils.StreamingChoices()]
+                        streaming_choice = litellm.utils.StreamingChoices()
+                        streaming_choice.index = model_response.choices[0].index
+                        _tool_calls = []
+                        print_verbose(
+                            f"type of model_response.choices[0]: {type(model_response.choices[0])}"
+                        )
+                        print_verbose(
+                            f"type of streaming_choice: {type(streaming_choice)}"
+                        )
+                        if isinstance(model_response.choices[0], litellm.Choices):
+                            if getattr(
+                                model_response.choices[0].message, "tool_calls", None
+                            ) is not None and isinstance(
+                                model_response.choices[0].message.tool_calls, list
+                            ):
+                                for tool_call in model_response.choices[
+                                    0
+                                ].message.tool_calls:
+                                    _tool_call = {**tool_call.dict(), "index": 0}
+                                    _tool_calls.append(_tool_call)
+                            delta_obj = litellm.utils.Delta(
+                                content=getattr(
+                                    model_response.choices[0].message, "content", None
+                                ),
+                                role=model_response.choices[0].message.role,
+                                tool_calls=_tool_calls,
+                            )
+                            streaming_choice.delta = delta_obj
+                            streaming_model_response.choices = [streaming_choice]
+                            completion_stream = ModelResponseIterator(
+                                model_response=streaming_model_response
+                            )
+                            print_verbose(
+                                f"Returns anthropic CustomStreamWrapper with 'cached_response' streaming object"
+                            )
+                            return litellm.CustomStreamWrapper(
+                                completion_stream=completion_stream,
+                                model=model,
+                                custom_llm_provider="cached_response",
+                                logging_obj=logging_obj,
+                            )
+
+                    model_response["finish_reason"] = map_finish_reason(
+                        completion_response.get("stop_reason", "")
+                    )
+                    _usage = litellm.Usage(
+                        prompt_tokens=completion_response["usage"]["input_tokens"],
+                        completion_tokens=completion_response["usage"]["output_tokens"],
+                        total_tokens=completion_response["usage"]["input_tokens"]
+                        + completion_response["usage"]["output_tokens"],
+                    )
+                    setattr(model_response, "usage", _usage)
+                else:
+                    outputText = completion_response["completion"]
+
+                    model_response["finish_reason"] = completion_response["stop_reason"]
+            elif provider == "ai21":
+                outputText = (
+                    completion_response.get("completions")[0].get("data").get("text")
+                )
+            elif provider == "meta":
+                outputText = completion_response["generation"]
+            elif provider == "mistral":
+                outputText = completion_response["outputs"][0]["text"]
+                model_response["finish_reason"] = completion_response["outputs"][0][
+                    "stop_reason"
+                ]
+            else:  # amazon titan
+                outputText = completion_response.get("results")[0].get("outputText")
         except Exception as e:
-            raise BedrockError(message=response.text, status_code=422)
+            raise BedrockError(
+                message="Error processing={}, Received error={}".format(
+                    response.text, str(e)
+                ),
+                status_code=422,
+            )
+
+        try:
+            if (
+                len(outputText) > 0
+                and hasattr(model_response.choices[0], "message")
+                and getattr(model_response.choices[0].message, "tool_calls", None)
+                is None
+            ):
+                model_response["choices"][0]["message"]["content"] = outputText
+            elif (
+                hasattr(model_response.choices[0], "message")
+                and getattr(model_response.choices[0].message, "tool_calls", None)
+                is not None
+            ):
+                pass
+            else:
+                raise Exception()
+        except:
+            raise BedrockError(
+                message=json.dumps(outputText), status_code=response.status_code
+            )
+
+        if stream and provider == "ai21":
+            streaming_model_response = ModelResponse(stream=True)
+            streaming_model_response.choices[0].finish_reason = model_response.choices[  # type: ignore
+                0
+            ].finish_reason
+            # streaming_model_response.choices = [litellm.utils.StreamingChoices()]
+            streaming_choice = litellm.utils.StreamingChoices()
+            streaming_choice.index = model_response.choices[0].index
+            delta_obj = litellm.utils.Delta(
+                content=getattr(model_response.choices[0].message, "content", None),
+                role=model_response.choices[0].message.role,
+            )
+            streaming_choice.delta = delta_obj
+            streaming_model_response.choices = [streaming_choice]
+            mri = ModelResponseIterator(model_response=streaming_model_response)
+            return CustomStreamWrapper(
+                completion_stream=mri,
+                model=model,
+                custom_llm_provider="cached_response",
+                logging_obj=logging_obj,
+            )
 
         ## CALCULATING USAGE - bedrock returns usage in the headers
-        prompt_tokens = int(
-            response.headers.get(
-                "x-amzn-bedrock-input-token-count",
-                len(encoding.encode("".join(m.get("content", "") for m in messages))),
-            )
+        bedrock_input_tokens = response.headers.get(
+            "x-amzn-bedrock-input-token-count", None
         )
+        bedrock_output_tokens = response.headers.get(
+            "x-amzn-bedrock-output-token-count", None
+        )
+
+        prompt_tokens = int(
+            bedrock_input_tokens or litellm.token_counter(messages=messages)
+        )
+
         completion_tokens = int(
-            response.headers.get(
-                "x-amzn-bedrock-output-token-count",
-                len(
-                    encoding.encode(
-                        model_response.choices[0].message.content,  # type: ignore
-                        disallowed_special=(),
-                    )
-                ),
+            bedrock_output_tokens
+            or litellm.token_counter(
+                text=model_response.choices[0].message.content,  # type: ignore
+                count_response_tokens=True,
             )
         )
 
@@ -359,6 +552,7 @@ class BedrockLLM(BaseLLM):
 
         ## SETUP ##
         stream = optional_params.pop("stream", None)
+        provider = model.split(".")[0]
 
         ## CREDENTIALS ##
         # pop aws_secret_access_key, aws_access_key_id, aws_region_name from kwargs, since completion calls fail with them
@@ -414,19 +608,18 @@ class BedrockLLM(BaseLLM):
         else:
             endpoint_url = f"https://bedrock-runtime.{aws_region_name}.amazonaws.com"
 
-        if stream is not None and stream == True:
+        if (stream is not None and stream == True) and provider != "ai21":
             endpoint_url = f"{endpoint_url}/model/{model}/invoke-with-response-stream"
         else:
             endpoint_url = f"{endpoint_url}/model/{model}/invoke"
 
         sigv4 = SigV4Auth(credentials, "bedrock", aws_region_name)
 
-        provider = model.split(".")[0]
         prompt, chat_history = self.convert_messages_to_prompt(
             model, messages, provider, custom_prompt_dict
         )
         inference_params = copy.deepcopy(optional_params)
-
+        json_schemas: dict = {}
         if provider == "cohere":
             if model.startswith("cohere.command-r"):
                 ## LOAD CONFIG
@@ -453,8 +646,114 @@ class BedrockLLM(BaseLLM):
                         True  # cohere requires stream = True in inference params
                     )
                 data = json.dumps({"prompt": prompt, **inference_params})
+        elif provider == "anthropic":
+            if model.startswith("anthropic.claude-3"):
+                # Separate system prompt from rest of message
+                system_prompt_idx: list[int] = []
+                system_messages: list[str] = []
+                for idx, message in enumerate(messages):
+                    if message["role"] == "system":
+                        system_messages.append(message["content"])
+                        system_prompt_idx.append(idx)
+                if len(system_prompt_idx) > 0:
+                    inference_params["system"] = "\n".join(system_messages)
+                    messages = [
+                        i for j, i in enumerate(messages) if j not in system_prompt_idx
+                    ]
+                # Format rest of message according to anthropic guidelines
+                messages = prompt_factory(
+                    model=model, messages=messages, custom_llm_provider="anthropic_xml"
+                )  # type: ignore
+                ## LOAD CONFIG
+                config = litellm.AmazonAnthropicClaude3Config.get_config()
+                for k, v in config.items():
+                    if (
+                        k not in inference_params
+                    ):  # completion(top_k=3) > anthropic_config(top_k=3) <- allows for dynamic variables to be passed in
+                        inference_params[k] = v
+                ## Handle Tool Calling
+                if "tools" in inference_params:
+                    _is_function_call = True
+                    for tool in inference_params["tools"]:
+                        json_schemas[tool["function"]["name"]] = tool["function"].get(
+                            "parameters", None
+                        )
+                    tool_calling_system_prompt = construct_tool_use_system_prompt(
+                        tools=inference_params["tools"]
+                    )
+                    inference_params["system"] = (
+                        inference_params.get("system", "\n")
+                        + tool_calling_system_prompt
+                    )  # add the anthropic tool calling prompt to the system prompt
+                    inference_params.pop("tools")
+                data = json.dumps({"messages": messages, **inference_params})
+            else:
+                ## LOAD CONFIG
+                config = litellm.AmazonAnthropicConfig.get_config()
+                for k, v in config.items():
+                    if (
+                        k not in inference_params
+                    ):  # completion(top_k=3) > anthropic_config(top_k=3) <- allows for dynamic variables to be passed in
+                        inference_params[k] = v
+                data = json.dumps({"prompt": prompt, **inference_params})
+        elif provider == "ai21":
+            ## LOAD CONFIG
+            config = litellm.AmazonAI21Config.get_config()
+            for k, v in config.items():
+                if (
+                    k not in inference_params
+                ):  # completion(top_k=3) > anthropic_config(top_k=3) <- allows for dynamic variables to be passed in
+                    inference_params[k] = v
+
+            data = json.dumps({"prompt": prompt, **inference_params})
+        elif provider == "mistral":
+            ## LOAD CONFIG
+            config = litellm.AmazonMistralConfig.get_config()
+            for k, v in config.items():
+                if (
+                    k not in inference_params
+                ):  # completion(top_k=3) > amazon_config(top_k=3) <- allows for dynamic variables to be passed in
+                    inference_params[k] = v
+
+            data = json.dumps({"prompt": prompt, **inference_params})
+        elif provider == "amazon":  # amazon titan
+            ## LOAD CONFIG
+            config = litellm.AmazonTitanConfig.get_config()
+            for k, v in config.items():
+                if (
+                    k not in inference_params
+                ):  # completion(top_k=3) > amazon_config(top_k=3) <- allows for dynamic variables to be passed in
+                    inference_params[k] = v
+
+            data = json.dumps(
+                {
+                    "inputText": prompt,
+                    "textGenerationConfig": inference_params,
+                }
+            )
+        elif provider == "meta":
+            ## LOAD CONFIG
+            config = litellm.AmazonLlamaConfig.get_config()
+            for k, v in config.items():
+                if (
+                    k not in inference_params
+                ):  # completion(top_k=3) > anthropic_config(top_k=3) <- allows for dynamic variables to be passed in
+                    inference_params[k] = v
+            data = json.dumps({"prompt": prompt, **inference_params})
         else:
-            raise Exception("UNSUPPORTED PROVIDER")
+            ## LOGGING
+            logging_obj.pre_call(
+                input=messages,
+                api_key="",
+                additional_args={
+                    "complete_input_dict": inference_params,
+                },
+            )
+            raise Exception(
+                "Bedrock HTTPX: Unsupported provider={}, model={}".format(
+                    provider, model
+                )
+            )
 
         ## COMPLETION CALL
 
@@ -482,7 +781,7 @@ class BedrockLLM(BaseLLM):
         if acompletion:
             if isinstance(client, HTTPHandler):
                 client = None
-            if stream:
+            if stream == True and provider != "ai21":
                 return self.async_streaming(
                     model=model,
                     messages=messages,
@@ -511,7 +810,7 @@ class BedrockLLM(BaseLLM):
                 encoding=encoding,
                 logging_obj=logging_obj,
                 optional_params=optional_params,
-                stream=False,
+                stream=stream,  # type: ignore
                 litellm_params=litellm_params,
                 logger_fn=logger_fn,
                 headers=prepped.headers,
@@ -528,7 +827,7 @@ class BedrockLLM(BaseLLM):
             self.client = HTTPHandler(**_params)  # type: ignore
         else:
             self.client = client
-        if stream is not None and stream == True:
+        if (stream is not None and stream == True) and provider != "ai21":
             response = self.client.post(
                 url=prepped.url,
                 headers=prepped.headers,  # type: ignore
@@ -541,7 +840,7 @@ class BedrockLLM(BaseLLM):
                     status_code=response.status_code, message=response.text
                 )
 
-            decoder = AWSEventStreamDecoder()
+            decoder = AWSEventStreamDecoder(model=model)
 
             completion_stream = decoder.iter_bytes(response.iter_bytes(chunk_size=1024))
             streaming_response = CustomStreamWrapper(
@@ -550,15 +849,24 @@ class BedrockLLM(BaseLLM):
                 custom_llm_provider="bedrock",
                 logging_obj=logging_obj,
             )
+
+            ## LOGGING
+            logging_obj.post_call(
+                input=messages,
+                api_key="",
+                original_response=streaming_response,
+                additional_args={"complete_input_dict": data},
+            )
             return streaming_response
 
-        response = self.client.post(url=prepped.url, headers=prepped.headers, data=data)  # type: ignore
-
         try:
+            response = self.client.post(url=prepped.url, headers=prepped.headers, data=data)  # type: ignore
             response.raise_for_status()
         except httpx.HTTPStatusError as err:
             error_code = err.response.status_code
             raise BedrockError(status_code=error_code, message=response.text)
+        except httpx.TimeoutException as e:
+            raise BedrockError(status_code=408, message="Timeout error occurred.")
 
         return self.process_response(
             model=model,
@@ -591,7 +899,7 @@ class BedrockLLM(BaseLLM):
         logger_fn=None,
         headers={},
         client: Optional[AsyncHTTPHandler] = None,
-    ) -> ModelResponse:
+    ) -> Union[ModelResponse, CustomStreamWrapper]:
         if client is None:
             _params = {}
             if timeout is not None:
@@ -602,12 +910,20 @@ class BedrockLLM(BaseLLM):
         else:
             self.client = client  # type: ignore
 
-        response = await self.client.post(api_base, headers=headers, data=data)  # type: ignore
+        try:
+            response = await self.client.post(api_base, headers=headers, data=data)  # type: ignore
+            response.raise_for_status()
+        except httpx.HTTPStatusError as err:
+            error_code = err.response.status_code
+            raise BedrockError(status_code=error_code, message=response.text)
+        except httpx.TimeoutException as e:
+            raise BedrockError(status_code=408, message="Timeout error occurred.")
+
         return self.process_response(
             model=model,
             response=response,
             model_response=model_response,
-            stream=stream,
+            stream=stream if isinstance(stream, bool) else False,
             logging_obj=logging_obj,
             api_key="",
             data=data,
@@ -650,7 +966,7 @@ class BedrockLLM(BaseLLM):
         if response.status_code != 200:
             raise BedrockError(status_code=response.status_code, message=response.text)
 
-        decoder = AWSEventStreamDecoder()
+        decoder = AWSEventStreamDecoder(model=model)
 
         completion_stream = decoder.aiter_bytes(response.aiter_bytes(chunk_size=1024))
         streaming_response = CustomStreamWrapper(
@@ -659,6 +975,15 @@ class BedrockLLM(BaseLLM):
             custom_llm_provider="bedrock",
             logging_obj=logging_obj,
         )
+
+        ## LOGGING
+        logging_obj.post_call(
+            input=messages,
+            api_key="",
+            original_response=streaming_response,
+            additional_args={"complete_input_dict": data},
+        )
+
         return streaming_response
 
     def embedding(self, *args, **kwargs):
@@ -676,11 +1001,70 @@ def get_response_stream_shape():
 
 
 class AWSEventStreamDecoder:
-    def __init__(self) -> None:
+    def __init__(self, model: str) -> None:
         from botocore.parsers import EventStreamJSONParser
 
+        self.model = model
         self.parser = EventStreamJSONParser()
 
+    def _chunk_parser(self, chunk_data: dict) -> GenericStreamingChunk:
+        text = ""
+        is_finished = False
+        finish_reason = ""
+        if "outputText" in chunk_data:
+            text = chunk_data["outputText"]
+        # ai21 mapping
+        if "ai21" in self.model:  # fake ai21 streaming
+            text = chunk_data.get("completions")[0].get("data").get("text")  # type: ignore
+            is_finished = True
+            finish_reason = "stop"
+        ######## bedrock.anthropic mappings ###############
+        elif "completion" in chunk_data:  # not claude-3
+            text = chunk_data["completion"]  # bedrock.anthropic
+            stop_reason = chunk_data.get("stop_reason", None)
+            if stop_reason != None:
+                is_finished = True
+                finish_reason = stop_reason
+        elif "delta" in chunk_data:
+            if chunk_data["delta"].get("text", None) is not None:
+                text = chunk_data["delta"]["text"]
+            stop_reason = chunk_data["delta"].get("stop_reason", None)
+            if stop_reason != None:
+                is_finished = True
+                finish_reason = stop_reason
+        ######## bedrock.mistral mappings ###############
+        elif "outputs" in chunk_data:
+            if (
+                len(chunk_data["outputs"]) == 1
+                and chunk_data["outputs"][0].get("text", None) is not None
+            ):
+                text = chunk_data["outputs"][0]["text"]
+            stop_reason = chunk_data.get("stop_reason", None)
+            if stop_reason != None:
+                is_finished = True
+                finish_reason = stop_reason
+        ######## bedrock.cohere mappings ###############
+        # meta mapping
+        elif "generation" in chunk_data:
+            text = chunk_data["generation"]  # bedrock.meta
+        # cohere mapping
+        elif "text" in chunk_data:
+            text = chunk_data["text"]  # bedrock.cohere
+        # cohere mapping for finish reason
+        elif "finish_reason" in chunk_data:
+            finish_reason = chunk_data["finish_reason"]
+            is_finished = True
+        elif chunk_data.get("completionReason", None):
+            is_finished = True
+            finish_reason = chunk_data["completionReason"]
+        return GenericStreamingChunk(
+            **{
+                "text": text,
+                "is_finished": is_finished,
+                "finish_reason": finish_reason,
+            }
+        )
+
     def iter_bytes(self, iterator: Iterator[bytes]) -> Iterator[GenericStreamingChunk]:
         """Given an iterator that yields lines, iterate over it & yield every event encountered"""
         from botocore.eventstream import EventStreamBuffer
@@ -693,12 +1077,7 @@ class AWSEventStreamDecoder:
                 if message:
                     # sse_event = ServerSentEvent(data=message, event="completion")
                     _data = json.loads(message)
-                    streaming_chunk: GenericStreamingChunk = GenericStreamingChunk(
-                        text=_data.get("text", ""),
-                        is_finished=_data.get("is_finished", False),
-                        finish_reason=_data.get("finish_reason", ""),
-                    )
-                    yield streaming_chunk
+                    yield self._chunk_parser(chunk_data=_data)
 
     async def aiter_bytes(
         self, iterator: AsyncIterator[bytes]
@@ -713,12 +1092,7 @@ class AWSEventStreamDecoder:
                 message = self._parse_message_from_event(event)
                 if message:
                     _data = json.loads(message)
-                    streaming_chunk: GenericStreamingChunk = GenericStreamingChunk(
-                        text=_data.get("text", ""),
-                        is_finished=_data.get("is_finished", False),
-                        finish_reason=_data.get("finish_reason", ""),
-                    )
-                    yield streaming_chunk
+                    yield self._chunk_parser(chunk_data=_data)
 
     def _parse_message_from_event(self, event) -> Optional[str]:
         response_dict = event.to_response_dict()
diff --git a/litellm/llms/gemini.py b/litellm/llms/gemini.py
index 60220fd290..a55b39aef9 100644
--- a/litellm/llms/gemini.py
+++ b/litellm/llms/gemini.py
@@ -260,7 +260,7 @@ def completion(
                 message_obj = Message(content=item.content.parts[0].text)
             else:
                 message_obj = Message(content=None)
-            choice_obj = Choices(index=idx + 1, message=message_obj)
+            choice_obj = Choices(index=idx, message=message_obj)
             choices_list.append(choice_obj)
         model_response["choices"] = choices_list
     except Exception as e:
@@ -352,7 +352,7 @@ async def async_completion(
                 message_obj = Message(content=item.content.parts[0].text)
             else:
                 message_obj = Message(content=None)
-            choice_obj = Choices(index=idx + 1, message=message_obj)
+            choice_obj = Choices(index=idx, message=message_obj)
             choices_list.append(choice_obj)
         model_response["choices"] = choices_list
     except Exception as e:
diff --git a/litellm/llms/openai.py b/litellm/llms/openai.py
index 7acbdfae02..9d143f5d9a 100644
--- a/litellm/llms/openai.py
+++ b/litellm/llms/openai.py
@@ -96,7 +96,7 @@ class MistralConfig:
         safe_prompt: Optional[bool] = None,
         response_format: Optional[dict] = None,
     ) -> None:
-        locals_ = locals()
+        locals_ = locals().copy()
         for key, value in locals_.items():
             if key != "self" and value is not None:
                 setattr(self.__class__, key, value)
@@ -211,7 +211,7 @@ class OpenAIConfig:
         temperature: Optional[int] = None,
         top_p: Optional[int] = None,
     ) -> None:
-        locals_ = locals()
+        locals_ = locals().copy()
         for key, value in locals_.items():
             if key != "self" and value is not None:
                 setattr(self.__class__, key, value)
@@ -234,6 +234,47 @@ class OpenAIConfig:
             and v is not None
         }
 
+    def get_supported_openai_params(self, model: str) -> list:
+        base_params = [
+            "frequency_penalty",
+            "logit_bias",
+            "logprobs",
+            "top_logprobs",
+            "max_tokens",
+            "n",
+            "presence_penalty",
+            "seed",
+            "stop",
+            "stream",
+            "stream_options",
+            "temperature",
+            "top_p",
+            "tools",
+            "tool_choice",
+            "user",
+            "function_call",
+            "functions",
+            "max_retries",
+            "extra_headers",
+        ]  # works across all models
+
+        model_specific_params = []
+        if (
+            model != "gpt-3.5-turbo-16k" and model != "gpt-4"
+        ):  # gpt-4 does not support 'response_format'
+            model_specific_params.append("response_format")
+
+        return base_params + model_specific_params
+
+    def map_openai_params(
+        self, non_default_params: dict, optional_params: dict, model: str
+    ) -> dict:
+        supported_openai_params = self.get_supported_openai_params(model)
+        for param, value in non_default_params.items():
+            if param in supported_openai_params:
+                optional_params[param] = value
+        return optional_params
+
 
 class OpenAITextCompletionConfig:
     """
@@ -294,7 +335,7 @@ class OpenAITextCompletionConfig:
         temperature: Optional[float] = None,
         top_p: Optional[float] = None,
     ) -> None:
-        locals_ = locals()
+        locals_ = locals().copy()
         for key, value in locals_.items():
             if key != "self" and value is not None:
                 setattr(self.__class__, key, value)
diff --git a/litellm/llms/prompt_templates/factory.py b/litellm/llms/prompt_templates/factory.py
index cf593369c4..e6e8ef50ec 100644
--- a/litellm/llms/prompt_templates/factory.py
+++ b/litellm/llms/prompt_templates/factory.py
@@ -12,6 +12,7 @@ from typing import (
     Sequence,
 )
 import litellm
+import litellm.types
 from litellm.types.completion import (
     ChatCompletionUserMessageParam,
     ChatCompletionSystemMessageParam,
@@ -20,9 +21,12 @@ from litellm.types.completion import (
     ChatCompletionMessageToolCallParam,
     ChatCompletionToolMessageParam,
 )
+import litellm.types.llms
 from litellm.types.llms.anthropic import *
 import uuid
 
+import litellm.types.llms.vertex_ai
+
 
 def default_pt(messages):
     return " ".join(message["content"] for message in messages)
@@ -841,6 +845,175 @@ def anthropic_messages_pt_xml(messages: list):
 # ------------------------------------------------------------------------------
 
 
+def infer_protocol_value(
+    value: Any,
+) -> Literal[
+    "string_value",
+    "number_value",
+    "bool_value",
+    "struct_value",
+    "list_value",
+    "null_value",
+    "unknown",
+]:
+    if value is None:
+        return "null_value"
+    if isinstance(value, int) or isinstance(value, float):
+        return "number_value"
+    if isinstance(value, str):
+        return "string_value"
+    if isinstance(value, bool):
+        return "bool_value"
+    if isinstance(value, dict):
+        return "struct_value"
+    if isinstance(value, list):
+        return "list_value"
+
+    return "unknown"
+
+
+def convert_to_gemini_tool_call_invoke(
+    tool_calls: list,
+) -> List[litellm.types.llms.vertex_ai.PartType]:
+    """
+    OpenAI tool invokes:
+    {
+      "role": "assistant",
+      "content": null,
+      "tool_calls": [
+        {
+          "id": "call_abc123",
+          "type": "function",
+          "function": {
+            "name": "get_current_weather",
+            "arguments": "{\n\"location\": \"Boston, MA\"\n}"
+          }
+        }
+      ]
+    },
+    """
+    """
+    Gemini tool call invokes: - https://cloud.google.com/vertex-ai/generative-ai/docs/multimodal/function-calling#submit-api-output
+    content {
+        role: "model"
+        parts [
+        {
+            function_call {
+            name: "get_current_weather"
+            args {
+                fields {
+                    key: "unit"
+                    value {
+                    string_value: "fahrenheit"
+                    }
+                }
+                fields {
+                    key: "predicted_temperature"
+                    value {
+                    number_value: 45
+                    }
+                }
+                fields {
+                    key: "location"
+                    value {
+                    string_value: "Boston, MA"
+                    }
+                }
+            }
+        },
+        {
+            function_call {
+            name: "get_current_weather"
+            args {
+                fields {
+                key: "location"
+                value {
+                    string_value: "San Francisco"
+                }
+                }
+            }
+            }
+        }
+        ]
+    }
+    """
+
+    """
+    - json.load the arguments 
+    - iterate through arguments -> create a FunctionCallArgs for each field
+    """
+    try:
+        _parts_list: List[litellm.types.llms.vertex_ai.PartType] = []
+        for tool in tool_calls:
+            if "function" in tool:
+                name = tool["function"].get("name", "")
+                arguments = tool["function"].get("arguments", "")
+                arguments_dict = json.loads(arguments)
+                for k, v in arguments_dict.items():
+                    inferred_protocol_value = infer_protocol_value(value=v)
+                    _field = litellm.types.llms.vertex_ai.Field(
+                        key=k, value={inferred_protocol_value: v}
+                    )
+                    _fields = litellm.types.llms.vertex_ai.FunctionCallArgs(
+                        fields=_field
+                    )
+                    function_call = litellm.types.llms.vertex_ai.FunctionCall(
+                        name=name,
+                        args=_fields,
+                    )
+                _parts_list.append(
+                    litellm.types.llms.vertex_ai.PartType(function_call=function_call)
+                )
+        return _parts_list
+    except Exception as e:
+        raise Exception(
+            "Unable to convert openai tool calls={} to gemini tool calls. Received error={}".format(
+                tool_calls, str(e)
+            )
+        )
+
+
+def convert_to_gemini_tool_call_result(
+    message: dict,
+) -> litellm.types.llms.vertex_ai.PartType:
+    """
+    OpenAI message with a tool result looks like:
+    {
+        "tool_call_id": "tool_1",
+        "role": "tool",
+        "name": "get_current_weather",
+        "content": "function result goes here",
+    },
+
+    OpenAI message with a function call result looks like:
+    {
+        "role": "function",
+        "name": "get_current_weather",
+        "content": "function result goes here",
+    }
+    """
+    content = message.get("content", "")
+    name = message.get("name", "")
+
+    # We can't determine from openai message format whether it's a successful or
+    # error call result so default to the successful result template
+    inferred_content_value = infer_protocol_value(value=content)
+
+    _field = litellm.types.llms.vertex_ai.Field(
+        key="content", value={inferred_content_value: content}
+    )
+
+    _function_call_args = litellm.types.llms.vertex_ai.FunctionCallArgs(fields=_field)
+
+    _function_response = litellm.types.llms.vertex_ai.FunctionResponse(
+        name=name, response=_function_call_args
+    )
+
+    _part = litellm.types.llms.vertex_ai.PartType(function_response=_function_response)
+
+    return _part
+
+
 def convert_to_anthropic_tool_result(message: dict) -> dict:
     """
     OpenAI message with a tool result looks like:
@@ -1328,6 +1501,7 @@ def _gemini_vision_convert_messages(messages: list):
                 # Case 1: Image from URL
                 image = _load_image_from_url(img)
                 processed_images.append(image)
+
             else:
                 try:
                     from PIL import Image
@@ -1335,8 +1509,22 @@ def _gemini_vision_convert_messages(messages: list):
                     raise Exception(
                         "gemini image conversion failed please run `pip install Pillow`"
                     )
-                # Case 2: Image filepath (e.g. temp.jpeg) given
-                image = Image.open(img)
+                
+                if "base64" in img:
+                    # Case 2: Base64 image data
+                    import base64
+                    import io
+                    # Extract the base64 image data
+                    base64_data = img.split("base64,")[1]
+
+                    # Decode the base64 image data
+                    image_data = base64.b64decode(base64_data)
+
+                    # Load the image from the decoded data
+                    image = Image.open(io.BytesIO(image_data))
+                else:
+                    # Case 3: Image filepath (e.g. temp.jpeg) given
+                    image = Image.open(img)
                 processed_images.append(image)
         content = [prompt] + processed_images
         return content
@@ -1513,7 +1701,7 @@ def prompt_factory(
     elif custom_llm_provider == "clarifai":
         if "claude" in model:
             return anthropic_pt(messages=messages)
-        
+
     elif custom_llm_provider == "perplexity":
         for message in messages:
             message.pop("name", None)
diff --git a/litellm/llms/replicate.py b/litellm/llms/replicate.py
index c297281347..386d24f59b 100644
--- a/litellm/llms/replicate.py
+++ b/litellm/llms/replicate.py
@@ -2,11 +2,12 @@ import os, types
 import json
 import requests  # type: ignore
 import time
-from typing import Callable, Optional
-from litellm.utils import ModelResponse, Usage
-import litellm
+from typing import Callable, Optional, Union, Tuple, Any
+from litellm.utils import ModelResponse, Usage, CustomStreamWrapper
+import litellm, asyncio
 import httpx  # type: ignore
 from .prompt_templates.factory import prompt_factory, custom_prompt
+from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler
 
 
 class ReplicateError(Exception):
@@ -145,6 +146,65 @@ def start_prediction(
         )
 
 
+async def async_start_prediction(
+    version_id,
+    input_data,
+    api_token,
+    api_base,
+    logging_obj,
+    print_verbose,
+    http_handler: AsyncHTTPHandler,
+) -> str:
+    base_url = api_base
+    if "deployments" in version_id:
+        print_verbose("\nLiteLLM: Request to custom replicate deployment")
+        version_id = version_id.replace("deployments/", "")
+        base_url = f"https://api.replicate.com/v1/deployments/{version_id}"
+        print_verbose(f"Deployment base URL: {base_url}\n")
+    else:  # assume it's a model
+        base_url = f"https://api.replicate.com/v1/models/{version_id}"
+    headers = {
+        "Authorization": f"Token {api_token}",
+        "Content-Type": "application/json",
+    }
+
+    initial_prediction_data = {
+        "input": input_data,
+    }
+
+    if ":" in version_id and len(version_id) > 64:
+        model_parts = version_id.split(":")
+        if (
+            len(model_parts) > 1 and len(model_parts[1]) == 64
+        ):  ## checks if model name has a 64 digit code - e.g. "meta/llama-2-70b-chat:02e509c789964a7ea8736978a43525956ef40397be9033abf9fd2badfe68c9e3"
+            initial_prediction_data["version"] = model_parts[1]
+
+    ## LOGGING
+    logging_obj.pre_call(
+        input=input_data["prompt"],
+        api_key="",
+        additional_args={
+            "complete_input_dict": initial_prediction_data,
+            "headers": headers,
+            "api_base": base_url,
+        },
+    )
+
+    response = await http_handler.post(
+        url="{}/predictions".format(base_url),
+        data=json.dumps(initial_prediction_data),
+        headers=headers,
+    )
+
+    if response.status_code == 201:
+        response_data = response.json()
+        return response_data.get("urls", {}).get("get")
+    else:
+        raise ReplicateError(
+            response.status_code, f"Failed to start prediction {response.text}"
+        )
+
+
 # Function to handle prediction response (non-streaming)
 def handle_prediction_response(prediction_url, api_token, print_verbose):
     output_string = ""
@@ -178,6 +238,40 @@ def handle_prediction_response(prediction_url, api_token, print_verbose):
     return output_string, logs
 
 
+async def async_handle_prediction_response(
+    prediction_url, api_token, print_verbose, http_handler: AsyncHTTPHandler
+) -> Tuple[str, Any]:
+    output_string = ""
+    headers = {
+        "Authorization": f"Token {api_token}",
+        "Content-Type": "application/json",
+    }
+
+    status = ""
+    logs = ""
+    while True and (status not in ["succeeded", "failed", "canceled"]):
+        print_verbose(f"replicate: polling endpoint: {prediction_url}")
+        await asyncio.sleep(0.5)
+        response = await http_handler.get(prediction_url, headers=headers)
+        if response.status_code == 200:
+            response_data = response.json()
+            if "output" in response_data:
+                output_string = "".join(response_data["output"])
+                print_verbose(f"Non-streamed output:{output_string}")
+            status = response_data.get("status", None)
+            logs = response_data.get("logs", "")
+            if status == "failed":
+                replicate_error = response_data.get("error", "")
+                raise ReplicateError(
+                    status_code=400,
+                    message=f"Error: {replicate_error}, \nReplicate logs:{logs}",
+                )
+        else:
+            # this can fail temporarily but it does not mean the replicate request failed, replicate request fails when status=="failed"
+            print_verbose("Replicate: Failed to fetch prediction status and output.")
+    return output_string, logs
+
+
 # Function to handle prediction response (streaming)
 def handle_prediction_response_streaming(prediction_url, api_token, print_verbose):
     previous_output = ""
@@ -214,6 +308,45 @@ def handle_prediction_response_streaming(prediction_url, api_token, print_verbos
             )
 
 
+# Function to handle prediction response (streaming)
+async def async_handle_prediction_response_streaming(
+    prediction_url, api_token, print_verbose
+):
+    http_handler = AsyncHTTPHandler(concurrent_limit=1)
+    previous_output = ""
+    output_string = ""
+
+    headers = {
+        "Authorization": f"Token {api_token}",
+        "Content-Type": "application/json",
+    }
+    status = ""
+    while True and (status not in ["succeeded", "failed", "canceled"]):
+        await asyncio.sleep(0.5)  # prevent being rate limited by replicate
+        print_verbose(f"replicate: polling endpoint: {prediction_url}")
+        response = await http_handler.get(prediction_url, headers=headers)
+        if response.status_code == 200:
+            response_data = response.json()
+            status = response_data["status"]
+            if "output" in response_data:
+                output_string = "".join(response_data["output"])
+                new_output = output_string[len(previous_output) :]
+                print_verbose(f"New chunk: {new_output}")
+                yield {"output": new_output, "status": status}
+                previous_output = output_string
+            status = response_data["status"]
+            if status == "failed":
+                replicate_error = response_data.get("error", "")
+                raise ReplicateError(
+                    status_code=400, message=f"Error: {replicate_error}"
+                )
+        else:
+            # this can fail temporarily but it does not mean the replicate request failed, replicate request fails when status=="failed"
+            print_verbose(
+                f"Replicate: Failed to fetch prediction status and output.{response.status_code}{response.text}"
+            )
+
+
 # Function to extract version ID from model string
 def model_to_version_id(model):
     if ":" in model:
@@ -222,6 +355,39 @@ def model_to_version_id(model):
     return model
 
 
+def process_response(
+    model_response: ModelResponse,
+    result: str,
+    model: str,
+    encoding: Any,
+    prompt: str,
+) -> ModelResponse:
+    if len(result) == 0:  # edge case, where result from replicate is empty
+        result = " "
+
+    ## Building RESPONSE OBJECT
+    if len(result) > 1:
+        model_response["choices"][0]["message"]["content"] = result
+
+    # Calculate usage
+    prompt_tokens = len(encoding.encode(prompt, disallowed_special=()))
+    completion_tokens = len(
+        encoding.encode(
+            model_response["choices"][0]["message"].get("content", ""),
+            disallowed_special=(),
+        )
+    )
+    model_response["model"] = "replicate/" + model
+    usage = Usage(
+        prompt_tokens=prompt_tokens,
+        completion_tokens=completion_tokens,
+        total_tokens=prompt_tokens + completion_tokens,
+    )
+    setattr(model_response, "usage", usage)
+
+    return model_response
+
+
 # Main function for prediction completion
 def completion(
     model: str,
@@ -229,14 +395,15 @@ def completion(
     api_base: str,
     model_response: ModelResponse,
     print_verbose: Callable,
+    optional_params: dict,
     logging_obj,
     api_key,
     encoding,
     custom_prompt_dict={},
-    optional_params=None,
     litellm_params=None,
     logger_fn=None,
-):
+    acompletion=None,
+) -> Union[ModelResponse, CustomStreamWrapper]:
     # Start a prediction and get the prediction URL
     version_id = model_to_version_id(model)
     ## Load Config
@@ -274,6 +441,12 @@ def completion(
     else:
         prompt = prompt_factory(model=model, messages=messages)
 
+    if prompt is None or not isinstance(prompt, str):
+        raise ReplicateError(
+            status_code=400,
+            message="LiteLLM Error - prompt is not a string - {}".format(prompt),
+        )
+
     # If system prompt is supported, and a system prompt is provided, use it
     if system_prompt is not None:
         input_data = {
@@ -285,6 +458,20 @@ def completion(
     else:
         input_data = {"prompt": prompt, **optional_params}
 
+    if acompletion is not None and acompletion == True:
+        return async_completion(
+            model_response=model_response,
+            model=model,
+            prompt=prompt,
+            encoding=encoding,
+            optional_params=optional_params,
+            version_id=version_id,
+            input_data=input_data,
+            api_key=api_key,
+            api_base=api_base,
+            logging_obj=logging_obj,
+            print_verbose=print_verbose,
+        )  # type: ignore
     ## COMPLETION CALL
     ## Replicate Compeltion calls have 2 steps
     ## Step1: Start Prediction: gets a prediction url
@@ -293,6 +480,7 @@ def completion(
     model_response["created"] = int(
         time.time()
     )  # for pricing this must remain right before calling api
+
     prediction_url = start_prediction(
         version_id,
         input_data,
@@ -306,9 +494,10 @@ def completion(
     # Handle the prediction response (streaming or non-streaming)
     if "stream" in optional_params and optional_params["stream"] == True:
         print_verbose("streaming request")
-        return handle_prediction_response_streaming(
+        _response = handle_prediction_response_streaming(
             prediction_url, api_key, print_verbose
         )
+        return CustomStreamWrapper(_response, model, logging_obj=logging_obj, custom_llm_provider="replicate")  # type: ignore
     else:
         result, logs = handle_prediction_response(
             prediction_url, api_key, print_verbose
@@ -328,29 +517,56 @@ def completion(
 
         print_verbose(f"raw model_response: {result}")
 
-        if len(result) == 0:  # edge case, where result from replicate is empty
-            result = " "
-
-        ## Building RESPONSE OBJECT
-        if len(result) > 1:
-            model_response["choices"][0]["message"]["content"] = result
-
-        # Calculate usage
-        prompt_tokens = len(encoding.encode(prompt, disallowed_special=()))
-        completion_tokens = len(
-            encoding.encode(
-                model_response["choices"][0]["message"].get("content", ""),
-                disallowed_special=(),
-            )
+        return process_response(
+            model_response=model_response,
+            result=result,
+            model=model,
+            encoding=encoding,
+            prompt=prompt,
         )
-        model_response["model"] = "replicate/" + model
-        usage = Usage(
-            prompt_tokens=prompt_tokens,
-            completion_tokens=completion_tokens,
-            total_tokens=prompt_tokens + completion_tokens,
+
+
+async def async_completion(
+    model_response: ModelResponse,
+    model: str,
+    prompt: str,
+    encoding,
+    optional_params: dict,
+    version_id,
+    input_data,
+    api_key,
+    api_base,
+    logging_obj,
+    print_verbose,
+) -> Union[ModelResponse, CustomStreamWrapper]:
+    http_handler = AsyncHTTPHandler(concurrent_limit=1)
+    prediction_url = await async_start_prediction(
+        version_id,
+        input_data,
+        api_key,
+        api_base,
+        logging_obj=logging_obj,
+        print_verbose=print_verbose,
+        http_handler=http_handler,
+    )
+
+    if "stream" in optional_params and optional_params["stream"] == True:
+        _response = async_handle_prediction_response_streaming(
+            prediction_url, api_key, print_verbose
         )
-        setattr(model_response, "usage", usage)
-        return model_response
+        return CustomStreamWrapper(_response, model, logging_obj=logging_obj, custom_llm_provider="replicate")  # type: ignore
+
+    result, logs = await async_handle_prediction_response(
+        prediction_url, api_key, print_verbose, http_handler=http_handler
+    )
+
+    return process_response(
+        model_response=model_response,
+        result=result,
+        model=model,
+        encoding=encoding,
+        prompt=prompt,
+    )
 
 
 # # Example usage:
diff --git a/litellm/llms/vertex_ai.py b/litellm/llms/vertex_ai.py
index 84fec734fd..b52e8689f1 100644
--- a/litellm/llms/vertex_ai.py
+++ b/litellm/llms/vertex_ai.py
@@ -3,10 +3,15 @@ import json
 from enum import Enum
 import requests  # type: ignore
 import time
-from typing import Callable, Optional, Union, List
+from typing import Callable, Optional, Union, List, Literal
 from litellm.utils import ModelResponse, Usage, CustomStreamWrapper, map_finish_reason
 import litellm, uuid
 import httpx, inspect  # type: ignore
+from litellm.types.llms.vertex_ai import *
+from litellm.llms.prompt_templates.factory import (
+    convert_to_gemini_tool_call_result,
+    convert_to_gemini_tool_call_invoke,
+)
 
 
 class VertexAIError(Exception):
@@ -283,6 +288,125 @@ def _load_image_from_url(image_url: str):
     return Image.from_bytes(data=image_bytes)
 
 
+def _convert_gemini_role(role: str) -> Literal["user", "model"]:
+    if role == "user":
+        return "user"
+    else:
+        return "model"
+
+
+def _process_gemini_image(image_url: str) -> PartType:
+    try:
+        if "gs://" in image_url:
+            # Case 1: Images with Cloud Storage URIs
+            # The supported MIME types for images include image/png and image/jpeg.
+            part_mime = "image/png" if "png" in image_url else "image/jpeg"
+            _file_data = FileDataType(mime_type=part_mime, file_uri=image_url)
+            return PartType(file_data=_file_data)
+        elif "https:/" in image_url:
+            # Case 2: Images with direct links
+            image = _load_image_from_url(image_url)
+            _blob = BlobType(data=image.data, mime_type=image._mime_type)
+            return PartType(inline_data=_blob)
+        elif ".mp4" in image_url and "gs://" in image_url:
+            # Case 3: Videos with Cloud Storage URIs
+            part_mime = "video/mp4"
+            _file_data = FileDataType(mime_type=part_mime, file_uri=image_url)
+            return PartType(file_data=_file_data)
+        elif "base64" in image_url:
+            # Case 4: Images with base64 encoding
+            import base64, re
+
+            # base 64 is passed as data:image/jpeg;base64,<base-64-encoded-image>
+            image_metadata, img_without_base_64 = image_url.split(",")
+
+            # read mime_type from img_without_base_64=data:image/jpeg;base64
+            # Extract MIME type using regular expression
+            mime_type_match = re.match(r"data:(.*?);base64", image_metadata)
+
+            if mime_type_match:
+                mime_type = mime_type_match.group(1)
+            else:
+                mime_type = "image/jpeg"
+            decoded_img = base64.b64decode(img_without_base_64)
+            _blob = BlobType(data=decoded_img, mime_type=mime_type)
+            return PartType(inline_data=_blob)
+        raise Exception("Invalid image received - {}".format(image_url))
+    except Exception as e:
+        raise e
+
+
+def _gemini_convert_messages_with_history(messages: list) -> List[ContentType]:
+    """
+    Converts given messages from OpenAI format to Gemini format
+
+    - Parts must be iterable
+    - Roles must alternate b/w 'user' and 'model' (same as anthropic -> merge consecutive roles)
+    - Please ensure that function response turn comes immediately after a function call turn
+    """
+    user_message_types = {"user", "system"}
+    contents: List[ContentType] = []
+
+    msg_i = 0
+    while msg_i < len(messages):
+        user_content: List[PartType] = []
+        init_msg_i = msg_i
+        ## MERGE CONSECUTIVE USER CONTENT ##
+        while msg_i < len(messages) and messages[msg_i]["role"] in user_message_types:
+            if isinstance(messages[msg_i]["content"], list):
+                _parts: List[PartType] = []
+                for element in messages[msg_i]["content"]:
+                    if isinstance(element, dict):
+                        if element["type"] == "text":
+                            _part = PartType(text=element["text"])
+                            _parts.append(_part)
+                        elif element["type"] == "image_url":
+                            image_url = element["image_url"]["url"]
+                            _part = _process_gemini_image(image_url=image_url)
+                            _parts.append(_part)  # type: ignore
+                user_content.extend(_parts)
+            else:
+                _part = PartType(text=messages[msg_i]["content"])
+                user_content.append(_part)
+
+            msg_i += 1
+
+        if user_content:
+            contents.append(ContentType(role="user", parts=user_content))
+        assistant_content = []
+        ## MERGE CONSECUTIVE ASSISTANT CONTENT ##
+        while msg_i < len(messages) and messages[msg_i]["role"] == "assistant":
+            assistant_text = (
+                messages[msg_i].get("content") or ""
+            )  # either string or none
+            if assistant_text:
+                assistant_content.append(PartType(text=assistant_text))
+            if messages[msg_i].get(
+                "tool_calls", []
+            ):  # support assistant tool invoke convertion
+                assistant_content.extend(
+                    convert_to_gemini_tool_call_invoke(messages[msg_i]["tool_calls"])
+                )
+            msg_i += 1
+
+        if assistant_content:
+            contents.append(ContentType(role="model", parts=assistant_content))
+
+        ## APPEND TOOL CALL MESSAGES ##
+        if msg_i < len(messages) and messages[msg_i]["role"] == "tool":
+            _part = convert_to_gemini_tool_call_result(messages[msg_i])
+            contents.append(ContentType(parts=[_part]))  # type: ignore
+            msg_i += 1
+        if msg_i == init_msg_i:  # prevent infinite loops
+            raise Exception(
+                "Invalid Message passed in - {}. File an issue https://github.com/BerriAI/litellm/issues".format(
+                    messages[msg_i]
+                )
+            )
+
+    return contents
+
+
 def _gemini_vision_convert_messages(messages: list):
     """
     Converts given messages for GPT-4 Vision to Gemini format.
@@ -396,10 +520,10 @@ def completion(
     print_verbose: Callable,
     encoding,
     logging_obj,
+    optional_params: dict,
     vertex_project=None,
     vertex_location=None,
     vertex_credentials=None,
-    optional_params=None,
     litellm_params=None,
     logger_fn=None,
     acompletion: bool = False,
@@ -556,6 +680,7 @@ def completion(
                 "model_response": model_response,
                 "encoding": encoding,
                 "messages": messages,
+                "request_str": request_str,
                 "print_verbose": print_verbose,
                 "client_options": client_options,
                 "instances": instances,
@@ -574,11 +699,9 @@ def completion(
             print_verbose("\nMaking VertexAI Gemini Pro / Pro Vision Call")
             print_verbose(f"\nProcessing input messages = {messages}")
             tools = optional_params.pop("tools", None)
-            prompt, images = _gemini_vision_convert_messages(messages=messages)
-            content = [prompt] + images
+            content = _gemini_convert_messages_with_history(messages=messages)
             stream = optional_params.pop("stream", False)
             if stream == True:
-
                 request_str += f"response = llm_model.generate_content({content}, generation_config=GenerationConfig(**{optional_params}), safety_settings={safety_settings}, stream={stream})\n"
                 logging_obj.pre_call(
                     input=prompt,
@@ -589,7 +712,7 @@ def completion(
                     },
                 )
 
-                model_response = llm_model.generate_content(
+                _model_response = llm_model.generate_content(
                     contents=content,
                     generation_config=optional_params,
                     safety_settings=safety_settings,
@@ -597,7 +720,7 @@ def completion(
                     tools=tools,
                 )
 
-                return model_response
+                return _model_response
 
             request_str += f"response = llm_model.generate_content({content})\n"
             ## LOGGING
@@ -850,12 +973,12 @@ async def async_completion(
     mode: str,
     prompt: str,
     model: str,
+    messages: list,
     model_response: ModelResponse,
-    logging_obj=None,
-    request_str=None,
+    request_str: str,
+    print_verbose: Callable,
+    logging_obj,
     encoding=None,
-    messages=None,
-    print_verbose=None,
     client_options=None,
     instances=None,
     vertex_project=None,
@@ -875,8 +998,7 @@ async def async_completion(
             tools = optional_params.pop("tools", None)
             stream = optional_params.pop("stream", False)
 
-            prompt, images = _gemini_vision_convert_messages(messages=messages)
-            content = [prompt] + images
+            content = _gemini_convert_messages_with_history(messages=messages)
 
             request_str += f"response = llm_model.generate_content({content})\n"
             ## LOGGING
@@ -1076,11 +1198,11 @@ async def async_streaming(
     prompt: str,
     model: str,
     model_response: ModelResponse,
-    logging_obj=None,
-    request_str=None,
+    messages: list,
+    print_verbose: Callable,
+    logging_obj,
+    request_str: str,
     encoding=None,
-    messages=None,
-    print_verbose=None,
     client_options=None,
     instances=None,
     vertex_project=None,
@@ -1097,8 +1219,8 @@ async def async_streaming(
         print_verbose("\nMaking VertexAI Gemini Pro Vision Call")
         print_verbose(f"\nProcessing input messages = {messages}")
 
-        prompt, images = _gemini_vision_convert_messages(messages=messages)
-        content = [prompt] + images
+        content = _gemini_convert_messages_with_history(messages=messages)
+
         request_str += f"response = llm_model.generate_content({content}, generation_config=GenerationConfig(**{optional_params}), stream={stream})\n"
         logging_obj.pre_call(
             input=prompt,
diff --git a/litellm/llms/vertex_httpx.py b/litellm/llms/vertex_httpx.py
new file mode 100644
index 0000000000..b8c698c901
--- /dev/null
+++ b/litellm/llms/vertex_httpx.py
@@ -0,0 +1,224 @@
+import os, types
+import json
+from enum import Enum
+import requests  # type: ignore
+import time
+from typing import Callable, Optional, Union, List, Any, Tuple
+from litellm.utils import ModelResponse, Usage, CustomStreamWrapper, map_finish_reason
+import litellm, uuid
+import httpx, inspect  # type: ignore
+from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler
+from .base import BaseLLM
+
+
+class VertexAIError(Exception):
+    def __init__(self, status_code, message):
+        self.status_code = status_code
+        self.message = message
+        self.request = httpx.Request(
+            method="POST", url=" https://cloud.google.com/vertex-ai/"
+        )
+        self.response = httpx.Response(status_code=status_code, request=self.request)
+        super().__init__(
+            self.message
+        )  # Call the base class constructor with the parameters it needs
+
+
+class VertexLLM(BaseLLM):
+    def __init__(self) -> None:
+        super().__init__()
+        self.access_token: Optional[str] = None
+        self.refresh_token: Optional[str] = None
+        self._credentials: Optional[Any] = None
+        self.project_id: Optional[str] = None
+        self.async_handler: Optional[AsyncHTTPHandler] = None
+
+    def load_auth(self) -> Tuple[Any, str]:
+        from google.auth.transport.requests import Request  # type: ignore[import-untyped]
+        from google.auth.credentials import Credentials  # type: ignore[import-untyped]
+        import google.auth as google_auth
+
+        credentials, project_id = google_auth.default(
+            scopes=["https://www.googleapis.com/auth/cloud-platform"],
+        )
+
+        credentials.refresh(Request())
+
+        if not project_id:
+            raise ValueError("Could not resolve project_id")
+
+        if not isinstance(project_id, str):
+            raise TypeError(
+                f"Expected project_id to be a str but got {type(project_id)}"
+            )
+
+        return credentials, project_id
+
+    def refresh_auth(self, credentials: Any) -> None:
+        from google.auth.transport.requests import Request  # type: ignore[import-untyped]
+
+        credentials.refresh(Request())
+
+    def _prepare_request(self, request: httpx.Request) -> None:
+        access_token = self._ensure_access_token()
+
+        if request.headers.get("Authorization"):
+            # already authenticated, nothing for us to do
+            return
+
+        request.headers["Authorization"] = f"Bearer {access_token}"
+
+    def _ensure_access_token(self) -> str:
+        if self.access_token is not None:
+            return self.access_token
+
+        if not self._credentials:
+            self._credentials, project_id = self.load_auth()
+            if not self.project_id:
+                self.project_id = project_id
+        else:
+            self.refresh_auth(self._credentials)
+
+        if not self._credentials.token:
+            raise RuntimeError("Could not resolve API token from the environment")
+
+        assert isinstance(self._credentials.token, str)
+        return self._credentials.token
+
+    def image_generation(
+        self,
+        prompt: str,
+        vertex_project: str,
+        vertex_location: str,
+        model: Optional[
+            str
+        ] = "imagegeneration",  # vertex ai uses imagegeneration as the default model
+        client: Optional[AsyncHTTPHandler] = None,
+        optional_params: Optional[dict] = None,
+        timeout: Optional[int] = None,
+        logging_obj=None,
+        model_response=None,
+        aimg_generation=False,
+    ):
+        if aimg_generation == True:
+            response = self.aimage_generation(
+                prompt=prompt,
+                vertex_project=vertex_project,
+                vertex_location=vertex_location,
+                model=model,
+                client=client,
+                optional_params=optional_params,
+                timeout=timeout,
+                logging_obj=logging_obj,
+                model_response=model_response,
+            )
+            return response
+
+    async def aimage_generation(
+        self,
+        prompt: str,
+        vertex_project: str,
+        vertex_location: str,
+        model_response: litellm.ImageResponse,
+        model: Optional[
+            str
+        ] = "imagegeneration",  # vertex ai uses imagegeneration as the default model
+        client: Optional[AsyncHTTPHandler] = None,
+        optional_params: Optional[dict] = None,
+        timeout: Optional[int] = None,
+        logging_obj=None,
+    ):
+        response = None
+        if client is None:
+            _params = {}
+            if timeout is not None:
+                if isinstance(timeout, float) or isinstance(timeout, int):
+                    _httpx_timeout = httpx.Timeout(timeout)
+                    _params["timeout"] = _httpx_timeout
+            else:
+                _params["timeout"] = httpx.Timeout(timeout=600.0, connect=5.0)
+
+            self.async_handler = AsyncHTTPHandler(**_params)  # type: ignore
+        else:
+            self.async_handler = client  # type: ignore
+
+        # make POST request to
+        # https://us-central1-aiplatform.googleapis.com/v1/projects/PROJECT_ID/locations/us-central1/publishers/google/models/imagegeneration:predict
+        url = f"https://{vertex_location}-aiplatform.googleapis.com/v1/projects/{vertex_project}/locations/{vertex_location}/publishers/google/models/{model}:predict"
+
+        """
+        Docs link: https://console.cloud.google.com/vertex-ai/publishers/google/model-garden/imagegeneration?project=adroit-crow-413218
+        curl -X POST \
+        -H "Authorization: Bearer $(gcloud auth print-access-token)" \
+        -H "Content-Type: application/json; charset=utf-8" \
+        -d {
+            "instances": [
+                {
+                    "prompt": "a cat"
+                }
+            ],
+            "parameters": {
+                "sampleCount": 1
+            }
+        } \
+        "https://us-central1-aiplatform.googleapis.com/v1/projects/PROJECT_ID/locations/us-central1/publishers/google/models/imagegeneration:predict"
+        """
+        auth_header = self._ensure_access_token()
+        optional_params = optional_params or {
+            "sampleCount": 1
+        }  # default optional params
+
+        request_data = {
+            "instances": [{"prompt": prompt}],
+            "parameters": optional_params,
+        }
+
+        request_str = f"\n curl -X POST \\\n -H \"Authorization: Bearer {auth_header[:10] + 'XXXXXXXXXX'}\" \\\n -H \"Content-Type: application/json; charset=utf-8\" \\\n -d {request_data} \\\n \"{url}\""
+        logging_obj.pre_call(
+            input=prompt,
+            api_key=None,
+            additional_args={
+                "complete_input_dict": optional_params,
+                "request_str": request_str,
+            },
+        )
+
+        response = await self.async_handler.post(
+            url=url,
+            headers={
+                "Content-Type": "application/json; charset=utf-8",
+                "Authorization": f"Bearer {auth_header}",
+            },
+            data=json.dumps(request_data),
+        )
+
+        if response.status_code != 200:
+            raise Exception(f"Error: {response.status_code} {response.text}")
+        """
+        Vertex AI Image generation response example:
+        {
+            "predictions": [
+                {
+                "bytesBase64Encoded": "BASE64_IMG_BYTES",
+                "mimeType": "image/png"
+                },
+                {
+                "mimeType": "image/png",
+                "bytesBase64Encoded": "BASE64_IMG_BYTES"
+                }
+            ]
+        }
+        """
+
+        _json_response = response.json()
+        _predictions = _json_response["predictions"]
+
+        _response_data: List[litellm.ImageObject] = []
+        for _prediction in _predictions:
+            _bytes_base64_encoded = _prediction["bytesBase64Encoded"]
+            image_object = litellm.ImageObject(b64_json=_bytes_base64_encoded)
+            _response_data.append(image_object)
+
+        model_response.data = _response_data
+
+        return model_response
diff --git a/litellm/main.py b/litellm/main.py
index 3429cab4d2..7601d98a2b 100644
--- a/litellm/main.py
+++ b/litellm/main.py
@@ -79,6 +79,7 @@ from .llms.anthropic_text import AnthropicTextCompletion
 from .llms.huggingface_restapi import Huggingface
 from .llms.predibase import PredibaseChatCompletion
 from .llms.bedrock_httpx import BedrockLLM
+from .llms.vertex_httpx import VertexLLM
 from .llms.triton import TritonChatCompletion
 from .llms.prompt_templates.factory import (
     prompt_factory,
@@ -118,6 +119,7 @@ huggingface = Huggingface()
 predibase_chat_completions = PredibaseChatCompletion()
 triton_chat_completions = TritonChatCompletion()
 bedrock_chat_completion = BedrockLLM()
+vertex_chat_completion = VertexLLM()
 ####### COMPLETION ENDPOINTS ################
 
 
@@ -320,12 +322,13 @@ async def acompletion(
             or custom_llm_provider == "huggingface"
             or custom_llm_provider == "ollama"
             or custom_llm_provider == "ollama_chat"
+            or custom_llm_provider == "replicate"
             or custom_llm_provider == "vertex_ai"
             or custom_llm_provider == "gemini"
             or custom_llm_provider == "sagemaker"
             or custom_llm_provider == "anthropic"
             or custom_llm_provider == "predibase"
-            or (custom_llm_provider == "bedrock" and "cohere" in model)
+            or custom_llm_provider == "bedrock"
             or custom_llm_provider in litellm.openai_compatible_providers
         ):  # currently implemented aiohttp calls for just azure, openai, hf, ollama, vertex ai soon all.
             init_response = await loop.run_in_executor(None, func_with_context)
@@ -367,6 +370,8 @@ async def acompletion(
 async def _async_streaming(response, model, custom_llm_provider, args):
     try:
         print_verbose(f"received response in _async_streaming: {response}")
+        if asyncio.iscoroutine(response):
+            response = await response
         async for line in response:
             print_verbose(f"line in async streaming: {line}")
             yield line
@@ -552,7 +557,7 @@ def completion(
     model_info = kwargs.get("model_info", None)
     proxy_server_request = kwargs.get("proxy_server_request", None)
     fallbacks = kwargs.get("fallbacks", None)
-    headers = kwargs.get("headers", None)
+    headers = kwargs.get("headers", None) or extra_headers
     num_retries = kwargs.get("num_retries", None)  ## deprecated
     max_retries = kwargs.get("max_retries", None)
     context_window_fallback_dict = kwargs.get("context_window_fallback_dict", None)
@@ -674,20 +679,6 @@ def completion(
         k: v for k, v in kwargs.items() if k not in default_params
     }  # model-specific params - pass them straight to the model/provider
 
-    ### TIMEOUT LOGIC ###
-    timeout = timeout or kwargs.get("request_timeout", 600) or 600
-    # set timeout for 10 minutes by default
-
-    if (
-        timeout is not None
-        and isinstance(timeout, httpx.Timeout)
-        and supports_httpx_timeout(custom_llm_provider) == False
-    ):
-        read_timeout = timeout.read or 600
-        timeout = read_timeout  # default 10 min timeout
-    elif timeout is not None and not isinstance(timeout, httpx.Timeout):
-        timeout = float(timeout)  # type: ignore
-
     try:
         if base_url is not None:
             api_base = base_url
@@ -727,6 +718,16 @@ def completion(
                 "aws_region_name", None
             )  # support region-based pricing for bedrock
 
+        ### TIMEOUT LOGIC ###
+        timeout = timeout or kwargs.get("request_timeout", 600) or 600
+        # set timeout for 10 minutes by default
+        if isinstance(timeout, httpx.Timeout) and not supports_httpx_timeout(
+            custom_llm_provider
+        ):
+            timeout = timeout.read or 600  # default 10 min timeout
+        elif not isinstance(timeout, httpx.Timeout):
+            timeout = float(timeout)  # type: ignore
+
         ### REGISTER CUSTOM MODEL PRICING -- IF GIVEN ###
         if input_cost_per_token is not None and output_cost_per_token is not None:
             litellm.register_model(
@@ -1192,7 +1193,7 @@ def completion(
 
             custom_prompt_dict = custom_prompt_dict or litellm.custom_prompt_dict
 
-            model_response = replicate.completion(
+            model_response = replicate.completion(  # type: ignore
                 model=model,
                 messages=messages,
                 api_base=api_base,
@@ -1205,12 +1206,10 @@ def completion(
                 api_key=replicate_key,
                 logging_obj=logging,
                 custom_prompt_dict=custom_prompt_dict,
+                acompletion=acompletion,
             )
-            if "stream" in optional_params and optional_params["stream"] == True:
-                # don't try to access stream object,
-                model_response = CustomStreamWrapper(model_response, model, logging_obj=logging, custom_llm_provider="replicate")  # type: ignore
 
-            if optional_params.get("stream", False) or acompletion == True:
+            if optional_params.get("stream", False) == True:
                 ## LOGGING
                 logging.post_call(
                     input=messages,
@@ -1984,23 +1983,9 @@ def completion(
             # boto3 reads keys from .env
             custom_prompt_dict = custom_prompt_dict or litellm.custom_prompt_dict
 
-            if "cohere" in model:
-                response = bedrock_chat_completion.completion(
-                    model=model,
-                    messages=messages,
-                    custom_prompt_dict=litellm.custom_prompt_dict,
-                    model_response=model_response,
-                    print_verbose=print_verbose,
-                    optional_params=optional_params,
-                    litellm_params=litellm_params,
-                    logger_fn=logger_fn,
-                    encoding=encoding,
-                    logging_obj=logging,
-                    extra_headers=extra_headers,
-                    timeout=timeout,
-                    acompletion=acompletion,
-                )
-            else:
+            if (
+                "aws_bedrock_client" in optional_params
+            ):  # use old bedrock flow for aws_bedrock_client users.
                 response = bedrock.completion(
                     model=model,
                     messages=messages,
@@ -2036,7 +2021,22 @@ def completion(
                             custom_llm_provider="bedrock",
                             logging_obj=logging,
                         )
-
+            else:
+                response = bedrock_chat_completion.completion(
+                    model=model,
+                    messages=messages,
+                    custom_prompt_dict=custom_prompt_dict,
+                    model_response=model_response,
+                    print_verbose=print_verbose,
+                    optional_params=optional_params,
+                    litellm_params=litellm_params,
+                    logger_fn=logger_fn,
+                    encoding=encoding,
+                    logging_obj=logging,
+                    extra_headers=extra_headers,
+                    timeout=timeout,
+                    acompletion=acompletion,
+                )
             if optional_params.get("stream", False):
                 ## LOGGING
                 logging.post_call(
@@ -3856,6 +3856,36 @@ def image_generation(
                 model_response=model_response,
                 aimg_generation=aimg_generation,
             )
+        elif custom_llm_provider == "vertex_ai":
+            vertex_ai_project = (
+                optional_params.pop("vertex_project", None)
+                or optional_params.pop("vertex_ai_project", None)
+                or litellm.vertex_project
+                or get_secret("VERTEXAI_PROJECT")
+            )
+            vertex_ai_location = (
+                optional_params.pop("vertex_location", None)
+                or optional_params.pop("vertex_ai_location", None)
+                or litellm.vertex_location
+                or get_secret("VERTEXAI_LOCATION")
+            )
+            vertex_credentials = (
+                optional_params.pop("vertex_credentials", None)
+                or optional_params.pop("vertex_ai_credentials", None)
+                or get_secret("VERTEXAI_CREDENTIALS")
+            )
+            model_response = vertex_chat_completion.image_generation(
+                model=model,
+                prompt=prompt,
+                timeout=timeout,
+                logging_obj=litellm_logging_obj,
+                optional_params=optional_params,
+                model_response=model_response,
+                vertex_project=vertex_ai_project,
+                vertex_location=vertex_ai_location,
+                aimg_generation=aimg_generation,
+            )
+
         return model_response
     except Exception as e:
         ## Map to OpenAI Exception
diff --git a/litellm/model_prices_and_context_window_backup.json b/litellm/model_prices_and_context_window_backup.json
index a88d6875ca..f3db33c601 100644
--- a/litellm/model_prices_and_context_window_backup.json
+++ b/litellm/model_prices_and_context_window_backup.json
@@ -234,6 +234,24 @@
         "litellm_provider": "openai",
         "mode": "chat"
     },
+    "ft:davinci-002": {
+        "max_tokens": 16384,
+        "max_input_tokens": 16384,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 0.000002,
+        "output_cost_per_token": 0.000002,
+        "litellm_provider": "text-completion-openai",
+        "mode": "completion"
+    },
+    "ft:babbage-002": {
+        "max_tokens": 16384,
+        "max_input_tokens": 16384,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 0.0000004,
+        "output_cost_per_token": 0.0000004,
+        "litellm_provider": "text-completion-openai",
+        "mode": "completion"
+    },
     "text-embedding-3-large": {
         "max_tokens": 8191,
         "max_input_tokens": 8191,
@@ -1385,6 +1403,24 @@
         "mode": "completion",
         "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
     },
+    "gemini/gemini-1.5-flash-latest": {
+        "max_tokens": 8192,
+        "max_input_tokens": 1000000,
+        "max_output_tokens": 8192,
+        "max_images_per_prompt": 3000,
+        "max_videos_per_prompt": 10,
+        "max_video_length": 1,
+        "max_audio_length_hours": 8.4,
+        "max_audio_per_prompt": 1,
+        "max_pdf_size_mb": 30,
+        "input_cost_per_token": 0, 
+        "output_cost_per_token": 0,
+        "litellm_provider": "vertex_ai-language-models",
+        "mode": "chat",
+        "supports_function_calling": true,
+        "supports_vision": true,
+        "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
+    },
     "gemini/gemini-pro": {
         "max_tokens": 8192,
         "max_input_tokens": 32760,
@@ -1744,6 +1780,30 @@
         "litellm_provider": "openrouter",
         "mode": "chat"
     },
+    "openrouter/openai/gpt-4o": {
+        "max_tokens": 4096,
+        "max_input_tokens": 128000,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 0.000005,
+        "output_cost_per_token": 0.000015,
+        "litellm_provider": "openrouter",
+        "mode": "chat",
+        "supports_function_calling": true,
+        "supports_parallel_function_calling": true,
+        "supports_vision": true
+    },
+    "openrouter/openai/gpt-4o-2024-05-13": {
+        "max_tokens": 4096,
+        "max_input_tokens": 128000,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 0.000005,
+        "output_cost_per_token": 0.000015,
+        "litellm_provider": "openrouter",
+        "mode": "chat",
+        "supports_function_calling": true,
+        "supports_parallel_function_calling": true,
+        "supports_vision": true
+    },
     "openrouter/openai/gpt-4-vision-preview": {
         "max_tokens": 130000,
         "input_cost_per_token": 0.00001,
@@ -2943,6 +3003,24 @@
         "litellm_provider": "ollama",
         "mode": "completion"
     },
+    "ollama/llama3": {
+        "max_tokens": 8192,
+        "max_input_tokens": 8192,
+        "max_output_tokens": 8192,
+        "input_cost_per_token": 0.0,
+        "output_cost_per_token": 0.0,
+        "litellm_provider": "ollama",
+        "mode": "chat"
+    },
+    "ollama/llama3:70b": {
+        "max_tokens": 8192,
+        "max_input_tokens": 8192,
+        "max_output_tokens": 8192,
+        "input_cost_per_token": 0.0,
+        "output_cost_per_token": 0.0,
+        "litellm_provider": "ollama",
+        "mode": "chat"
+    },
     "ollama/mistral": {
         "max_tokens": 8192,
         "max_input_tokens": 8192,
@@ -2952,6 +3030,42 @@
         "litellm_provider": "ollama",
         "mode": "completion"
     },
+    "ollama/mistral-7B-Instruct-v0.1": {
+        "max_tokens": 8192,
+        "max_input_tokens": 8192,
+        "max_output_tokens": 8192,
+        "input_cost_per_token": 0.0,
+        "output_cost_per_token": 0.0,
+        "litellm_provider": "ollama",
+        "mode": "chat"
+    },
+    "ollama/mistral-7B-Instruct-v0.2": {
+        "max_tokens": 32768,
+        "max_input_tokens": 32768,
+        "max_output_tokens": 32768,
+        "input_cost_per_token": 0.0,
+        "output_cost_per_token": 0.0,
+        "litellm_provider": "ollama",
+        "mode": "chat"
+    },
+    "ollama/mixtral-8x7B-Instruct-v0.1": {
+        "max_tokens": 32768,
+        "max_input_tokens": 32768,
+        "max_output_tokens": 32768,
+        "input_cost_per_token": 0.0,
+        "output_cost_per_token": 0.0,
+        "litellm_provider": "ollama",
+        "mode": "chat"
+    },
+    "ollama/mixtral-8x22B-Instruct-v0.1": {
+        "max_tokens": 65536,
+        "max_input_tokens": 65536,
+        "max_output_tokens": 65536,
+        "input_cost_per_token": 0.0,
+        "output_cost_per_token": 0.0,
+        "litellm_provider": "ollama",
+        "mode": "chat"
+    },
     "ollama/codellama": {
         "max_tokens": 4096, 
         "max_input_tokens": 4096, 
diff --git a/litellm/proxy/_experimental/out/404.html b/litellm/proxy/_experimental/out/404.html
index b70559084d..fa19572edc 100644
--- a/litellm/proxy/_experimental/out/404.html
+++ b/litellm/proxy/_experimental/out/404.html
@@ -1 +1 @@
-<!DOCTYPE html><html lang="en"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" href="/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2" as="font" crossorigin="" type="font/woff2"/><link rel="stylesheet" href="/ui/_next/static/css/f04e46b02318b660.css" crossorigin="" data-precedence="next"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-de9c0fadf6a94b3b.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-f960ab1e6d32b002.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-04708d7d4a17c1ee.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>404: This page could not be found.</title><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body class="__className_c23dc8"><div style="font-family:system-ui,&quot;Segoe UI&quot;,Roboto,Helvetica,Arial,sans-serif,&quot;Apple Color Emoji&quot;,&quot;Segoe UI Emoji&quot;;height:100vh;text-align:center;display:flex;flex-direction:column;align-items:center;justify-content:center"><div><style>body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}</style><h1 class="next-error-h1" style="display:inline-block;margin:0 20px 0 0;padding:0 23px 0 0;font-size:24px;font-weight:500;vertical-align:top;line-height:49px">404</h1><div style="display:inline-block"><h2 style="font-size:14px;font-weight:400;line-height:49px;margin:0">This page could not be found.</h2></div></div></div><script src="/ui/_next/static/chunks/webpack-de9c0fadf6a94b3b.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/f04e46b02318b660.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[5613,[],\"\"]\n7:I[31778,[],\"\"]\nd:I[48955,[],\"\"]\n8:{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"}\n9:{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"}\na:{\"display\":\"inline-block\"}\nb:{\"fontSize\":14,\"fontWe"])</script><script>self.__next_f.push([1,"ight\":400,\"lineHeight\":\"49px\",\"margin\":0}\ne:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/f04e46b02318b660.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"obp5wqVSVDMiDTC414cR8\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/_not-found\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_c23dc8\",\"children\":[\"$\",\"$L6\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L7\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":\"$8\",\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":\"$9\",\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":\"$a\",\"children\":[\"$\",\"h2\",null,{\"style\":\"$b\",\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$Lc\"],\"globalErrorComponent\":\"$d\",\"missingSlots\":\"$We\"}]]\n"])</script><script>self.__next_f.push([1,"c:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"meta\",\"4\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>
\ No newline at end of file
+<!DOCTYPE html><html lang="en"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" href="/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2" as="font" crossorigin="" type="font/woff2"/><link rel="stylesheet" href="/ui/_next/static/css/f04e46b02318b660.css" crossorigin="" data-precedence="next"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-de9c0fadf6a94b3b.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-f960ab1e6d32b002.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-04708d7d4a17c1ee.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>404: This page could not be found.</title><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body class="__className_c23dc8"><div style="font-family:system-ui,&quot;Segoe UI&quot;,Roboto,Helvetica,Arial,sans-serif,&quot;Apple Color Emoji&quot;,&quot;Segoe UI Emoji&quot;;height:100vh;text-align:center;display:flex;flex-direction:column;align-items:center;justify-content:center"><div><style>body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}</style><h1 class="next-error-h1" style="display:inline-block;margin:0 20px 0 0;padding:0 23px 0 0;font-size:24px;font-weight:500;vertical-align:top;line-height:49px">404</h1><div style="display:inline-block"><h2 style="font-size:14px;font-weight:400;line-height:49px;margin:0">This page could not be found.</h2></div></div></div><script src="/ui/_next/static/chunks/webpack-de9c0fadf6a94b3b.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/f04e46b02318b660.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[5613,[],\"\"]\n7:I[31778,[],\"\"]\nd:I[48955,[],\"\"]\n8:{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"}\n9:{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"}\na:{\"display\":\"inline-block\"}\nb:{\"fontSize\":14,\"fontWe"])</script><script>self.__next_f.push([1,"ight\":400,\"lineHeight\":\"49px\",\"margin\":0}\ne:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/f04e46b02318b660.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"l-0LDfSCdaUCAbcLIx_QC\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/_not-found\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_c23dc8\",\"children\":[\"$\",\"$L6\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L7\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":\"$8\",\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":\"$9\",\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":\"$a\",\"children\":[\"$\",\"h2\",null,{\"style\":\"$b\",\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$Lc\"],\"globalErrorComponent\":\"$d\",\"missingSlots\":\"$We\"}]]\n"])</script><script>self.__next_f.push([1,"c:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"meta\",\"4\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>
\ No newline at end of file
diff --git a/litellm/proxy/_experimental/out/_next/static/chunks/app/page-6a39771cacf75ea6.js b/litellm/proxy/_experimental/out/_next/static/chunks/app/page-6a39771cacf75ea6.js
deleted file mode 100644
index 7d08a80c96..0000000000
--- a/litellm/proxy/_experimental/out/_next/static/chunks/app/page-6a39771cacf75ea6.js
+++ /dev/null
@@ -1 +0,0 @@
-(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[931],{20661:function(e,l,t){Promise.resolve().then(t.bind(t,7926))},7926:function(e,l,t){"use strict";t.r(l),t.d(l,{default:function(){return lb}});var s,a,r=t(3827),n=t(64090),o=t(47907),i=t(8792),c=t(40491),d=t(65270),m=e=>{let{userID:l,userRole:t,userEmail:s,showSSOBanner:a}=e;console.log("User ID:",l),console.log("userEmail:",s),console.log("showSSOBanner:",a);let n=[{key:"1",label:(0,r.jsxs)(r.Fragment,{children:[(0,r.jsxs)("p",{children:["Role: ",t]}),(0,r.jsxs)("p",{children:["ID: ",l]})]})}];return(0,r.jsxs)("nav",{className:"left-0 right-0 top-0 flex justify-between items-center h-12 mb-4",children:[(0,r.jsx)("div",{className:"text-left my-2 absolute top-0 left-0",children:(0,r.jsx)("div",{className:"flex flex-col items-center",children:(0,r.jsx)(i.default,{href:"/",children:(0,r.jsx)("button",{className:"text-gray-800 rounded text-center",children:(0,r.jsx)("img",{src:"/get_image",width:160,height:160,alt:"LiteLLM Brand",className:"mr-2"})})})})}),(0,r.jsxs)("div",{className:"text-right mx-4 my-2 absolute top-0 right-0 flex items-center justify-end space-x-2",children:[a?(0,r.jsx)("div",{style:{padding:"6px",borderRadius:"8px"},children:(0,r.jsx)("a",{href:"https://calendly.com/d/4mp-gd3-k5k/litellm-1-1-onboarding-chat",target:"_blank",style:{fontSize:"14px",textDecoration:"underline"},children:"Request hosted proxy"})}):null,(0,r.jsx)("div",{style:{border:"1px solid #391085",padding:"6px",borderRadius:"8px"},children:(0,r.jsx)(c.Z,{menu:{items:n},children:(0,r.jsx)(d.Z,{children:s})})})]})]})},u=t(80588);let h=async()=>{try{let e=await fetch("https://raw.githubusercontent.com/BerriAI/litellm/main/model_prices_and_context_window.json"),l=await e.json();return console.log("received data: ".concat(l)),l}catch(e){throw console.error("Failed to get model cost map:",e),e}},x=async(e,l)=>{try{let t=await fetch("/model/new",{method:"POST",headers:{Authorization:"Bearer ".concat(e),"Content-Type":"application/json"},body:JSON.stringify({...l})});if(!t.ok){let e=await t.text();throw u.ZP.error("Failed to create key: "+e,10),console.error("Error response from the server:",e),Error("Network response was not ok")}let s=await t.json();return console.log("API Response:",s),u.ZP.success("Model created successfully. Wait 60s and refresh on 'All Models' page"),s}catch(e){throw console.error("Failed to create key:",e),e}},p=async(e,l)=>{console.log("model_id in model delete call: ".concat(l));try{let t=await fetch("/model/delete",{method:"POST",headers:{Authorization:"Bearer ".concat(e),"Content-Type":"application/json"},body:JSON.stringify({id:l})});if(!t.ok){let e=await t.text();throw u.ZP.error("Failed to create key: "+e,10),console.error("Error response from the server:",e),Error("Network response was not ok")}let s=await t.json();return console.log("API Response:",s),u.ZP.success("Model deleted successfully. Restart server to see this."),s}catch(e){throw console.error("Failed to create key:",e),e}},j=async(e,l,t)=>{try{if(console.log("Form Values in keyCreateCall:",t),t.description&&(t.metadata||(t.metadata={}),t.metadata.description=t.description,delete t.description,t.metadata=JSON.stringify(t.metadata)),t.metadata){console.log("formValues.metadata:",t.metadata);try{t.metadata=JSON.parse(t.metadata)}catch(e){throw u.ZP.error("Failed to parse metadata: "+e,10),Error("Failed to parse metadata: "+e)}}console.log("Form Values after check:",t);let s=await fetch("/key/generate",{method:"POST",headers:{Authorization:"Bearer ".concat(e),"Content-Type":"application/json"},body:JSON.stringify({user_id:l,...t})});if(!s.ok){let e=await s.text();throw u.ZP.error("Failed to create key: "+e,10),console.error("Error response from the server:",e),Error("Network response was not ok")}let a=await s.json();return console.log("API Response:",a),a}catch(e){throw console.error("Failed to create key:",e),e}},g=async(e,l,t)=>{try{if(console.log("Form Values in keyCreateCall:",t),t.description&&(t.metadata||(t.metadata={}),t.metadata.description=t.description,delete t.description,t.metadata=JSON.stringify(t.metadata)),t.metadata){console.log("formValues.metadata:",t.metadata);try{t.metadata=JSON.parse(t.metadata)}catch(e){throw u.ZP.error("Failed to parse metadata: "+e,10),Error("Failed to parse metadata: "+e)}}console.log("Form Values after check:",t);let s=await fetch("/user/new",{method:"POST",headers:{Authorization:"Bearer ".concat(e),"Content-Type":"application/json"},body:JSON.stringify({user_id:l,...t})});if(!s.ok){let e=await s.text();throw u.ZP.error("Failed to create key: "+e,10),console.error("Error response from the server:",e),Error("Network response was not ok")}let a=await s.json();return console.log("API Response:",a),a}catch(e){throw console.error("Failed to create key:",e),e}},y=async(e,l)=>{try{console.log("in keyDeleteCall:",l);let t=await fetch("/key/delete",{method:"POST",headers:{Authorization:"Bearer ".concat(e),"Content-Type":"application/json"},body:JSON.stringify({keys:[l]})});if(!t.ok){let e=await t.text();throw u.ZP.error("Failed to delete key: "+e,10),Error("Network response was not ok")}let s=await t.json();return console.log(s),s}catch(e){throw console.error("Failed to create key:",e),e}},f=async(e,l)=>{try{console.log("in teamDeleteCall:",l);let t=await fetch("/team/delete",{method:"POST",headers:{Authorization:"Bearer ".concat(e),"Content-Type":"application/json"},body:JSON.stringify({team_ids:[l]})});if(!t.ok){let e=await t.text();throw u.ZP.error("Failed to delete team: "+e,10),Error("Network response was not ok")}let s=await t.json();return console.log(s),s}catch(e){throw console.error("Failed to delete key:",e),e}},Z=async function(e,l,t){let s=arguments.length>3&&void 0!==arguments[3]&&arguments[3],a=arguments.length>4?arguments[4]:void 0,r=arguments.length>5?arguments[5]:void 0;try{let n="/user/info";"App Owner"==t&&l&&(n="".concat(n,"?user_id=").concat(l)),"App User"==t&&l&&(n="".concat(n,"?user_id=").concat(l)),console.log("in userInfoCall viewAll=",s),s&&r&&null!=a&&void 0!=a&&(n="".concat(n,"?view_all=true&page=").concat(a,"&page_size=").concat(r));let o=await fetch(n,{method:"GET",headers:{Authorization:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!o.ok){let e=await o.text();throw u.ZP.error(e,10),Error("Network response was not ok")}let i=await o.json();return console.log("API Response:",i),i}catch(e){throw console.error("Failed to create key:",e),e}},_=async(e,l)=>{try{let t="/team/info";l&&(t="".concat(t,"?team_id=").concat(l)),console.log("in teamInfoCall");let s=await fetch(t,{method:"GET",headers:{Authorization:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!s.ok){let e=await s.text();throw u.ZP.error(e,10),Error("Network response was not ok")}let a=await s.json();return console.log("API Response:",a),a}catch(e){throw console.error("Failed to create key:",e),e}},w=async e=>{try{let l=await fetch("/global/spend",{method:"GET",headers:{Authorization:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!l.ok){let e=await l.text();throw u.ZP.error(e,10),Error("Network response was not ok")}return await l.json()}catch(e){throw console.error("Failed to create key:",e),e}},b=async(e,l,t)=>{try{let l=await fetch("/v2/model/info",{method:"GET",headers:{Authorization:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!l.ok){let e=await l.text();throw u.ZP.error(e,10),Error("Network response was not ok")}let t=await l.json();return console.log("modelInfoCall:",t),t}catch(e){throw console.error("Failed to create key:",e),e}},k=async(e,l,t,s,a,r)=>{try{let l="/model/metrics";s&&(l="".concat(l,"?_selected_model_group=").concat(s,"&startTime=").concat(a,"&endTime=").concat(r));let t=await fetch(l,{method:"GET",headers:{Authorization:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!t.ok){let e=await t.text();throw u.ZP.error(e,10),Error("Network response was not ok")}return await t.json()}catch(e){throw console.error("Failed to create key:",e),e}},v=async(e,l,t,s,a,r)=>{try{let l="/model/metrics/slow_responses";s&&(l="".concat(l,"?_selected_model_group=").concat(s,"&startTime=").concat(a,"&endTime=").concat(r));let t=await fetch(l,{method:"GET",headers:{Authorization:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!t.ok){let e=await t.text();throw u.ZP.error(e,10),Error("Network response was not ok")}return await t.json()}catch(e){throw console.error("Failed to create key:",e),e}},S=async(e,l,t,s,a,r)=>{try{let l="/model/metrics/exceptions";s&&(l="".concat(l,"?_selected_model_group=").concat(s,"&startTime=").concat(a,"&endTime=").concat(r));let t=await fetch(l,{method:"GET",headers:{Authorization:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!t.ok){let e=await t.text();throw u.ZP.error(e,10),Error("Network response was not ok")}return await t.json()}catch(e){throw console.error("Failed to create key:",e),e}},N=async(e,l,t)=>{try{let l=await fetch("/models",{method:"GET",headers:{Authorization:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!l.ok){let e=await l.text();throw u.ZP.error(e,10),Error("Network response was not ok")}return await l.json()}catch(e){throw console.error("Failed to create key:",e),e}},A=async e=>{try{let l="/global/spend/teams";console.log("in teamSpendLogsCall:",l);let t=await fetch("".concat(l),{method:"GET",headers:{Authorization:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!t.ok){let e=await t.text();throw u.ZP.error(e,10),Error("Network response was not ok")}let s=await t.json();return console.log(s),s}catch(e){throw console.error("Failed to create key:",e),e}},E=async(e,l,t)=>{try{let s="/global/spend/tags";l&&t&&(s="".concat(s,"?start_date=").concat(l,"&end_date=").concat(t)),console.log("in tagsSpendLogsCall:",s);let a=await fetch("".concat(s),{method:"GET",headers:{Authorization:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!a.ok){let e=await a.text();throw u.ZP.error(e,10),Error("Network response was not ok")}let r=await a.json();return console.log(r),r}catch(e){throw console.error("Failed to create key:",e),e}},I=async(e,l,t,s,a,r)=>{try{console.log("user role in spend logs call: ".concat(t));let l="/spend/logs";l="App Owner"==t?"".concat(l,"?user_id=").concat(s,"&start_date=").concat(a,"&end_date=").concat(r):"".concat(l,"?start_date=").concat(a,"&end_date=").concat(r);let n=await fetch(l,{method:"GET",headers:{Authorization:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!n.ok){let e=await n.text();throw u.ZP.error(e,10),Error("Network response was not ok")}let o=await n.json();return console.log(o),o}catch(e){throw console.error("Failed to create key:",e),e}},C=async e=>{try{let l=await fetch("/global/spend/logs",{method:"GET",headers:{Authorization:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!l.ok){let e=await l.text();throw u.ZP.error(e,10),Error("Network response was not ok")}let t=await l.json();return console.log(t),t}catch(e){throw console.error("Failed to create key:",e),e}},P=async e=>{try{let l=await fetch("/global/spend/keys?limit=5",{method:"GET",headers:{Authorization:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!l.ok){let e=await l.text();throw u.ZP.error(e,10),Error("Network response was not ok")}let t=await l.json();return console.log(t),t}catch(e){throw console.error("Failed to create key:",e),e}},T=async(e,l,t,s)=>{try{let a="";a=l?JSON.stringify({api_key:l,startTime:t,endTime:s}):JSON.stringify({startTime:t,endTime:s});let r={method:"POST",headers:{Authorization:"Bearer ".concat(e),"Content-Type":"application/json"}};r.body=a;let n=await fetch("/global/spend/end_users",r);if(!n.ok){let e=await n.text();throw u.ZP.error(e,10),Error("Network response was not ok")}let o=await n.json();return console.log(o),o}catch(e){throw console.error("Failed to create key:",e),e}},O=async e=>{try{let l=await fetch("/global/spend/models?limit=5",{method:"GET",headers:{Authorization:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!l.ok){let e=await l.text();throw u.ZP.error(e,10),Error("Network response was not ok")}let t=await l.json();return console.log(t),t}catch(e){throw console.error("Failed to create key:",e),e}},F=async(e,l)=>{try{let t=await fetch("/v2/key/info",{method:"POST",headers:{Authorization:"Bearer ".concat(e),"Content-Type":"application/json"},body:JSON.stringify({keys:l})});if(!t.ok){let e=await t.text();throw u.ZP.error(e,10),Error("Network response was not ok")}let s=await t.json();return console.log(s),s}catch(e){throw console.error("Failed to create key:",e),e}},R=async(e,l)=>{try{let t="/user/get_users?role=".concat(l);console.log("in userGetAllUsersCall:",t);let s=await fetch(t,{method:"GET",headers:{Authorization:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!s.ok){let e=await s.text();throw u.ZP.error("Failed to delete key: "+e,10),Error("Network response was not ok")}let a=await s.json();return console.log(a),a}catch(e){throw console.error("Failed to get requested models:",e),e}},M=async(e,l)=>{try{console.log("Form Values in teamCreateCall:",l);let t=await fetch("/team/new",{method:"POST",headers:{Authorization:"Bearer ".concat(e),"Content-Type":"application/json"},body:JSON.stringify({...l})});if(!t.ok){let e=await t.text();throw u.ZP.error("Failed to create key: "+e,10),console.error("Error response from the server:",e),Error("Network response was not ok")}let s=await t.json();return console.log("API Response:",s),s}catch(e){throw console.error("Failed to create key:",e),e}},L=async(e,l)=>{try{console.log("Form Values in keyUpdateCall:",l);let t=await fetch("/key/update",{method:"POST",headers:{Authorization:"Bearer ".concat(e),"Content-Type":"application/json"},body:JSON.stringify({...l})});if(!t.ok){let e=await t.text();throw u.ZP.error("Failed to update key: "+e,10),console.error("Error response from the server:",e),Error("Network response was not ok")}let s=await t.json();return console.log("Update key Response:",s),s}catch(e){throw console.error("Failed to create key:",e),e}},U=async(e,l)=>{try{console.log("Form Values in teamUpateCall:",l);let t=await fetch("/team/update",{method:"POST",headers:{Authorization:"Bearer ".concat(e),"Content-Type":"application/json"},body:JSON.stringify({...l})});if(!t.ok){let e=await t.text();throw u.ZP.error("Failed to update team: "+e,10),console.error("Error response from the server:",e),Error("Network response was not ok")}let s=await t.json();return console.log("Update Team Response:",s),s}catch(e){throw console.error("Failed to create key:",e),e}},D=async(e,l)=>{try{console.log("Form Values in modelUpateCall:",l);let t=await fetch("/model/update",{method:"POST",headers:{Authorization:"Bearer ".concat(e),"Content-Type":"application/json"},body:JSON.stringify({...l})});if(!t.ok){let e=await t.text();throw u.ZP.error("Failed to update model: "+e,10),console.error("Error update from the server:",e),Error("Network response was not ok")}let s=await t.json();return console.log("Update model Response:",s),s}catch(e){throw console.error("Failed to update model:",e),e}},K=async(e,l,t)=>{try{console.log("Form Values in teamMemberAddCall:",t);let s=await fetch("/team/member_add",{method:"POST",headers:{Authorization:"Bearer ".concat(e),"Content-Type":"application/json"},body:JSON.stringify({team_id:l,member:t})});if(!s.ok){let e=await s.text();throw u.ZP.error("Failed to create key: "+e,10),console.error("Error response from the server:",e),Error("Network response was not ok")}let a=await s.json();return console.log("API Response:",a),a}catch(e){throw console.error("Failed to create key:",e),e}},B=async(e,l,t)=>{try{console.log("Form Values in userUpdateUserCall:",l);let s={...l};null!==t&&(s.user_role=t),s=JSON.stringify(s);let a=await fetch("/user/update",{method:"POST",headers:{Authorization:"Bearer ".concat(e),"Content-Type":"application/json"},body:s});if(!a.ok){let e=await a.text();throw u.ZP.error("Failed to create key: "+e,10),console.error("Error response from the server:",e),Error("Network response was not ok")}let r=await a.json();return console.log("API Response:",r),r}catch(e){throw console.error("Failed to create key:",e),e}},z=async(e,l)=>{try{let t="/health/services?service=".concat(l);console.log("Checking Slack Budget Alerts service health");let s=await fetch(t,{method:"GET",headers:{Authorization:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!s.ok){let e=await s.text();throw u.ZP.error("Failed ".concat(l," service health check ")+e),Error(e)}let a=await s.json();return u.ZP.success("Test request to ".concat(l," made - check logs/alerts on ").concat(l," to verify")),a}catch(e){throw console.error("Failed to perform health check:",e),e}},q=async(e,l,t)=>{try{let l=await fetch("/get/config/callbacks",{method:"GET",headers:{Authorization:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!l.ok){let e=await l.text();throw u.ZP.error(e,10),Error("Network response was not ok")}return await l.json()}catch(e){throw console.error("Failed to get callbacks:",e),e}},V=async(e,l)=>{try{let t=await fetch("/config/update",{method:"POST",headers:{Authorization:"Bearer ".concat(e),"Content-Type":"application/json"},body:JSON.stringify({...l})});if(!t.ok){let e=await t.text();throw u.ZP.error(e,10),Error("Network response was not ok")}return await t.json()}catch(e){throw console.error("Failed to set callbacks:",e),e}},G=async e=>{try{let l=await fetch("/health",{method:"GET",headers:{Authorization:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!l.ok){let e=await l.text();throw u.ZP.error(e),Error("Network response was not ok")}return await l.json()}catch(e){throw console.error("Failed to call /health:",e),e}};var Y=t(10384),W=t(46453),J=t(16450),H=t(52273),$=t(26780),X=t(15595),Q=t(6698),ee=t(71801),el=t(42440),et=t(42308),es=t(50670),ea=t(81583),er=t(99129),en=t(44839),eo=t(88707),ei=t(1861);let{Option:ec}=es.default;var ed=e=>{let{userID:l,team:t,userRole:s,accessToken:a,data:o,setData:i}=e,[c]=ea.Z.useForm(),[d,m]=(0,n.useState)(!1),[h,x]=(0,n.useState)(null),[p,g]=(0,n.useState)(null),[y,f]=(0,n.useState)([]),[Z,_]=(0,n.useState)([]),w=()=>{m(!1),c.resetFields()},b=()=>{m(!1),x(null),c.resetFields()};(0,n.useEffect)(()=>{(async()=>{try{if(null===l||null===s)return;if(null!==a){let e=(await N(a,l,s)).data.map(e=>e.id);console.log("available_model_names:",e),f(e)}}catch(e){console.error("Error fetching user models:",e)}})()},[a,l,s]);let k=async e=>{try{var t,s,r;let n=null!==(t=null==e?void 0:e.key_alias)&&void 0!==t?t:"",d=null!==(s=null==e?void 0:e.team_id)&&void 0!==s?s:null;if((null!==(r=null==o?void 0:o.filter(e=>e.team_id===d).map(e=>e.key_alias))&&void 0!==r?r:[]).includes(n))throw Error("Key alias ".concat(n," already exists for team with ID ").concat(d,", please provide another key alias"));u.ZP.info("Making API Call"),m(!0);let h=await j(a,l,e);console.log("key create Response:",h),i(e=>e?[...e,h]:[h]),x(h.key),g(h.soft_budget),u.ZP.success("API Key Created"),c.resetFields(),localStorage.removeItem("userData"+l)}catch(e){console.error("Error creating the key:",e),u.ZP.error("Error creating the key: ".concat(e),20)}};return(0,n.useEffect)(()=>{_(t&&t.models.length>0?t.models.includes("all-proxy-models")?y:t.models:y)},[t,y]),(0,r.jsxs)("div",{children:[(0,r.jsx)(J.Z,{className:"mx-auto",onClick:()=>m(!0),children:"+ Create New Key"}),(0,r.jsx)(er.Z,{title:"Create Key",visible:d,width:800,footer:null,onOk:w,onCancel:b,children:(0,r.jsxs)(ea.Z,{form:c,onFinish:k,labelCol:{span:8},wrapperCol:{span:16},labelAlign:"left",children:[(0,r.jsxs)(r.Fragment,{children:[(0,r.jsx)(ea.Z.Item,{label:"Key Name",name:"key_alias",rules:[{required:!0,message:"Please input a key name"}],help:"required",children:(0,r.jsx)(H.Z,{placeholder:""})}),(0,r.jsx)(ea.Z.Item,{label:"Team ID",name:"team_id",hidden:!0,initialValue:t?t.team_id:null,valuePropName:"team_id",className:"mt-8",children:(0,r.jsx)(en.Z,{value:t?t.team_alias:"",disabled:!0})}),(0,r.jsx)(ea.Z.Item,{label:"Models",name:"models",rules:[{required:!0,message:"Please select a model"}],help:"required",children:(0,r.jsxs)(es.default,{mode:"multiple",placeholder:"Select models",style:{width:"100%"},onChange:e=>{e.includes("all-team-models")&&c.setFieldsValue({models:["all-team-models"]})},children:[(0,r.jsx)(ec,{value:"all-team-models",children:"All Team Models"},"all-team-models"),Z.map(e=>(0,r.jsx)(ec,{value:e,children:e},e))]})}),(0,r.jsxs)($.Z,{className:"mt-20 mb-8",children:[(0,r.jsx)(Q.Z,{children:(0,r.jsx)("b",{children:"Optional Settings"})}),(0,r.jsxs)(X.Z,{children:[(0,r.jsx)(ea.Z.Item,{className:"mt-8",label:"Max Budget (USD)",name:"max_budget",help:"Budget cannot exceed team max budget: $".concat((null==t?void 0:t.max_budget)!==null&&(null==t?void 0:t.max_budget)!==void 0?null==t?void 0:t.max_budget:"unlimited"),rules:[{validator:async(e,l)=>{if(l&&t&&null!==t.max_budget&&l>t.max_budget)throw Error("Budget cannot exceed team max budget: $".concat(t.max_budget))}}],children:(0,r.jsx)(eo.Z,{step:.01,precision:2,width:200})}),(0,r.jsx)(ea.Z.Item,{className:"mt-8",label:"Reset Budget",name:"budget_duration",help:"Team Reset Budget: ".concat((null==t?void 0:t.budget_duration)!==null&&(null==t?void 0:t.budget_duration)!==void 0?null==t?void 0:t.budget_duration:"None"),children:(0,r.jsxs)(es.default,{defaultValue:null,placeholder:"n/a",children:[(0,r.jsx)(es.default.Option,{value:"24h",children:"daily"}),(0,r.jsx)(es.default.Option,{value:"30d",children:"monthly"})]})}),(0,r.jsx)(ea.Z.Item,{className:"mt-8",label:"Tokens per minute Limit (TPM)",name:"tpm_limit",help:"TPM cannot exceed team TPM limit: ".concat((null==t?void 0:t.tpm_limit)!==null&&(null==t?void 0:t.tpm_limit)!==void 0?null==t?void 0:t.tpm_limit:"unlimited"),rules:[{validator:async(e,l)=>{if(l&&t&&null!==t.tpm_limit&&l>t.tpm_limit)throw Error("TPM limit cannot exceed team TPM limit: ".concat(t.tpm_limit))}}],children:(0,r.jsx)(eo.Z,{step:1,width:400})}),(0,r.jsx)(ea.Z.Item,{className:"mt-8",label:"Requests per minute Limit (RPM)",name:"rpm_limit",help:"RPM cannot exceed team RPM limit: ".concat((null==t?void 0:t.rpm_limit)!==null&&(null==t?void 0:t.rpm_limit)!==void 0?null==t?void 0:t.rpm_limit:"unlimited"),rules:[{validator:async(e,l)=>{if(l&&t&&null!==t.rpm_limit&&l>t.rpm_limit)throw Error("RPM limit cannot exceed team RPM limit: ".concat(t.rpm_limit))}}],children:(0,r.jsx)(eo.Z,{step:1,width:400})}),(0,r.jsx)(ea.Z.Item,{label:"Expire Key (eg: 30s, 30h, 30d)",name:"duration",className:"mt-8",children:(0,r.jsx)(H.Z,{placeholder:""})}),(0,r.jsx)(ea.Z.Item,{label:"Metadata",name:"metadata",children:(0,r.jsx)(en.Z.TextArea,{rows:4,placeholder:"Enter metadata as JSON"})})]})]})]}),(0,r.jsx)("div",{style:{textAlign:"right",marginTop:"10px"},children:(0,r.jsx)(ei.ZP,{htmlType:"submit",children:"Create Key"})})]})}),h&&(0,r.jsx)(er.Z,{visible:d,onOk:w,onCancel:b,footer:null,children:(0,r.jsxs)(W.Z,{numItems:1,className:"gap-2 w-full",children:[(0,r.jsx)(el.Z,{children:"Save your Key"}),(0,r.jsx)(Y.Z,{numColSpan:1,children:(0,r.jsxs)("p",{children:["Please save this secret key somewhere safe and accessible. For security reasons, ",(0,r.jsx)("b",{children:"you will not be able to view it again"})," ","through your LiteLLM account. If you lose this secret key, you will need to generate a new one."]})}),(0,r.jsx)(Y.Z,{numColSpan:1,children:null!=h?(0,r.jsxs)("div",{children:[(0,r.jsx)(ee.Z,{className:"mt-3",children:"API Key:"}),(0,r.jsx)("div",{style:{background:"#f8f8f8",padding:"10px",borderRadius:"5px",marginBottom:"10px"},children:(0,r.jsx)("pre",{style:{wordWrap:"break-word",whiteSpace:"normal"},children:h})}),(0,r.jsx)(et.CopyToClipboard,{text:h,onCopy:()=>{u.ZP.success("API Key copied to clipboard")},children:(0,r.jsx)(J.Z,{className:"mt-3",children:"Copy API Key"})})]}):(0,r.jsx)(ee.Z,{children:"Key being created, this might take 30s"})})]})})]})},em=t(9454),eu=t(98941),eh=t(33393),ex=t(5),ep=t(13810),ej=t(61244),eg=t(10827),ey=t(3851),ef=t(2044),eZ=t(64167),e_=t(74480),ew=t(7178),eb=t(95093),ek=t(27166);let{Option:ev}=es.default;var eS=e=>{let{userID:l,userRole:t,accessToken:s,selectedTeam:a,data:o,setData:i,teams:c}=e,[d,m]=(0,n.useState)(!1),[h,x]=(0,n.useState)(!1),[p,j]=(0,n.useState)(null),[g,f]=(0,n.useState)(null),[Z,_]=(0,n.useState)(null),[w,b]=(0,n.useState)(""),[k,v]=(0,n.useState)(!1),[S,A]=(0,n.useState)(!1),[E,I]=(0,n.useState)(null),[C,P]=(0,n.useState)([]),T=new Set,[O,F]=(0,n.useState)(T);(0,n.useEffect)(()=>{(async()=>{try{if(null===l)return;if(null!==s&&null!==t){let e=(await N(s,l,t)).data.map(e=>e.id);console.log("available_model_names:",e),P(e)}}catch(e){console.error("Error fetching user models:",e)}})()},[s,l,t]),(0,n.useEffect)(()=>{if(c){let e=new Set;c.forEach((l,t)=>{let s=l.team_id;e.add(s)}),F(e)}},[c]);let R=e=>{console.log("handleEditClick:",e),null==e.token&&null!==e.token_id&&(e.token=e.token_id),I(e),v(!0)},M=async e=>{if(null==s)return;let l=e.token;e.key=l,console.log("handleEditSubmit:",e);let t=await L(s,e);console.log("handleEditSubmit: newKeyValues",t),o&&i(o.map(e=>e.token===l?t:e)),u.ZP.success("Key updated successfully"),v(!1),I(null)},U=async e=>{console.log("handleDelete:",e),null==e.token&&null!==e.token_id&&(e.token=e.token_id),null!=o&&(j(e.token),localStorage.removeItem("userData"+l),x(!0))},D=async()=>{if(null!=p&&null!=o){try{await y(s,p);let e=o.filter(e=>e.token!==p);i(e)}catch(e){console.error("Error deleting the key:",e)}x(!1),j(null)}};if(null!=o)return console.log("RERENDER TRIGGERED"),(0,r.jsxs)("div",{children:[(0,r.jsxs)(ep.Z,{className:"w-full mx-auto flex-auto overflow-y-auto max-h-[50vh] mb-4 mt-2",children:[(0,r.jsxs)(eg.Z,{className:"mt-5 max-h-[300px] min-h-[300px]",children:[(0,r.jsx)(eZ.Z,{children:(0,r.jsxs)(ew.Z,{children:[(0,r.jsx)(e_.Z,{children:"Key Alias"}),(0,r.jsx)(e_.Z,{children:"Secret Key"}),(0,r.jsx)(e_.Z,{children:"Spend (USD)"}),(0,r.jsx)(e_.Z,{children:"Budget (USD)"}),(0,r.jsx)(e_.Z,{children:"Models"}),(0,r.jsx)(e_.Z,{children:"TPM / RPM Limits"})]})}),(0,r.jsx)(ey.Z,{children:o.map(e=>{if(console.log(e),"litellm-dashboard"===e.team_id)return null;if(a){if(console.log("item team id: ".concat(e.team_id,", knownTeamIDs.has(item.team_id): ").concat(O.has(e.team_id),", selectedTeam id: ").concat(a.team_id)),(null!=a.team_id||null===e.team_id||O.has(e.team_id))&&e.team_id!=a.team_id)return null;console.log("item team id: ".concat(e.team_id,", is returned"))}return(0,r.jsxs)(ew.Z,{children:[(0,r.jsx)(ef.Z,{style:{maxWidth:"2px",whiteSpace:"pre-wrap",overflow:"hidden"},children:null!=e.key_alias?(0,r.jsx)(ee.Z,{children:e.key_alias}):(0,r.jsx)(ee.Z,{children:"Not Set"})}),(0,r.jsx)(ef.Z,{children:(0,r.jsx)(ee.Z,{children:e.key_name})}),(0,r.jsx)(ef.Z,{children:(0,r.jsx)(ee.Z,{children:(()=>{try{return parseFloat(e.spend).toFixed(4)}catch(l){return e.spend}})()})}),(0,r.jsx)(ef.Z,{children:null!=e.max_budget?(0,r.jsx)(ee.Z,{children:e.max_budget}):(0,r.jsx)(ee.Z,{children:"Unlimited"})}),(0,r.jsx)(ef.Z,{children:Array.isArray(e.models)?(0,r.jsx)("div",{style:{display:"flex",flexDirection:"column"},children:0===e.models.length?(0,r.jsx)(r.Fragment,{children:a&&a.models&&a.models.length>0?a.models.map((e,l)=>"all-proxy-models"===e?(0,r.jsx)(ex.Z,{size:"xs",className:"mb-1",color:"red",children:(0,r.jsx)(ee.Z,{children:"All Proxy Models"})},l):"all-team-models"===e?(0,r.jsx)(ex.Z,{size:"xs",className:"mb-1",color:"red",children:(0,r.jsx)(ee.Z,{children:"All Team Models"})},l):(0,r.jsx)(ex.Z,{size:"xs",className:"mb-1",color:"blue",children:(0,r.jsx)(ee.Z,{children:e.length>30?"".concat(e.slice(0,30),"..."):e})},l)):(0,r.jsx)(ex.Z,{size:"xs",className:"mb-1",color:"blue",children:(0,r.jsx)(ee.Z,{children:"all-proxy-models"})})}):e.models.map((e,l)=>"all-proxy-models"===e?(0,r.jsx)(ex.Z,{size:"xs",className:"mb-1",color:"red",children:(0,r.jsx)(ee.Z,{children:"All Proxy Models"})},l):"all-team-models"===e?(0,r.jsx)(ex.Z,{size:"xs",className:"mb-1",color:"red",children:(0,r.jsx)(ee.Z,{children:"All Team Models"})},l):(0,r.jsx)(ex.Z,{size:"xs",className:"mb-1",color:"blue",children:(0,r.jsx)(ee.Z,{children:e.length>30?"".concat(e.slice(0,30),"..."):e})},l))}):null}),(0,r.jsx)(ef.Z,{children:(0,r.jsxs)(ee.Z,{children:["TPM: ",e.tpm_limit?e.tpm_limit:"Unlimited"," ",(0,r.jsx)("br",{})," RPM:"," ",e.rpm_limit?e.rpm_limit:"Unlimited"]})}),(0,r.jsxs)(ef.Z,{children:[(0,r.jsx)(ej.Z,{onClick:()=>{I(e),A(!0)},icon:em.Z,size:"sm"}),(0,r.jsx)(er.Z,{open:S,onCancel:()=>{A(!1),I(null)},footer:null,width:800,children:E&&(0,r.jsxs)(r.Fragment,{children:[(0,r.jsxs)("div",{className:"grid grid-cols-1 gap-6 sm:grid-cols-2 lg:grid-cols-3 mt-8",children:[(0,r.jsxs)(ep.Z,{children:[(0,r.jsx)("p",{className:"text-tremor-default font-medium text-tremor-content dark:text-dark-tremor-content",children:"Spend"}),(0,r.jsx)("div",{className:"mt-2 flex items-baseline space-x-2.5",children:(0,r.jsx)("p",{className:"text-tremor font-semibold text-tremor-content-strong dark:text-dark-tremor-content-strong",children:(()=>{try{return parseFloat(E.spend).toFixed(4)}catch(e){return E.spend}})()})})]}),(0,r.jsxs)(ep.Z,{children:[(0,r.jsx)("p",{className:"text-tremor-default font-medium text-tremor-content dark:text-dark-tremor-content",children:"Budget"}),(0,r.jsx)("div",{className:"mt-2 flex items-baseline space-x-2.5",children:(0,r.jsx)("p",{className:"text-tremor font-semibold text-tremor-content-strong dark:text-dark-tremor-content-strong",children:null!=E.max_budget?(0,r.jsx)(r.Fragment,{children:E.max_budget}):(0,r.jsx)(r.Fragment,{children:"Unlimited"})})})]},e.name),(0,r.jsxs)(ep.Z,{children:[(0,r.jsx)("p",{className:"text-tremor-default font-medium text-tremor-content dark:text-dark-tremor-content",children:"Expires"}),(0,r.jsx)("div",{className:"mt-2 flex items-baseline space-x-2.5",children:(0,r.jsx)("p",{className:"text-tremor-default font-small text-tremor-content-strong dark:text-dark-tremor-content-strong",children:null!=E.expires?(0,r.jsx)(r.Fragment,{children:new Date(E.expires).toLocaleString(void 0,{day:"numeric",month:"long",year:"numeric",hour:"numeric",minute:"numeric",second:"numeric"})}):(0,r.jsx)(r.Fragment,{children:"Never"})})})]},e.name)]}),(0,r.jsxs)(ep.Z,{className:"my-4",children:[(0,r.jsx)(el.Z,{children:"Token Name"}),(0,r.jsx)(ee.Z,{className:"my-1",children:E.key_alias?E.key_alias:E.key_name}),(0,r.jsx)(el.Z,{children:"Token ID"}),(0,r.jsx)(ee.Z,{className:"my-1 text-[12px]",children:E.token}),(0,r.jsx)(el.Z,{children:"Metadata"}),(0,r.jsx)(ee.Z,{className:"my-1",children:(0,r.jsxs)("pre",{children:[JSON.stringify(E.metadata)," "]})})]}),(0,r.jsx)(J.Z,{className:"mx-auto flex items-center",onClick:()=>{A(!1),I(null)},children:"Close"})]})}),(0,r.jsx)(ej.Z,{icon:eu.Z,size:"sm",onClick:()=>R(e)}),(0,r.jsx)(ej.Z,{onClick:()=>U(e),icon:eh.Z,size:"sm"})]})]},e.token)})})]}),h&&(0,r.jsx)("div",{className:"fixed z-10 inset-0 overflow-y-auto",children:(0,r.jsxs)("div",{className:"flex items-end justify-center min-h-screen pt-4 px-4 pb-20 text-center sm:block sm:p-0",children:[(0,r.jsx)("div",{className:"fixed inset-0 transition-opacity","aria-hidden":"true",children:(0,r.jsx)("div",{className:"absolute inset-0 bg-gray-500 opacity-75"})}),(0,r.jsx)("span",{className:"hidden sm:inline-block sm:align-middle sm:h-screen","aria-hidden":"true",children:"​"}),(0,r.jsxs)("div",{className:"inline-block align-bottom bg-white rounded-lg text-left overflow-hidden shadow-xl transform transition-all sm:my-8 sm:align-middle sm:max-w-lg sm:w-full",children:[(0,r.jsx)("div",{className:"bg-white px-4 pt-5 pb-4 sm:p-6 sm:pb-4",children:(0,r.jsx)("div",{className:"sm:flex sm:items-start",children:(0,r.jsxs)("div",{className:"mt-3 text-center sm:mt-0 sm:ml-4 sm:text-left",children:[(0,r.jsx)("h3",{className:"text-lg leading-6 font-medium text-gray-900",children:"Delete Key"}),(0,r.jsx)("div",{className:"mt-2",children:(0,r.jsx)("p",{className:"text-sm text-gray-500",children:"Are you sure you want to delete this key ?"})})]})})}),(0,r.jsxs)("div",{className:"bg-gray-50 px-4 py-3 sm:px-6 sm:flex sm:flex-row-reverse",children:[(0,r.jsx)(J.Z,{onClick:D,color:"red",className:"ml-2",children:"Delete"}),(0,r.jsx)(J.Z,{onClick:()=>{x(!1),j(null)},children:"Cancel"})]})]})]})})]}),E&&(0,r.jsx)(e=>{let{visible:l,onCancel:t,token:s,onSubmit:o}=e,[i]=ea.Z.useForm(),[d,m]=(0,n.useState)(a),[u,h]=(0,n.useState)([]),[x,p]=(0,n.useState)(!1);return(0,r.jsx)(er.Z,{title:"Edit Key",visible:l,width:800,footer:null,onOk:()=>{i.validateFields().then(e=>{i.resetFields()}).catch(e=>{console.error("Validation failed:",e)})},onCancel:t,children:(0,r.jsxs)(ea.Z,{form:i,onFinish:M,initialValues:s,labelCol:{span:8},wrapperCol:{span:16},labelAlign:"left",children:[(0,r.jsxs)(r.Fragment,{children:[(0,r.jsx)(ea.Z.Item,{label:"Key Name",name:"key_alias",rules:[{required:!0,message:"Please input a key name"}],help:"required",children:(0,r.jsx)(en.Z,{})}),(0,r.jsx)(ea.Z.Item,{label:"Models",name:"models",rules:[{validator:(e,l)=>{let t=l.filter(e=>!d.models.includes(e)&&"all-team-models"!==e&&"all-proxy-models"!==e&&!d.models.includes("all-proxy-models"));return(console.log("errorModels: ".concat(t)),t.length>0)?Promise.reject("Some models are not part of the new team's models - ".concat(t,"Team models: ").concat(d.models)):Promise.resolve()}}],children:(0,r.jsxs)(es.default,{mode:"multiple",placeholder:"Select models",style:{width:"100%"},children:[(0,r.jsx)(ev,{value:"all-team-models",children:"All Team Models"},"all-team-models"),d&&d.models?d.models.includes("all-proxy-models")?C.filter(e=>"all-proxy-models"!==e).map(e=>(0,r.jsx)(ev,{value:e,children:e},e)):d.models.map(e=>(0,r.jsx)(ev,{value:e,children:e},e)):C.map(e=>(0,r.jsx)(ev,{value:e,children:e},e))]})}),(0,r.jsx)(ea.Z.Item,{className:"mt-8",label:"Max Budget (USD)",name:"max_budget",help:"Budget cannot exceed team max budget: ".concat((null==d?void 0:d.max_budget)!==null&&(null==d?void 0:d.max_budget)!==void 0?null==d?void 0:d.max_budget:"unlimited"),rules:[{validator:async(e,l)=>{if(l&&d&&null!==d.max_budget&&l>d.max_budget)throw console.log("keyTeam.max_budget: ".concat(d.max_budget)),Error("Budget cannot exceed team max budget: $".concat(d.max_budget))}}],children:(0,r.jsx)(eo.Z,{step:.01,precision:2,width:200})}),(0,r.jsx)(ea.Z.Item,{label:"token",name:"token",hidden:!0}),(0,r.jsx)(ea.Z.Item,{label:"Team",name:"team_id",help:"the team this key belongs to",children:(0,r.jsx)(eb.Z,{value:s.team_alias,children:null==c?void 0:c.map((e,l)=>(0,r.jsx)(ek.Z,{value:e.team_id,onClick:()=>m(e),children:e.team_alias},l))})})]}),(0,r.jsx)("div",{style:{textAlign:"right",marginTop:"10px"},children:(0,r.jsx)(ei.ZP,{htmlType:"submit",children:"Edit Key"})})]})})},{visible:k,onCancel:()=>{v(!1),I(null)},token:E,onSubmit:M})]})},eN=t(76032),eA=t(35152),eE=e=>{let{userID:l,userRole:t,accessToken:s,userSpend:a,selectedTeam:o}=e;console.log("userSpend: ".concat(a));let[i,c]=(0,n.useState)(null!==a?a:0),[d,m]=(0,n.useState)(0),[u,h]=(0,n.useState)([]);(0,n.useEffect)(()=>{let e=async()=>{if(s&&l&&t&&"Admin"===t&&null==a)try{let e=await w(s);e&&(e.spend?c(e.spend):c(0),e.max_budget?m(e.max_budget):m(0))}catch(e){console.error("Error fetching global spend data:",e)}};(async()=>{try{if(null===l||null===t)return;if(null!==s){let e=(await N(s,l,t)).data.map(e=>e.id);console.log("available_model_names:",e),h(e)}}catch(e){console.error("Error fetching user models:",e)}})(),e()},[t,s,l]),(0,n.useEffect)(()=>{null!==a&&c(a)},[a]);let x=[];o&&o.models&&(x=o.models),x&&x.includes("all-proxy-models")?(console.log("user models:",u),x=u):x&&x.includes("all-team-models")?x=o.models:x&&0===x.length&&(x=u);let p=void 0!==i?i.toFixed(4):null;return console.log("spend in view user spend: ".concat(i)),(0,r.jsxs)("div",{className:"flex items-center",children:[(0,r.jsxs)("div",{children:[(0,r.jsxs)("p",{className:"text-tremor-default text-tremor-content dark:text-dark-tremor-content",children:["Total Spend"," "]}),(0,r.jsxs)("p",{className:"text-2xl text-tremor-content-strong dark:text-dark-tremor-content-strong font-semibold",children:["$",p]})]}),(0,r.jsx)("div",{className:"ml-auto",children:(0,r.jsxs)($.Z,{children:[(0,r.jsx)(Q.Z,{children:(0,r.jsx)(ee.Z,{children:"Team Models"})}),(0,r.jsx)(X.Z,{className:"absolute right-0 z-10 bg-white p-2 shadow-lg max-w-xs",children:(0,r.jsx)(eN.Z,{children:x.map(e=>(0,r.jsx)(eA.Z,{children:(0,r.jsx)(ee.Z,{children:e})},e))})})]})})]})},eI=e=>{let{userID:l,userRole:t,selectedTeam:s,accessToken:a}=e,[o,i]=(0,n.useState)([]);(0,n.useEffect)(()=>{(async()=>{try{if(null===l||null===t)return;if(null!==a){let e=(await N(a,l,t)).data.map(e=>e.id);console.log("available_model_names:",e),i(e)}}catch(e){console.error("Error fetching user models:",e)}})()},[a,l,t]);let c=[];return s&&s.models&&(c=s.models),c&&c.includes("all-proxy-models")&&(console.log("user models:",o),c=o),(0,r.jsx)(r.Fragment,{children:(0,r.jsx)("div",{className:"mb-5",children:(0,r.jsx)("p",{className:"text-3xl text-tremor-content-strong dark:text-dark-tremor-content-strong font-semibold",children:null==s?void 0:s.team_alias})})})},eC=e=>{let l,{teams:t,setSelectedTeam:s,userRole:a}=e,o={models:[],team_id:null,team_alias:"Default Team"},[i,c]=(0,n.useState)(o);return(l="App User"===a?t:t?[...t,o]:[o],"App User"===a)?null:(0,r.jsxs)("div",{className:"mt-5 mb-5",children:[(0,r.jsx)(el.Z,{children:"Select Team"}),(0,r.jsx)(ee.Z,{children:"If you belong to multiple teams, this setting controls which team is used by default when creating new API Keys."}),(0,r.jsxs)(ee.Z,{className:"mt-3 mb-3",children:[(0,r.jsx)("b",{children:"Default Team:"})," If no team_id is set for a key, it will be grouped under here."]}),l&&l.length>0?(0,r.jsx)(eb.Z,{defaultValue:"0",children:l.map((e,l)=>(0,r.jsx)(ek.Z,{value:String(l),onClick:()=>s(e),children:e.team_alias},l))}):(0,r.jsxs)(ee.Z,{children:["No team created. ",(0,r.jsx)("b",{children:"Defaulting to personal account."})]})]})},eP=t(37963),eT=t(36083);console.log("isLocal:",!1);var eO=e=>{let{userID:l,userRole:t,teams:s,keys:a,setUserRole:i,userEmail:c,setUserEmail:d,setTeams:m,setKeys:u}=e,[h,x]=(0,n.useState)(null),p=(0,o.useSearchParams)();p.get("viewSpend"),(0,o.useRouter)();let j=p.get("token"),[g,y]=(0,n.useState)(null),[f,_]=(0,n.useState)(null),[b,k]=(0,n.useState)([]),v={models:[],team_alias:"Default Team",team_id:null},[S,A]=(0,n.useState)(s?s[0]:v);if(window.addEventListener("beforeunload",function(){sessionStorage.clear()}),(0,n.useEffect)(()=>{if(j){let e=(0,eP.o)(j);if(e){if(console.log("Decoded token:",e),console.log("Decoded key:",e.key),y(e.key),e.user_role){let l=function(e){if(!e)return"Undefined Role";switch(console.log("Received user role: ".concat(e)),e.toLowerCase()){case"app_owner":case"demo_app_owner":return"App Owner";case"app_admin":case"proxy_admin":return"Admin";case"proxy_admin_viewer":return"Admin Viewer";case"app_user":return"App User";default:return"Unknown Role"}}(e.user_role);console.log("Decoded user_role:",l),i(l)}else console.log("User role not defined");e.user_email?d(e.user_email):console.log("User Email is not set ".concat(e))}}if(l&&g&&t&&!a&&!h){let e=sessionStorage.getItem("userModels"+l);e?k(JSON.parse(e)):(async()=>{try{let e=await Z(g,l,t,!1,null,null);if(console.log("received teams in user dashboard: ".concat(Object.keys(e),"; team values: ").concat(Object.entries(e.teams))),"Admin"==t){let e=await w(g);x(e),console.log("globalSpend:",e)}else x(e.user_info);u(e.keys),m(e.teams);let s=[...e.teams];s.length>0?(console.log("response['teams']: ".concat(s)),A(s[0])):A(v),sessionStorage.setItem("userData"+l,JSON.stringify(e.keys)),sessionStorage.setItem("userSpendData"+l,JSON.stringify(e.user_info));let a=(await N(g,l,t)).data.map(e=>e.id);console.log("available_model_names:",a),k(a),console.log("userModels:",b),sessionStorage.setItem("userModels"+l,JSON.stringify(a))}catch(e){console.error("There was an error fetching the data",e)}})()}},[l,j,g,a,t]),(0,n.useEffect)(()=>{if(null!==a&&null!=S){let e=0;for(let l of a)S.hasOwnProperty("team_id")&&null!==l.team_id&&l.team_id===S.team_id&&(e+=l.spend);_(e)}else if(null!==a){let e=0;for(let l of a)e+=l.spend;_(e)}},[S]),null==l||null==j){let e="/sso/key/generate";return console.log("Full URL:",e),window.location.href=e,null}if(null==g)return null;if(null==t&&i("App Owner"),t&&"Admin Viewer"==t){let{Title:e,Paragraph:l}=eT.default;return(0,r.jsxs)("div",{children:[(0,r.jsx)(e,{level:1,children:"Access Denied"}),(0,r.jsx)(l,{children:"Ask your proxy admin for access to create keys"})]})}return console.log("inside user dashboard, selected team",S),console.log("teamSpend: ".concat(f)),(0,r.jsx)("div",{className:"w-full mx-4",children:(0,r.jsx)(W.Z,{numItems:1,className:"gap-2 p-8 h-[75vh] w-full mt-2",children:(0,r.jsxs)(Y.Z,{numColSpan:1,children:[(0,r.jsx)(eI,{userID:l,userRole:t,selectedTeam:S||null,accessToken:g}),(0,r.jsx)(eE,{userID:l,userRole:t,accessToken:g,userSpend:f,selectedTeam:S||null}),(0,r.jsx)(eS,{userID:l,userRole:t,accessToken:g,selectedTeam:S||null,data:a,setData:u,teams:s}),(0,r.jsx)(ed,{userID:l,team:S||null,userRole:t,accessToken:g,data:a,setData:u},S?S.team_id:null),(0,r.jsx)(eC,{teams:s,setSelectedTeam:A,userRole:t})]})})})},eF=t(35087),eR=t(92836),eM=t(26734),eL=t(41608),eU=t(32126),eD=t(23682),eK=t(47047),eB=t(76628),ez=t(57750),eq=t(44041),eV=t(38302),eG=t(28683),eY=t(1460),eW=t(78578),eJ=t(63954),eH=t(90252),e$=t(7905),eX=e=>{let{modelID:l,accessToken:t}=e,[s,a]=(0,n.useState)(!1),o=async()=>{try{u.ZP.info("Making API Call"),a(!0);let e=await p(t,l);console.log("model delete Response:",e),u.ZP.success("Model ".concat(l," deleted successfully")),a(!1)}catch(e){console.error("Error deleting the model:",e)}};return(0,r.jsxs)("div",{children:[(0,r.jsx)(ej.Z,{onClick:()=>a(!0),icon:eh.Z,size:"sm"}),(0,r.jsx)(er.Z,{open:s,onOk:o,okType:"danger",onCancel:()=>a(!1),children:(0,r.jsxs)(W.Z,{numItems:1,className:"gap-2 w-full",children:[(0,r.jsx)(el.Z,{children:"Delete Model"}),(0,r.jsx)(Y.Z,{numColSpan:1,children:(0,r.jsx)("p",{children:"Are you sure you want to delete this model? This action is irreversible."})}),(0,r.jsx)(Y.Z,{numColSpan:1,children:(0,r.jsxs)("p",{children:["Model ID: ",(0,r.jsx)("b",{children:l})]})})]})})]})},eQ=t(97766),e0=t(46495);let{Title:e1,Link:e2}=eT.default;(s=a||(a={})).OpenAI="OpenAI",s.Azure="Azure",s.Anthropic="Anthropic",s.Google_AI_Studio="Gemini (Google AI Studio)",s.Bedrock="Amazon Bedrock",s.OpenAI_Compatible="OpenAI-Compatible Endpoints (Groq, Together AI, Mistral AI, etc.)",s.Vertex_AI="Vertex AI (Anthropic, Gemini, etc.)";let e4={OpenAI:"openai",Azure:"azure",Anthropic:"anthropic",Google_AI_Studio:"gemini",Bedrock:"bedrock",OpenAI_Compatible:"openai",Vertex_AI:"vertex_ai"},e8={"BadRequestError (400)":"BadRequestErrorRetries","AuthenticationError  (401)":"AuthenticationErrorRetries","TimeoutError (408)":"TimeoutErrorRetries","RateLimitError (429)":"RateLimitErrorRetries","ContentPolicyViolationError (400)":"ContentPolicyViolationErrorRetries","InternalServerError (500)":"InternalServerErrorRetries"},e5=async(e,l,t)=>{try{let s=Array.isArray(e.model)?e.model:[e.model];console.log("received deployments: ".concat(s)),console.log("received type of deployments: ".concat(typeof s)),s.forEach(async t=>{console.log("litellm_model: ".concat(t));let s={},a={};s.model=t;let r="";for(let[l,t]of Object.entries(e))if(""!==t){if("model_name"==l)r+=t;else if("custom_llm_provider"==l)continue;else if("model"==l)continue;else if("base_model"===l)a[l]=t;else if("litellm_extra_params"==l){console.log("litellm_extra_params:",t);let e={};if(t&&void 0!=t){try{e=JSON.parse(t)}catch(e){throw u.ZP.error("Failed to parse LiteLLM Extra Params: "+e,10),Error("Failed to parse litellm_extra_params: "+e)}for(let[l,t]of Object.entries(e))s[l]=t}}else s[l]=t}let n={model_name:r,litellm_params:s,model_info:a},o=await x(l,n);console.log("response for model create call: ".concat(o.data))}),t.resetFields()}catch(e){u.ZP.error("Failed to create model: "+e,10)}};var e3=e=>{var l,t,s;let{accessToken:o,token:i,userRole:c,userID:d,modelData:m={data:[]},setModelData:x}=e,[p,j]=(0,n.useState)([]),[g]=ea.Z.useForm(),[y,f]=(0,n.useState)(null),[Z,_]=(0,n.useState)(""),[w,N]=(0,n.useState)([]),A=Object.values(a).filter(e=>isNaN(Number(e))),[E,I]=(0,n.useState)("OpenAI"),[C,P]=(0,n.useState)(""),[T,O]=(0,n.useState)(!1),[F,R]=(0,n.useState)(null),[M,L]=(0,n.useState)([]),[U,K]=(0,n.useState)(null),[B,z]=(0,n.useState)([]),[Y,et]=(0,n.useState)([]),[es,en]=(0,n.useState)([]),[ec,ed]=(0,n.useState)([]),[em,eh]=(0,n.useState)([]),[ev,eS]=(0,n.useState)([]),[eN,eA]=(0,n.useState)([]),[eE,eI]=(0,n.useState)({from:new Date(Date.now()-6048e5),to:new Date}),[eC,eP]=(0,n.useState)(null),[eO,e3]=(0,n.useState)(0),e6=e=>{R(e),O(!0)},e7=async e=>{if(console.log("handleEditSubmit:",e),null==o)return;let l={},t=null;for(let[s,a]of Object.entries(e))"model_id"!==s?l[s]=a:t=a;let s={litellm_params:l,model_info:{id:t}};console.log("handleEditSubmit payload:",s);try{await D(o,s),u.ZP.success("Model updated successfully, restart server to see updates"),O(!1),R(null)}catch(e){console.log("Error occurred")}},e9=()=>{_(new Date().toLocaleString())},le=async()=>{if(!o){console.error("Access token is missing");return}console.log("new modelGroupRetryPolicy:",eC);try{await V(o,{router_settings:{model_group_retry_policy:eC}}),u.ZP.success("Retry settings saved successfully")}catch(e){console.error("Failed to save retry settings:",e),u.ZP.error("Failed to save retry settings")}};if((0,n.useEffect)(()=>{if(!o||!i||!c||!d)return;let e=async()=>{try{var e,l,t,s,a,r;let n=await b(o,d,c);console.log("Model data response:",n.data),x(n);let i=new Set;for(let e=0;e<n.data.length;e++){let l=n.data[e];i.add(l.model_name)}console.log("all_model_groups:",i);let m=Array.from(i);m=m.sort(),L(m),console.log("array_model_groups:",m);let u="all";m.length>0&&(u=m[m.length-1],console.log("_initial_model_group:",u),K(u)),console.log("selectedModelGroup:",U);let h=await k(o,d,c,u,null===(e=eE.from)||void 0===e?void 0:e.toISOString(),null===(l=eE.to)||void 0===l?void 0:l.toISOString());console.log("Model metrics response:",h),et(h.data),en(h.all_api_bases);let p=await S(o,d,c,u,null===(t=eE.from)||void 0===t?void 0:t.toISOString(),null===(s=eE.to)||void 0===s?void 0:s.toISOString());console.log("Model exceptions response:",p),ed(p.data),eh(p.exception_types);let j=await v(o,d,c,u,null===(a=eE.from)||void 0===a?void 0:a.toISOString(),null===(r=eE.to)||void 0===r?void 0:r.toISOString());console.log("slowResponses:",j),eA(j);let g=(await q(o,d,c)).router_settings;console.log("routerSettingsInfo:",g);let y=g.model_group_retry_policy,f=g.num_retries;console.log("model_group_retry_policy:",y),console.log("default_retries:",f),eP(y),e3(f)}catch(e){console.error("There was an error fetching the model data",e)}};o&&i&&c&&d&&e();let l=async()=>{let e=await h();console.log("received model cost map data: ".concat(Object.keys(e))),f(e)};null==y&&l(),e9()},[o,i,c,d,y,Z]),!m||!o||!i||!c||!d)return(0,r.jsx)("div",{children:"Loading..."});let ll=[];for(let e=0;e<m.data.length;e++){let s=m.data[e],a=null==s?void 0:null===(l=s.litellm_params)||void 0===l?void 0:l.model,r=null==s?void 0:s.model_info,n="",o="Undefined",i="Undefined",c="Undefined",d={},u=e=>(console.log("GET PROVIDER CALLED! - ".concat(y)),null!=y&&"object"==typeof y&&e in y)?y[e].litellm_provider:"openai";if(a){let e=a.split("/"),l=e[0];n=1===e.length?u(a):l}else n="openai";r&&(o=null==r?void 0:r.input_cost_per_token,i=null==r?void 0:r.output_cost_per_token,c=null==r?void 0:r.max_tokens),(null==s?void 0:s.litellm_params)&&(d=Object.fromEntries(Object.entries(null==s?void 0:s.litellm_params).filter(e=>{let[l]=e;return"model"!==l&&"api_base"!==l}))),m.data[e].provider=n,m.data[e].input_cost=o,m.data[e].output_cost=i,m.data[e].max_tokens=c,m.data[e].api_base=null==s?void 0:null===(t=s.litellm_params)||void 0===t?void 0:t.api_base,m.data[e].cleanedLitellmParams=d,ll.push(s.model_name),console.log(m.data[e])}if(c&&"Admin Viewer"==c){let{Title:e,Paragraph:l}=eT.default;return(0,r.jsxs)("div",{children:[(0,r.jsx)(e,{level:1,children:"Access Denied"}),(0,r.jsx)(l,{children:"Ask your proxy admin for access to view all models"})]})}let lt=e=>{console.log("received provider string: ".concat(e));let l=Object.keys(a).find(l=>a[l]===e);if(l){let e=e4[l];console.log("mappingResult: ".concat(e));let t=[];"object"==typeof y&&Object.entries(y).forEach(l=>{let[s,a]=l;null!==a&&"object"==typeof a&&"litellm_provider"in a&&(a.litellm_provider===e||a.litellm_provider.includes(e))&&t.push(s)}),N(t),console.log("providerModels: ".concat(w))}},ls=async()=>{try{u.ZP.info("Running health check..."),P("");let e=await G(o);P(e)}catch(e){console.error("Error running health check:",e),P("Error running health check")}},la=async(e,l,t)=>{if(console.log("Updating model metrics for group:",e),o&&d&&c&&l&&t){console.log("inside updateModelMetrics - startTime:",l,"endTime:",t),K(e);try{let s=await k(o,d,c,e,l.toISOString(),t.toISOString());console.log("Model metrics response:",s),et(s.data),en(s.all_api_bases);let a=await S(o,d,c,e,l.toISOString(),t.toISOString());console.log("Model exceptions response:",a),ed(a.data),eh(a.exception_types);let r=await v(o,d,c,e,l.toISOString(),t.toISOString());console.log("slowResponses:",r),eA(r)}catch(e){console.error("Failed to fetch model metrics",e)}}};return console.log("selectedProvider: ".concat(E)),console.log("providerModels.length: ".concat(w.length)),(0,r.jsx)("div",{style:{width:"100%",height:"100%"},children:(0,r.jsxs)(eM.Z,{className:"gap-2 p-8 h-[75vh] w-full mt-2",children:[(0,r.jsxs)(eL.Z,{className:"flex justify-between mt-2 w-full items-center",children:[(0,r.jsxs)("div",{className:"flex",children:[(0,r.jsx)(eR.Z,{children:"All Models"}),(0,r.jsx)(eR.Z,{children:"Add Model"}),(0,r.jsx)(eR.Z,{children:(0,r.jsx)("pre",{children:"/health Models"})}),(0,r.jsx)(eR.Z,{children:"Model Analytics"}),(0,r.jsx)(eR.Z,{children:"Model Retry Settings"})]}),(0,r.jsxs)("div",{className:"flex items-center space-x-2",children:[Z&&(0,r.jsxs)(ee.Z,{children:["Last Refreshed: ",Z]}),(0,r.jsx)(ej.Z,{icon:eJ.Z,variant:"shadow",size:"xs",className:"self-center",onClick:e9})]})]}),(0,r.jsxs)(eD.Z,{children:[(0,r.jsxs)(eU.Z,{children:[(0,r.jsxs)(W.Z,{children:[(0,r.jsxs)("div",{className:"flex items-center",children:[(0,r.jsx)(ee.Z,{children:"Filter by Public Model Name"}),(0,r.jsxs)(eb.Z,{className:"mb-4 mt-2 ml-2 w-50",defaultValue:"all",onValueChange:e=>K("all"===e?"all":e),children:[(0,r.jsx)(ek.Z,{value:"all",children:"All Models"}),M.map((e,l)=>(0,r.jsx)(ek.Z,{value:e,onClick:()=>K(e),children:e},l))]})]}),(0,r.jsx)(ep.Z,{children:(0,r.jsxs)(eg.Z,{className:"mt-5",children:[(0,r.jsx)(eZ.Z,{children:(0,r.jsxs)(ew.Z,{children:[(0,r.jsx)(e_.Z,{children:"Public Model Name "}),(0,r.jsx)(e_.Z,{children:"Provider"}),"Admin"===c&&(0,r.jsx)(e_.Z,{children:"API Base"}),(0,r.jsx)(e_.Z,{children:"Extra litellm Params"}),(0,r.jsx)(e_.Z,{children:"Input Price per token ($)"}),(0,r.jsx)(e_.Z,{children:"Output Price per token ($)"}),(0,r.jsx)(e_.Z,{children:"Max Tokens"}),(0,r.jsx)(e_.Z,{children:"Status"})]})}),(0,r.jsx)(ey.Z,{children:m.data.filter(e=>"all"===U||e.model_name===U||null==U||""===U).map((e,l)=>(0,r.jsxs)(ew.Z,{children:[(0,r.jsx)(ef.Z,{children:(0,r.jsx)(ee.Z,{children:e.model_name})}),(0,r.jsx)(ef.Z,{children:e.provider}),"Admin"===c&&(0,r.jsx)(ef.Z,{children:e.api_base}),(0,r.jsx)(ef.Z,{children:(0,r.jsxs)($.Z,{children:[(0,r.jsx)(Q.Z,{children:(0,r.jsx)(ee.Z,{children:"Litellm params"})}),(0,r.jsx)(X.Z,{children:(0,r.jsx)("pre",{children:JSON.stringify(e.cleanedLitellmParams,null,2)})})]})}),(0,r.jsx)(ef.Z,{children:e.input_cost||e.litellm_params.input_cost_per_token||null}),(0,r.jsx)(ef.Z,{children:e.output_cost||e.litellm_params.output_cost_per_token||null}),(0,r.jsx)(ef.Z,{children:e.max_tokens}),(0,r.jsx)(ef.Z,{children:e.model_info.db_model?(0,r.jsx)(ex.Z,{icon:eH.Z,className:"text-white",children:"DB Model"}):(0,r.jsx)(ex.Z,{icon:e$.Z,className:"text-black",children:"Config Model"})}),(0,r.jsxs)(ef.Z,{children:[(0,r.jsx)(ej.Z,{icon:eu.Z,size:"sm",onClick:()=>e6(e)}),(0,r.jsx)(eX,{modelID:e.model_info.id,accessToken:o})]})]},l))})]})})]}),(0,r.jsx)(e=>{let{visible:l,onCancel:t,model:s,onSubmit:a}=e,[n]=ea.Z.useForm(),o={},i="",c="";if(s){o=s.litellm_params,i=s.model_name;let e=s.model_info;e&&(c=e.id,console.log("model_id: ".concat(c)),o.model_id=c)}return(0,r.jsx)(er.Z,{title:"Edit Model "+i,visible:l,width:800,footer:null,onOk:()=>{n.validateFields().then(e=>{a(e),n.resetFields()}).catch(e=>{console.error("Validation failed:",e)})},onCancel:t,children:(0,r.jsxs)(ea.Z,{form:n,onFinish:e7,initialValues:o,labelCol:{span:8},wrapperCol:{span:16},labelAlign:"left",children:[(0,r.jsxs)(r.Fragment,{children:[(0,r.jsx)(ea.Z.Item,{className:"mt-8",label:"api_base",name:"api_base",children:(0,r.jsx)(H.Z,{})}),(0,r.jsx)(ea.Z.Item,{label:"tpm",name:"tpm",tooltip:"int (optional) - Tokens limit for this deployment: in tokens per minute (tpm). Find this information on your model/providers website",children:(0,r.jsx)(eo.Z,{min:0,step:1})}),(0,r.jsx)(ea.Z.Item,{label:"rpm",name:"rpm",tooltip:"int (optional) - Rate limit for this deployment: in requests per minute (rpm). Find this information on your model/providers website",children:(0,r.jsx)(eo.Z,{min:0,step:1})}),(0,r.jsx)(ea.Z.Item,{label:"max_retries",name:"max_retries",children:(0,r.jsx)(eo.Z,{min:0,step:1})}),(0,r.jsx)(ea.Z.Item,{label:"timeout",name:"timeout",tooltip:"int (optional) - Timeout in seconds for LLM requests (Defaults to 600 seconds)",children:(0,r.jsx)(eo.Z,{min:0,step:1})}),(0,r.jsx)(ea.Z.Item,{label:"stream_timeout",name:"stream_timeout",tooltip:"int (optional) - Timeout for stream requests (seconds)",children:(0,r.jsx)(eo.Z,{min:0,step:1})}),(0,r.jsx)(ea.Z.Item,{label:"input_cost_per_token",name:"input_cost_per_token",tooltip:"float (optional) - Input cost per token",children:(0,r.jsx)(eo.Z,{min:0,step:1e-4})}),(0,r.jsx)(ea.Z.Item,{label:"output_cost_per_token",name:"output_cost_per_token",tooltip:"float (optional) - Output cost per token",children:(0,r.jsx)(eo.Z,{min:0,step:1e-4})}),(0,r.jsx)(ea.Z.Item,{label:"model_id",name:"model_id",hidden:!0})]}),(0,r.jsx)("div",{style:{textAlign:"right",marginTop:"10px"},children:(0,r.jsx)(ei.ZP,{htmlType:"submit",children:"Save"})})]})})},{visible:T,onCancel:()=>{O(!1),R(null)},model:F,onSubmit:e7})]}),(0,r.jsxs)(eU.Z,{className:"h-full",children:[(0,r.jsx)(e1,{level:2,children:"Add new model"}),(0,r.jsx)(ep.Z,{children:(0,r.jsxs)(ea.Z,{form:g,onFinish:()=>{g.validateFields().then(e=>{e5(e,o,g)}).catch(e=>{console.error("Validation failed:",e)})},labelCol:{span:10},wrapperCol:{span:16},labelAlign:"left",children:[(0,r.jsxs)(r.Fragment,{children:[(0,r.jsx)(ea.Z.Item,{rules:[{required:!0,message:"Required"}],label:"Provider:",name:"custom_llm_provider",tooltip:"E.g. OpenAI, Azure OpenAI, Anthropic, Bedrock, etc.",labelCol:{span:10},labelAlign:"left",children:(0,r.jsx)(eb.Z,{value:E.toString(),children:A.map((e,l)=>(0,r.jsx)(ek.Z,{value:e,onClick:()=>{lt(e),I(e)},children:e},l))})}),(0,r.jsx)(ea.Z.Item,{rules:[{required:!0,message:"Required"}],label:"Public Model Name",name:"model_name",tooltip:"Model name your users will pass in. Also used for load-balancing, LiteLLM will load balance between all models with this public name.",className:"mb-0",children:(0,r.jsx)(H.Z,{placeholder:"Vertex AI (Anthropic, Gemini, etc.)"===(s=E.toString())?"gemini-pro":"Anthropic"==s?"claude-3-opus":"Amazon Bedrock"==s?"claude-3-opus":"Gemini (Google AI Studio)"==s?"gemini-pro":"gpt-3.5-turbo"})}),(0,r.jsxs)(eV.Z,{children:[(0,r.jsx)(eG.Z,{span:10}),(0,r.jsx)(eG.Z,{span:10,children:(0,r.jsx)(ee.Z,{className:"mb-3 mt-1",children:"Model name your users will pass in."})})]}),(0,r.jsx)(ea.Z.Item,{rules:[{required:!0,message:"Required"}],label:"LiteLLM Model Name(s)",name:"model",tooltip:"Actual model name used for making litellm.completion() call.",className:"mb-0",children:"Azure"===E?(0,r.jsx)(H.Z,{placeholder:"Enter model name"}):w.length>0?(0,r.jsx)(eK.Z,{value:w,children:w.map((e,l)=>(0,r.jsx)(eB.Z,{value:e,children:e},l))}):(0,r.jsx)(H.Z,{placeholder:"gpt-3.5-turbo-0125"})}),(0,r.jsxs)(eV.Z,{children:[(0,r.jsx)(eG.Z,{span:10}),(0,r.jsx)(eG.Z,{span:10,children:(0,r.jsxs)(ee.Z,{className:"mb-3 mt-1",children:["Actual model name used for making ",(0,r.jsx)(e2,{href:"https://docs.litellm.ai/docs/providers",target:"_blank",children:"litellm.completion() call"}),". We'll ",(0,r.jsx)(e2,{href:"https://docs.litellm.ai/docs/proxy/reliability#step-1---set-deployments-on-config",target:"_blank",children:"loadbalance"})," models with the same 'public name'"]})})]}),"Amazon Bedrock"!=E&&"Vertex AI (Anthropic, Gemini, etc.)"!=E&&(0,r.jsx)(ea.Z.Item,{rules:[{required:!0,message:"Required"}],label:"API Key",name:"api_key",children:(0,r.jsx)(H.Z,{placeholder:"sk-",type:"password"})}),"OpenAI"==E&&(0,r.jsx)(ea.Z.Item,{label:"Organization ID",name:"organization_id",children:(0,r.jsx)(H.Z,{placeholder:"[OPTIONAL] my-unique-org"})}),"Vertex AI (Anthropic, Gemini, etc.)"==E&&(0,r.jsx)(ea.Z.Item,{rules:[{required:!0,message:"Required"}],label:"Vertex Project",name:"vertex_project",children:(0,r.jsx)(H.Z,{placeholder:"adroit-cadet-1234.."})}),"Vertex AI (Anthropic, Gemini, etc.)"==E&&(0,r.jsx)(ea.Z.Item,{rules:[{required:!0,message:"Required"}],label:"Vertex Location",name:"vertex_location",children:(0,r.jsx)(H.Z,{placeholder:"us-east-1"})}),"Vertex AI (Anthropic, Gemini, etc.)"==E&&(0,r.jsx)(ea.Z.Item,{rules:[{required:!0,message:"Required"}],label:"Vertex Credentials",name:"vertex_credentials",className:"mb-0",children:(0,r.jsx)(e0.Z,{name:"file",accept:".json",beforeUpload:e=>{if("application/json"===e.type){let l=new FileReader;l.onload=e=>{if(e.target){let l=e.target.result;g.setFieldsValue({vertex_credentials:l})}},l.readAsText(e)}return!1},onChange(e){"uploading"!==e.file.status&&console.log(e.file,e.fileList),"done"===e.file.status?u.ZP.success("".concat(e.file.name," file uploaded successfully")):"error"===e.file.status&&u.ZP.error("".concat(e.file.name," file upload failed."))},children:(0,r.jsx)(ei.ZP,{icon:(0,r.jsx)(eQ.Z,{}),children:"Click to Upload"})})}),"Vertex AI (Anthropic, Gemini, etc.)"==E&&(0,r.jsxs)(eV.Z,{children:[(0,r.jsx)(eG.Z,{span:10}),(0,r.jsx)(eG.Z,{span:10,children:(0,r.jsx)(ee.Z,{className:"mb-3 mt-1",children:"Give litellm a gcp service account(.json file), so it can make the relevant calls"})})]}),("Azure"==E||"OpenAI-Compatible Endpoints (Groq, Together AI, Mistral AI, etc.)"==E)&&(0,r.jsx)(ea.Z.Item,{rules:[{required:!0,message:"Required"}],label:"API Base",name:"api_base",children:(0,r.jsx)(H.Z,{placeholder:"https://..."})}),"Azure"==E&&(0,r.jsx)(ea.Z.Item,{rules:[{required:!0,message:"Required"}],label:"API Version",name:"api_version",children:(0,r.jsx)(H.Z,{placeholder:"2023-07-01-preview"})}),"Azure"==E&&(0,r.jsxs)(ea.Z.Item,{label:"Base Model",name:"base_model",children:[(0,r.jsx)(H.Z,{placeholder:"azure/gpt-3.5-turbo"}),(0,r.jsxs)(ee.Z,{children:["The actual model your azure deployment uses. Used for accurate cost tracking. Select name from ",(0,r.jsx)(e2,{href:"https://github.com/BerriAI/litellm/blob/main/model_prices_and_context_window.json",target:"_blank",children:"here"})]})]}),"Amazon Bedrock"==E&&(0,r.jsx)(ea.Z.Item,{rules:[{required:!0,message:"Required"}],label:"AWS Access Key ID",name:"aws_access_key_id",tooltip:"You can provide the raw key or the environment variable (e.g. `os.environ/MY_SECRET_KEY`).",children:(0,r.jsx)(H.Z,{placeholder:""})}),"Amazon Bedrock"==E&&(0,r.jsx)(ea.Z.Item,{rules:[{required:!0,message:"Required"}],label:"AWS Secret Access Key",name:"aws_secret_access_key",tooltip:"You can provide the raw key or the environment variable (e.g. `os.environ/MY_SECRET_KEY`).",children:(0,r.jsx)(H.Z,{placeholder:""})}),"Amazon Bedrock"==E&&(0,r.jsx)(ea.Z.Item,{rules:[{required:!0,message:"Required"}],label:"AWS Region Name",name:"aws_region_name",tooltip:"You can provide the raw key or the environment variable (e.g. `os.environ/MY_SECRET_KEY`).",children:(0,r.jsx)(H.Z,{placeholder:"us-east-1"})}),(0,r.jsx)(ea.Z.Item,{label:"LiteLLM Params",name:"litellm_extra_params",tooltip:"Optional litellm params used for making a litellm.completion() call.",className:"mb-0",children:(0,r.jsx)(eW.Z,{rows:4,placeholder:'{ "rpm": 100, "timeout": 0, "stream_timeout": 0 }'})}),(0,r.jsxs)(eV.Z,{children:[(0,r.jsx)(eG.Z,{span:10}),(0,r.jsx)(eG.Z,{span:10,children:(0,r.jsxs)(ee.Z,{className:"mb-3 mt-1",children:["Pass JSON of litellm supported params ",(0,r.jsx)(e2,{href:"https://docs.litellm.ai/docs/completion/input",target:"_blank",children:"litellm.completion() call"})]})})]})]}),(0,r.jsx)("div",{style:{textAlign:"center",marginTop:"10px"},children:(0,r.jsx)(ei.ZP,{htmlType:"submit",children:"Add Model"})}),(0,r.jsx)(eY.Z,{title:"Get help on our github",children:(0,r.jsx)(eT.default.Link,{href:"https://github.com/BerriAI/litellm/issues",children:"Need Help?"})})]})})]}),(0,r.jsx)(eU.Z,{children:(0,r.jsxs)(ep.Z,{children:[(0,r.jsx)(ee.Z,{children:"`/health` will run a very small request through your models configured on litellm"}),(0,r.jsx)(J.Z,{onClick:ls,children:"Run `/health`"}),C&&(0,r.jsx)("pre",{children:JSON.stringify(C,null,2)})]})}),(0,r.jsxs)(eU.Z,{children:[(0,r.jsxs)(W.Z,{numItems:2,className:"mt-2",children:[(0,r.jsxs)(eG.Z,{children:[(0,r.jsx)(ee.Z,{children:"Select Time Range"}),(0,r.jsx)(eF.Z,{enableSelect:!0,value:eE,onValueChange:e=>{eI(e),la(U,e.from,e.to)}})]}),(0,r.jsxs)(eG.Z,{children:[(0,r.jsx)(ee.Z,{children:"Select Model Group"}),(0,r.jsx)(eb.Z,{className:"mb-4 mt-2",defaultValue:U||M[0],value:U||M[0],children:M.map((e,l)=>(0,r.jsx)(ek.Z,{value:e,onClick:()=>la(e,eE.from,eE.to),children:e},l))})]})]}),(0,r.jsxs)(W.Z,{numItems:2,children:[(0,r.jsx)(eG.Z,{children:(0,r.jsxs)(ep.Z,{className:"mr-2 max-h-[400px] min-h-[400px]",children:[(0,r.jsx)(el.Z,{children:"Avg Latency per Token"}),(0,r.jsx)("p",{className:"text-gray-500 italic",children:" (seconds/token)"}),(0,r.jsx)(ee.Z,{className:"text-gray-500 italic mt-1 mb-1",children:"average Latency for successfull requests divided by the total tokens"}),Y&&es&&(0,r.jsx)(ez.Z,{title:"Model Latency",className:"h-72",data:Y,showLegend:!1,index:"date",categories:es,connectNulls:!0,customTooltip:e=>{var l,t;let{payload:s,active:a}=e;if(!a||!s)return null;let n=null===(t=s[0])||void 0===t?void 0:null===(l=t.payload)||void 0===l?void 0:l.date,o=s.sort((e,l)=>l.value-e.value);if(o.length>5){let e=o.length-5;(o=o.slice(0,5)).push({dataKey:"".concat(e," other deployments"),value:s.slice(5).reduce((e,l)=>e+l.value,0),color:"gray"})}return(0,r.jsxs)("div",{className:"w-150 rounded-tremor-default border border-tremor-border bg-tremor-background p-2 text-tremor-default shadow-tremor-dropdown",children:[n&&(0,r.jsxs)("p",{className:"text-tremor-content-emphasis mb-2",children:["Date: ",n]}),o.map((e,l)=>{let t=parseFloat(e.value.toFixed(5)),s=0===t&&e.value>0?"<0.00001":t.toFixed(5);return(0,r.jsxs)("div",{className:"flex justify-between",children:[(0,r.jsxs)("div",{className:"flex items-center space-x-2",children:[(0,r.jsx)("div",{className:"w-2 h-2 mt-1 rounded-full bg-".concat(e.color,"-500")}),(0,r.jsx)("p",{className:"text-tremor-content",children:e.dataKey})]}),(0,r.jsx)("p",{className:"font-medium text-tremor-content-emphasis text-righ ml-2",children:s})]},l)})]})}})]})}),(0,r.jsx)(eG.Z,{children:(0,r.jsx)(ep.Z,{className:"ml-2 max-h-[400px] min-h-[400px]  overflow-y-auto",children:(0,r.jsxs)(eg.Z,{children:[(0,r.jsx)(eZ.Z,{children:(0,r.jsxs)(ew.Z,{children:[(0,r.jsx)(e_.Z,{children:"Deployment"}),(0,r.jsx)(e_.Z,{children:"Success Responses"}),(0,r.jsxs)(e_.Z,{children:["Slow Responses ",(0,r.jsx)("p",{children:"Success Responses taking 600+s"})]})]})}),(0,r.jsx)(ey.Z,{children:eN.map((e,l)=>(0,r.jsxs)(ew.Z,{children:[(0,r.jsx)(ef.Z,{children:e.api_base}),(0,r.jsx)(ef.Z,{children:e.total_count}),(0,r.jsx)(ef.Z,{children:e.slow_count})]},l))})]})})})]}),(0,r.jsxs)(ep.Z,{className:"mt-4",children:[(0,r.jsx)(el.Z,{children:"Exceptions per Model"}),(0,r.jsx)(eq.Z,{className:"h-72",data:ec,index:"model",categories:em,stack:!0,colors:["indigo-300","rose-200","#ffcc33"],yAxisWidth:30})]})]}),(0,r.jsxs)(eU.Z,{children:[(0,r.jsxs)("div",{className:"flex items-center",children:[(0,r.jsx)(ee.Z,{children:"Filter by Public Model Name"}),(0,r.jsx)(eb.Z,{className:"mb-4 mt-2 ml-2 w-50",defaultValue:U||M[0],value:U||M[0],onValueChange:e=>K(e),children:M.map((e,l)=>(0,r.jsx)(ek.Z,{value:e,onClick:()=>K(e),children:e},l))})]}),(0,r.jsxs)(el.Z,{children:["Retry Policy for ",U]}),(0,r.jsx)(ee.Z,{className:"mb-6",children:"How many retries should be attempted based on the Exception"}),e8&&(0,r.jsx)("table",{children:(0,r.jsx)("tbody",{children:Object.entries(e8).map((e,l)=>{var t;let[s,a]=e,n=null==eC?void 0:null===(t=eC[U])||void 0===t?void 0:t[a];return null==n&&(n=eO),(0,r.jsxs)("tr",{className:"flex justify-between items-center mt-2",children:[(0,r.jsx)("td",{children:(0,r.jsx)(ee.Z,{children:s})}),(0,r.jsx)("td",{children:(0,r.jsx)(eo.Z,{className:"ml-5",value:n,min:0,step:1,onChange:e=>{eP(l=>{var t;let s=null!==(t=null==l?void 0:l[U])&&void 0!==t?t:{};return{...null!=l?l:{},[U]:{...s,[a]:e}}})}})})]},l)})})}),(0,r.jsx)(J.Z,{className:"mt-6 mr-8",onClick:le,children:"Save"})]})]})]})})};let{Option:e6}=es.default;var e7=e=>{let{userID:l,accessToken:t,teams:s}=e,[a]=ea.Z.useForm(),[o,i]=(0,n.useState)(!1),[c,d]=(0,n.useState)(null),[m,h]=(0,n.useState)([]);(0,n.useEffect)(()=>{(async()=>{try{let e=await N(t,l,"any"),s=[];for(let l=0;l<e.data.length;l++){let t=e.data[l];s.push(t.id)}console.log("Model data response:",e.data),console.log("Available models:",s),h(s)}catch(e){console.error("Error fetching model data:",e)}})()},[]);let x=()=>{i(!1),a.resetFields()},p=()=>{i(!1),d(null),a.resetFields()},j=async e=>{try{u.ZP.info("Making API Call"),i(!0),console.log("formValues in create user:",e);let s=await g(t,null,e);console.log("user create Response:",s),d(s.key),u.ZP.success("API user Created"),a.resetFields(),localStorage.removeItem("userData"+l)}catch(e){console.error("Error creating the user:",e)}};return(0,r.jsxs)("div",{children:[(0,r.jsx)(J.Z,{className:"mx-auto",onClick:()=>i(!0),children:"+ Invite User"}),(0,r.jsxs)(er.Z,{title:"Invite User",visible:o,width:800,footer:null,onOk:x,onCancel:p,children:[(0,r.jsx)(ee.Z,{className:"mb-1",children:"Invite a user to login to the Admin UI and create Keys"}),(0,r.jsx)(ee.Z,{className:"mb-6",children:(0,r.jsx)("b",{children:"Note: SSO Setup Required for this"})}),(0,r.jsxs)(ea.Z,{form:a,onFinish:j,labelCol:{span:8},wrapperCol:{span:16},labelAlign:"left",children:[(0,r.jsx)(ea.Z.Item,{label:"User Email",name:"user_email",children:(0,r.jsx)(H.Z,{placeholder:""})}),(0,r.jsx)(ea.Z.Item,{label:"Team ID",name:"team_id",children:(0,r.jsx)(es.default,{placeholder:"Select Team ID",style:{width:"100%"},children:s?s.map(e=>(0,r.jsx)(e6,{value:e.team_id,children:e.team_alias},e.team_id)):(0,r.jsx)(e6,{value:null,children:"Default Team"},"default")})}),(0,r.jsx)(ea.Z.Item,{label:"Metadata",name:"metadata",children:(0,r.jsx)(en.Z.TextArea,{rows:4,placeholder:"Enter metadata as JSON"})}),(0,r.jsx)("div",{style:{textAlign:"right",marginTop:"10px"},children:(0,r.jsx)(ei.ZP,{htmlType:"submit",children:"Create User"})})]})]}),c&&(0,r.jsxs)(er.Z,{title:"User Created Successfully",visible:o,onOk:x,onCancel:p,footer:null,children:[(0,r.jsx)("p",{children:"User has been created to access your proxy. Please Ask them to Log In."}),(0,r.jsx)("br",{}),(0,r.jsx)("p",{children:(0,r.jsx)("b",{children:"Note: This Feature is only supported through SSO on the Admin UI"})})]})]})},e9=e=>{let{accessToken:l,token:t,keys:s,userRole:a,userID:o,teams:i,setKeys:c}=e,[d,m]=(0,n.useState)(null),[u,h]=(0,n.useState)(null),[x,p]=(0,n.useState)(0),[j,g]=n.useState(null),[y,f]=(0,n.useState)(null);return((0,n.useEffect)(()=>{if(!l||!t||!a||!o)return;let e=async()=>{try{let e=await Z(l,null,a,!0,x,25);console.log("user data response:",e),m(e)}catch(e){console.error("There was an error fetching the model data",e)}};l&&t&&a&&o&&e()},[l,t,a,o,x]),d&&l&&t&&a&&o)?(0,r.jsx)("div",{style:{width:"100%"},children:(0,r.jsxs)(W.Z,{className:"gap-2 p-2 h-[80vh] w-full mt-8",children:[(0,r.jsx)(e7,{userID:o,accessToken:l,teams:i}),(0,r.jsxs)(ep.Z,{className:"w-full mx-auto flex-auto overflow-y-auto max-h-[80vh] mb-4",children:[(0,r.jsx)("div",{className:"mb-4 mt-1",children:(0,r.jsx)(ee.Z,{children:"These are Users on LiteLLM that created API Keys. Automatically tracked by LiteLLM"})}),(0,r.jsx)(eM.Z,{children:(0,r.jsxs)(eD.Z,{children:[(0,r.jsx)(eU.Z,{children:(0,r.jsxs)(eg.Z,{className:"mt-5",children:[(0,r.jsx)(eZ.Z,{children:(0,r.jsxs)(ew.Z,{children:[(0,r.jsx)(e_.Z,{children:"User ID"}),(0,r.jsx)(e_.Z,{children:"User Email"}),(0,r.jsx)(e_.Z,{children:"User Models"}),(0,r.jsx)(e_.Z,{children:"User Spend ($ USD)"}),(0,r.jsx)(e_.Z,{children:"User Max Budget ($ USD)"}),(0,r.jsx)(e_.Z,{children:"User API Key Aliases"})]})}),(0,r.jsx)(ey.Z,{children:d.map(e=>{var l;return(0,r.jsxs)(ew.Z,{children:[(0,r.jsx)(ef.Z,{children:e.user_id}),(0,r.jsx)(ef.Z,{children:e.user_email}),(0,r.jsx)(ef.Z,{children:e.models&&e.models.length>0?e.models:"All Models"}),(0,r.jsx)(ef.Z,{children:e.spend?null===(l=e.spend)||void 0===l?void 0:l.toFixed(2):0}),(0,r.jsx)(ef.Z,{children:e.max_budget?e.max_budget:"Unlimited"}),(0,r.jsx)(ef.Z,{children:(0,r.jsx)(W.Z,{numItems:2,children:e&&e.key_aliases&&e.key_aliases.filter(e=>null!==e).length>0?(0,r.jsx)(ex.Z,{size:"xs",color:"indigo",children:e.key_aliases.filter(e=>null!==e).join(", ")}):(0,r.jsx)(ex.Z,{size:"xs",color:"gray",children:"No Keys"})})})]},e.user_id)})})]})}),(0,r.jsx)(eU.Z,{children:(0,r.jsxs)("div",{className:"flex items-center",children:[(0,r.jsx)("div",{className:"flex-1"}),(0,r.jsx)("div",{className:"flex-1 flex justify-between items-center"})]})})]})})]}),function(){if(!d)return null;let e=Math.ceil(d.length/25);return(0,r.jsxs)("div",{className:"flex justify-between items-center",children:[(0,r.jsxs)("div",{children:["Showing Page ",x+1," of ",e]}),(0,r.jsxs)("div",{className:"flex",children:[(0,r.jsx)("button",{className:"bg-blue-500 hover:bg-blue-700 text-white font-bold py-2 px-4 rounded-l focus:outline-none",disabled:0===x,onClick:()=>p(x-1),children:"← Prev"}),(0,r.jsx)("button",{className:"bg-blue-500 hover:bg-blue-700 text-white font-bold py-2 px-4 rounded-r focus:outline-none",onClick:()=>{p(x+1)},children:"Next →"})]})]})}()]})}):(0,r.jsx)("div",{children:"Loading..."})},le=e=>{let{teams:l,searchParams:t,accessToken:s,setTeams:a,userID:o,userRole:i}=e,[c]=ea.Z.useForm(),[d]=ea.Z.useForm(),{Title:m,Paragraph:h}=eT.default,[x,p]=(0,n.useState)(""),[j,g]=(0,n.useState)(!1),[y,Z]=(0,n.useState)(l?l[0]:null),[w,b]=(0,n.useState)(!1),[k,v]=(0,n.useState)(!1),[S,A]=(0,n.useState)([]),[E,I]=(0,n.useState)(!1),[C,P]=(0,n.useState)(null),[T,O]=(0,n.useState)({}),F=e=>{Z(e),g(!0)},R=async e=>{let t=e.team_id;if(console.log("handleEditSubmit:",e),null==s)return;let r=await U(s,e);l&&a(l.map(e=>e.team_id===t?r.data:e)),u.ZP.success("Team updated successfully"),g(!1),Z(null)},L=async e=>{P(e),I(!0)},D=async()=>{if(null!=C&&null!=l&&null!=s){try{await f(s,C);let e=l.filter(e=>e.team_id!==C);a(e)}catch(e){console.error("Error deleting the team:",e)}I(!1),P(null)}};(0,n.useEffect)(()=>{let e=async()=>{try{if(null===o||null===i||null===s||null===l)return;console.log("fetching team info:");let e={};for(let t=0;t<(null==l?void 0:l.length);t++){let a=l[t].team_id,r=await _(s,a);console.log("teamInfo response:",r),null!==r&&(e={...e,[a]:r})}O(e)}catch(e){console.error("Error fetching team info:",e)}};(async()=>{try{if(null===o||null===i)return;if(null!==s){let e=(await N(s,o,i)).data.map(e=>e.id);console.log("available_model_names:",e),A(e)}}catch(e){console.error("Error fetching user models:",e)}})(),e()},[s,o,i,l]);let B=async e=>{try{if(null!=s){var t;let r=null==e?void 0:e.team_alias;if((null!==(t=null==l?void 0:l.map(e=>e.team_alias))&&void 0!==t?t:[]).includes(r))throw Error("Team alias ".concat(r," already exists, please pick another alias"));u.ZP.info("Creating Team");let n=await M(s,e);null!==l?a([...l,n]):a([n]),console.log("response for team create call: ".concat(n)),u.ZP.success("Team created"),b(!1)}}catch(e){console.error("Error creating the team:",e),u.ZP.error("Error creating the team: "+e,20)}},z=async e=>{try{if(null!=s&&null!=l){u.ZP.info("Adding Member");let t={role:"user",user_email:e.user_email,user_id:e.user_id},r=await K(s,y.team_id,t);console.log("response for team create call: ".concat(r.data));let n=l.findIndex(e=>(console.log("team.team_id=".concat(e.team_id,"; response.data.team_id=").concat(r.data.team_id)),e.team_id===r.data.team_id));if(console.log("foundIndex: ".concat(n)),-1!==n){let e=[...l];e[n]=r.data,a(e),Z(r.data)}v(!1)}}catch(e){console.error("Error creating the team:",e)}};return console.log("received teams ".concat(JSON.stringify(l))),(0,r.jsx)("div",{className:"w-full mx-4",children:(0,r.jsxs)(W.Z,{numItems:1,className:"gap-2 p-8 h-[75vh] w-full mt-2",children:[(0,r.jsxs)(Y.Z,{numColSpan:1,children:[(0,r.jsx)(m,{level:4,children:"All Teams"}),(0,r.jsxs)(ep.Z,{className:"w-full mx-auto flex-auto overflow-y-auto max-h-[50vh]",children:[(0,r.jsxs)(eg.Z,{children:[(0,r.jsx)(eZ.Z,{children:(0,r.jsxs)(ew.Z,{children:[(0,r.jsx)(e_.Z,{children:"Team Name"}),(0,r.jsx)(e_.Z,{children:"Spend (USD)"}),(0,r.jsx)(e_.Z,{children:"Budget (USD)"}),(0,r.jsx)(e_.Z,{children:"Models"}),(0,r.jsx)(e_.Z,{children:"TPM / RPM Limits"}),(0,r.jsx)(e_.Z,{children:"Info"})]})}),(0,r.jsx)(ey.Z,{children:l&&l.length>0?l.map(e=>(0,r.jsxs)(ew.Z,{children:[(0,r.jsx)(ef.Z,{style:{maxWidth:"4px",whiteSpace:"pre-wrap",overflow:"hidden"},children:e.team_alias}),(0,r.jsx)(ef.Z,{style:{maxWidth:"4px",whiteSpace:"pre-wrap",overflow:"hidden"},children:e.spend}),(0,r.jsx)(ef.Z,{style:{maxWidth:"4px",whiteSpace:"pre-wrap",overflow:"hidden"},children:e.max_budget?e.max_budget:"No limit"}),(0,r.jsx)(ef.Z,{style:{maxWidth:"8-x",whiteSpace:"pre-wrap",overflow:"hidden"},children:Array.isArray(e.models)?(0,r.jsx)("div",{style:{display:"flex",flexDirection:"column"},children:0===e.models.length?(0,r.jsx)(ex.Z,{size:"xs",className:"mb-1",color:"red",children:(0,r.jsx)(ee.Z,{children:"All Proxy Models"})}):e.models.map((e,l)=>"all-proxy-models"===e?(0,r.jsx)(ex.Z,{size:"xs",className:"mb-1",color:"red",children:(0,r.jsx)(ee.Z,{children:"All Proxy Models"})},l):(0,r.jsx)(ex.Z,{size:"xs",className:"mb-1",color:"blue",children:(0,r.jsx)(ee.Z,{children:e.length>30?"".concat(e.slice(0,30),"..."):e})},l))}):null}),(0,r.jsx)(ef.Z,{style:{maxWidth:"4px",whiteSpace:"pre-wrap",overflow:"hidden"},children:(0,r.jsxs)(ee.Z,{children:["TPM: ",e.tpm_limit?e.tpm_limit:"Unlimited"," ",(0,r.jsx)("br",{}),"RPM:"," ",e.rpm_limit?e.rpm_limit:"Unlimited"]})}),(0,r.jsxs)(ef.Z,{children:[(0,r.jsxs)(ee.Z,{children:[T&&e.team_id&&T[e.team_id]&&T[e.team_id].keys&&T[e.team_id].keys.length," ","Keys"]}),(0,r.jsxs)(ee.Z,{children:[T&&e.team_id&&T[e.team_id]&&T[e.team_id].team_info&&T[e.team_id].team_info.members_with_roles&&T[e.team_id].team_info.members_with_roles.length," ","Members"]})]}),(0,r.jsxs)(ef.Z,{children:[(0,r.jsx)(ej.Z,{icon:eu.Z,size:"sm",onClick:()=>F(e)}),(0,r.jsx)(ej.Z,{onClick:()=>L(e.team_id),icon:eh.Z,size:"sm"})]})]},e.team_id)):null})]}),E&&(0,r.jsx)("div",{className:"fixed z-10 inset-0 overflow-y-auto",children:(0,r.jsxs)("div",{className:"flex items-end justify-center min-h-screen pt-4 px-4 pb-20 text-center sm:block sm:p-0",children:[(0,r.jsx)("div",{className:"fixed inset-0 transition-opacity","aria-hidden":"true",children:(0,r.jsx)("div",{className:"absolute inset-0 bg-gray-500 opacity-75"})}),(0,r.jsx)("span",{className:"hidden sm:inline-block sm:align-middle sm:h-screen","aria-hidden":"true",children:"​"}),(0,r.jsxs)("div",{className:"inline-block align-bottom bg-white rounded-lg text-left overflow-hidden shadow-xl transform transition-all sm:my-8 sm:align-middle sm:max-w-lg sm:w-full",children:[(0,r.jsx)("div",{className:"bg-white px-4 pt-5 pb-4 sm:p-6 sm:pb-4",children:(0,r.jsx)("div",{className:"sm:flex sm:items-start",children:(0,r.jsxs)("div",{className:"mt-3 text-center sm:mt-0 sm:ml-4 sm:text-left",children:[(0,r.jsx)("h3",{className:"text-lg leading-6 font-medium text-gray-900",children:"Delete Team"}),(0,r.jsx)("div",{className:"mt-2",children:(0,r.jsx)("p",{className:"text-sm text-gray-500",children:"Are you sure you want to delete this team ?"})})]})})}),(0,r.jsxs)("div",{className:"bg-gray-50 px-4 py-3 sm:px-6 sm:flex sm:flex-row-reverse",children:[(0,r.jsx)(J.Z,{onClick:D,color:"red",className:"ml-2",children:"Delete"}),(0,r.jsx)(J.Z,{onClick:()=>{I(!1),P(null)},children:"Cancel"})]})]})]})})]})]}),(0,r.jsxs)(Y.Z,{numColSpan:1,children:[(0,r.jsx)(J.Z,{className:"mx-auto",onClick:()=>b(!0),children:"+ Create New Team"}),(0,r.jsx)(er.Z,{title:"Create Team",visible:w,width:800,footer:null,onOk:()=>{b(!1),c.resetFields()},onCancel:()=>{b(!1),c.resetFields()},children:(0,r.jsxs)(ea.Z,{form:c,onFinish:B,labelCol:{span:8},wrapperCol:{span:16},labelAlign:"left",children:[(0,r.jsxs)(r.Fragment,{children:[(0,r.jsx)(ea.Z.Item,{label:"Team Name",name:"team_alias",rules:[{required:!0,message:"Please input a team name"}],children:(0,r.jsx)(H.Z,{placeholder:""})}),(0,r.jsx)(ea.Z.Item,{label:"Models",name:"models",children:(0,r.jsxs)(es.default,{mode:"multiple",placeholder:"Select models",style:{width:"100%"},children:[(0,r.jsx)(es.default.Option,{value:"all-proxy-models",children:"All Proxy Models"},"all-proxy-models"),S.map(e=>(0,r.jsx)(es.default.Option,{value:e,children:e},e))]})}),(0,r.jsx)(ea.Z.Item,{label:"Max Budget (USD)",name:"max_budget",children:(0,r.jsx)(eo.Z,{step:.01,precision:2,width:200})}),(0,r.jsx)(ea.Z.Item,{label:"Tokens per minute Limit (TPM)",name:"tpm_limit",children:(0,r.jsx)(eo.Z,{step:1,width:400})}),(0,r.jsx)(ea.Z.Item,{label:"Requests per minute Limit (RPM)",name:"rpm_limit",children:(0,r.jsx)(eo.Z,{step:1,width:400})})]}),(0,r.jsx)("div",{style:{textAlign:"right",marginTop:"10px"},children:(0,r.jsx)(ei.ZP,{htmlType:"submit",children:"Create Team"})})]})})]}),(0,r.jsxs)(Y.Z,{numColSpan:1,children:[(0,r.jsx)(m,{level:4,children:"Team Members"}),(0,r.jsx)(h,{children:"If you belong to multiple teams, this setting controls which teams members you see."}),l&&l.length>0?(0,r.jsx)(eb.Z,{defaultValue:"0",children:l.map((e,l)=>(0,r.jsx)(ek.Z,{value:String(l),onClick:()=>{Z(e)},children:e.team_alias},l))}):(0,r.jsxs)(h,{children:["No team created. ",(0,r.jsx)("b",{children:"Defaulting to personal account."})]})]}),(0,r.jsxs)(Y.Z,{numColSpan:1,children:[(0,r.jsx)(ep.Z,{className:"w-full mx-auto flex-auto overflow-y-auto max-h-[50vh]",children:(0,r.jsxs)(eg.Z,{children:[(0,r.jsx)(eZ.Z,{children:(0,r.jsxs)(ew.Z,{children:[(0,r.jsx)(e_.Z,{children:"Member Name"}),(0,r.jsx)(e_.Z,{children:"Role"})]})}),(0,r.jsx)(ey.Z,{children:y?y.members_with_roles.map((e,l)=>(0,r.jsxs)(ew.Z,{children:[(0,r.jsx)(ef.Z,{children:e.user_email?e.user_email:e.user_id?e.user_id:null}),(0,r.jsx)(ef.Z,{children:e.role})]},l)):null})]})}),y&&(0,r.jsx)(e=>{let{visible:l,onCancel:t,team:s,onSubmit:a}=e,[n]=ea.Z.useForm();return(0,r.jsx)(er.Z,{title:"Edit Team",visible:l,width:800,footer:null,onOk:()=>{n.validateFields().then(e=>{a({...e,team_id:s.team_id}),n.resetFields()}).catch(e=>{console.error("Validation failed:",e)})},onCancel:t,children:(0,r.jsxs)(ea.Z,{form:n,onFinish:R,initialValues:s,labelCol:{span:8},wrapperCol:{span:16},labelAlign:"left",children:[(0,r.jsxs)(r.Fragment,{children:[(0,r.jsx)(ea.Z.Item,{label:"Team Name",name:"team_alias",rules:[{required:!0,message:"Please input a team name"}],children:(0,r.jsx)(H.Z,{})}),(0,r.jsx)(ea.Z.Item,{label:"Models",name:"models",children:(0,r.jsxs)(es.default,{mode:"multiple",placeholder:"Select models",style:{width:"100%"},children:[(0,r.jsx)(es.default.Option,{value:"all-proxy-models",children:"All Proxy Models"},"all-proxy-models"),S&&S.map(e=>(0,r.jsx)(es.default.Option,{value:e,children:e},e))]})}),(0,r.jsx)(ea.Z.Item,{label:"Max Budget (USD)",name:"max_budget",children:(0,r.jsx)(eo.Z,{step:.01,precision:2,width:200})}),(0,r.jsx)(ea.Z.Item,{label:"Tokens per minute Limit (TPM)",name:"tpm_limit",children:(0,r.jsx)(eo.Z,{step:1,width:400})}),(0,r.jsx)(ea.Z.Item,{label:"Requests per minute Limit (RPM)",name:"rpm_limit",children:(0,r.jsx)(eo.Z,{step:1,width:400})}),(0,r.jsx)(ea.Z.Item,{label:"Requests per minute Limit (RPM)",name:"team_id",hidden:!0})]}),(0,r.jsx)("div",{style:{textAlign:"right",marginTop:"10px"},children:(0,r.jsx)(ei.ZP,{htmlType:"submit",children:"Edit Team"})})]})})},{visible:j,onCancel:()=>{g(!1),Z(null)},team:y,onSubmit:R})]}),(0,r.jsxs)(Y.Z,{numColSpan:1,children:[(0,r.jsx)(J.Z,{className:"mx-auto mb-5",onClick:()=>v(!0),children:"+ Add member"}),(0,r.jsx)(er.Z,{title:"Add member",visible:k,width:800,footer:null,onOk:()=>{v(!1),d.resetFields()},onCancel:()=>{v(!1),d.resetFields()},children:(0,r.jsxs)(ea.Z,{form:c,onFinish:z,labelCol:{span:8},wrapperCol:{span:16},labelAlign:"left",children:[(0,r.jsxs)(r.Fragment,{children:[(0,r.jsx)(ea.Z.Item,{label:"Email",name:"user_email",className:"mb-4",children:(0,r.jsx)(en.Z,{name:"user_email",className:"px-3 py-2 border rounded-md w-full"})}),(0,r.jsx)("div",{className:"text-center mb-4",children:"OR"}),(0,r.jsx)(ea.Z.Item,{label:"User ID",name:"user_id",className:"mb-4",children:(0,r.jsx)(en.Z,{name:"user_id",className:"px-3 py-2 border rounded-md w-full"})})]}),(0,r.jsx)("div",{style:{textAlign:"right",marginTop:"10px"},children:(0,r.jsx)(ei.ZP,{htmlType:"submit",children:"Add member"})})]})})]})]})})},ll=t(18190),lt=e=>{let l,{searchParams:t,accessToken:s,showSSOBanner:a}=e,[o]=ea.Z.useForm(),[i]=ea.Z.useForm(),{Title:c,Paragraph:d}=eT.default,[m,h]=(0,n.useState)(""),[x,p]=(0,n.useState)(null),[j,g]=(0,n.useState)(!1),[y,f]=(0,n.useState)(!1),[Z,_]=(0,n.useState)(!1),[w,b]=(0,n.useState)(!1),[k,v]=(0,n.useState)(!1);try{l=window.location.origin}catch(e){l="<your-proxy-url>"}l+="/fallback/login";let S=()=>{v(!1)},N=["proxy_admin","proxy_admin_viewer"];(0,n.useEffect)(()=>{(async()=>{if(null!=s){let e=[],l=await R(s,"proxy_admin_viewer");l.forEach(l=>{e.push({user_role:l.user_role,user_id:l.user_id,user_email:l.user_email})}),console.log("proxy viewers: ".concat(l));let t=await R(s,"proxy_admin");t.forEach(l=>{e.push({user_role:l.user_role,user_id:l.user_id,user_email:l.user_email})}),console.log("proxy admins: ".concat(t)),console.log("combinedList: ".concat(e)),p(e)}})()},[s]);let A=()=>{_(!1),i.resetFields()},E=()=>{_(!1),i.resetFields()},I=e=>(0,r.jsxs)(ea.Z,{form:o,onFinish:e,labelCol:{span:8},wrapperCol:{span:16},labelAlign:"left",children:[(0,r.jsxs)(r.Fragment,{children:[(0,r.jsx)(ea.Z.Item,{label:"Email",name:"user_email",className:"mb-4",children:(0,r.jsx)(en.Z,{name:"user_email",className:"px-3 py-2 border rounded-md w-full"})}),(0,r.jsx)("div",{className:"text-center mb-4",children:"OR"}),(0,r.jsx)(ea.Z.Item,{label:"User ID",name:"user_id",className:"mb-4",children:(0,r.jsx)(en.Z,{name:"user_id",className:"px-3 py-2 border rounded-md w-full"})})]}),(0,r.jsx)("div",{style:{textAlign:"right",marginTop:"10px"},children:(0,r.jsx)(ei.ZP,{htmlType:"submit",children:"Add member"})})]}),C=(e,l,t)=>(0,r.jsxs)(ea.Z,{form:o,onFinish:e,labelCol:{span:8},wrapperCol:{span:16},labelAlign:"left",children:[(0,r.jsxs)(r.Fragment,{children:[(0,r.jsx)(ea.Z.Item,{rules:[{required:!0,message:"Required"}],label:"User Role",name:"user_role",labelCol:{span:10},labelAlign:"left",children:(0,r.jsx)(eb.Z,{value:l,children:N.map((e,l)=>(0,r.jsx)(ek.Z,{value:e,children:e},l))})}),(0,r.jsx)(ea.Z.Item,{label:"Team ID",name:"user_id",hidden:!0,initialValue:t,valuePropName:"user_id",className:"mt-8",children:(0,r.jsx)(en.Z,{value:t,disabled:!0})})]}),(0,r.jsx)("div",{style:{textAlign:"right",marginTop:"10px"},children:(0,r.jsx)(ei.ZP,{htmlType:"submit",children:"Update role"})})]}),P=async e=>{try{if(null!=s&&null!=x){u.ZP.info("Making API Call");let l=await B(s,e,null);console.log("response for team create call: ".concat(l));let t=x.findIndex(e=>(console.log("user.user_id=".concat(e.user_id,"; response.user_id=").concat(l.user_id)),e.user_id===l.user_id));console.log("foundIndex: ".concat(t)),-1==t&&(console.log("updates admin with new user"),x.push(l),p(x)),u.ZP.success("Refresh tab to see updated user role"),_(!1)}}catch(e){console.error("Error creating the key:",e)}},T=async e=>{try{if(null!=s&&null!=x){u.ZP.info("Making API Call");let l=await B(s,e,"proxy_admin_viewer");console.log("response for team create call: ".concat(l));let t=x.findIndex(e=>(console.log("user.user_id=".concat(e.user_id,"; response.user_id=").concat(l.user_id)),e.user_id===l.user_id));console.log("foundIndex: ".concat(t)),-1==t&&(console.log("updates admin with new user"),x.push(l),p(x)),g(!1)}}catch(e){console.error("Error creating the key:",e)}},O=async e=>{try{if(null!=s&&null!=x){u.ZP.info("Making API Call"),e.user_email,e.user_id;let l=await B(s,e,"proxy_admin");console.log("response for team create call: ".concat(l));let t=x.findIndex(e=>(console.log("user.user_id=".concat(e.user_id,"; response.user_id=").concat(l.user_id)),e.user_id===l.user_id));console.log("foundIndex: ".concat(t)),-1==t&&(console.log("updates admin with new user"),x.push(l),p(x)),f(!1)}}catch(e){console.error("Error creating the key:",e)}},F=async e=>{null!=s&&V(s,{environment_variables:{PROXY_BASE_URL:e.proxy_base_url,GOOGLE_CLIENT_ID:e.google_client_id,GOOGLE_CLIENT_SECRET:e.google_client_secret}})};return console.log("admins: ".concat(null==x?void 0:x.length)),(0,r.jsxs)("div",{className:"w-full m-2 mt-2 p-8",children:[(0,r.jsx)(c,{level:4,children:"Admin Access "}),(0,r.jsxs)(d,{children:[a&&(0,r.jsx)("a",{href:"https://docs.litellm.ai/docs/proxy/ui#restrict-ui-access",children:"Requires SSO Setup"}),(0,r.jsx)("br",{}),(0,r.jsx)("b",{children:"Proxy Admin: "})," Can create keys, teams, users, add models, etc. ",(0,r.jsx)("br",{}),(0,r.jsx)("b",{children:"Proxy Admin Viewer: "}),"Can just view spend. They cannot create keys, teams or grant users access to new models."," "]}),(0,r.jsxs)(W.Z,{numItems:1,className:"gap-2 p-2 w-full",children:[(0,r.jsx)(Y.Z,{numColSpan:1,children:(0,r.jsx)(ep.Z,{className:"w-full mx-auto flex-auto overflow-y-auto max-h-[50vh]",children:(0,r.jsxs)(eg.Z,{children:[(0,r.jsx)(eZ.Z,{children:(0,r.jsxs)(ew.Z,{children:[(0,r.jsx)(e_.Z,{children:"Member Name"}),(0,r.jsx)(e_.Z,{children:"Role"})]})}),(0,r.jsx)(ey.Z,{children:x?x.map((e,l)=>(0,r.jsxs)(ew.Z,{children:[(0,r.jsx)(ef.Z,{children:e.user_email?e.user_email:e.user_id?e.user_id:null}),(0,r.jsx)(ef.Z,{children:e.user_role}),(0,r.jsxs)(ef.Z,{children:[(0,r.jsx)(ej.Z,{icon:eu.Z,size:"sm",onClick:()=>_(!0)}),(0,r.jsx)(er.Z,{title:"Update role",visible:Z,width:800,footer:null,onOk:A,onCancel:E,children:C(P,e.user_role,e.user_id)})]})]},l)):null})]})})}),(0,r.jsx)(Y.Z,{numColSpan:1,children:(0,r.jsxs)("div",{className:"flex justify-start",children:[(0,r.jsx)(J.Z,{className:"mr-4 mb-5",onClick:()=>f(!0),children:"+ Add admin"}),(0,r.jsx)(er.Z,{title:"Add admin",visible:y,width:800,footer:null,onOk:()=>{f(!1),i.resetFields()},onCancel:()=>{f(!1),i.resetFields()},children:I(O)}),(0,r.jsx)(J.Z,{className:"mb-5",onClick:()=>g(!0),children:"+ Add viewer"}),(0,r.jsx)(er.Z,{title:"Add viewer",visible:j,width:800,footer:null,onOk:()=>{g(!1),i.resetFields()},onCancel:()=>{g(!1),i.resetFields()},children:I(T)})]})})]}),(0,r.jsxs)(W.Z,{children:[(0,r.jsx)(c,{level:4,children:"Add SSO"}),(0,r.jsxs)("div",{className:"flex justify-start mb-4",children:[(0,r.jsx)(J.Z,{onClick:()=>b(!0),children:"Add SSO"}),(0,r.jsx)(er.Z,{title:"Add SSO",visible:w,width:800,footer:null,onOk:()=>{b(!1),o.resetFields()},onCancel:()=>{b(!1),o.resetFields()},children:(0,r.jsxs)(ea.Z,{form:o,onFinish:e=>{O(e),F(e),b(!1),v(!0)},labelCol:{span:8},wrapperCol:{span:16},labelAlign:"left",children:[(0,r.jsxs)(r.Fragment,{children:[(0,r.jsx)(ea.Z.Item,{label:"Admin Email",name:"user_email",rules:[{required:!0,message:"Please enter the email of the proxy admin"}],children:(0,r.jsx)(en.Z,{})}),(0,r.jsx)(ea.Z.Item,{label:"PROXY BASE URL",name:"proxy_base_url",rules:[{required:!0,message:"Please enter the proxy base url"}],children:(0,r.jsx)(en.Z,{})}),(0,r.jsx)(ea.Z.Item,{label:"GOOGLE CLIENT ID",name:"google_client_id",rules:[{required:!0,message:"Please enter the google client id"}],children:(0,r.jsx)(en.Z.Password,{})}),(0,r.jsx)(ea.Z.Item,{label:"GOOGLE CLIENT SECRET",name:"google_client_secret",rules:[{required:!0,message:"Please enter the google client secret"}],children:(0,r.jsx)(en.Z.Password,{})})]}),(0,r.jsx)("div",{style:{textAlign:"right",marginTop:"10px"},children:(0,r.jsx)(ei.ZP,{htmlType:"submit",children:"Save"})})]})}),(0,r.jsxs)(er.Z,{title:"SSO Setup Instructions",visible:k,width:800,footer:null,onOk:S,onCancel:()=>{v(!1)},children:[(0,r.jsx)("p",{children:"Follow these steps to complete the SSO setup:"}),(0,r.jsx)(ee.Z,{className:"mt-2",children:"1. DO NOT Exit this TAB"}),(0,r.jsx)(ee.Z,{className:"mt-2",children:"2. Open a new tab, visit your proxy base url"}),(0,r.jsx)(ee.Z,{className:"mt-2",children:"3. Confirm your SSO is configured correctly and you can login on the new Tab"}),(0,r.jsx)(ee.Z,{className:"mt-2",children:"4. If Step 3 is successful, you can close this tab"}),(0,r.jsx)("div",{style:{textAlign:"right",marginTop:"10px"},children:(0,r.jsx)(ei.ZP,{onClick:S,children:"Done"})})]})]}),(0,r.jsxs)(ll.Z,{title:"Login without SSO",color:"teal",children:["If you need to login without sso, you can access ",(0,r.jsxs)("a",{href:l,target:"_blank",children:[(0,r.jsx)("b",{children:l}),"  "]})]})]})]})},ls=t(42556);let la=[{name:"slack",variables:{LANGFUSE_HOST:null,LANGFUSE_PUBLIC_KEY:null,LANGFUSE_SECRET_KEY:null,OPENMETER_API_KEY:null,SLACK_WEBHOOK_URL:null}},{name:"langfuse",variables:{LANGFUSE_HOST:null,LANGFUSE_PUBLIC_KEY:null,LANGFUSE_SECRET_KEY:null,OPENMETER_API_KEY:null,SLACK_WEBHOOK_URL:null}},{name:"openmeter",variables:{LANGFUSE_HOST:null,LANGFUSE_PUBLIC_KEY:null,LANGFUSE_SECRET_KEY:null,OPENMETER_API_KEY:null,SLACK_WEBHOOK_URL:null}}];var lr=e=>{let{accessToken:l,userRole:t,userID:s}=e,[a,o]=(0,n.useState)(la),[i,c]=(0,n.useState)([]),[d,m]=(0,n.useState)(!1),[h]=ea.Z.useForm(),[x,p]=(0,n.useState)(null),[j,g]=(0,n.useState)([]),[y,f]=(0,n.useState)(""),[Z,_]=(0,n.useState)({}),[w,b]=(0,n.useState)([]),k=e=>{w.includes(e)?b(w.filter(l=>l!==e)):b([...w,e])},v={llm_exceptions:"LLM Exceptions",llm_too_slow:"LLM Responses Too Slow",llm_requests_hanging:"LLM Requests Hanging",budget_alerts:"Budget Alerts (API Keys, Users)",db_exceptions:"Database Exceptions (Read/Write)",daily_reports:"Weekly/Monthly Spend Reports"};(0,n.useEffect)(()=>{l&&t&&s&&q(l,s,t).then(e=>{console.log("callbacks",e);let l=la;o(l=l.map(l=>{let t=e.callbacks.find(e=>e.name===l.name);return t?{...l,variables:{...l.variables,...t.variables}}:l}));let t=e.alerts;if(console.log("alerts_data",t),t&&t.length>0){let e=t[0];console.log("_alert_info",e);let l=e.variables.SLACK_WEBHOOK_URL;console.log("catch_all_webhook",l),b(e.active_alerts),f(l),_(e.alerts_to_webhook)}c(t)})},[l,t,s]);let S=e=>w&&w.includes(e),N=e=>{if(!l)return;let t=Object.fromEntries(Object.entries(e.variables).map(e=>{var l;let[t,s]=e;return[t,(null===(l=document.querySelector('input[name="'.concat(t,'"]')))||void 0===l?void 0:l.value)||s]}));console.log("updatedVariables",t),console.log("updateAlertTypes",j);let s={environment_variables:t,litellm_settings:{success_callback:[e.name]}};try{V(l,s)}catch(e){u.ZP.error("Failed to update callback: "+e,20)}u.ZP.success("Callback updated successfully")},A=()=>{l&&h.validateFields().then(e=>{if(console.log("Form values:",e),"langfuse"===e.callback){V(l,{environment_variables:{LANGFUSE_PUBLIC_KEY:e.langfusePublicKey,LANGFUSE_SECRET_KEY:e.langfusePrivateKey},litellm_settings:{success_callback:[e.callback]}});let t={name:e.callback,variables:{SLACK_WEBHOOK_URL:null,LANGFUSE_HOST:null,LANGFUSE_PUBLIC_KEY:e.langfusePublicKey,LANGFUSE_SECRET_KEY:e.langfusePrivateKey,OPENMETER_API_KEY:null}};o(a?[...a,t]:[t])}else if("slack"===e.callback){console.log("values.slackWebhookUrl: ".concat(e.slackWebhookUrl)),V(l,{general_settings:{alerting:["slack"],alerting_threshold:300},environment_variables:{SLACK_WEBHOOK_URL:e.slackWebhookUrl}}),console.log("values.callback: ".concat(e.callback));let t={name:e.callback,variables:{SLACK_WEBHOOK_URL:e.slackWebhookUrl,LANGFUSE_HOST:null,LANGFUSE_PUBLIC_KEY:null,LANGFUSE_SECRET_KEY:null,OPENMETER_API_KEY:null}};o(a?[...a,t]:[t])}else if("openmeter"==e.callback){console.log("values.openMeterApiKey: ".concat(e.openMeterApiKey)),V(l,{environment_variables:{OPENMETER_API_KEY:e.openMeterApiKey},litellm_settings:{success_callback:[e.callback]}});let t={name:e.callback,variables:{SLACK_WEBHOOK_URL:null,LANGFUSE_HOST:null,LANGFUSE_PUBLIC_KEY:null,LANGFUSE_SECRET_KEY:null,OPENMETER_API_KEY:e.openMeterAPIKey}};o(a?[...a,t]:[t])}m(!1),h.resetFields(),p(null)})};return l?(console.log("callbacks: ".concat(a)),(0,r.jsxs)("div",{className:"w-full mx-4",children:[(0,r.jsxs)(W.Z,{numItems:1,className:"gap-2 p-8 w-full mt-2",children:[(0,r.jsx)(ll.Z,{title:"[UI] Presidio PII + Guardrails Coming Soon. https://docs.litellm.ai/docs/proxy/pii_masking",color:"sky"}),(0,r.jsxs)(eM.Z,{children:[(0,r.jsxs)(eL.Z,{variant:"line",defaultValue:"1",children:[(0,r.jsx)(eR.Z,{value:"1",children:"Logging Callbacks"}),(0,r.jsx)(eR.Z,{value:"2",children:"Alerting"})]}),(0,r.jsxs)(eD.Z,{children:[(0,r.jsx)(eU.Z,{children:(0,r.jsx)(ep.Z,{children:(0,r.jsxs)(eg.Z,{children:[(0,r.jsx)(eZ.Z,{children:(0,r.jsxs)(ew.Z,{children:[(0,r.jsx)(e_.Z,{children:"Callback"}),(0,r.jsx)(e_.Z,{children:"Callback Env Vars"})]})}),(0,r.jsx)(ey.Z,{children:a.filter(e=>"slack"!==e.name).map((e,t)=>{var s;return(0,r.jsxs)(ew.Z,{children:[(0,r.jsx)(ef.Z,{children:(0,r.jsx)(ex.Z,{color:"emerald",children:e.name})}),(0,r.jsxs)(ef.Z,{children:[(0,r.jsx)("ul",{children:Object.entries(null!==(s=e.variables)&&void 0!==s?s:{}).filter(l=>{let[t,s]=l;return t.toLowerCase().includes(e.name)}).map(e=>{let[l,t]=e;return(0,r.jsxs)("li",{children:[(0,r.jsx)(ee.Z,{className:"mt-2",children:l}),"LANGFUSE_HOST"===l?(0,r.jsx)("p",{children:"default value=https://cloud.langfuse.com"}):(0,r.jsx)("div",{}),(0,r.jsx)(H.Z,{name:l,defaultValue:t,type:"password"})]},l)})}),(0,r.jsx)(J.Z,{className:"mt-2",onClick:()=>N(e),children:"Save Changes"}),(0,r.jsx)(J.Z,{onClick:()=>z(l,e.name),className:"mx-2",children:"Test Callback"})]})]},t)})})]})})}),(0,r.jsx)(eU.Z,{children:(0,r.jsxs)(ep.Z,{children:[(0,r.jsxs)(ee.Z,{className:"my-2",children:["Alerts are only supported for Slack Webhook URLs. Get your webhook urls from ",(0,r.jsx)("a",{href:"https://api.slack.com/messaging/webhooks",target:"_blank",style:{color:"blue"},children:"here"})]}),(0,r.jsxs)(eg.Z,{children:[(0,r.jsx)(eZ.Z,{children:(0,r.jsxs)(ew.Z,{children:[(0,r.jsx)(e_.Z,{}),(0,r.jsx)(e_.Z,{}),(0,r.jsx)(e_.Z,{children:"Slack Webhook URL"})]})}),(0,r.jsx)(ey.Z,{children:Object.entries(v).map((e,l)=>{let[t,s]=e;return(0,r.jsxs)(ew.Z,{children:[(0,r.jsx)(ef.Z,{children:(0,r.jsx)(ls.Z,{id:"switch",name:"switch",checked:S(t),onChange:()=>k(t)})}),(0,r.jsx)(ef.Z,{children:(0,r.jsx)(ee.Z,{children:s})}),(0,r.jsx)(ef.Z,{children:(0,r.jsx)(H.Z,{name:t,type:"password",defaultValue:Z&&Z[t]?Z[t]:y})})]},l)})})]}),(0,r.jsx)(J.Z,{size:"xs",className:"mt-2",onClick:()=>{if(!l)return;let e={};Object.entries(v).forEach(l=>{let[t,s]=l,a=document.querySelector('input[name="'.concat(t,'"]'));console.log("key",t),console.log("webhookInput",a);let r=(null==a?void 0:a.value)||"";console.log("newWebhookValue",r),e[t]=r}),console.log("updatedAlertToWebhooks",e);let t={general_settings:{alert_to_webhook_url:e,alert_types:w}};console.log("payload",t);try{V(l,t)}catch(e){u.ZP.error("Failed to update alerts: "+e,20)}u.ZP.success("Alerts updated successfully")},children:"Save Changes"}),(0,r.jsx)(J.Z,{onClick:()=>z(l,"slack"),className:"mx-2",children:"Test Alerts"})]})})]})]})]}),(0,r.jsx)(er.Z,{title:"Add Callback",visible:d,onOk:A,width:800,onCancel:()=>{m(!1),h.resetFields(),p(null)},footer:null,children:(0,r.jsxs)(ea.Z,{form:h,layout:"vertical",onFinish:A,children:[(0,r.jsx)(ea.Z.Item,{label:"Callback",name:"callback",rules:[{required:!0,message:"Please select a callback"}],children:(0,r.jsxs)(es.default,{onChange:e=>{p(e)},children:[(0,r.jsx)(es.default.Option,{value:"langfuse",children:"langfuse"}),(0,r.jsx)(es.default.Option,{value:"openmeter",children:"openmeter"})]})}),"langfuse"===x&&(0,r.jsxs)(r.Fragment,{children:[(0,r.jsx)(ea.Z.Item,{label:"LANGFUSE_PUBLIC_KEY",name:"langfusePublicKey",rules:[{required:!0,message:"Please enter the public key"}],children:(0,r.jsx)(H.Z,{type:"password"})}),(0,r.jsx)(ea.Z.Item,{label:"LANGFUSE_PRIVATE_KEY",name:"langfusePrivateKey",rules:[{required:!0,message:"Please enter the private key"}],children:(0,r.jsx)(H.Z,{type:"password"})})]}),"openmeter"==x&&(0,r.jsx)(r.Fragment,{children:(0,r.jsx)(ea.Z.Item,{label:"OPENMETER_API_KEY",name:"openMeterApiKey",rules:[{required:!0,message:"Please enter the openmeter api key"}],children:(0,r.jsx)(H.Z,{type:"password"})})}),(0,r.jsx)("div",{style:{textAlign:"right",marginTop:"10px"},children:(0,r.jsx)(ei.ZP,{htmlType:"submit",children:"Save"})})]})})]})):null};let{Option:ln}=es.default;var lo=e=>{let{models:l,accessToken:t,routerSettings:s,setRouterSettings:a}=e,[o]=ea.Z.useForm(),[i,c]=(0,n.useState)(!1),[d,m]=(0,n.useState)("");return(0,r.jsxs)("div",{children:[(0,r.jsx)(J.Z,{className:"mx-auto",onClick:()=>c(!0),children:"+ Add Fallbacks"}),(0,r.jsx)(er.Z,{title:"Add Fallbacks",visible:i,width:800,footer:null,onOk:()=>{c(!1),o.resetFields()},onCancel:()=>{c(!1),o.resetFields()},children:(0,r.jsxs)(ea.Z,{form:o,onFinish:e=>{console.log(e);let{model_name:l,models:r}=e,n=[...s.fallbacks||[],{[l]:r}],i={...s,fallbacks:n};console.log(i);try{V(t,{router_settings:i}),a(i)}catch(e){u.ZP.error("Failed to update router settings: "+e,20)}u.ZP.success("router settings updated successfully"),c(!1),o.resetFields()},labelCol:{span:8},wrapperCol:{span:16},labelAlign:"left",children:[(0,r.jsxs)(r.Fragment,{children:[(0,r.jsx)(ea.Z.Item,{label:"Public Model Name",name:"model_name",rules:[{required:!0,message:"Set the model to fallback for"}],help:"required",children:(0,r.jsx)(eb.Z,{defaultValue:d,children:l&&l.map((e,l)=>(0,r.jsx)(ek.Z,{value:e,onClick:()=>m(e),children:e},l))})}),(0,r.jsx)(ea.Z.Item,{label:"Fallback Models",name:"models",rules:[{required:!0,message:"Please select a model"}],help:"required",children:(0,r.jsx)(eK.Z,{value:l,children:l&&l.filter(e=>e!=d).map(e=>(0,r.jsx)(eB.Z,{value:e,children:e},e))})})]}),(0,r.jsx)("div",{style:{textAlign:"right",marginTop:"10px"},children:(0,r.jsx)(ei.ZP,{htmlType:"submit",children:"Add Fallbacks"})})]})})]})},li=t(12968);async function lc(e,l){console.log("isLocal:",!1);let t=window.location.origin,s=new li.ZP.OpenAI({apiKey:l,baseURL:t,dangerouslyAllowBrowser:!0});try{let l=await s.chat.completions.create({model:e,messages:[{role:"user",content:"Hi, this is a test message"}],mock_testing_fallbacks:!0});u.ZP.success((0,r.jsxs)("span",{children:["Test model=",(0,r.jsx)("strong",{children:e}),", received model=",(0,r.jsx)("strong",{children:l.model}),". See ",(0,r.jsx)("a",{href:"#",onClick:()=>window.open("https://docs.litellm.ai/docs/proxy/reliability","_blank"),style:{textDecoration:"underline",color:"blue"},children:"curl"})]}))}catch(e){u.ZP.error("Error occurred while generating model response. Please try again. Error: ".concat(e),20)}}let ld={ttl:3600,lowest_latency_buffer:0},lm=e=>{let{selectedStrategy:l,strategyArgs:t,paramExplanation:s}=e;return(0,r.jsxs)($.Z,{children:[(0,r.jsx)(Q.Z,{className:"text-sm font-medium text-tremor-content-strong dark:text-dark-tremor-content-strong",children:"Routing Strategy Specific Args"}),(0,r.jsx)(X.Z,{children:"latency-based-routing"==l?(0,r.jsx)(ep.Z,{children:(0,r.jsxs)(eg.Z,{children:[(0,r.jsx)(eZ.Z,{children:(0,r.jsxs)(ew.Z,{children:[(0,r.jsx)(e_.Z,{children:"Setting"}),(0,r.jsx)(e_.Z,{children:"Value"})]})}),(0,r.jsx)(ey.Z,{children:Object.entries(t).map(e=>{let[l,t]=e;return(0,r.jsxs)(ew.Z,{children:[(0,r.jsxs)(ef.Z,{children:[(0,r.jsx)(ee.Z,{children:l}),(0,r.jsx)("p",{style:{fontSize:"0.65rem",color:"#808080",fontStyle:"italic"},className:"mt-1",children:s[l]})]}),(0,r.jsx)(ef.Z,{children:(0,r.jsx)(H.Z,{name:l,defaultValue:"object"==typeof t?JSON.stringify(t,null,2):t.toString()})})]},l)})})]})}):(0,r.jsx)(ee.Z,{children:"No specific settings"})})]})};var lu=e=>{let{accessToken:l,userRole:t,userID:s,modelData:a}=e,[o,i]=(0,n.useState)({}),[c,d]=(0,n.useState)(!1),[m]=ea.Z.useForm(),[h,x]=(0,n.useState)(null),[p,j]=(0,n.useState)(null),[g,y]=(0,n.useState)(null),f={routing_strategy_args:"(dict) Arguments to pass to the routing strategy",routing_strategy:"(string) Routing strategy to use",allowed_fails:"(int) Number of times a deployment can fail before being added to cooldown",cooldown_time:"(int) time in seconds to cooldown a deployment after failure",num_retries:"(int) Number of retries for failed requests. Defaults to 0.",timeout:"(float) Timeout for requests. Defaults to None.",retry_after:"(int) Minimum time to wait before retrying a failed request",ttl:"(int) Sliding window to look back over when calculating the average latency of a deployment. Default - 1 hour (in seconds).",lowest_latency_buffer:"(float) Shuffle between deployments within this % of the lowest latency. Default - 0 (i.e. always pick lowest latency)."};(0,n.useEffect)(()=>{l&&t&&s&&q(l,s,t).then(e=>{console.log("callbacks",e),i(e.router_settings)})},[l,t,s]);let Z=async e=>{if(l){console.log("received key: ".concat(e)),console.log("routerSettings['fallbacks']: ".concat(o.fallbacks)),o.fallbacks.map(l=>(e in l&&delete l[e],l));try{await V(l,{router_settings:o}),i({...o}),j(o.routing_strategy),u.ZP.success("Router settings updated successfully")}catch(e){u.ZP.error("Failed to update router settings: "+e,20)}}},_=e=>{if(!l)return;console.log("router_settings",e);let t=Object.fromEntries(Object.entries(e).map(e=>{let[l,t]=e;if("routing_strategy_args"!==l&&"routing_strategy"!==l){var s;return[l,(null===(s=document.querySelector('input[name="'.concat(l,'"]')))||void 0===s?void 0:s.value)||t]}if("routing_strategy"==l)return[l,p];if("routing_strategy_args"==l&&"latency-based-routing"==p){let e={},l=document.querySelector('input[name="lowest_latency_buffer"]'),t=document.querySelector('input[name="ttl"]');return(null==l?void 0:l.value)&&(e.lowest_latency_buffer=Number(l.value)),(null==t?void 0:t.value)&&(e.ttl=Number(t.value)),console.log("setRoutingStrategyArgs: ".concat(e)),["routing_strategy_args",e]}return null}).filter(e=>null!=e));console.log("updatedVariables",t);try{V(l,{router_settings:t})}catch(e){u.ZP.error("Failed to update router settings: "+e,20)}u.ZP.success("router settings updated successfully")};return l?(0,r.jsx)("div",{className:"w-full mx-4",children:(0,r.jsxs)(eM.Z,{className:"gap-2 p-8 h-[75vh] w-full mt-2",children:[(0,r.jsxs)(eL.Z,{variant:"line",defaultValue:"1",children:[(0,r.jsx)(eR.Z,{value:"1",children:"General Settings"}),(0,r.jsx)(eR.Z,{value:"2",children:"Fallbacks"})]}),(0,r.jsxs)(eD.Z,{children:[(0,r.jsx)(eU.Z,{children:(0,r.jsxs)(W.Z,{numItems:1,className:"gap-2 p-8 w-full mt-2",children:[(0,r.jsx)(el.Z,{children:"Router Settings"}),(0,r.jsxs)(ep.Z,{children:[(0,r.jsxs)(eg.Z,{children:[(0,r.jsx)(eZ.Z,{children:(0,r.jsxs)(ew.Z,{children:[(0,r.jsx)(e_.Z,{children:"Setting"}),(0,r.jsx)(e_.Z,{children:"Value"})]})}),(0,r.jsx)(ey.Z,{children:Object.entries(o).filter(e=>{let[l,t]=e;return"fallbacks"!=l&&"context_window_fallbacks"!=l&&"routing_strategy_args"!=l}).map(e=>{let[l,t]=e;return(0,r.jsxs)(ew.Z,{children:[(0,r.jsxs)(ef.Z,{children:[(0,r.jsx)(ee.Z,{children:l}),(0,r.jsx)("p",{style:{fontSize:"0.65rem",color:"#808080",fontStyle:"italic"},className:"mt-1",children:f[l]})]}),(0,r.jsx)(ef.Z,{children:"routing_strategy"==l?(0,r.jsxs)(eb.Z,{defaultValue:t,className:"w-full max-w-md",onValueChange:j,children:[(0,r.jsx)(ek.Z,{value:"usage-based-routing",children:"usage-based-routing"}),(0,r.jsx)(ek.Z,{value:"latency-based-routing",children:"latency-based-routing"}),(0,r.jsx)(ek.Z,{value:"simple-shuffle",children:"simple-shuffle"})]}):(0,r.jsx)(H.Z,{name:l,defaultValue:"object"==typeof t?JSON.stringify(t,null,2):t.toString()})})]},l)})})]}),(0,r.jsx)(lm,{selectedStrategy:p,strategyArgs:o&&o.routing_strategy_args&&Object.keys(o.routing_strategy_args).length>0?o.routing_strategy_args:ld,paramExplanation:f})]}),(0,r.jsx)(Y.Z,{children:(0,r.jsx)(J.Z,{className:"mt-2",onClick:()=>_(o),children:"Save Changes"})})]})}),(0,r.jsxs)(eU.Z,{children:[(0,r.jsxs)(eg.Z,{children:[(0,r.jsx)(eZ.Z,{children:(0,r.jsxs)(ew.Z,{children:[(0,r.jsx)(e_.Z,{children:"Model Name"}),(0,r.jsx)(e_.Z,{children:"Fallbacks"})]})}),(0,r.jsx)(ey.Z,{children:o.fallbacks&&o.fallbacks.map((e,t)=>Object.entries(e).map(e=>{let[s,a]=e;return(0,r.jsxs)(ew.Z,{children:[(0,r.jsx)(ef.Z,{children:s}),(0,r.jsx)(ef.Z,{children:Array.isArray(a)?a.join(", "):a}),(0,r.jsx)(ef.Z,{children:(0,r.jsx)(J.Z,{onClick:()=>lc(s,l),children:"Test Fallback"})}),(0,r.jsx)(ef.Z,{children:(0,r.jsx)(ej.Z,{icon:eh.Z,size:"sm",onClick:()=>Z(s)})})]},t.toString()+s)}))})]}),(0,r.jsx)(lo,{models:(null==a?void 0:a.data)?a.data.map(e=>e.model_name):[],accessToken:l,routerSettings:o,setRouterSettings:i})]})]})]})}):null},lh=t(67951),lx=e=>{let{}=e;return(0,r.jsx)(r.Fragment,{children:(0,r.jsx)(W.Z,{className:"gap-2 p-8 h-[80vh] w-full mt-2",children:(0,r.jsxs)("div",{className:"mb-5",children:[(0,r.jsx)("p",{className:"text-2xl text-tremor-content-strong dark:text-dark-tremor-content-strong font-semibold",children:"OpenAI Compatible Proxy: API Reference"}),(0,r.jsx)(ee.Z,{className:"mt-2 mb-2",children:"LiteLLM is OpenAI Compatible. This means your API Key works with the OpenAI SDK. Just replace the base_url to point to your litellm proxy. Example Below "}),(0,r.jsxs)(eM.Z,{children:[(0,r.jsxs)(eL.Z,{children:[(0,r.jsx)(eR.Z,{children:"OpenAI Python SDK"}),(0,r.jsx)(eR.Z,{children:"LlamaIndex"}),(0,r.jsx)(eR.Z,{children:"Langchain Py"})]}),(0,r.jsxs)(eD.Z,{children:[(0,r.jsx)(eU.Z,{children:(0,r.jsx)(lh.Z,{language:"python",children:'\nimport openai\nclient = openai.OpenAI(\n    api_key="your_api_key",\n    base_url="http://0.0.0.0:4000" # LiteLLM Proxy is OpenAI compatible, Read More: https://docs.litellm.ai/docs/proxy/user_keys\n)\n\nresponse = client.chat.completions.create(\n    model="gpt-3.5-turbo", # model to send to the proxy\n    messages = [\n        {\n            "role": "user",\n            "content": "this is a test request, write a short poem"\n        }\n    ]\n)\n\nprint(response)\n            '})}),(0,r.jsx)(eU.Z,{children:(0,r.jsx)(lh.Z,{language:"python",children:'\nimport os, dotenv\n\nfrom llama_index.llms import AzureOpenAI\nfrom llama_index.embeddings import AzureOpenAIEmbedding\nfrom llama_index import VectorStoreIndex, SimpleDirectoryReader, ServiceContext\n\nllm = AzureOpenAI(\n    engine="azure-gpt-3.5",               # model_name on litellm proxy\n    temperature=0.0,\n    azure_endpoint="http://0.0.0.0:4000", # litellm proxy endpoint\n    api_key="sk-1234",                    # litellm proxy API Key\n    api_version="2023-07-01-preview",\n)\n\nembed_model = AzureOpenAIEmbedding(\n    deployment_name="azure-embedding-model",\n    azure_endpoint="http://0.0.0.0:4000",\n    api_key="sk-1234",\n    api_version="2023-07-01-preview",\n)\n\n\ndocuments = SimpleDirectoryReader("llama_index_data").load_data()\nservice_context = ServiceContext.from_defaults(llm=llm, embed_model=embed_model)\nindex = VectorStoreIndex.from_documents(documents, service_context=service_context)\n\nquery_engine = index.as_query_engine()\nresponse = query_engine.query("What did the author do growing up?")\nprint(response)\n\n            '})}),(0,r.jsx)(eU.Z,{children:(0,r.jsx)(lh.Z,{language:"python",children:'\nfrom langchain.chat_models import ChatOpenAI\nfrom langchain.prompts.chat import (\n    ChatPromptTemplate,\n    HumanMessagePromptTemplate,\n    SystemMessagePromptTemplate,\n)\nfrom langchain.schema import HumanMessage, SystemMessage\n\nchat = ChatOpenAI(\n    openai_api_base="http://0.0.0.0:4000",\n    model = "gpt-3.5-turbo",\n    temperature=0.1\n)\n\nmessages = [\n    SystemMessage(\n        content="You are a helpful assistant that im using to make a test request to."\n    ),\n    HumanMessage(\n        content="test from litellm. tell me why it\'s amazing in 1 sentence"\n    ),\n]\nresponse = chat(messages)\n\nprint(response)\n\n            '})})]})]})]})})})};async function lp(e,l,t,s){console.log("isLocal:",!1);let a=window.location.origin,r=new li.ZP.OpenAI({apiKey:s,baseURL:a,dangerouslyAllowBrowser:!0});try{for await(let s of(await r.chat.completions.create({model:t,stream:!0,messages:[{role:"user",content:e}]})))console.log(s),s.choices[0].delta.content&&l(s.choices[0].delta.content)}catch(e){u.ZP.error("Error occurred while generating model response. Please try again. Error: ".concat(e),20)}}var lj=e=>{let{accessToken:l,token:t,userRole:s,userID:a}=e,[o,i]=(0,n.useState)(""),[c,d]=(0,n.useState)(""),[m,u]=(0,n.useState)([]),[h,x]=(0,n.useState)(void 0),[p,j]=(0,n.useState)([]);(0,n.useEffect)(()=>{l&&t&&s&&a&&(async()=>{try{let e=await N(l,a,s);if(console.log("model_info:",e),(null==e?void 0:e.data.length)>0){let l=e.data.map(e=>({value:e.id,label:e.id}));console.log(l),j(l),x(e.data[0].id)}}catch(e){console.error("Error fetching model info:",e)}})()},[l,a,s]);let g=(e,l)=>{u(t=>{let s=t[t.length-1];return s&&s.role===e?[...t.slice(0,t.length-1),{role:e,content:s.content+l}]:[...t,{role:e,content:l}]})},y=async()=>{if(""!==c.trim()&&o&&t&&s&&a){u(e=>[...e,{role:"user",content:c}]);try{h&&await lp(c,e=>g("assistant",e),h,o)}catch(e){console.error("Error fetching model response",e),g("assistant","Error fetching model response")}d("")}};if(s&&"Admin Viewer"==s){let{Title:e,Paragraph:l}=eT.default;return(0,r.jsxs)("div",{children:[(0,r.jsx)(e,{level:1,children:"Access Denied"}),(0,r.jsx)(l,{children:"Ask your proxy admin for access to test models"})]})}return(0,r.jsx)("div",{style:{width:"100%",position:"relative"},children:(0,r.jsx)(W.Z,{className:"gap-2 p-8 h-[80vh] w-full mt-2",children:(0,r.jsx)(ep.Z,{children:(0,r.jsxs)(eM.Z,{children:[(0,r.jsx)(eL.Z,{children:(0,r.jsx)(eR.Z,{children:"Chat"})}),(0,r.jsx)(eD.Z,{children:(0,r.jsxs)(eU.Z,{children:[(0,r.jsx)("div",{className:"sm:max-w-2xl",children:(0,r.jsxs)(W.Z,{numItems:2,children:[(0,r.jsxs)(Y.Z,{children:[(0,r.jsx)(ee.Z,{children:"API Key"}),(0,r.jsx)(H.Z,{placeholder:"Type API Key here",type:"password",onValueChange:i,value:o})]}),(0,r.jsxs)(Y.Z,{className:"mx-2",children:[(0,r.jsx)(ee.Z,{children:"Select Model:"}),(0,r.jsx)(es.default,{placeholder:"Select a Model",onChange:e=>{console.log("selected ".concat(e)),x(e)},options:p,style:{width:"200px"}})]})]})}),(0,r.jsxs)(eg.Z,{className:"mt-5",style:{display:"block",maxHeight:"60vh",overflowY:"auto"},children:[(0,r.jsx)(eZ.Z,{children:(0,r.jsx)(ew.Z,{children:(0,r.jsx)(ef.Z,{})})}),(0,r.jsx)(ey.Z,{children:m.map((e,l)=>(0,r.jsx)(ew.Z,{children:(0,r.jsx)(ef.Z,{children:"".concat(e.role,": ").concat(e.content)})},l))})]}),(0,r.jsx)("div",{className:"mt-3",style:{position:"absolute",bottom:5,width:"95%"},children:(0,r.jsxs)("div",{className:"flex",children:[(0,r.jsx)(H.Z,{type:"text",value:c,onChange:e=>d(e.target.value),placeholder:"Type your message..."}),(0,r.jsx)(J.Z,{onClick:y,className:"ml-2",children:"Send"})]})})]})})]})})})})},lg=t(33509),ly=t(95781);let{Sider:lf}=lg.default;var lZ=e=>{let{setPage:l,userRole:t,defaultSelectedKey:s}=e;return"Admin Viewer"==t?(0,r.jsx)(lg.default,{style:{minHeight:"100vh",maxWidth:"120px"},children:(0,r.jsx)(lf,{width:120,children:(0,r.jsxs)(ly.Z,{mode:"inline",defaultSelectedKeys:s||["4"],style:{height:"100%",borderRight:0},children:[(0,r.jsx)(ly.Z.Item,{onClick:()=>l("api-keys"),children:"API Keys"},"4"),(0,r.jsx)(ly.Z.Item,{onClick:()=>l("models"),children:"Models"},"2"),(0,r.jsx)(ly.Z.Item,{onClick:()=>l("llm-playground"),children:"Chat UI"},"3"),(0,r.jsx)(ly.Z.Item,{onClick:()=>l("usage"),children:"Usage"},"1")]})})}):(0,r.jsx)(lg.default,{style:{minHeight:"100vh",maxWidth:"145px"},children:(0,r.jsx)(lf,{width:145,children:(0,r.jsxs)(ly.Z,{mode:"inline",defaultSelectedKeys:s||["1"],style:{height:"100%",borderRight:0},children:[(0,r.jsx)(ly.Z.Item,{onClick:()=>l("api-keys"),children:(0,r.jsx)(ee.Z,{children:"API Keys"})},"1"),(0,r.jsx)(ly.Z.Item,{onClick:()=>l("llm-playground"),children:(0,r.jsx)(ee.Z,{children:"Test Key"})},"3"),"Admin"==t?(0,r.jsx)(ly.Z.Item,{onClick:()=>l("models"),children:(0,r.jsx)(ee.Z,{children:"Models"})},"2"):null,"Admin"==t?(0,r.jsx)(ly.Z.Item,{onClick:()=>l("usage"),children:(0,r.jsx)(ee.Z,{children:"Usage"})},"4"):null,"Admin"==t?(0,r.jsx)(ly.Z.Item,{onClick:()=>l("teams"),children:(0,r.jsx)(ee.Z,{children:"Teams"})},"6"):null,"Admin"==t?(0,r.jsx)(ly.Z.Item,{onClick:()=>l("users"),children:(0,r.jsx)(ee.Z,{children:"Users"})},"5"):null,"Admin"==t?(0,r.jsx)(ly.Z.Item,{onClick:()=>l("settings"),children:(0,r.jsx)(ee.Z,{children:"Logging & Alerts"})},"8"):null,"Admin"==t?(0,r.jsx)(ly.Z.Item,{onClick:()=>l("general-settings"),children:(0,r.jsx)(ee.Z,{children:"Router Settings"})},"9"):null,"Admin"==t?(0,r.jsx)(ly.Z.Item,{onClick:()=>l("admin-panel"),children:(0,r.jsx)(ee.Z,{children:"Admin"})},"7"):null,(0,r.jsx)(ly.Z.Item,{onClick:()=>l("api_ref"),children:(0,r.jsx)(ee.Z,{children:"API Reference"})},"11")]})})})},l_=t(67989),lw=e=>{let{accessToken:l,token:t,userRole:s,userID:a,keys:o}=e,i=new Date,[c,d]=(0,n.useState)([]),[m,u]=(0,n.useState)([]),[h,x]=(0,n.useState)([]),[p,j]=(0,n.useState)([]),[g,y]=(0,n.useState)([]),[f,Z]=(0,n.useState)([]),[_,w]=(0,n.useState)([]),[b,k]=(0,n.useState)([]),[v,S]=(0,n.useState)(""),[N,R]=(0,n.useState)({from:new Date(Date.now()-6048e5),to:new Date}),M=new Date(i.getFullYear(),i.getMonth(),1),L=new Date(i.getFullYear(),i.getMonth()+1,0),U=z(M),D=z(L);console.log("keys in usage",o);let K=async(e,t,s)=>{if(!e||!t||!l)return;console.log("uiSelectedKey",s);let a=await T(l,s,e.toISOString(),t.toISOString());console.log("End user data updated successfully",a),j(a)},B=async(e,t)=>{e&&t&&l&&(Z((await E(l,e.toISOString(),t.toISOString())).spend_per_tag),console.log("Tag spend data updated successfully"))};function z(e){let l=e.getFullYear(),t=e.getMonth()+1,s=e.getDate();return"".concat(l,"-").concat(t<10?"0"+t:t,"-").concat(s<10?"0"+s:s)}return console.log("Start date is ".concat(U)),console.log("End date is ".concat(D)),(0,n.useEffect)(()=>{l&&t&&s&&a&&(async()=>{try{if(console.log("user role: ".concat(s)),"Admin"==s||"Admin Viewer"==s){var e,r;let t=await C(l);d(t);let s=(await P(l)).map(e=>({key:(e.key_name||e.key_alias||e.api_key).substring(0,10),spend:e.total_spend}));u(s);let a=(await O(l)).map(e=>({key:e.model,spend:e.total_spend}));x(a);let n=await A(l);console.log("teamSpend",n),y(n.daily_spend),w(n.teams);let o=n.total_spend_per_team;o=o.map(e=>(e.name=e.team_id||"",e.value=e.total_spend||0,e)),k(o);let i=await E(l,null===(e=N.from)||void 0===e?void 0:e.toISOString(),null===(r=N.to)||void 0===r?void 0:r.toISOString());Z(i.spend_per_tag);let c=await T(l,null,void 0,void 0);j(c),console.log("spend/user result",c)}else"App Owner"==s&&await I(l,t,s,a,U,D).then(async e=>{if(console.log("result from spend logs call",e),"daily_spend"in e){let l=e.daily_spend;console.log("daily spend",l),d(l);let t=e.top_api_keys;u(t)}else{let t=(await F(l,function(e){let l=[];e.forEach(e=>{Object.entries(e).forEach(e=>{let[t,s]=e;"spend"!==t&&"startTime"!==t&&"models"!==t&&"users"!==t&&l.push({key:t,spend:s})})}),l.sort((e,l)=>Number(l.spend)-Number(e.spend));let t=l.slice(0,5).map(e=>e.key);return console.log("topKeys: ".concat(Object.keys(t[0]))),t}(e))).info.map(e=>({key:(e.key_name||e.key_alias).substring(0,10),spend:e.spend}));u(t),d(e)}})}catch(e){console.error("There was an error fetching the data",e)}})()},[l,t,s,a,U,D]),(0,r.jsxs)("div",{style:{width:"100%"},className:"p-8",children:[(0,r.jsx)(eE,{userID:a,userRole:s,accessToken:l,userSpend:null,selectedTeam:null}),(0,r.jsxs)(eM.Z,{children:[(0,r.jsxs)(eL.Z,{className:"mt-2",children:[(0,r.jsx)(eR.Z,{children:"All Up"}),(0,r.jsx)(eR.Z,{children:"Team Based Usage"}),(0,r.jsx)(eR.Z,{children:"End User Usage"}),(0,r.jsx)(eR.Z,{children:"Tag Based Usage"})]}),(0,r.jsxs)(eD.Z,{children:[(0,r.jsx)(eU.Z,{children:(0,r.jsxs)(W.Z,{numItems:2,className:"gap-2 h-[75vh] w-full",children:[(0,r.jsx)(Y.Z,{numColSpan:2,children:(0,r.jsxs)(ep.Z,{children:[(0,r.jsx)(el.Z,{children:"Monthly Spend"}),(0,r.jsx)(eq.Z,{data:c,index:"date",categories:["spend"],colors:["blue"],valueFormatter:e=>"$ ".concat(new Intl.NumberFormat("us").format(e).toString()),yAxisWidth:100,tickGap:5})]})}),(0,r.jsx)(Y.Z,{numColSpan:1,children:(0,r.jsxs)(ep.Z,{children:[(0,r.jsx)(el.Z,{children:"Top API Keys"}),(0,r.jsx)(eq.Z,{className:"mt-4 h-40",data:m,index:"key",categories:["spend"],colors:["blue"],yAxisWidth:80,tickGap:5,layout:"vertical",showXAxis:!1,showLegend:!1})]})}),(0,r.jsx)(Y.Z,{numColSpan:1,children:(0,r.jsxs)(ep.Z,{children:[(0,r.jsx)(el.Z,{children:"Top Models"}),(0,r.jsx)(eq.Z,{className:"mt-4 h-40",data:h,index:"key",categories:["spend"],colors:["blue"],yAxisWidth:200,layout:"vertical",showXAxis:!1,showLegend:!1})]})}),(0,r.jsx)(Y.Z,{numColSpan:1})]})}),(0,r.jsx)(eU.Z,{children:(0,r.jsxs)(W.Z,{numItems:2,className:"gap-2 h-[75vh] w-full",children:[(0,r.jsxs)(Y.Z,{numColSpan:2,children:[(0,r.jsxs)(ep.Z,{className:"mb-2",children:[(0,r.jsx)(el.Z,{children:"Total Spend Per Team"}),(0,r.jsx)(l_.Z,{data:b})]}),(0,r.jsxs)(ep.Z,{children:[(0,r.jsx)(el.Z,{children:"Daily Spend Per Team"}),(0,r.jsx)(eq.Z,{className:"h-72",data:g,showLegend:!0,index:"date",categories:_,yAxisWidth:80,colors:["blue","green","yellow","red","purple"],stack:!0})]})]}),(0,r.jsx)(Y.Z,{numColSpan:2})]})}),(0,r.jsxs)(eU.Z,{children:[(0,r.jsxs)("p",{className:"mb-2 text-gray-500 italic text-[12px]",children:["End-Users of your LLM API calls. Tracked when a `user` param is passed in your LLM calls ",(0,r.jsx)("a",{className:"text-blue-500",href:"https://docs.litellm.ai/docs/proxy/users",target:"_blank",children:"docs here"})]}),(0,r.jsxs)(W.Z,{numItems:2,children:[(0,r.jsxs)(Y.Z,{children:[(0,r.jsx)(ee.Z,{children:"Select Time Range"}),(0,r.jsx)(eF.Z,{enableSelect:!0,value:N,onValueChange:e=>{R(e),K(e.from,e.to,null)}})]}),(0,r.jsxs)(Y.Z,{children:[(0,r.jsx)(ee.Z,{children:"Select Key"}),(0,r.jsxs)(eb.Z,{defaultValue:"all-keys",children:[(0,r.jsx)(ek.Z,{value:"all-keys",onClick:()=>{K(N.from,N.to,null)},children:"All Keys"},"all-keys"),null==o?void 0:o.map((e,l)=>e&&null!==e.key_alias&&e.key_alias.length>0?(0,r.jsx)(ek.Z,{value:String(l),onClick:()=>{K(N.from,N.to,e.token)},children:e.key_alias},l):null)]})]})]}),(0,r.jsx)(ep.Z,{className:"mt-4",children:(0,r.jsxs)(eg.Z,{className:"max-h-[70vh] min-h-[500px]",children:[(0,r.jsx)(eZ.Z,{children:(0,r.jsxs)(ew.Z,{children:[(0,r.jsx)(e_.Z,{children:"End User"}),(0,r.jsx)(e_.Z,{children:"Spend"}),(0,r.jsx)(e_.Z,{children:"Total Events"})]})}),(0,r.jsx)(ey.Z,{children:null==p?void 0:p.map((e,l)=>{var t;return(0,r.jsxs)(ew.Z,{children:[(0,r.jsx)(ef.Z,{children:e.end_user}),(0,r.jsx)(ef.Z,{children:null===(t=e.total_spend)||void 0===t?void 0:t.toFixed(4)}),(0,r.jsx)(ef.Z,{children:e.total_count})]},l)})})]})})]}),(0,r.jsx)(eU.Z,{children:(0,r.jsxs)(W.Z,{numItems:2,className:"gap-2 h-[75vh] w-full mb-4",children:[(0,r.jsxs)(Y.Z,{numColSpan:2,children:[(0,r.jsx)(eF.Z,{className:"mb-4",enableSelect:!0,value:N,onValueChange:e=>{R(e),B(e.from,e.to)}}),(0,r.jsxs)(ep.Z,{children:[(0,r.jsx)(el.Z,{children:"Spend Per Tag"}),(0,r.jsxs)(ee.Z,{children:["Get Started Tracking cost per tag ",(0,r.jsx)("a",{className:"text-blue-500",href:"https://docs.litellm.ai/docs/proxy/enterprise#tracking-spend-for-custom-tags",target:"_blank",children:"here"})]}),(0,r.jsx)(eq.Z,{className:"h-72",data:f,index:"name",categories:["spend"],colors:["blue"]})]})]}),(0,r.jsx)(Y.Z,{numColSpan:2})]})})]})]})]})},lb=()=>{let{Title:e,Paragraph:l}=eT.default,[t,s]=(0,n.useState)(""),[a,i]=(0,n.useState)(null),[c,d]=(0,n.useState)(null),[u,h]=(0,n.useState)(null),[x,p]=(0,n.useState)(!0),j=(0,o.useSearchParams)(),[g,y]=(0,n.useState)({data:[]}),f=j.get("userID"),Z=j.get("token"),[_,w]=(0,n.useState)("api-keys"),[b,k]=(0,n.useState)(null);return(0,n.useEffect)(()=>{if(Z){let e=(0,eP.o)(Z);if(e){if(console.log("Decoded token:",e),console.log("Decoded key:",e.key),k(e.key),e.user_role){let l=function(e){if(!e)return"Undefined Role";switch(console.log("Received user role: ".concat(e.toLowerCase())),console.log("Received user role length: ".concat(e.toLowerCase().length)),e.toLowerCase()){case"app_owner":case"demo_app_owner":return"App Owner";case"app_admin":case"proxy_admin":return"Admin";case"proxy_admin_viewer":return"Admin Viewer";case"app_user":return"App User";default:return"Unknown Role"}}(e.user_role);console.log("Decoded user_role:",l),s(l),"Admin Viewer"==l&&w("usage")}else console.log("User role not defined");e.user_email?i(e.user_email):console.log("User Email is not set ".concat(e)),e.login_method?p("username_password"==e.login_method):console.log("User Email is not set ".concat(e))}}},[Z]),(0,r.jsx)(n.Suspense,{fallback:(0,r.jsx)("div",{children:"Loading..."}),children:(0,r.jsxs)("div",{className:"flex flex-col min-h-screen",children:[(0,r.jsx)(m,{userID:f,userRole:t,userEmail:a,showSSOBanner:x}),(0,r.jsxs)("div",{className:"flex flex-1 overflow-auto",children:[(0,r.jsx)("div",{className:"mt-8",children:(0,r.jsx)(lZ,{setPage:w,userRole:t,defaultSelectedKey:null})}),"api-keys"==_?(0,r.jsx)(eO,{userID:f,userRole:t,teams:c,keys:u,setUserRole:s,userEmail:a,setUserEmail:i,setTeams:d,setKeys:h}):"models"==_?(0,r.jsx)(e3,{userID:f,userRole:t,token:Z,accessToken:b,modelData:g,setModelData:y}):"llm-playground"==_?(0,r.jsx)(lj,{userID:f,userRole:t,token:Z,accessToken:b}):"users"==_?(0,r.jsx)(e9,{userID:f,userRole:t,token:Z,keys:u,teams:c,accessToken:b,setKeys:h}):"teams"==_?(0,r.jsx)(le,{teams:c,setTeams:d,searchParams:j,accessToken:b,userID:f,userRole:t}):"admin-panel"==_?(0,r.jsx)(lt,{setTeams:d,searchParams:j,accessToken:b,showSSOBanner:x}):"api_ref"==_?(0,r.jsx)(lx,{}):"settings"==_?(0,r.jsx)(lr,{userID:f,userRole:t,accessToken:b}):"general-settings"==_?(0,r.jsx)(lu,{userID:f,userRole:t,accessToken:b,modelData:g}):(0,r.jsx)(lw,{userID:f,userRole:t,token:Z,accessToken:b,keys:u})]})]})})}}},function(e){e.O(0,[936,884,971,69,744],function(){return e(e.s=20661)}),_N_E=e.O()}]);
\ No newline at end of file
diff --git a/litellm/proxy/_experimental/out/_next/static/chunks/app/page-f20fdea77aed85ba.js b/litellm/proxy/_experimental/out/_next/static/chunks/app/page-f20fdea77aed85ba.js
new file mode 100644
index 0000000000..3db3281fa9
--- /dev/null
+++ b/litellm/proxy/_experimental/out/_next/static/chunks/app/page-f20fdea77aed85ba.js
@@ -0,0 +1 @@
+(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[931],{20661:function(e,l,t){Promise.resolve().then(t.bind(t,4858))},4858:function(e,l,t){"use strict";t.r(l),t.d(l,{default:function(){return lS}});var s,a,r=t(3827),n=t(64090),o=t(47907),i=t(8792),c=t(40491),d=t(65270),m=e=>{let{userID:l,userRole:t,userEmail:s,showSSOBanner:a}=e;console.log("User ID:",l),console.log("userEmail:",s),console.log("showSSOBanner:",a);let n=[{key:"1",label:(0,r.jsxs)(r.Fragment,{children:[(0,r.jsxs)("p",{children:["Role: ",t]}),(0,r.jsxs)("p",{children:["ID: ",l]})]})}];return(0,r.jsxs)("nav",{className:"left-0 right-0 top-0 flex justify-between items-center h-12 mb-4",children:[(0,r.jsx)("div",{className:"text-left my-2 absolute top-0 left-0",children:(0,r.jsx)("div",{className:"flex flex-col items-center",children:(0,r.jsx)(i.default,{href:"/",children:(0,r.jsx)("button",{className:"text-gray-800 rounded text-center",children:(0,r.jsx)("img",{src:"/get_image",width:160,height:160,alt:"LiteLLM Brand",className:"mr-2"})})})})}),(0,r.jsxs)("div",{className:"text-right mx-4 my-2 absolute top-0 right-0 flex items-center justify-end space-x-2",children:[a?(0,r.jsx)("div",{style:{padding:"6px",borderRadius:"8px"},children:(0,r.jsx)("a",{href:"https://calendly.com/d/4mp-gd3-k5k/litellm-1-1-onboarding-chat",target:"_blank",style:{fontSize:"14px",textDecoration:"underline"},children:"Request hosted proxy"})}):null,(0,r.jsx)("div",{style:{border:"1px solid #391085",padding:"6px",borderRadius:"8px"},children:(0,r.jsx)(c.Z,{menu:{items:n},children:(0,r.jsx)(d.Z,{children:s})})})]})]})},u=t(80588);let h=async()=>{try{let e=await fetch("https://raw.githubusercontent.com/BerriAI/litellm/main/model_prices_and_context_window.json"),l=await e.json();return console.log("received data: ".concat(l)),l}catch(e){throw console.error("Failed to get model cost map:",e),e}},x=async(e,l)=>{try{let t=await fetch("/model/new",{method:"POST",headers:{Authorization:"Bearer ".concat(e),"Content-Type":"application/json"},body:JSON.stringify({...l})});if(!t.ok){let e=await t.text();throw u.ZP.error("Failed to create key: "+e,10),console.error("Error response from the server:",e),Error("Network response was not ok")}let s=await t.json();return console.log("API Response:",s),u.ZP.success("Model created successfully. Wait 60s and refresh on 'All Models' page"),s}catch(e){throw console.error("Failed to create key:",e),e}},p=async(e,l)=>{console.log("model_id in model delete call: ".concat(l));try{let t=await fetch("/model/delete",{method:"POST",headers:{Authorization:"Bearer ".concat(e),"Content-Type":"application/json"},body:JSON.stringify({id:l})});if(!t.ok){let e=await t.text();throw u.ZP.error("Failed to create key: "+e,10),console.error("Error response from the server:",e),Error("Network response was not ok")}let s=await t.json();return console.log("API Response:",s),u.ZP.success("Model deleted successfully. Restart server to see this."),s}catch(e){throw console.error("Failed to create key:",e),e}},j=async(e,l,t)=>{try{if(console.log("Form Values in keyCreateCall:",t),t.description&&(t.metadata||(t.metadata={}),t.metadata.description=t.description,delete t.description,t.metadata=JSON.stringify(t.metadata)),t.metadata){console.log("formValues.metadata:",t.metadata);try{t.metadata=JSON.parse(t.metadata)}catch(e){throw u.ZP.error("Failed to parse metadata: "+e,10),Error("Failed to parse metadata: "+e)}}console.log("Form Values after check:",t);let s=await fetch("/key/generate",{method:"POST",headers:{Authorization:"Bearer ".concat(e),"Content-Type":"application/json"},body:JSON.stringify({user_id:l,...t})});if(!s.ok){let e=await s.text();throw u.ZP.error("Failed to create key: "+e,10),console.error("Error response from the server:",e),Error("Network response was not ok")}let a=await s.json();return console.log("API Response:",a),a}catch(e){throw console.error("Failed to create key:",e),e}},g=async(e,l,t)=>{try{if(console.log("Form Values in keyCreateCall:",t),t.description&&(t.metadata||(t.metadata={}),t.metadata.description=t.description,delete t.description,t.metadata=JSON.stringify(t.metadata)),t.metadata){console.log("formValues.metadata:",t.metadata);try{t.metadata=JSON.parse(t.metadata)}catch(e){throw u.ZP.error("Failed to parse metadata: "+e,10),Error("Failed to parse metadata: "+e)}}console.log("Form Values after check:",t);let s=await fetch("/user/new",{method:"POST",headers:{Authorization:"Bearer ".concat(e),"Content-Type":"application/json"},body:JSON.stringify({user_id:l,...t})});if(!s.ok){let e=await s.text();throw u.ZP.error("Failed to create key: "+e,10),console.error("Error response from the server:",e),Error("Network response was not ok")}let a=await s.json();return console.log("API Response:",a),a}catch(e){throw console.error("Failed to create key:",e),e}},y=async(e,l)=>{try{console.log("in keyDeleteCall:",l);let t=await fetch("/key/delete",{method:"POST",headers:{Authorization:"Bearer ".concat(e),"Content-Type":"application/json"},body:JSON.stringify({keys:[l]})});if(!t.ok){let e=await t.text();throw u.ZP.error("Failed to delete key: "+e,10),Error("Network response was not ok")}let s=await t.json();return console.log(s),s}catch(e){throw console.error("Failed to create key:",e),e}},f=async(e,l)=>{try{console.log("in teamDeleteCall:",l);let t=await fetch("/team/delete",{method:"POST",headers:{Authorization:"Bearer ".concat(e),"Content-Type":"application/json"},body:JSON.stringify({team_ids:[l]})});if(!t.ok){let e=await t.text();throw u.ZP.error("Failed to delete team: "+e,10),Error("Network response was not ok")}let s=await t.json();return console.log(s),s}catch(e){throw console.error("Failed to delete key:",e),e}},Z=async function(e,l,t){let s=arguments.length>3&&void 0!==arguments[3]&&arguments[3],a=arguments.length>4?arguments[4]:void 0,r=arguments.length>5?arguments[5]:void 0;try{let n="/user/info";"App Owner"==t&&l&&(n="".concat(n,"?user_id=").concat(l)),"App User"==t&&l&&(n="".concat(n,"?user_id=").concat(l)),console.log("in userInfoCall viewAll=",s),s&&r&&null!=a&&void 0!=a&&(n="".concat(n,"?view_all=true&page=").concat(a,"&page_size=").concat(r));let o=await fetch(n,{method:"GET",headers:{Authorization:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!o.ok){let e=await o.text();throw u.ZP.error(e,10),Error("Network response was not ok")}let i=await o.json();return console.log("API Response:",i),i}catch(e){throw console.error("Failed to create key:",e),e}},_=async(e,l)=>{try{let t="/team/info";l&&(t="".concat(t,"?team_id=").concat(l)),console.log("in teamInfoCall");let s=await fetch(t,{method:"GET",headers:{Authorization:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!s.ok){let e=await s.text();throw u.ZP.error(e,10),Error("Network response was not ok")}let a=await s.json();return console.log("API Response:",a),a}catch(e){throw console.error("Failed to create key:",e),e}},w=async e=>{try{let l=await fetch("/global/spend",{method:"GET",headers:{Authorization:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!l.ok){let e=await l.text();throw u.ZP.error(e,10),Error("Network response was not ok")}return await l.json()}catch(e){throw console.error("Failed to create key:",e),e}},b=async(e,l,t)=>{try{let l=await fetch("/v2/model/info",{method:"GET",headers:{Authorization:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!l.ok){let e=await l.text();throw u.ZP.error(e,10),Error("Network response was not ok")}let t=await l.json();return console.log("modelInfoCall:",t),t}catch(e){throw console.error("Failed to create key:",e),e}},k=async(e,l,t,s,a,r)=>{try{let l="/model/metrics";s&&(l="".concat(l,"?_selected_model_group=").concat(s,"&startTime=").concat(a,"&endTime=").concat(r));let t=await fetch(l,{method:"GET",headers:{Authorization:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!t.ok){let e=await t.text();throw u.ZP.error(e,10),Error("Network response was not ok")}return await t.json()}catch(e){throw console.error("Failed to create key:",e),e}},v=async(e,l,t,s,a,r)=>{try{let l="/model/metrics/slow_responses";s&&(l="".concat(l,"?_selected_model_group=").concat(s,"&startTime=").concat(a,"&endTime=").concat(r));let t=await fetch(l,{method:"GET",headers:{Authorization:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!t.ok){let e=await t.text();throw u.ZP.error(e,10),Error("Network response was not ok")}return await t.json()}catch(e){throw console.error("Failed to create key:",e),e}},S=async(e,l,t,s,a,r)=>{try{let l="/model/metrics/exceptions";s&&(l="".concat(l,"?_selected_model_group=").concat(s,"&startTime=").concat(a,"&endTime=").concat(r));let t=await fetch(l,{method:"GET",headers:{Authorization:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!t.ok){let e=await t.text();throw u.ZP.error(e,10),Error("Network response was not ok")}return await t.json()}catch(e){throw console.error("Failed to create key:",e),e}},N=async(e,l,t)=>{try{let l=await fetch("/models",{method:"GET",headers:{Authorization:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!l.ok){let e=await l.text();throw u.ZP.error(e,10),Error("Network response was not ok")}return await l.json()}catch(e){throw console.error("Failed to create key:",e),e}},A=async e=>{try{let l="/global/spend/teams";console.log("in teamSpendLogsCall:",l);let t=await fetch("".concat(l),{method:"GET",headers:{Authorization:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!t.ok){let e=await t.text();throw u.ZP.error(e,10),Error("Network response was not ok")}let s=await t.json();return console.log(s),s}catch(e){throw console.error("Failed to create key:",e),e}},E=async(e,l,t)=>{try{let s="/global/spend/tags";l&&t&&(s="".concat(s,"?start_date=").concat(l,"&end_date=").concat(t)),console.log("in tagsSpendLogsCall:",s);let a=await fetch("".concat(s),{method:"GET",headers:{Authorization:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!a.ok)throw await a.text(),Error("Network response was not ok");let r=await a.json();return console.log(r),r}catch(e){throw console.error("Failed to create key:",e),e}},I=async(e,l,t,s,a,r)=>{try{console.log("user role in spend logs call: ".concat(t));let l="/spend/logs";l="App Owner"==t?"".concat(l,"?user_id=").concat(s,"&start_date=").concat(a,"&end_date=").concat(r):"".concat(l,"?start_date=").concat(a,"&end_date=").concat(r);let n=await fetch(l,{method:"GET",headers:{Authorization:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!n.ok){let e=await n.text();throw u.ZP.error(e,10),Error("Network response was not ok")}let o=await n.json();return console.log(o),o}catch(e){throw console.error("Failed to create key:",e),e}},C=async e=>{try{let l=await fetch("/global/spend/logs",{method:"GET",headers:{Authorization:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!l.ok){let e=await l.text();throw u.ZP.error(e,10),Error("Network response was not ok")}let t=await l.json();return console.log(t),t}catch(e){throw console.error("Failed to create key:",e),e}},T=async e=>{try{let l=await fetch("/global/spend/keys?limit=5",{method:"GET",headers:{Authorization:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!l.ok){let e=await l.text();throw u.ZP.error(e,10),Error("Network response was not ok")}let t=await l.json();return console.log(t),t}catch(e){throw console.error("Failed to create key:",e),e}},P=async(e,l,t,s)=>{try{let a="";a=l?JSON.stringify({api_key:l,startTime:t,endTime:s}):JSON.stringify({startTime:t,endTime:s});let r={method:"POST",headers:{Authorization:"Bearer ".concat(e),"Content-Type":"application/json"}};r.body=a;let n=await fetch("/global/spend/end_users",r);if(!n.ok){let e=await n.text();throw u.ZP.error(e,10),Error("Network response was not ok")}let o=await n.json();return console.log(o),o}catch(e){throw console.error("Failed to create key:",e),e}},O=async e=>{try{let l=await fetch("/global/spend/models?limit=5",{method:"GET",headers:{Authorization:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!l.ok){let e=await l.text();throw u.ZP.error(e,10),Error("Network response was not ok")}let t=await l.json();return console.log(t),t}catch(e){throw console.error("Failed to create key:",e),e}},F=async(e,l)=>{try{let t=await fetch("/v2/key/info",{method:"POST",headers:{Authorization:"Bearer ".concat(e),"Content-Type":"application/json"},body:JSON.stringify({keys:l})});if(!t.ok){let e=await t.text();throw u.ZP.error(e,10),Error("Network response was not ok")}let s=await t.json();return console.log(s),s}catch(e){throw console.error("Failed to create key:",e),e}},R=async(e,l)=>{try{let t="/user/get_users?role=".concat(l);console.log("in userGetAllUsersCall:",t);let s=await fetch(t,{method:"GET",headers:{Authorization:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!s.ok){let e=await s.text();throw u.ZP.error("Failed to delete key: "+e,10),Error("Network response was not ok")}let a=await s.json();return console.log(a),a}catch(e){throw console.error("Failed to get requested models:",e),e}},M=async(e,l)=>{try{console.log("Form Values in teamCreateCall:",l);let t=await fetch("/team/new",{method:"POST",headers:{Authorization:"Bearer ".concat(e),"Content-Type":"application/json"},body:JSON.stringify({...l})});if(!t.ok){let e=await t.text();throw u.ZP.error("Failed to create key: "+e,10),console.error("Error response from the server:",e),Error("Network response was not ok")}let s=await t.json();return console.log("API Response:",s),s}catch(e){throw console.error("Failed to create key:",e),e}},L=async(e,l)=>{try{console.log("Form Values in keyUpdateCall:",l);let t=await fetch("/key/update",{method:"POST",headers:{Authorization:"Bearer ".concat(e),"Content-Type":"application/json"},body:JSON.stringify({...l})});if(!t.ok){let e=await t.text();throw u.ZP.error("Failed to update key: "+e,10),console.error("Error response from the server:",e),Error("Network response was not ok")}let s=await t.json();return console.log("Update key Response:",s),s}catch(e){throw console.error("Failed to create key:",e),e}},U=async(e,l)=>{try{console.log("Form Values in teamUpateCall:",l);let t=await fetch("/team/update",{method:"POST",headers:{Authorization:"Bearer ".concat(e),"Content-Type":"application/json"},body:JSON.stringify({...l})});if(!t.ok){let e=await t.text();throw u.ZP.error("Failed to update team: "+e,10),console.error("Error response from the server:",e),Error("Network response was not ok")}let s=await t.json();return console.log("Update Team Response:",s),s}catch(e){throw console.error("Failed to create key:",e),e}},B=async(e,l)=>{try{console.log("Form Values in modelUpateCall:",l);let t=await fetch("/model/update",{method:"POST",headers:{Authorization:"Bearer ".concat(e),"Content-Type":"application/json"},body:JSON.stringify({...l})});if(!t.ok){let e=await t.text();throw u.ZP.error("Failed to update model: "+e,10),console.error("Error update from the server:",e),Error("Network response was not ok")}let s=await t.json();return console.log("Update model Response:",s),s}catch(e){throw console.error("Failed to update model:",e),e}},D=async(e,l,t)=>{try{console.log("Form Values in teamMemberAddCall:",t);let s=await fetch("/team/member_add",{method:"POST",headers:{Authorization:"Bearer ".concat(e),"Content-Type":"application/json"},body:JSON.stringify({team_id:l,member:t})});if(!s.ok){let e=await s.text();throw u.ZP.error("Failed to create key: "+e,10),console.error("Error response from the server:",e),Error("Network response was not ok")}let a=await s.json();return console.log("API Response:",a),a}catch(e){throw console.error("Failed to create key:",e),e}},K=async(e,l,t)=>{try{console.log("Form Values in userUpdateUserCall:",l);let s={...l};null!==t&&(s.user_role=t),s=JSON.stringify(s);let a=await fetch("/user/update",{method:"POST",headers:{Authorization:"Bearer ".concat(e),"Content-Type":"application/json"},body:s});if(!a.ok){let e=await a.text();throw u.ZP.error("Failed to create key: "+e,10),console.error("Error response from the server:",e),Error("Network response was not ok")}let r=await a.json();return console.log("API Response:",r),r}catch(e){throw console.error("Failed to create key:",e),e}},z=async(e,l)=>{try{let t="/health/services?service=".concat(l);console.log("Checking Slack Budget Alerts service health");let s=await fetch(t,{method:"GET",headers:{Authorization:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!s.ok){let e=await s.text();throw u.ZP.error("Failed ".concat(l," service health check ")+e),Error(e)}let a=await s.json();return u.ZP.success("Test request to ".concat(l," made - check logs/alerts on ").concat(l," to verify")),a}catch(e){throw console.error("Failed to perform health check:",e),e}},V=async(e,l,t)=>{try{let l=await fetch("/get/config/callbacks",{method:"GET",headers:{Authorization:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!l.ok){let e=await l.text();throw u.ZP.error(e,10),Error("Network response was not ok")}return await l.json()}catch(e){throw console.error("Failed to get callbacks:",e),e}},q=async e=>{try{let l=await fetch("/config/list?config_type=general_settings",{method:"GET",headers:{Authorization:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!l.ok){let e=await l.text();throw u.ZP.error(e,10),Error("Network response was not ok")}return await l.json()}catch(e){throw console.error("Failed to get callbacks:",e),e}},G=async(e,l,t)=>{try{let s=await fetch("/config/field/update",{method:"POST",headers:{Authorization:"Bearer ".concat(e),"Content-Type":"application/json"},body:JSON.stringify({field_name:l,field_value:t,config_type:"general_settings"})});if(!s.ok){let e=await s.text();throw u.ZP.error(e,10),Error("Network response was not ok")}let a=await s.json();return u.ZP.success("Successfully updated value!"),a}catch(e){throw console.error("Failed to set callbacks:",e),e}},W=async(e,l)=>{try{let t=await fetch("/config/field/delete",{method:"POST",headers:{Authorization:"Bearer ".concat(e),"Content-Type":"application/json"},body:JSON.stringify({field_name:l,config_type:"general_settings"})});if(!t.ok){let e=await t.text();throw u.ZP.error(e,10),Error("Network response was not ok")}let s=await t.json();return u.ZP.success("Field reset on proxy"),s}catch(e){throw console.error("Failed to get callbacks:",e),e}},Y=async(e,l)=>{try{let t=await fetch("/config/update",{method:"POST",headers:{Authorization:"Bearer ".concat(e),"Content-Type":"application/json"},body:JSON.stringify({...l})});if(!t.ok){let e=await t.text();throw u.ZP.error(e,10),Error("Network response was not ok")}return await t.json()}catch(e){throw console.error("Failed to set callbacks:",e),e}},J=async e=>{try{let l=await fetch("/health",{method:"GET",headers:{Authorization:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!l.ok){let e=await l.text();throw u.ZP.error(e),Error("Network response was not ok")}return await l.json()}catch(e){throw console.error("Failed to call /health:",e),e}};var H=t(10384),$=t(46453),X=t(16450),Q=t(52273),ee=t(26780),el=t(15595),et=t(6698),es=t(71801),ea=t(42440),er=t(42308),en=t(50670),eo=t(81583),ei=t(99129),ec=t(44839),ed=t(88707),em=t(1861);let{Option:eu}=en.default;var eh=e=>{let{userID:l,team:t,userRole:s,accessToken:a,data:o,setData:i}=e,[c]=eo.Z.useForm(),[d,m]=(0,n.useState)(!1),[h,x]=(0,n.useState)(null),[p,g]=(0,n.useState)(null),[y,f]=(0,n.useState)([]),[Z,_]=(0,n.useState)([]),w=()=>{m(!1),c.resetFields()},b=()=>{m(!1),x(null),c.resetFields()};(0,n.useEffect)(()=>{(async()=>{try{if(null===l||null===s)return;if(null!==a){let e=(await N(a,l,s)).data.map(e=>e.id);console.log("available_model_names:",e),f(e)}}catch(e){console.error("Error fetching user models:",e)}})()},[a,l,s]);let k=async e=>{try{var t,s,r;let n=null!==(t=null==e?void 0:e.key_alias)&&void 0!==t?t:"",d=null!==(s=null==e?void 0:e.team_id)&&void 0!==s?s:null;if((null!==(r=null==o?void 0:o.filter(e=>e.team_id===d).map(e=>e.key_alias))&&void 0!==r?r:[]).includes(n))throw Error("Key alias ".concat(n," already exists for team with ID ").concat(d,", please provide another key alias"));u.ZP.info("Making API Call"),m(!0);let h=await j(a,l,e);console.log("key create Response:",h),i(e=>e?[...e,h]:[h]),x(h.key),g(h.soft_budget),u.ZP.success("API Key Created"),c.resetFields(),localStorage.removeItem("userData"+l)}catch(e){console.error("Error creating the key:",e),u.ZP.error("Error creating the key: ".concat(e),20)}};return(0,n.useEffect)(()=>{_(t&&t.models.length>0?t.models.includes("all-proxy-models")?y:t.models:y)},[t,y]),(0,r.jsxs)("div",{children:[(0,r.jsx)(X.Z,{className:"mx-auto",onClick:()=>m(!0),children:"+ Create New Key"}),(0,r.jsx)(ei.Z,{title:"Create Key",visible:d,width:800,footer:null,onOk:w,onCancel:b,children:(0,r.jsxs)(eo.Z,{form:c,onFinish:k,labelCol:{span:8},wrapperCol:{span:16},labelAlign:"left",children:[(0,r.jsxs)(r.Fragment,{children:[(0,r.jsx)(eo.Z.Item,{label:"Key Name",name:"key_alias",rules:[{required:!0,message:"Please input a key name"}],help:"required",children:(0,r.jsx)(Q.Z,{placeholder:""})}),(0,r.jsx)(eo.Z.Item,{label:"Team ID",name:"team_id",hidden:!0,initialValue:t?t.team_id:null,valuePropName:"team_id",className:"mt-8",children:(0,r.jsx)(ec.Z,{value:t?t.team_alias:"",disabled:!0})}),(0,r.jsx)(eo.Z.Item,{label:"Models",name:"models",rules:[{required:!0,message:"Please select a model"}],help:"required",children:(0,r.jsxs)(en.default,{mode:"multiple",placeholder:"Select models",style:{width:"100%"},onChange:e=>{e.includes("all-team-models")&&c.setFieldsValue({models:["all-team-models"]})},children:[(0,r.jsx)(eu,{value:"all-team-models",children:"All Team Models"},"all-team-models"),Z.map(e=>(0,r.jsx)(eu,{value:e,children:e},e))]})}),(0,r.jsxs)(ee.Z,{className:"mt-20 mb-8",children:[(0,r.jsx)(et.Z,{children:(0,r.jsx)("b",{children:"Optional Settings"})}),(0,r.jsxs)(el.Z,{children:[(0,r.jsx)(eo.Z.Item,{className:"mt-8",label:"Max Budget (USD)",name:"max_budget",help:"Budget cannot exceed team max budget: $".concat((null==t?void 0:t.max_budget)!==null&&(null==t?void 0:t.max_budget)!==void 0?null==t?void 0:t.max_budget:"unlimited"),rules:[{validator:async(e,l)=>{if(l&&t&&null!==t.max_budget&&l>t.max_budget)throw Error("Budget cannot exceed team max budget: $".concat(t.max_budget))}}],children:(0,r.jsx)(ed.Z,{step:.01,precision:2,width:200})}),(0,r.jsx)(eo.Z.Item,{className:"mt-8",label:"Reset Budget",name:"budget_duration",help:"Team Reset Budget: ".concat((null==t?void 0:t.budget_duration)!==null&&(null==t?void 0:t.budget_duration)!==void 0?null==t?void 0:t.budget_duration:"None"),children:(0,r.jsxs)(en.default,{defaultValue:null,placeholder:"n/a",children:[(0,r.jsx)(en.default.Option,{value:"24h",children:"daily"}),(0,r.jsx)(en.default.Option,{value:"30d",children:"monthly"})]})}),(0,r.jsx)(eo.Z.Item,{className:"mt-8",label:"Tokens per minute Limit (TPM)",name:"tpm_limit",help:"TPM cannot exceed team TPM limit: ".concat((null==t?void 0:t.tpm_limit)!==null&&(null==t?void 0:t.tpm_limit)!==void 0?null==t?void 0:t.tpm_limit:"unlimited"),rules:[{validator:async(e,l)=>{if(l&&t&&null!==t.tpm_limit&&l>t.tpm_limit)throw Error("TPM limit cannot exceed team TPM limit: ".concat(t.tpm_limit))}}],children:(0,r.jsx)(ed.Z,{step:1,width:400})}),(0,r.jsx)(eo.Z.Item,{className:"mt-8",label:"Requests per minute Limit (RPM)",name:"rpm_limit",help:"RPM cannot exceed team RPM limit: ".concat((null==t?void 0:t.rpm_limit)!==null&&(null==t?void 0:t.rpm_limit)!==void 0?null==t?void 0:t.rpm_limit:"unlimited"),rules:[{validator:async(e,l)=>{if(l&&t&&null!==t.rpm_limit&&l>t.rpm_limit)throw Error("RPM limit cannot exceed team RPM limit: ".concat(t.rpm_limit))}}],children:(0,r.jsx)(ed.Z,{step:1,width:400})}),(0,r.jsx)(eo.Z.Item,{label:"Expire Key (eg: 30s, 30h, 30d)",name:"duration",className:"mt-8",children:(0,r.jsx)(Q.Z,{placeholder:""})}),(0,r.jsx)(eo.Z.Item,{label:"Metadata",name:"metadata",children:(0,r.jsx)(ec.Z.TextArea,{rows:4,placeholder:"Enter metadata as JSON"})})]})]})]}),(0,r.jsx)("div",{style:{textAlign:"right",marginTop:"10px"},children:(0,r.jsx)(em.ZP,{htmlType:"submit",children:"Create Key"})})]})}),h&&(0,r.jsx)(ei.Z,{visible:d,onOk:w,onCancel:b,footer:null,children:(0,r.jsxs)($.Z,{numItems:1,className:"gap-2 w-full",children:[(0,r.jsx)(ea.Z,{children:"Save your Key"}),(0,r.jsx)(H.Z,{numColSpan:1,children:(0,r.jsxs)("p",{children:["Please save this secret key somewhere safe and accessible. For security reasons, ",(0,r.jsx)("b",{children:"you will not be able to view it again"})," ","through your LiteLLM account. If you lose this secret key, you will need to generate a new one."]})}),(0,r.jsx)(H.Z,{numColSpan:1,children:null!=h?(0,r.jsxs)("div",{children:[(0,r.jsx)(es.Z,{className:"mt-3",children:"API Key:"}),(0,r.jsx)("div",{style:{background:"#f8f8f8",padding:"10px",borderRadius:"5px",marginBottom:"10px"},children:(0,r.jsx)("pre",{style:{wordWrap:"break-word",whiteSpace:"normal"},children:h})}),(0,r.jsx)(er.CopyToClipboard,{text:h,onCopy:()=>{u.ZP.success("API Key copied to clipboard")},children:(0,r.jsx)(X.Z,{className:"mt-3",children:"Copy API Key"})})]}):(0,r.jsx)(es.Z,{children:"Key being created, this might take 30s"})})]})})]})},ex=t(9454),ep=t(98941),ej=t(33393),eg=t(5),ey=t(13810),ef=t(61244),eZ=t(10827),e_=t(3851),ew=t(2044),eb=t(64167),ek=t(74480),ev=t(7178),eS=t(95093),eN=t(27166);let{Option:eA}=en.default;var eE=e=>{let{userID:l,userRole:t,accessToken:s,selectedTeam:a,data:o,setData:i,teams:c}=e,[d,m]=(0,n.useState)(!1),[h,x]=(0,n.useState)(!1),[p,j]=(0,n.useState)(null),[g,f]=(0,n.useState)(null),[Z,_]=(0,n.useState)(null),[w,b]=(0,n.useState)(""),[k,v]=(0,n.useState)(!1),[S,A]=(0,n.useState)(!1),[E,I]=(0,n.useState)(null),[C,T]=(0,n.useState)([]),P=new Set,[O,F]=(0,n.useState)(P);(0,n.useEffect)(()=>{(async()=>{try{if(null===l)return;if(null!==s&&null!==t){let e=(await N(s,l,t)).data.map(e=>e.id);console.log("available_model_names:",e),T(e)}}catch(e){console.error("Error fetching user models:",e)}})()},[s,l,t]),(0,n.useEffect)(()=>{if(c){let e=new Set;c.forEach((l,t)=>{let s=l.team_id;e.add(s)}),F(e)}},[c]);let R=e=>{console.log("handleEditClick:",e),null==e.token&&null!==e.token_id&&(e.token=e.token_id),I(e),v(!0)},M=async e=>{if(null==s)return;let l=e.token;e.key=l,console.log("handleEditSubmit:",e);let t=await L(s,e);console.log("handleEditSubmit: newKeyValues",t),o&&i(o.map(e=>e.token===l?t:e)),u.ZP.success("Key updated successfully"),v(!1),I(null)},U=async e=>{console.log("handleDelete:",e),null==e.token&&null!==e.token_id&&(e.token=e.token_id),null!=o&&(j(e.token),localStorage.removeItem("userData"+l),x(!0))},B=async()=>{if(null!=p&&null!=o){try{await y(s,p);let e=o.filter(e=>e.token!==p);i(e)}catch(e){console.error("Error deleting the key:",e)}x(!1),j(null)}};if(null!=o)return console.log("RERENDER TRIGGERED"),(0,r.jsxs)("div",{children:[(0,r.jsxs)(ey.Z,{className:"w-full mx-auto flex-auto overflow-y-auto max-h-[50vh] mb-4 mt-2",children:[(0,r.jsxs)(eZ.Z,{className:"mt-5 max-h-[300px] min-h-[300px]",children:[(0,r.jsx)(eb.Z,{children:(0,r.jsxs)(ev.Z,{children:[(0,r.jsx)(ek.Z,{children:"Key Alias"}),(0,r.jsx)(ek.Z,{children:"Secret Key"}),(0,r.jsx)(ek.Z,{children:"Spend (USD)"}),(0,r.jsx)(ek.Z,{children:"Budget (USD)"}),(0,r.jsx)(ek.Z,{children:"Models"}),(0,r.jsx)(ek.Z,{children:"TPM / RPM Limits"})]})}),(0,r.jsx)(e_.Z,{children:o.map(e=>{if(console.log(e),"litellm-dashboard"===e.team_id)return null;if(a){if(console.log("item team id: ".concat(e.team_id,", knownTeamIDs.has(item.team_id): ").concat(O.has(e.team_id),", selectedTeam id: ").concat(a.team_id)),(null!=a.team_id||null===e.team_id||O.has(e.team_id))&&e.team_id!=a.team_id)return null;console.log("item team id: ".concat(e.team_id,", is returned"))}return(0,r.jsxs)(ev.Z,{children:[(0,r.jsx)(ew.Z,{style:{maxWidth:"2px",whiteSpace:"pre-wrap",overflow:"hidden"},children:null!=e.key_alias?(0,r.jsx)(es.Z,{children:e.key_alias}):(0,r.jsx)(es.Z,{children:"Not Set"})}),(0,r.jsx)(ew.Z,{children:(0,r.jsx)(es.Z,{children:e.key_name})}),(0,r.jsx)(ew.Z,{children:(0,r.jsx)(es.Z,{children:(()=>{try{return parseFloat(e.spend).toFixed(4)}catch(l){return e.spend}})()})}),(0,r.jsx)(ew.Z,{children:null!=e.max_budget?(0,r.jsx)(es.Z,{children:e.max_budget}):(0,r.jsx)(es.Z,{children:"Unlimited"})}),(0,r.jsx)(ew.Z,{children:Array.isArray(e.models)?(0,r.jsx)("div",{style:{display:"flex",flexDirection:"column"},children:0===e.models.length?(0,r.jsx)(r.Fragment,{children:a&&a.models&&a.models.length>0?a.models.map((e,l)=>"all-proxy-models"===e?(0,r.jsx)(eg.Z,{size:"xs",className:"mb-1",color:"red",children:(0,r.jsx)(es.Z,{children:"All Proxy Models"})},l):"all-team-models"===e?(0,r.jsx)(eg.Z,{size:"xs",className:"mb-1",color:"red",children:(0,r.jsx)(es.Z,{children:"All Team Models"})},l):(0,r.jsx)(eg.Z,{size:"xs",className:"mb-1",color:"blue",children:(0,r.jsx)(es.Z,{children:e.length>30?"".concat(e.slice(0,30),"..."):e})},l)):(0,r.jsx)(eg.Z,{size:"xs",className:"mb-1",color:"blue",children:(0,r.jsx)(es.Z,{children:"all-proxy-models"})})}):e.models.map((e,l)=>"all-proxy-models"===e?(0,r.jsx)(eg.Z,{size:"xs",className:"mb-1",color:"red",children:(0,r.jsx)(es.Z,{children:"All Proxy Models"})},l):"all-team-models"===e?(0,r.jsx)(eg.Z,{size:"xs",className:"mb-1",color:"red",children:(0,r.jsx)(es.Z,{children:"All Team Models"})},l):(0,r.jsx)(eg.Z,{size:"xs",className:"mb-1",color:"blue",children:(0,r.jsx)(es.Z,{children:e.length>30?"".concat(e.slice(0,30),"..."):e})},l))}):null}),(0,r.jsx)(ew.Z,{children:(0,r.jsxs)(es.Z,{children:["TPM: ",e.tpm_limit?e.tpm_limit:"Unlimited"," ",(0,r.jsx)("br",{})," RPM:"," ",e.rpm_limit?e.rpm_limit:"Unlimited"]})}),(0,r.jsxs)(ew.Z,{children:[(0,r.jsx)(ef.Z,{onClick:()=>{I(e),A(!0)},icon:ex.Z,size:"sm"}),(0,r.jsx)(ei.Z,{open:S,onCancel:()=>{A(!1),I(null)},footer:null,width:800,children:E&&(0,r.jsxs)(r.Fragment,{children:[(0,r.jsxs)("div",{className:"grid grid-cols-1 gap-6 sm:grid-cols-2 lg:grid-cols-3 mt-8",children:[(0,r.jsxs)(ey.Z,{children:[(0,r.jsx)("p",{className:"text-tremor-default font-medium text-tremor-content dark:text-dark-tremor-content",children:"Spend"}),(0,r.jsx)("div",{className:"mt-2 flex items-baseline space-x-2.5",children:(0,r.jsx)("p",{className:"text-tremor font-semibold text-tremor-content-strong dark:text-dark-tremor-content-strong",children:(()=>{try{return parseFloat(E.spend).toFixed(4)}catch(e){return E.spend}})()})})]}),(0,r.jsxs)(ey.Z,{children:[(0,r.jsx)("p",{className:"text-tremor-default font-medium text-tremor-content dark:text-dark-tremor-content",children:"Budget"}),(0,r.jsx)("div",{className:"mt-2 flex items-baseline space-x-2.5",children:(0,r.jsx)("p",{className:"text-tremor font-semibold text-tremor-content-strong dark:text-dark-tremor-content-strong",children:null!=E.max_budget?(0,r.jsx)(r.Fragment,{children:E.max_budget}):(0,r.jsx)(r.Fragment,{children:"Unlimited"})})})]},e.name),(0,r.jsxs)(ey.Z,{children:[(0,r.jsx)("p",{className:"text-tremor-default font-medium text-tremor-content dark:text-dark-tremor-content",children:"Expires"}),(0,r.jsx)("div",{className:"mt-2 flex items-baseline space-x-2.5",children:(0,r.jsx)("p",{className:"text-tremor-default font-small text-tremor-content-strong dark:text-dark-tremor-content-strong",children:null!=E.expires?(0,r.jsx)(r.Fragment,{children:new Date(E.expires).toLocaleString(void 0,{day:"numeric",month:"long",year:"numeric",hour:"numeric",minute:"numeric",second:"numeric"})}):(0,r.jsx)(r.Fragment,{children:"Never"})})})]},e.name)]}),(0,r.jsxs)(ey.Z,{className:"my-4",children:[(0,r.jsx)(ea.Z,{children:"Token Name"}),(0,r.jsx)(es.Z,{className:"my-1",children:E.key_alias?E.key_alias:E.key_name}),(0,r.jsx)(ea.Z,{children:"Token ID"}),(0,r.jsx)(es.Z,{className:"my-1 text-[12px]",children:E.token}),(0,r.jsx)(ea.Z,{children:"Metadata"}),(0,r.jsx)(es.Z,{className:"my-1",children:(0,r.jsxs)("pre",{children:[JSON.stringify(E.metadata)," "]})})]}),(0,r.jsx)(X.Z,{className:"mx-auto flex items-center",onClick:()=>{A(!1),I(null)},children:"Close"})]})}),(0,r.jsx)(ef.Z,{icon:ep.Z,size:"sm",onClick:()=>R(e)}),(0,r.jsx)(ef.Z,{onClick:()=>U(e),icon:ej.Z,size:"sm"})]})]},e.token)})})]}),h&&(0,r.jsx)("div",{className:"fixed z-10 inset-0 overflow-y-auto",children:(0,r.jsxs)("div",{className:"flex items-end justify-center min-h-screen pt-4 px-4 pb-20 text-center sm:block sm:p-0",children:[(0,r.jsx)("div",{className:"fixed inset-0 transition-opacity","aria-hidden":"true",children:(0,r.jsx)("div",{className:"absolute inset-0 bg-gray-500 opacity-75"})}),(0,r.jsx)("span",{className:"hidden sm:inline-block sm:align-middle sm:h-screen","aria-hidden":"true",children:"​"}),(0,r.jsxs)("div",{className:"inline-block align-bottom bg-white rounded-lg text-left overflow-hidden shadow-xl transform transition-all sm:my-8 sm:align-middle sm:max-w-lg sm:w-full",children:[(0,r.jsx)("div",{className:"bg-white px-4 pt-5 pb-4 sm:p-6 sm:pb-4",children:(0,r.jsx)("div",{className:"sm:flex sm:items-start",children:(0,r.jsxs)("div",{className:"mt-3 text-center sm:mt-0 sm:ml-4 sm:text-left",children:[(0,r.jsx)("h3",{className:"text-lg leading-6 font-medium text-gray-900",children:"Delete Key"}),(0,r.jsx)("div",{className:"mt-2",children:(0,r.jsx)("p",{className:"text-sm text-gray-500",children:"Are you sure you want to delete this key ?"})})]})})}),(0,r.jsxs)("div",{className:"bg-gray-50 px-4 py-3 sm:px-6 sm:flex sm:flex-row-reverse",children:[(0,r.jsx)(X.Z,{onClick:B,color:"red",className:"ml-2",children:"Delete"}),(0,r.jsx)(X.Z,{onClick:()=>{x(!1),j(null)},children:"Cancel"})]})]})]})})]}),E&&(0,r.jsx)(e=>{let{visible:l,onCancel:t,token:s,onSubmit:o}=e,[i]=eo.Z.useForm(),[d,m]=(0,n.useState)(a),[u,h]=(0,n.useState)([]),[x,p]=(0,n.useState)(!1);return(0,r.jsx)(ei.Z,{title:"Edit Key",visible:l,width:800,footer:null,onOk:()=>{i.validateFields().then(e=>{i.resetFields()}).catch(e=>{console.error("Validation failed:",e)})},onCancel:t,children:(0,r.jsxs)(eo.Z,{form:i,onFinish:M,initialValues:s,labelCol:{span:8},wrapperCol:{span:16},labelAlign:"left",children:[(0,r.jsxs)(r.Fragment,{children:[(0,r.jsx)(eo.Z.Item,{label:"Key Name",name:"key_alias",rules:[{required:!0,message:"Please input a key name"}],help:"required",children:(0,r.jsx)(ec.Z,{})}),(0,r.jsx)(eo.Z.Item,{label:"Models",name:"models",rules:[{validator:(e,l)=>{let t=l.filter(e=>!d.models.includes(e)&&"all-team-models"!==e&&"all-proxy-models"!==e&&!d.models.includes("all-proxy-models"));return(console.log("errorModels: ".concat(t)),t.length>0)?Promise.reject("Some models are not part of the new team's models - ".concat(t,"Team models: ").concat(d.models)):Promise.resolve()}}],children:(0,r.jsxs)(en.default,{mode:"multiple",placeholder:"Select models",style:{width:"100%"},children:[(0,r.jsx)(eA,{value:"all-team-models",children:"All Team Models"},"all-team-models"),d&&d.models?d.models.includes("all-proxy-models")?C.filter(e=>"all-proxy-models"!==e).map(e=>(0,r.jsx)(eA,{value:e,children:e},e)):d.models.map(e=>(0,r.jsx)(eA,{value:e,children:e},e)):C.map(e=>(0,r.jsx)(eA,{value:e,children:e},e))]})}),(0,r.jsx)(eo.Z.Item,{className:"mt-8",label:"Max Budget (USD)",name:"max_budget",help:"Budget cannot exceed team max budget: ".concat((null==d?void 0:d.max_budget)!==null&&(null==d?void 0:d.max_budget)!==void 0?null==d?void 0:d.max_budget:"unlimited"),rules:[{validator:async(e,l)=>{if(l&&d&&null!==d.max_budget&&l>d.max_budget)throw console.log("keyTeam.max_budget: ".concat(d.max_budget)),Error("Budget cannot exceed team max budget: $".concat(d.max_budget))}}],children:(0,r.jsx)(ed.Z,{step:.01,precision:2,width:200})}),(0,r.jsx)(eo.Z.Item,{label:"token",name:"token",hidden:!0}),(0,r.jsx)(eo.Z.Item,{label:"Team",name:"team_id",help:"the team this key belongs to",children:(0,r.jsx)(eS.Z,{value:s.team_alias,children:null==c?void 0:c.map((e,l)=>(0,r.jsx)(eN.Z,{value:e.team_id,onClick:()=>m(e),children:e.team_alias},l))})})]}),(0,r.jsx)("div",{style:{textAlign:"right",marginTop:"10px"},children:(0,r.jsx)(em.ZP,{htmlType:"submit",children:"Edit Key"})})]})})},{visible:k,onCancel:()=>{v(!1),I(null)},token:E,onSubmit:M})]})},eI=t(76032),eC=t(35152),eT=e=>{let{userID:l,userRole:t,accessToken:s,userSpend:a,selectedTeam:o}=e;console.log("userSpend: ".concat(a));let[i,c]=(0,n.useState)(null!==a?a:0),[d,m]=(0,n.useState)(0),[u,h]=(0,n.useState)([]);(0,n.useEffect)(()=>{let e=async()=>{if(s&&l&&t&&"Admin"===t&&null==a)try{let e=await w(s);e&&(e.spend?c(e.spend):c(0),e.max_budget?m(e.max_budget):m(0))}catch(e){console.error("Error fetching global spend data:",e)}};(async()=>{try{if(null===l||null===t)return;if(null!==s){let e=(await N(s,l,t)).data.map(e=>e.id);console.log("available_model_names:",e),h(e)}}catch(e){console.error("Error fetching user models:",e)}})(),e()},[t,s,l]),(0,n.useEffect)(()=>{null!==a&&c(a)},[a]);let x=[];o&&o.models&&(x=o.models),x&&x.includes("all-proxy-models")?(console.log("user models:",u),x=u):x&&x.includes("all-team-models")?x=o.models:x&&0===x.length&&(x=u);let p=void 0!==i?i.toFixed(4):null;return console.log("spend in view user spend: ".concat(i)),(0,r.jsxs)("div",{className:"flex items-center",children:[(0,r.jsxs)("div",{children:[(0,r.jsxs)("p",{className:"text-tremor-default text-tremor-content dark:text-dark-tremor-content",children:["Total Spend"," "]}),(0,r.jsxs)("p",{className:"text-2xl text-tremor-content-strong dark:text-dark-tremor-content-strong font-semibold",children:["$",p]})]}),(0,r.jsx)("div",{className:"ml-auto",children:(0,r.jsxs)(ee.Z,{children:[(0,r.jsx)(et.Z,{children:(0,r.jsx)(es.Z,{children:"Team Models"})}),(0,r.jsx)(el.Z,{className:"absolute right-0 z-10 bg-white p-2 shadow-lg max-w-xs",children:(0,r.jsx)(eI.Z,{children:x.map(e=>(0,r.jsx)(eC.Z,{children:(0,r.jsx)(es.Z,{children:e})},e))})})]})})]})},eP=e=>{let{userID:l,userRole:t,selectedTeam:s,accessToken:a}=e,[o,i]=(0,n.useState)([]);(0,n.useEffect)(()=>{(async()=>{try{if(null===l||null===t)return;if(null!==a){let e=(await N(a,l,t)).data.map(e=>e.id);console.log("available_model_names:",e),i(e)}}catch(e){console.error("Error fetching user models:",e)}})()},[a,l,t]);let c=[];return s&&s.models&&(c=s.models),c&&c.includes("all-proxy-models")&&(console.log("user models:",o),c=o),(0,r.jsx)(r.Fragment,{children:(0,r.jsx)("div",{className:"mb-5",children:(0,r.jsx)("p",{className:"text-3xl text-tremor-content-strong dark:text-dark-tremor-content-strong font-semibold",children:null==s?void 0:s.team_alias})})})},eO=e=>{let l,{teams:t,setSelectedTeam:s,userRole:a}=e,o={models:[],team_id:null,team_alias:"Default Team"},[i,c]=(0,n.useState)(o);return(l="App User"===a?t:t?[...t,o]:[o],"App User"===a)?null:(0,r.jsxs)("div",{className:"mt-5 mb-5",children:[(0,r.jsx)(ea.Z,{children:"Select Team"}),(0,r.jsx)(es.Z,{children:"If you belong to multiple teams, this setting controls which team is used by default when creating new API Keys."}),(0,r.jsxs)(es.Z,{className:"mt-3 mb-3",children:[(0,r.jsx)("b",{children:"Default Team:"})," If no team_id is set for a key, it will be grouped under here."]}),l&&l.length>0?(0,r.jsx)(eS.Z,{defaultValue:"0",children:l.map((e,l)=>(0,r.jsx)(eN.Z,{value:String(l),onClick:()=>s(e),children:e.team_alias},l))}):(0,r.jsxs)(es.Z,{children:["No team created. ",(0,r.jsx)("b",{children:"Defaulting to personal account."})]})]})},eF=t(37963),eR=t(36083);console.log("isLocal:",!1);var eM=e=>{let{userID:l,userRole:t,teams:s,keys:a,setUserRole:i,userEmail:c,setUserEmail:d,setTeams:m,setKeys:u}=e,[h,x]=(0,n.useState)(null),p=(0,o.useSearchParams)();p.get("viewSpend"),(0,o.useRouter)();let j=p.get("token"),[g,y]=(0,n.useState)(null),[f,_]=(0,n.useState)(null),[b,k]=(0,n.useState)([]),v={models:[],team_alias:"Default Team",team_id:null},[S,A]=(0,n.useState)(s?s[0]:v);if(window.addEventListener("beforeunload",function(){sessionStorage.clear()}),(0,n.useEffect)(()=>{if(j){let e=(0,eF.o)(j);if(e){if(console.log("Decoded token:",e),console.log("Decoded key:",e.key),y(e.key),e.user_role){let l=function(e){if(!e)return"Undefined Role";switch(console.log("Received user role: ".concat(e)),e.toLowerCase()){case"app_owner":case"demo_app_owner":return"App Owner";case"app_admin":case"proxy_admin":return"Admin";case"proxy_admin_viewer":return"Admin Viewer";case"app_user":return"App User";default:return"Unknown Role"}}(e.user_role);console.log("Decoded user_role:",l),i(l)}else console.log("User role not defined");e.user_email?d(e.user_email):console.log("User Email is not set ".concat(e))}}if(l&&g&&t&&!a&&!h){let e=sessionStorage.getItem("userModels"+l);e?k(JSON.parse(e)):(async()=>{try{let e=await Z(g,l,t,!1,null,null);if(console.log("received teams in user dashboard: ".concat(Object.keys(e),"; team values: ").concat(Object.entries(e.teams))),"Admin"==t){let e=await w(g);x(e),console.log("globalSpend:",e)}else x(e.user_info);u(e.keys),m(e.teams);let s=[...e.teams];s.length>0?(console.log("response['teams']: ".concat(s)),A(s[0])):A(v),sessionStorage.setItem("userData"+l,JSON.stringify(e.keys)),sessionStorage.setItem("userSpendData"+l,JSON.stringify(e.user_info));let a=(await N(g,l,t)).data.map(e=>e.id);console.log("available_model_names:",a),k(a),console.log("userModels:",b),sessionStorage.setItem("userModels"+l,JSON.stringify(a))}catch(e){console.error("There was an error fetching the data",e)}})()}},[l,j,g,a,t]),(0,n.useEffect)(()=>{if(null!==a&&null!=S){let e=0;for(let l of a)S.hasOwnProperty("team_id")&&null!==l.team_id&&l.team_id===S.team_id&&(e+=l.spend);_(e)}else if(null!==a){let e=0;for(let l of a)e+=l.spend;_(e)}},[S]),null==l||null==j){let e="/sso/key/generate";return console.log("Full URL:",e),window.location.href=e,null}if(null==g)return null;if(null==t&&i("App Owner"),t&&"Admin Viewer"==t){let{Title:e,Paragraph:l}=eR.default;return(0,r.jsxs)("div",{children:[(0,r.jsx)(e,{level:1,children:"Access Denied"}),(0,r.jsx)(l,{children:"Ask your proxy admin for access to create keys"})]})}return console.log("inside user dashboard, selected team",S),console.log("teamSpend: ".concat(f)),(0,r.jsx)("div",{className:"w-full mx-4",children:(0,r.jsx)($.Z,{numItems:1,className:"gap-2 p-8 h-[75vh] w-full mt-2",children:(0,r.jsxs)(H.Z,{numColSpan:1,children:[(0,r.jsx)(eP,{userID:l,userRole:t,selectedTeam:S||null,accessToken:g}),(0,r.jsx)(eT,{userID:l,userRole:t,accessToken:g,userSpend:f,selectedTeam:S||null}),(0,r.jsx)(eE,{userID:l,userRole:t,accessToken:g,selectedTeam:S||null,data:a,setData:u,teams:s}),(0,r.jsx)(eh,{userID:l,team:S||null,userRole:t,accessToken:g,data:a,setData:u},S?S.team_id:null),(0,r.jsx)(eO,{teams:s,setSelectedTeam:A,userRole:t})]})})})},eL=t(35087),eU=t(92836),eB=t(26734),eD=t(41608),eK=t(32126),ez=t(23682),eV=t(47047),eq=t(76628),eG=t(57750),eW=t(44041),eY=t(38302),eJ=t(28683),eH=t(1460),e$=t(78578),eX=t(63954),eQ=t(90252),e0=t(7905),e1=e=>{let{modelID:l,accessToken:t}=e,[s,a]=(0,n.useState)(!1),o=async()=>{try{u.ZP.info("Making API Call"),a(!0);let e=await p(t,l);console.log("model delete Response:",e),u.ZP.success("Model ".concat(l," deleted successfully")),a(!1)}catch(e){console.error("Error deleting the model:",e)}};return(0,r.jsxs)("div",{children:[(0,r.jsx)(ef.Z,{onClick:()=>a(!0),icon:ej.Z,size:"sm"}),(0,r.jsx)(ei.Z,{open:s,onOk:o,okType:"danger",onCancel:()=>a(!1),children:(0,r.jsxs)($.Z,{numItems:1,className:"gap-2 w-full",children:[(0,r.jsx)(ea.Z,{children:"Delete Model"}),(0,r.jsx)(H.Z,{numColSpan:1,children:(0,r.jsx)("p",{children:"Are you sure you want to delete this model? This action is irreversible."})}),(0,r.jsx)(H.Z,{numColSpan:1,children:(0,r.jsxs)("p",{children:["Model ID: ",(0,r.jsx)("b",{children:l})]})})]})})]})},e2=t(97766),e4=t(46495);let{Title:e5,Link:e8}=eR.default;(s=a||(a={})).OpenAI="OpenAI",s.Azure="Azure",s.Anthropic="Anthropic",s.Google_AI_Studio="Gemini (Google AI Studio)",s.Bedrock="Amazon Bedrock",s.OpenAI_Compatible="OpenAI-Compatible Endpoints (Groq, Together AI, Mistral AI, etc.)",s.Vertex_AI="Vertex AI (Anthropic, Gemini, etc.)";let e3={OpenAI:"openai",Azure:"azure",Anthropic:"anthropic",Google_AI_Studio:"gemini",Bedrock:"bedrock",OpenAI_Compatible:"openai",Vertex_AI:"vertex_ai"},e6={"BadRequestError (400)":"BadRequestErrorRetries","AuthenticationError  (401)":"AuthenticationErrorRetries","TimeoutError (408)":"TimeoutErrorRetries","RateLimitError (429)":"RateLimitErrorRetries","ContentPolicyViolationError (400)":"ContentPolicyViolationErrorRetries","InternalServerError (500)":"InternalServerErrorRetries"},e7=async(e,l,t)=>{try{let s=Array.isArray(e.model)?e.model:[e.model];console.log("received deployments: ".concat(s)),console.log("received type of deployments: ".concat(typeof s)),s.forEach(async t=>{console.log("litellm_model: ".concat(t));let s={},a={};s.model=t;let r="";for(let[l,t]of(console.log("formValues add deployment:",e),Object.entries(e)))if(""!==t){if("model_name"==l)r+=t;else if("custom_llm_provider"==l)continue;else if("model"==l)continue;else if("base_model"===l)a[l]=t;else if("litellm_extra_params"==l){console.log("litellm_extra_params:",t);let e={};if(t&&void 0!=t){try{e=JSON.parse(t)}catch(e){throw u.ZP.error("Failed to parse LiteLLM Extra Params: "+e,10),Error("Failed to parse litellm_extra_params: "+e)}for(let[l,t]of Object.entries(e))s[l]=t}}else s[l]=t}let n={model_name:r,litellm_params:s,model_info:a},o=await x(l,n);console.log("response for model create call: ".concat(o.data))}),t.resetFields()}catch(e){u.ZP.error("Failed to create model: "+e,10)}};var e9=e=>{var l,t,s;let{accessToken:o,token:i,userRole:c,userID:d,modelData:m={data:[]},setModelData:x}=e,[p,j]=(0,n.useState)([]),[g]=eo.Z.useForm(),[y,f]=(0,n.useState)(null),[Z,_]=(0,n.useState)(""),[w,N]=(0,n.useState)([]),A=Object.values(a).filter(e=>isNaN(Number(e))),[E,I]=(0,n.useState)("OpenAI"),[C,T]=(0,n.useState)(""),[P,O]=(0,n.useState)(!1),[F,R]=(0,n.useState)(null),[M,L]=(0,n.useState)([]),[U,D]=(0,n.useState)(null),[K,z]=(0,n.useState)([]),[q,G]=(0,n.useState)([]),[W,H]=(0,n.useState)([]),[er,en]=(0,n.useState)([]),[ec,eu]=(0,n.useState)([]),[eh,ex]=(0,n.useState)([]),[ej,eA]=(0,n.useState)([]),[eE,eI]=(0,n.useState)({from:new Date(Date.now()-6048e5),to:new Date}),[eC,eT]=(0,n.useState)(null),[eP,eO]=(0,n.useState)(0),eF=e=>{R(e),O(!0)},eM=async e=>{if(console.log("handleEditSubmit:",e),null==o)return;let l={},t=null;for(let[s,a]of Object.entries(e))"model_id"!==s?l[s]=a:t=a;let s={litellm_params:l,model_info:{id:t}};console.log("handleEditSubmit payload:",s);try{await B(o,s),u.ZP.success("Model updated successfully, restart server to see updates"),O(!1),R(null)}catch(e){console.log("Error occurred")}},e9=()=>{_(new Date().toLocaleString())},le=async()=>{if(!o){console.error("Access token is missing");return}console.log("new modelGroupRetryPolicy:",eC);try{await Y(o,{router_settings:{model_group_retry_policy:eC}}),u.ZP.success("Retry settings saved successfully")}catch(e){console.error("Failed to save retry settings:",e),u.ZP.error("Failed to save retry settings")}};if((0,n.useEffect)(()=>{if(!o||!i||!c||!d)return;let e=async()=>{try{var e,l,t,s,a,r;let n=await b(o,d,c);console.log("Model data response:",n.data),x(n);let i=new Set;for(let e=0;e<n.data.length;e++){let l=n.data[e];i.add(l.model_name)}console.log("all_model_groups:",i);let m=Array.from(i);m=m.sort(),L(m),console.log("array_model_groups:",m);let u="all";m.length>0&&(u=m[m.length-1],console.log("_initial_model_group:",u),D(u)),console.log("selectedModelGroup:",U);let h=await k(o,d,c,u,null===(e=eE.from)||void 0===e?void 0:e.toISOString(),null===(l=eE.to)||void 0===l?void 0:l.toISOString());console.log("Model metrics response:",h),G(h.data),H(h.all_api_bases);let p=await S(o,d,c,u,null===(t=eE.from)||void 0===t?void 0:t.toISOString(),null===(s=eE.to)||void 0===s?void 0:s.toISOString());console.log("Model exceptions response:",p),en(p.data),eu(p.exception_types);let j=await v(o,d,c,u,null===(a=eE.from)||void 0===a?void 0:a.toISOString(),null===(r=eE.to)||void 0===r?void 0:r.toISOString());console.log("slowResponses:",j),eA(j);let g=(await V(o,d,c)).router_settings;console.log("routerSettingsInfo:",g);let y=g.model_group_retry_policy,f=g.num_retries;console.log("model_group_retry_policy:",y),console.log("default_retries:",f),eT(y),eO(f)}catch(e){console.error("There was an error fetching the model data",e)}};o&&i&&c&&d&&e();let l=async()=>{let e=await h();console.log("received model cost map data: ".concat(Object.keys(e))),f(e)};null==y&&l(),e9()},[o,i,c,d,y,Z]),!m||!o||!i||!c||!d)return(0,r.jsx)("div",{children:"Loading..."});let ll=[];for(let e=0;e<m.data.length;e++){let s=m.data[e],a=null==s?void 0:null===(l=s.litellm_params)||void 0===l?void 0:l.model,r=null==s?void 0:s.model_info,n="",o="Undefined",i="Undefined",c="Undefined",d="Undefined",u={},h=e=>(console.log("GET PROVIDER CALLED! - ".concat(y)),null!=y&&"object"==typeof y&&e in y)?y[e].litellm_provider:"openai";if(a){let e=a.split("/"),l=e[0];n=1===e.length?h(a):l}else n="openai";r&&(o=null==r?void 0:r.input_cost_per_token,i=null==r?void 0:r.output_cost_per_token,c=null==r?void 0:r.max_tokens,d=null==r?void 0:r.max_input_tokens),(null==s?void 0:s.litellm_params)&&(u=Object.fromEntries(Object.entries(null==s?void 0:s.litellm_params).filter(e=>{let[l]=e;return"model"!==l&&"api_base"!==l}))),m.data[e].provider=n,m.data[e].input_cost=o,m.data[e].output_cost=i,m.data[e].input_cost&&(m.data[e].input_cost=(1e6*Number(m.data[e].input_cost)).toFixed(2)),m.data[e].output_cost&&(m.data[e].output_cost=(1e6*Number(m.data[e].output_cost)).toFixed(2)),m.data[e].max_tokens=c,m.data[e].max_input_tokens=d,m.data[e].api_base=null==s?void 0:null===(t=s.litellm_params)||void 0===t?void 0:t.api_base,m.data[e].cleanedLitellmParams=u,ll.push(s.model_name),console.log(m.data[e])}if(c&&"Admin Viewer"==c){let{Title:e,Paragraph:l}=eR.default;return(0,r.jsxs)("div",{children:[(0,r.jsx)(e,{level:1,children:"Access Denied"}),(0,r.jsx)(l,{children:"Ask your proxy admin for access to view all models"})]})}let lt=e=>{console.log("received provider string: ".concat(e));let l=Object.keys(a).find(l=>a[l]===e);if(l){let e=e3[l];console.log("mappingResult: ".concat(e));let t=[];"object"==typeof y&&Object.entries(y).forEach(l=>{let[s,a]=l;null!==a&&"object"==typeof a&&"litellm_provider"in a&&(a.litellm_provider===e||a.litellm_provider.includes(e))&&t.push(s)}),N(t),console.log("providerModels: ".concat(w))}},ls=async()=>{try{u.ZP.info("Running health check..."),T("");let e=await J(o);T(e)}catch(e){console.error("Error running health check:",e),T("Error running health check")}},la=async(e,l,t)=>{if(console.log("Updating model metrics for group:",e),o&&d&&c&&l&&t){console.log("inside updateModelMetrics - startTime:",l,"endTime:",t),D(e);try{let s=await k(o,d,c,e,l.toISOString(),t.toISOString());console.log("Model metrics response:",s),G(s.data),H(s.all_api_bases);let a=await S(o,d,c,e,l.toISOString(),t.toISOString());console.log("Model exceptions response:",a),en(a.data),eu(a.exception_types);let r=await v(o,d,c,e,l.toISOString(),t.toISOString());console.log("slowResponses:",r),eA(r)}catch(e){console.error("Failed to fetch model metrics",e)}}};return console.log("selectedProvider: ".concat(E)),console.log("providerModels.length: ".concat(w.length)),(0,r.jsx)("div",{style:{width:"100%",height:"100%"},children:(0,r.jsxs)(eB.Z,{className:"gap-2 p-8 h-[75vh] w-full mt-2",children:[(0,r.jsxs)(eD.Z,{className:"flex justify-between mt-2 w-full items-center",children:[(0,r.jsxs)("div",{className:"flex",children:[(0,r.jsx)(eU.Z,{children:"All Models"}),(0,r.jsx)(eU.Z,{children:"Add Model"}),(0,r.jsx)(eU.Z,{children:(0,r.jsx)("pre",{children:"/health Models"})}),(0,r.jsx)(eU.Z,{children:"Model Analytics"}),(0,r.jsx)(eU.Z,{children:"Model Retry Settings"})]}),(0,r.jsxs)("div",{className:"flex items-center space-x-2",children:[Z&&(0,r.jsxs)(es.Z,{children:["Last Refreshed: ",Z]}),(0,r.jsx)(ef.Z,{icon:eX.Z,variant:"shadow",size:"xs",className:"self-center",onClick:e9})]})]}),(0,r.jsxs)(ez.Z,{children:[(0,r.jsxs)(eK.Z,{children:[(0,r.jsxs)($.Z,{children:[(0,r.jsxs)("div",{className:"flex items-center",children:[(0,r.jsx)(es.Z,{children:"Filter by Public Model Name"}),(0,r.jsxs)(eS.Z,{className:"mb-4 mt-2 ml-2 w-50",defaultValue:U||M[0],onValueChange:e=>D("all"===e?"all":e),value:U||M[0],children:[(0,r.jsx)(eN.Z,{value:"all",children:"All Models"}),M.map((e,l)=>(0,r.jsx)(eN.Z,{value:e,onClick:()=>D(e),children:e},l))]})]}),(0,r.jsx)(ey.Z,{children:(0,r.jsxs)(eZ.Z,{className:"mt-5",style:{maxWidth:"1500px",width:"100%"},children:[(0,r.jsx)(eb.Z,{children:(0,r.jsxs)(ev.Z,{children:[(0,r.jsx)(ek.Z,{style:{maxWidth:"150px",whiteSpace:"normal",wordBreak:"break-word"},children:"Public Model Name"}),(0,r.jsx)(ek.Z,{style:{maxWidth:"100px",whiteSpace:"normal",wordBreak:"break-word"},children:"Provider"}),"Admin"===c&&(0,r.jsx)(ek.Z,{style:{maxWidth:"150px",whiteSpace:"normal",wordBreak:"break-word"},children:"API Base"}),(0,r.jsx)(ek.Z,{style:{maxWidth:"200px",whiteSpace:"normal",wordBreak:"break-word"},children:"Extra litellm Params"}),(0,r.jsxs)(ek.Z,{style:{maxWidth:"85px",whiteSpace:"normal",wordBreak:"break-word"},children:["Input Price ",(0,r.jsx)("p",{style:{fontSize:"10px",color:"gray"},children:"/1M Tokens ($)"})]}),(0,r.jsxs)(ek.Z,{style:{maxWidth:"85px",whiteSpace:"normal",wordBreak:"break-word"},children:["Output Price ",(0,r.jsx)("p",{style:{fontSize:"10px",color:"gray"},children:"/1M Tokens ($)"})]}),(0,r.jsx)(ek.Z,{style:{maxWidth:"120px",whiteSpace:"normal",wordBreak:"break-word"},children:"Max Tokens"}),(0,r.jsx)(ek.Z,{style:{maxWidth:"50px",whiteSpace:"normal",wordBreak:"break-word"},children:"Status"})]})}),(0,r.jsx)(e_.Z,{children:m.data.filter(e=>"all"===U||e.model_name===U||null==U||""===U).map((e,l)=>(0,r.jsxs)(ev.Z,{children:[(0,r.jsx)(ew.Z,{style:{maxWidth:"150px",whiteSpace:"normal",wordBreak:"break-word"},children:(0,r.jsx)(es.Z,{children:e.model_name})}),(0,r.jsx)(ew.Z,{style:{maxWidth:"100px",whiteSpace:"normal",wordBreak:"break-word"},children:e.provider}),"Admin"===c&&(0,r.jsx)(ew.Z,{style:{maxWidth:"150px",whiteSpace:"normal",wordBreak:"break-word"},children:e.api_base}),(0,r.jsx)(ew.Z,{style:{maxWidth:"200px",whiteSpace:"normal",wordBreak:"break-word"},children:(0,r.jsxs)(ee.Z,{children:[(0,r.jsx)(et.Z,{children:(0,r.jsx)(es.Z,{children:"Litellm params"})}),(0,r.jsx)(el.Z,{children:(0,r.jsx)("pre",{children:JSON.stringify(e.cleanedLitellmParams,null,2)})})]})}),(0,r.jsx)(ew.Z,{style:{maxWidth:"80px",whiteSpace:"normal",wordBreak:"break-word"},children:e.input_cost||e.litellm_params.input_cost_per_token||null}),(0,r.jsx)(ew.Z,{style:{maxWidth:"80px",whiteSpace:"normal",wordBreak:"break-word"},children:e.output_cost||e.litellm_params.output_cost_per_token||null}),(0,r.jsx)(ew.Z,{style:{maxWidth:"120px",whiteSpace:"normal",wordBreak:"break-word"},children:(0,r.jsxs)("p",{style:{fontSize:"10px"},children:["Max Tokens: ",e.max_tokens," ",(0,r.jsx)("br",{}),"Max Input Tokens: ",e.max_input_tokens]})}),(0,r.jsx)(ew.Z,{style:{maxWidth:"100px",whiteSpace:"normal",wordBreak:"break-word"},children:e.model_info.db_model?(0,r.jsx)(eg.Z,{icon:eQ.Z,size:"xs",className:"text-white",children:(0,r.jsx)("p",{style:{fontSize:"10px"},children:"DB Model"})}):(0,r.jsx)(eg.Z,{icon:e0.Z,size:"xs",className:"text-black",children:(0,r.jsx)("p",{style:{fontSize:"10px"},children:"Config Model"})})}),(0,r.jsxs)(ew.Z,{style:{maxWidth:"100px",whiteSpace:"normal",wordBreak:"break-word"},children:[(0,r.jsx)(ef.Z,{icon:ep.Z,size:"sm",onClick:()=>eF(e)}),(0,r.jsx)(e1,{modelID:e.model_info.id,accessToken:o})]})]},l))})]})})]}),(0,r.jsx)(e=>{let{visible:l,onCancel:t,model:s,onSubmit:a}=e,[n]=eo.Z.useForm(),o={},i="",c="";if(s){o=s.litellm_params,i=s.model_name;let e=s.model_info;e&&(c=e.id,console.log("model_id: ".concat(c)),o.model_id=c)}return(0,r.jsx)(ei.Z,{title:"Edit Model "+i,visible:l,width:800,footer:null,onOk:()=>{n.validateFields().then(e=>{a(e),n.resetFields()}).catch(e=>{console.error("Validation failed:",e)})},onCancel:t,children:(0,r.jsxs)(eo.Z,{form:n,onFinish:eM,initialValues:o,labelCol:{span:8},wrapperCol:{span:16},labelAlign:"left",children:[(0,r.jsxs)(r.Fragment,{children:[(0,r.jsx)(eo.Z.Item,{className:"mt-8",label:"api_base",name:"api_base",children:(0,r.jsx)(Q.Z,{})}),(0,r.jsx)(eo.Z.Item,{label:"tpm",name:"tpm",tooltip:"int (optional) - Tokens limit for this deployment: in tokens per minute (tpm). Find this information on your model/providers website",children:(0,r.jsx)(ed.Z,{min:0,step:1})}),(0,r.jsx)(eo.Z.Item,{label:"rpm",name:"rpm",tooltip:"int (optional) - Rate limit for this deployment: in requests per minute (rpm). Find this information on your model/providers website",children:(0,r.jsx)(ed.Z,{min:0,step:1})}),(0,r.jsx)(eo.Z.Item,{label:"max_retries",name:"max_retries",children:(0,r.jsx)(ed.Z,{min:0,step:1})}),(0,r.jsx)(eo.Z.Item,{label:"timeout",name:"timeout",tooltip:"int (optional) - Timeout in seconds for LLM requests (Defaults to 600 seconds)",children:(0,r.jsx)(ed.Z,{min:0,step:1})}),(0,r.jsx)(eo.Z.Item,{label:"stream_timeout",name:"stream_timeout",tooltip:"int (optional) - Timeout for stream requests (seconds)",children:(0,r.jsx)(ed.Z,{min:0,step:1})}),(0,r.jsx)(eo.Z.Item,{label:"input_cost_per_token",name:"input_cost_per_token",tooltip:"float (optional) - Input cost per token",children:(0,r.jsx)(ed.Z,{min:0,step:1e-4})}),(0,r.jsx)(eo.Z.Item,{label:"output_cost_per_token",name:"output_cost_per_token",tooltip:"float (optional) - Output cost per token",children:(0,r.jsx)(ed.Z,{min:0,step:1e-4})}),(0,r.jsx)(eo.Z.Item,{label:"model_id",name:"model_id",hidden:!0})]}),(0,r.jsx)("div",{style:{textAlign:"right",marginTop:"10px"},children:(0,r.jsx)(em.ZP,{htmlType:"submit",children:"Save"})})]})})},{visible:P,onCancel:()=>{O(!1),R(null)},model:F,onSubmit:eM})]}),(0,r.jsxs)(eK.Z,{className:"h-full",children:[(0,r.jsx)(e5,{level:2,children:"Add new model"}),(0,r.jsx)(ey.Z,{children:(0,r.jsxs)(eo.Z,{form:g,onFinish:()=>{g.validateFields().then(e=>{e7(e,o,g)}).catch(e=>{console.error("Validation failed:",e)})},labelCol:{span:10},wrapperCol:{span:16},labelAlign:"left",children:[(0,r.jsxs)(r.Fragment,{children:[(0,r.jsx)(eo.Z.Item,{rules:[{required:!0,message:"Required"}],label:"Provider:",name:"custom_llm_provider",tooltip:"E.g. OpenAI, Azure OpenAI, Anthropic, Bedrock, etc.",labelCol:{span:10},labelAlign:"left",children:(0,r.jsx)(eS.Z,{value:E.toString(),children:A.map((e,l)=>(0,r.jsx)(eN.Z,{value:e,onClick:()=>{lt(e),I(e)},children:e},l))})}),(0,r.jsx)(eo.Z.Item,{rules:[{required:!0,message:"Required"}],label:"Public Model Name",name:"model_name",tooltip:"Model name your users will pass in. Also used for load-balancing, LiteLLM will load balance between all models with this public name.",className:"mb-0",children:(0,r.jsx)(Q.Z,{placeholder:"Vertex AI (Anthropic, Gemini, etc.)"===(s=E.toString())?"gemini-pro":"Anthropic"==s?"claude-3-opus":"Amazon Bedrock"==s?"claude-3-opus":"Gemini (Google AI Studio)"==s?"gemini-pro":"gpt-3.5-turbo"})}),(0,r.jsxs)(eY.Z,{children:[(0,r.jsx)(eJ.Z,{span:10}),(0,r.jsx)(eJ.Z,{span:10,children:(0,r.jsx)(es.Z,{className:"mb-3 mt-1",children:"Model name your users will pass in."})})]}),(0,r.jsx)(eo.Z.Item,{rules:[{required:!0,message:"Required"}],label:"LiteLLM Model Name(s)",name:"model",tooltip:"Actual model name used for making litellm.completion() call.",className:"mb-0",children:"Azure"===E?(0,r.jsx)(Q.Z,{placeholder:"Enter model name"}):w.length>0?(0,r.jsx)(eV.Z,{value:w,children:w.map((e,l)=>(0,r.jsx)(eq.Z,{value:e,children:e},l))}):(0,r.jsx)(Q.Z,{placeholder:"gpt-3.5-turbo-0125"})}),(0,r.jsxs)(eY.Z,{children:[(0,r.jsx)(eJ.Z,{span:10}),(0,r.jsx)(eJ.Z,{span:10,children:(0,r.jsxs)(es.Z,{className:"mb-3 mt-1",children:["Actual model name used for making ",(0,r.jsx)(e8,{href:"https://docs.litellm.ai/docs/providers",target:"_blank",children:"litellm.completion() call"}),". We'll ",(0,r.jsx)(e8,{href:"https://docs.litellm.ai/docs/proxy/reliability#step-1---set-deployments-on-config",target:"_blank",children:"loadbalance"})," models with the same 'public name'"]})})]}),"Amazon Bedrock"!=E&&"Vertex AI (Anthropic, Gemini, etc.)"!=E&&(0,r.jsx)(eo.Z.Item,{rules:[{required:!0,message:"Required"}],label:"API Key",name:"api_key",children:(0,r.jsx)(Q.Z,{placeholder:"sk-",type:"password"})}),"OpenAI"==E&&(0,r.jsx)(eo.Z.Item,{label:"Organization ID",name:"organization_id",children:(0,r.jsx)(Q.Z,{placeholder:"[OPTIONAL] my-unique-org"})}),"Vertex AI (Anthropic, Gemini, etc.)"==E&&(0,r.jsx)(eo.Z.Item,{rules:[{required:!0,message:"Required"}],label:"Vertex Project",name:"vertex_project",children:(0,r.jsx)(Q.Z,{placeholder:"adroit-cadet-1234.."})}),"Vertex AI (Anthropic, Gemini, etc.)"==E&&(0,r.jsx)(eo.Z.Item,{rules:[{required:!0,message:"Required"}],label:"Vertex Location",name:"vertex_location",children:(0,r.jsx)(Q.Z,{placeholder:"us-east-1"})}),"Vertex AI (Anthropic, Gemini, etc.)"==E&&(0,r.jsx)(eo.Z.Item,{rules:[{required:!0,message:"Required"}],label:"Vertex Credentials",name:"vertex_credentials",className:"mb-0",children:(0,r.jsx)(e4.Z,{name:"file",accept:".json",beforeUpload:e=>{if("application/json"===e.type){let l=new FileReader;l.onload=e=>{if(e.target){let l=e.target.result;g.setFieldsValue({vertex_credentials:l})}},l.readAsText(e)}return!1},onChange(e){"uploading"!==e.file.status&&console.log(e.file,e.fileList),"done"===e.file.status?u.ZP.success("".concat(e.file.name," file uploaded successfully")):"error"===e.file.status&&u.ZP.error("".concat(e.file.name," file upload failed."))},children:(0,r.jsx)(em.ZP,{icon:(0,r.jsx)(e2.Z,{}),children:"Click to Upload"})})}),"Vertex AI (Anthropic, Gemini, etc.)"==E&&(0,r.jsxs)(eY.Z,{children:[(0,r.jsx)(eJ.Z,{span:10}),(0,r.jsx)(eJ.Z,{span:10,children:(0,r.jsx)(es.Z,{className:"mb-3 mt-1",children:"Give litellm a gcp service account(.json file), so it can make the relevant calls"})})]}),("Azure"==E||"OpenAI-Compatible Endpoints (Groq, Together AI, Mistral AI, etc.)"==E)&&(0,r.jsx)(eo.Z.Item,{rules:[{required:!0,message:"Required"}],label:"API Base",name:"api_base",children:(0,r.jsx)(Q.Z,{placeholder:"https://..."})}),"Azure"==E&&(0,r.jsx)(eo.Z.Item,{rules:[{required:!0,message:"Required"}],label:"API Version",name:"api_version",children:(0,r.jsx)(Q.Z,{placeholder:"2023-07-01-preview"})}),"Azure"==E&&(0,r.jsxs)("div",{children:[(0,r.jsx)(eo.Z.Item,{label:"Base Model",name:"base_model",className:"mb-0",children:(0,r.jsx)(Q.Z,{placeholder:"azure/gpt-3.5-turbo"})}),(0,r.jsxs)(eY.Z,{children:[(0,r.jsx)(eJ.Z,{span:10}),(0,r.jsx)(eJ.Z,{span:10,children:(0,r.jsxs)(es.Z,{className:"mb-2",children:["The actual model your azure deployment uses. Used for accurate cost tracking. Select name from ",(0,r.jsx)(e8,{href:"https://github.com/BerriAI/litellm/blob/main/model_prices_and_context_window.json",target:"_blank",children:"here"})]})})]})]}),"Amazon Bedrock"==E&&(0,r.jsx)(eo.Z.Item,{rules:[{required:!0,message:"Required"}],label:"AWS Access Key ID",name:"aws_access_key_id",tooltip:"You can provide the raw key or the environment variable (e.g. `os.environ/MY_SECRET_KEY`).",children:(0,r.jsx)(Q.Z,{placeholder:""})}),"Amazon Bedrock"==E&&(0,r.jsx)(eo.Z.Item,{rules:[{required:!0,message:"Required"}],label:"AWS Secret Access Key",name:"aws_secret_access_key",tooltip:"You can provide the raw key or the environment variable (e.g. `os.environ/MY_SECRET_KEY`).",children:(0,r.jsx)(Q.Z,{placeholder:""})}),"Amazon Bedrock"==E&&(0,r.jsx)(eo.Z.Item,{rules:[{required:!0,message:"Required"}],label:"AWS Region Name",name:"aws_region_name",tooltip:"You can provide the raw key or the environment variable (e.g. `os.environ/MY_SECRET_KEY`).",children:(0,r.jsx)(Q.Z,{placeholder:"us-east-1"})}),(0,r.jsx)(eo.Z.Item,{label:"LiteLLM Params",name:"litellm_extra_params",tooltip:"Optional litellm params used for making a litellm.completion() call.",className:"mb-0",children:(0,r.jsx)(e$.Z,{rows:4,placeholder:'{ "rpm": 100, "timeout": 0, "stream_timeout": 0 }'})}),(0,r.jsxs)(eY.Z,{children:[(0,r.jsx)(eJ.Z,{span:10}),(0,r.jsx)(eJ.Z,{span:10,children:(0,r.jsxs)(es.Z,{className:"mb-3 mt-1",children:["Pass JSON of litellm supported params ",(0,r.jsx)(e8,{href:"https://docs.litellm.ai/docs/completion/input",target:"_blank",children:"litellm.completion() call"})]})})]})]}),(0,r.jsx)("div",{style:{textAlign:"center",marginTop:"10px"},children:(0,r.jsx)(em.ZP,{htmlType:"submit",children:"Add Model"})}),(0,r.jsx)(eH.Z,{title:"Get help on our github",children:(0,r.jsx)(eR.default.Link,{href:"https://github.com/BerriAI/litellm/issues",children:"Need Help?"})})]})})]}),(0,r.jsx)(eK.Z,{children:(0,r.jsxs)(ey.Z,{children:[(0,r.jsx)(es.Z,{children:"`/health` will run a very small request through your models configured on litellm"}),(0,r.jsx)(X.Z,{onClick:ls,children:"Run `/health`"}),C&&(0,r.jsx)("pre",{children:JSON.stringify(C,null,2)})]})}),(0,r.jsxs)(eK.Z,{children:[(0,r.jsxs)($.Z,{numItems:2,className:"mt-2",children:[(0,r.jsxs)(eJ.Z,{children:[(0,r.jsx)(es.Z,{children:"Select Time Range"}),(0,r.jsx)(eL.Z,{enableSelect:!0,value:eE,onValueChange:e=>{eI(e),la(U,e.from,e.to)}})]}),(0,r.jsxs)(eJ.Z,{children:[(0,r.jsx)(es.Z,{children:"Select Model Group"}),(0,r.jsx)(eS.Z,{className:"mb-4 mt-2",defaultValue:U||M[0],value:U||M[0],children:M.map((e,l)=>(0,r.jsx)(eN.Z,{value:e,onClick:()=>la(e,eE.from,eE.to),children:e},l))})]})]}),(0,r.jsxs)($.Z,{numItems:2,children:[(0,r.jsx)(eJ.Z,{children:(0,r.jsxs)(ey.Z,{className:"mr-2 max-h-[400px] min-h-[400px]",children:[(0,r.jsx)(ea.Z,{children:"Avg Latency per Token"}),(0,r.jsx)("p",{className:"text-gray-500 italic",children:" (seconds/token)"}),(0,r.jsx)(es.Z,{className:"text-gray-500 italic mt-1 mb-1",children:"average Latency for successfull requests divided by the total tokens"}),q&&W&&(0,r.jsx)(eG.Z,{title:"Model Latency",className:"h-72",data:q,showLegend:!1,index:"date",categories:W,connectNulls:!0,customTooltip:e=>{var l,t;let{payload:s,active:a}=e;if(!a||!s)return null;let n=null===(t=s[0])||void 0===t?void 0:null===(l=t.payload)||void 0===l?void 0:l.date,o=s.sort((e,l)=>l.value-e.value);if(o.length>5){let e=o.length-5;(o=o.slice(0,5)).push({dataKey:"".concat(e," other deployments"),value:s.slice(5).reduce((e,l)=>e+l.value,0),color:"gray"})}return(0,r.jsxs)("div",{className:"w-150 rounded-tremor-default border border-tremor-border bg-tremor-background p-2 text-tremor-default shadow-tremor-dropdown",children:[n&&(0,r.jsxs)("p",{className:"text-tremor-content-emphasis mb-2",children:["Date: ",n]}),o.map((e,l)=>{let t=parseFloat(e.value.toFixed(5)),s=0===t&&e.value>0?"<0.00001":t.toFixed(5);return(0,r.jsxs)("div",{className:"flex justify-between",children:[(0,r.jsxs)("div",{className:"flex items-center space-x-2",children:[(0,r.jsx)("div",{className:"w-2 h-2 mt-1 rounded-full bg-".concat(e.color,"-500")}),(0,r.jsx)("p",{className:"text-tremor-content",children:e.dataKey})]}),(0,r.jsx)("p",{className:"font-medium text-tremor-content-emphasis text-righ ml-2",children:s})]},l)})]})}})]})}),(0,r.jsx)(eJ.Z,{children:(0,r.jsx)(ey.Z,{className:"ml-2 max-h-[400px] min-h-[400px]  overflow-y-auto",children:(0,r.jsxs)(eZ.Z,{children:[(0,r.jsx)(eb.Z,{children:(0,r.jsxs)(ev.Z,{children:[(0,r.jsx)(ek.Z,{children:"Deployment"}),(0,r.jsx)(ek.Z,{children:"Success Responses"}),(0,r.jsxs)(ek.Z,{children:["Slow Responses ",(0,r.jsx)("p",{children:"Success Responses taking 600+s"})]})]})}),(0,r.jsx)(e_.Z,{children:ej.map((e,l)=>(0,r.jsxs)(ev.Z,{children:[(0,r.jsx)(ew.Z,{children:e.api_base}),(0,r.jsx)(ew.Z,{children:e.total_count}),(0,r.jsx)(ew.Z,{children:e.slow_count})]},l))})]})})})]}),(0,r.jsxs)(ey.Z,{className:"mt-4",children:[(0,r.jsx)(ea.Z,{children:"Exceptions per Model"}),(0,r.jsx)(eW.Z,{className:"h-72",data:er,index:"model",categories:ec,stack:!0,colors:["indigo-300","rose-200","#ffcc33"],yAxisWidth:30})]})]}),(0,r.jsxs)(eK.Z,{children:[(0,r.jsxs)("div",{className:"flex items-center",children:[(0,r.jsx)(es.Z,{children:"Filter by Public Model Name"}),(0,r.jsx)(eS.Z,{className:"mb-4 mt-2 ml-2 w-50",defaultValue:U||M[0],value:U||M[0],onValueChange:e=>D(e),children:M.map((e,l)=>(0,r.jsx)(eN.Z,{value:e,onClick:()=>D(e),children:e},l))})]}),(0,r.jsxs)(ea.Z,{children:["Retry Policy for ",U]}),(0,r.jsx)(es.Z,{className:"mb-6",children:"How many retries should be attempted based on the Exception"}),e6&&(0,r.jsx)("table",{children:(0,r.jsx)("tbody",{children:Object.entries(e6).map((e,l)=>{var t;let[s,a]=e,n=null==eC?void 0:null===(t=eC[U])||void 0===t?void 0:t[a];return null==n&&(n=eP),(0,r.jsxs)("tr",{className:"flex justify-between items-center mt-2",children:[(0,r.jsx)("td",{children:(0,r.jsx)(es.Z,{children:s})}),(0,r.jsx)("td",{children:(0,r.jsx)(ed.Z,{className:"ml-5",value:n,min:0,step:1,onChange:e=>{eT(l=>{var t;let s=null!==(t=null==l?void 0:l[U])&&void 0!==t?t:{};return{...null!=l?l:{},[U]:{...s,[a]:e}}})}})})]},l)})})}),(0,r.jsx)(X.Z,{className:"mt-6 mr-8",onClick:le,children:"Save"})]})]})]})})};let{Option:le}=en.default;var ll=e=>{let{userID:l,accessToken:t,teams:s}=e,[a]=eo.Z.useForm(),[o,i]=(0,n.useState)(!1),[c,d]=(0,n.useState)(null),[m,h]=(0,n.useState)([]);(0,n.useEffect)(()=>{(async()=>{try{let e=await N(t,l,"any"),s=[];for(let l=0;l<e.data.length;l++){let t=e.data[l];s.push(t.id)}console.log("Model data response:",e.data),console.log("Available models:",s),h(s)}catch(e){console.error("Error fetching model data:",e)}})()},[]);let x=()=>{i(!1),a.resetFields()},p=()=>{i(!1),d(null),a.resetFields()},j=async e=>{try{u.ZP.info("Making API Call"),i(!0),console.log("formValues in create user:",e);let s=await g(t,null,e);console.log("user create Response:",s),d(s.key),u.ZP.success("API user Created"),a.resetFields(),localStorage.removeItem("userData"+l)}catch(e){console.error("Error creating the user:",e)}};return(0,r.jsxs)("div",{children:[(0,r.jsx)(X.Z,{className:"mx-auto",onClick:()=>i(!0),children:"+ Invite User"}),(0,r.jsxs)(ei.Z,{title:"Invite User",visible:o,width:800,footer:null,onOk:x,onCancel:p,children:[(0,r.jsx)(es.Z,{className:"mb-1",children:"Invite a user to login to the Admin UI and create Keys"}),(0,r.jsx)(es.Z,{className:"mb-6",children:(0,r.jsx)("b",{children:"Note: SSO Setup Required for this"})}),(0,r.jsxs)(eo.Z,{form:a,onFinish:j,labelCol:{span:8},wrapperCol:{span:16},labelAlign:"left",children:[(0,r.jsx)(eo.Z.Item,{label:"User Email",name:"user_email",children:(0,r.jsx)(Q.Z,{placeholder:""})}),(0,r.jsx)(eo.Z.Item,{label:"Team ID",name:"team_id",children:(0,r.jsx)(en.default,{placeholder:"Select Team ID",style:{width:"100%"},children:s?s.map(e=>(0,r.jsx)(le,{value:e.team_id,children:e.team_alias},e.team_id)):(0,r.jsx)(le,{value:null,children:"Default Team"},"default")})}),(0,r.jsx)(eo.Z.Item,{label:"Metadata",name:"metadata",children:(0,r.jsx)(ec.Z.TextArea,{rows:4,placeholder:"Enter metadata as JSON"})}),(0,r.jsx)("div",{style:{textAlign:"right",marginTop:"10px"},children:(0,r.jsx)(em.ZP,{htmlType:"submit",children:"Create User"})})]})]}),c&&(0,r.jsxs)(ei.Z,{title:"User Created Successfully",visible:o,onOk:x,onCancel:p,footer:null,children:[(0,r.jsx)("p",{children:"User has been created to access your proxy. Please Ask them to Log In."}),(0,r.jsx)("br",{}),(0,r.jsx)("p",{children:(0,r.jsx)("b",{children:"Note: This Feature is only supported through SSO on the Admin UI"})})]})]})},lt=e=>{let{accessToken:l,token:t,keys:s,userRole:a,userID:o,teams:i,setKeys:c}=e,[d,m]=(0,n.useState)(null),[u,h]=(0,n.useState)(null),[x,p]=(0,n.useState)(0),[j,g]=n.useState(null),[y,f]=(0,n.useState)(null);return((0,n.useEffect)(()=>{if(!l||!t||!a||!o)return;let e=async()=>{try{let e=await Z(l,null,a,!0,x,25);console.log("user data response:",e),m(e)}catch(e){console.error("There was an error fetching the model data",e)}};l&&t&&a&&o&&e()},[l,t,a,o,x]),d&&l&&t&&a&&o)?(0,r.jsx)("div",{style:{width:"100%"},children:(0,r.jsxs)($.Z,{className:"gap-2 p-2 h-[80vh] w-full mt-8",children:[(0,r.jsx)(ll,{userID:o,accessToken:l,teams:i}),(0,r.jsxs)(ey.Z,{className:"w-full mx-auto flex-auto overflow-y-auto max-h-[80vh] mb-4",children:[(0,r.jsx)("div",{className:"mb-4 mt-1",children:(0,r.jsx)(es.Z,{children:"These are Users on LiteLLM that created API Keys. Automatically tracked by LiteLLM"})}),(0,r.jsx)(eB.Z,{children:(0,r.jsxs)(ez.Z,{children:[(0,r.jsx)(eK.Z,{children:(0,r.jsxs)(eZ.Z,{className:"mt-5",children:[(0,r.jsx)(eb.Z,{children:(0,r.jsxs)(ev.Z,{children:[(0,r.jsx)(ek.Z,{children:"User ID"}),(0,r.jsx)(ek.Z,{children:"User Email"}),(0,r.jsx)(ek.Z,{children:"User Models"}),(0,r.jsx)(ek.Z,{children:"User Spend ($ USD)"}),(0,r.jsx)(ek.Z,{children:"User Max Budget ($ USD)"}),(0,r.jsx)(ek.Z,{children:"User API Key Aliases"})]})}),(0,r.jsx)(e_.Z,{children:d.map(e=>{var l;return(0,r.jsxs)(ev.Z,{children:[(0,r.jsx)(ew.Z,{children:e.user_id}),(0,r.jsx)(ew.Z,{children:e.user_email}),(0,r.jsx)(ew.Z,{children:e.models&&e.models.length>0?e.models:"All Models"}),(0,r.jsx)(ew.Z,{children:e.spend?null===(l=e.spend)||void 0===l?void 0:l.toFixed(2):0}),(0,r.jsx)(ew.Z,{children:e.max_budget?e.max_budget:"Unlimited"}),(0,r.jsx)(ew.Z,{children:(0,r.jsx)($.Z,{numItems:2,children:e&&e.key_aliases&&e.key_aliases.filter(e=>null!==e).length>0?(0,r.jsx)(eg.Z,{size:"xs",color:"indigo",children:e.key_aliases.filter(e=>null!==e).join(", ")}):(0,r.jsx)(eg.Z,{size:"xs",color:"gray",children:"No Keys"})})})]},e.user_id)})})]})}),(0,r.jsx)(eK.Z,{children:(0,r.jsxs)("div",{className:"flex items-center",children:[(0,r.jsx)("div",{className:"flex-1"}),(0,r.jsx)("div",{className:"flex-1 flex justify-between items-center"})]})})]})})]}),function(){if(!d)return null;let e=Math.ceil(d.length/25);return(0,r.jsxs)("div",{className:"flex justify-between items-center",children:[(0,r.jsxs)("div",{children:["Showing Page ",x+1," of ",e]}),(0,r.jsxs)("div",{className:"flex",children:[(0,r.jsx)("button",{className:"bg-blue-500 hover:bg-blue-700 text-white font-bold py-2 px-4 rounded-l focus:outline-none",disabled:0===x,onClick:()=>p(x-1),children:"← Prev"}),(0,r.jsx)("button",{className:"bg-blue-500 hover:bg-blue-700 text-white font-bold py-2 px-4 rounded-r focus:outline-none",onClick:()=>{p(x+1)},children:"Next →"})]})]})}()]})}):(0,r.jsx)("div",{children:"Loading..."})},ls=e=>{let{teams:l,searchParams:t,accessToken:s,setTeams:a,userID:o,userRole:i}=e,[c]=eo.Z.useForm(),[d]=eo.Z.useForm(),{Title:m,Paragraph:h}=eR.default,[x,p]=(0,n.useState)(""),[j,g]=(0,n.useState)(!1),[y,Z]=(0,n.useState)(l?l[0]:null),[w,b]=(0,n.useState)(!1),[k,v]=(0,n.useState)(!1),[S,A]=(0,n.useState)([]),[E,I]=(0,n.useState)(!1),[C,T]=(0,n.useState)(null),[P,O]=(0,n.useState)({}),F=e=>{Z(e),g(!0)},R=async e=>{let t=e.team_id;if(console.log("handleEditSubmit:",e),null==s)return;let r=await U(s,e);l&&a(l.map(e=>e.team_id===t?r.data:e)),u.ZP.success("Team updated successfully"),g(!1),Z(null)},L=async e=>{T(e),I(!0)},B=async()=>{if(null!=C&&null!=l&&null!=s){try{await f(s,C);let e=l.filter(e=>e.team_id!==C);a(e)}catch(e){console.error("Error deleting the team:",e)}I(!1),T(null)}};(0,n.useEffect)(()=>{let e=async()=>{try{if(null===o||null===i||null===s||null===l)return;console.log("fetching team info:");let e={};for(let t=0;t<(null==l?void 0:l.length);t++){let a=l[t].team_id,r=await _(s,a);console.log("teamInfo response:",r),null!==r&&(e={...e,[a]:r})}O(e)}catch(e){console.error("Error fetching team info:",e)}};(async()=>{try{if(null===o||null===i)return;if(null!==s){let e=(await N(s,o,i)).data.map(e=>e.id);console.log("available_model_names:",e),A(e)}}catch(e){console.error("Error fetching user models:",e)}})(),e()},[s,o,i,l]);let K=async e=>{try{if(null!=s){var t;let r=null==e?void 0:e.team_alias;if((null!==(t=null==l?void 0:l.map(e=>e.team_alias))&&void 0!==t?t:[]).includes(r))throw Error("Team alias ".concat(r," already exists, please pick another alias"));u.ZP.info("Creating Team");let n=await M(s,e);null!==l?a([...l,n]):a([n]),console.log("response for team create call: ".concat(n)),u.ZP.success("Team created"),b(!1)}}catch(e){console.error("Error creating the team:",e),u.ZP.error("Error creating the team: "+e,20)}},z=async e=>{try{if(null!=s&&null!=l){u.ZP.info("Adding Member");let t={role:"user",user_email:e.user_email,user_id:e.user_id},r=await D(s,y.team_id,t);console.log("response for team create call: ".concat(r.data));let n=l.findIndex(e=>(console.log("team.team_id=".concat(e.team_id,"; response.data.team_id=").concat(r.data.team_id)),e.team_id===r.data.team_id));if(console.log("foundIndex: ".concat(n)),-1!==n){let e=[...l];e[n]=r.data,a(e),Z(r.data)}v(!1)}}catch(e){console.error("Error creating the team:",e)}};return console.log("received teams ".concat(JSON.stringify(l))),(0,r.jsx)("div",{className:"w-full mx-4",children:(0,r.jsxs)($.Z,{numItems:1,className:"gap-2 p-8 h-[75vh] w-full mt-2",children:[(0,r.jsxs)(H.Z,{numColSpan:1,children:[(0,r.jsx)(m,{level:4,children:"All Teams"}),(0,r.jsxs)(ey.Z,{className:"w-full mx-auto flex-auto overflow-y-auto max-h-[50vh]",children:[(0,r.jsxs)(eZ.Z,{children:[(0,r.jsx)(eb.Z,{children:(0,r.jsxs)(ev.Z,{children:[(0,r.jsx)(ek.Z,{children:"Team Name"}),(0,r.jsx)(ek.Z,{children:"Spend (USD)"}),(0,r.jsx)(ek.Z,{children:"Budget (USD)"}),(0,r.jsx)(ek.Z,{children:"Models"}),(0,r.jsx)(ek.Z,{children:"TPM / RPM Limits"}),(0,r.jsx)(ek.Z,{children:"Info"})]})}),(0,r.jsx)(e_.Z,{children:l&&l.length>0?l.map(e=>(0,r.jsxs)(ev.Z,{children:[(0,r.jsx)(ew.Z,{style:{maxWidth:"4px",whiteSpace:"pre-wrap",overflow:"hidden"},children:e.team_alias}),(0,r.jsx)(ew.Z,{style:{maxWidth:"4px",whiteSpace:"pre-wrap",overflow:"hidden"},children:e.spend}),(0,r.jsx)(ew.Z,{style:{maxWidth:"4px",whiteSpace:"pre-wrap",overflow:"hidden"},children:e.max_budget?e.max_budget:"No limit"}),(0,r.jsx)(ew.Z,{style:{maxWidth:"8-x",whiteSpace:"pre-wrap",overflow:"hidden"},children:Array.isArray(e.models)?(0,r.jsx)("div",{style:{display:"flex",flexDirection:"column"},children:0===e.models.length?(0,r.jsx)(eg.Z,{size:"xs",className:"mb-1",color:"red",children:(0,r.jsx)(es.Z,{children:"All Proxy Models"})}):e.models.map((e,l)=>"all-proxy-models"===e?(0,r.jsx)(eg.Z,{size:"xs",className:"mb-1",color:"red",children:(0,r.jsx)(es.Z,{children:"All Proxy Models"})},l):(0,r.jsx)(eg.Z,{size:"xs",className:"mb-1",color:"blue",children:(0,r.jsx)(es.Z,{children:e.length>30?"".concat(e.slice(0,30),"..."):e})},l))}):null}),(0,r.jsx)(ew.Z,{style:{maxWidth:"4px",whiteSpace:"pre-wrap",overflow:"hidden"},children:(0,r.jsxs)(es.Z,{children:["TPM: ",e.tpm_limit?e.tpm_limit:"Unlimited"," ",(0,r.jsx)("br",{}),"RPM:"," ",e.rpm_limit?e.rpm_limit:"Unlimited"]})}),(0,r.jsxs)(ew.Z,{children:[(0,r.jsxs)(es.Z,{children:[P&&e.team_id&&P[e.team_id]&&P[e.team_id].keys&&P[e.team_id].keys.length," ","Keys"]}),(0,r.jsxs)(es.Z,{children:[P&&e.team_id&&P[e.team_id]&&P[e.team_id].team_info&&P[e.team_id].team_info.members_with_roles&&P[e.team_id].team_info.members_with_roles.length," ","Members"]})]}),(0,r.jsxs)(ew.Z,{children:[(0,r.jsx)(ef.Z,{icon:ep.Z,size:"sm",onClick:()=>F(e)}),(0,r.jsx)(ef.Z,{onClick:()=>L(e.team_id),icon:ej.Z,size:"sm"})]})]},e.team_id)):null})]}),E&&(0,r.jsx)("div",{className:"fixed z-10 inset-0 overflow-y-auto",children:(0,r.jsxs)("div",{className:"flex items-end justify-center min-h-screen pt-4 px-4 pb-20 text-center sm:block sm:p-0",children:[(0,r.jsx)("div",{className:"fixed inset-0 transition-opacity","aria-hidden":"true",children:(0,r.jsx)("div",{className:"absolute inset-0 bg-gray-500 opacity-75"})}),(0,r.jsx)("span",{className:"hidden sm:inline-block sm:align-middle sm:h-screen","aria-hidden":"true",children:"​"}),(0,r.jsxs)("div",{className:"inline-block align-bottom bg-white rounded-lg text-left overflow-hidden shadow-xl transform transition-all sm:my-8 sm:align-middle sm:max-w-lg sm:w-full",children:[(0,r.jsx)("div",{className:"bg-white px-4 pt-5 pb-4 sm:p-6 sm:pb-4",children:(0,r.jsx)("div",{className:"sm:flex sm:items-start",children:(0,r.jsxs)("div",{className:"mt-3 text-center sm:mt-0 sm:ml-4 sm:text-left",children:[(0,r.jsx)("h3",{className:"text-lg leading-6 font-medium text-gray-900",children:"Delete Team"}),(0,r.jsx)("div",{className:"mt-2",children:(0,r.jsx)("p",{className:"text-sm text-gray-500",children:"Are you sure you want to delete this team ?"})})]})})}),(0,r.jsxs)("div",{className:"bg-gray-50 px-4 py-3 sm:px-6 sm:flex sm:flex-row-reverse",children:[(0,r.jsx)(X.Z,{onClick:B,color:"red",className:"ml-2",children:"Delete"}),(0,r.jsx)(X.Z,{onClick:()=>{I(!1),T(null)},children:"Cancel"})]})]})]})})]})]}),(0,r.jsxs)(H.Z,{numColSpan:1,children:[(0,r.jsx)(X.Z,{className:"mx-auto",onClick:()=>b(!0),children:"+ Create New Team"}),(0,r.jsx)(ei.Z,{title:"Create Team",visible:w,width:800,footer:null,onOk:()=>{b(!1),c.resetFields()},onCancel:()=>{b(!1),c.resetFields()},children:(0,r.jsxs)(eo.Z,{form:c,onFinish:K,labelCol:{span:8},wrapperCol:{span:16},labelAlign:"left",children:[(0,r.jsxs)(r.Fragment,{children:[(0,r.jsx)(eo.Z.Item,{label:"Team Name",name:"team_alias",rules:[{required:!0,message:"Please input a team name"}],children:(0,r.jsx)(Q.Z,{placeholder:""})}),(0,r.jsx)(eo.Z.Item,{label:"Models",name:"models",children:(0,r.jsxs)(en.default,{mode:"multiple",placeholder:"Select models",style:{width:"100%"},children:[(0,r.jsx)(en.default.Option,{value:"all-proxy-models",children:"All Proxy Models"},"all-proxy-models"),S.map(e=>(0,r.jsx)(en.default.Option,{value:e,children:e},e))]})}),(0,r.jsx)(eo.Z.Item,{label:"Max Budget (USD)",name:"max_budget",children:(0,r.jsx)(ed.Z,{step:.01,precision:2,width:200})}),(0,r.jsx)(eo.Z.Item,{label:"Tokens per minute Limit (TPM)",name:"tpm_limit",children:(0,r.jsx)(ed.Z,{step:1,width:400})}),(0,r.jsx)(eo.Z.Item,{label:"Requests per minute Limit (RPM)",name:"rpm_limit",children:(0,r.jsx)(ed.Z,{step:1,width:400})})]}),(0,r.jsx)("div",{style:{textAlign:"right",marginTop:"10px"},children:(0,r.jsx)(em.ZP,{htmlType:"submit",children:"Create Team"})})]})})]}),(0,r.jsxs)(H.Z,{numColSpan:1,children:[(0,r.jsx)(m,{level:4,children:"Team Members"}),(0,r.jsx)(h,{children:"If you belong to multiple teams, this setting controls which teams members you see."}),l&&l.length>0?(0,r.jsx)(eS.Z,{defaultValue:"0",children:l.map((e,l)=>(0,r.jsx)(eN.Z,{value:String(l),onClick:()=>{Z(e)},children:e.team_alias},l))}):(0,r.jsxs)(h,{children:["No team created. ",(0,r.jsx)("b",{children:"Defaulting to personal account."})]})]}),(0,r.jsxs)(H.Z,{numColSpan:1,children:[(0,r.jsx)(ey.Z,{className:"w-full mx-auto flex-auto overflow-y-auto max-h-[50vh]",children:(0,r.jsxs)(eZ.Z,{children:[(0,r.jsx)(eb.Z,{children:(0,r.jsxs)(ev.Z,{children:[(0,r.jsx)(ek.Z,{children:"Member Name"}),(0,r.jsx)(ek.Z,{children:"Role"})]})}),(0,r.jsx)(e_.Z,{children:y?y.members_with_roles.map((e,l)=>(0,r.jsxs)(ev.Z,{children:[(0,r.jsx)(ew.Z,{children:e.user_email?e.user_email:e.user_id?e.user_id:null}),(0,r.jsx)(ew.Z,{children:e.role})]},l)):null})]})}),y&&(0,r.jsx)(e=>{let{visible:l,onCancel:t,team:s,onSubmit:a}=e,[n]=eo.Z.useForm();return(0,r.jsx)(ei.Z,{title:"Edit Team",visible:l,width:800,footer:null,onOk:()=>{n.validateFields().then(e=>{a({...e,team_id:s.team_id}),n.resetFields()}).catch(e=>{console.error("Validation failed:",e)})},onCancel:t,children:(0,r.jsxs)(eo.Z,{form:n,onFinish:R,initialValues:s,labelCol:{span:8},wrapperCol:{span:16},labelAlign:"left",children:[(0,r.jsxs)(r.Fragment,{children:[(0,r.jsx)(eo.Z.Item,{label:"Team Name",name:"team_alias",rules:[{required:!0,message:"Please input a team name"}],children:(0,r.jsx)(Q.Z,{})}),(0,r.jsx)(eo.Z.Item,{label:"Models",name:"models",children:(0,r.jsxs)(en.default,{mode:"multiple",placeholder:"Select models",style:{width:"100%"},children:[(0,r.jsx)(en.default.Option,{value:"all-proxy-models",children:"All Proxy Models"},"all-proxy-models"),S&&S.map(e=>(0,r.jsx)(en.default.Option,{value:e,children:e},e))]})}),(0,r.jsx)(eo.Z.Item,{label:"Max Budget (USD)",name:"max_budget",children:(0,r.jsx)(ed.Z,{step:.01,precision:2,width:200})}),(0,r.jsx)(eo.Z.Item,{label:"Tokens per minute Limit (TPM)",name:"tpm_limit",children:(0,r.jsx)(ed.Z,{step:1,width:400})}),(0,r.jsx)(eo.Z.Item,{label:"Requests per minute Limit (RPM)",name:"rpm_limit",children:(0,r.jsx)(ed.Z,{step:1,width:400})}),(0,r.jsx)(eo.Z.Item,{label:"Requests per minute Limit (RPM)",name:"team_id",hidden:!0})]}),(0,r.jsx)("div",{style:{textAlign:"right",marginTop:"10px"},children:(0,r.jsx)(em.ZP,{htmlType:"submit",children:"Edit Team"})})]})})},{visible:j,onCancel:()=>{g(!1),Z(null)},team:y,onSubmit:R})]}),(0,r.jsxs)(H.Z,{numColSpan:1,children:[(0,r.jsx)(X.Z,{className:"mx-auto mb-5",onClick:()=>v(!0),children:"+ Add member"}),(0,r.jsx)(ei.Z,{title:"Add member",visible:k,width:800,footer:null,onOk:()=>{v(!1),d.resetFields()},onCancel:()=>{v(!1),d.resetFields()},children:(0,r.jsxs)(eo.Z,{form:c,onFinish:z,labelCol:{span:8},wrapperCol:{span:16},labelAlign:"left",children:[(0,r.jsxs)(r.Fragment,{children:[(0,r.jsx)(eo.Z.Item,{label:"Email",name:"user_email",className:"mb-4",children:(0,r.jsx)(ec.Z,{name:"user_email",className:"px-3 py-2 border rounded-md w-full"})}),(0,r.jsx)("div",{className:"text-center mb-4",children:"OR"}),(0,r.jsx)(eo.Z.Item,{label:"User ID",name:"user_id",className:"mb-4",children:(0,r.jsx)(ec.Z,{name:"user_id",className:"px-3 py-2 border rounded-md w-full"})})]}),(0,r.jsx)("div",{style:{textAlign:"right",marginTop:"10px"},children:(0,r.jsx)(em.ZP,{htmlType:"submit",children:"Add member"})})]})})]})]})})},la=t(18190),lr=e=>{let l,{searchParams:t,accessToken:s,showSSOBanner:a}=e,[o]=eo.Z.useForm(),[i]=eo.Z.useForm(),{Title:c,Paragraph:d}=eR.default,[m,h]=(0,n.useState)(""),[x,p]=(0,n.useState)(null),[j,g]=(0,n.useState)(!1),[y,f]=(0,n.useState)(!1),[Z,_]=(0,n.useState)(!1),[w,b]=(0,n.useState)(!1),[k,v]=(0,n.useState)(!1);try{l=window.location.origin}catch(e){l="<your-proxy-url>"}l+="/fallback/login";let S=()=>{v(!1)},N=["proxy_admin","proxy_admin_viewer"];(0,n.useEffect)(()=>{(async()=>{if(null!=s){let e=[],l=await R(s,"proxy_admin_viewer");l.forEach(l=>{e.push({user_role:l.user_role,user_id:l.user_id,user_email:l.user_email})}),console.log("proxy viewers: ".concat(l));let t=await R(s,"proxy_admin");t.forEach(l=>{e.push({user_role:l.user_role,user_id:l.user_id,user_email:l.user_email})}),console.log("proxy admins: ".concat(t)),console.log("combinedList: ".concat(e)),p(e)}})()},[s]);let A=()=>{_(!1),i.resetFields()},E=()=>{_(!1),i.resetFields()},I=e=>(0,r.jsxs)(eo.Z,{form:o,onFinish:e,labelCol:{span:8},wrapperCol:{span:16},labelAlign:"left",children:[(0,r.jsxs)(r.Fragment,{children:[(0,r.jsx)(eo.Z.Item,{label:"Email",name:"user_email",className:"mb-4",children:(0,r.jsx)(ec.Z,{name:"user_email",className:"px-3 py-2 border rounded-md w-full"})}),(0,r.jsx)("div",{className:"text-center mb-4",children:"OR"}),(0,r.jsx)(eo.Z.Item,{label:"User ID",name:"user_id",className:"mb-4",children:(0,r.jsx)(ec.Z,{name:"user_id",className:"px-3 py-2 border rounded-md w-full"})})]}),(0,r.jsx)("div",{style:{textAlign:"right",marginTop:"10px"},children:(0,r.jsx)(em.ZP,{htmlType:"submit",children:"Add member"})})]}),C=(e,l,t)=>(0,r.jsxs)(eo.Z,{form:o,onFinish:e,labelCol:{span:8},wrapperCol:{span:16},labelAlign:"left",children:[(0,r.jsxs)(r.Fragment,{children:[(0,r.jsx)(eo.Z.Item,{rules:[{required:!0,message:"Required"}],label:"User Role",name:"user_role",labelCol:{span:10},labelAlign:"left",children:(0,r.jsx)(eS.Z,{value:l,children:N.map((e,l)=>(0,r.jsx)(eN.Z,{value:e,children:e},l))})}),(0,r.jsx)(eo.Z.Item,{label:"Team ID",name:"user_id",hidden:!0,initialValue:t,valuePropName:"user_id",className:"mt-8",children:(0,r.jsx)(ec.Z,{value:t,disabled:!0})})]}),(0,r.jsx)("div",{style:{textAlign:"right",marginTop:"10px"},children:(0,r.jsx)(em.ZP,{htmlType:"submit",children:"Update role"})})]}),T=async e=>{try{if(null!=s&&null!=x){u.ZP.info("Making API Call");let l=await K(s,e,null);console.log("response for team create call: ".concat(l));let t=x.findIndex(e=>(console.log("user.user_id=".concat(e.user_id,"; response.user_id=").concat(l.user_id)),e.user_id===l.user_id));console.log("foundIndex: ".concat(t)),-1==t&&(console.log("updates admin with new user"),x.push(l),p(x)),u.ZP.success("Refresh tab to see updated user role"),_(!1)}}catch(e){console.error("Error creating the key:",e)}},P=async e=>{try{if(null!=s&&null!=x){u.ZP.info("Making API Call");let l=await K(s,e,"proxy_admin_viewer");console.log("response for team create call: ".concat(l));let t=x.findIndex(e=>(console.log("user.user_id=".concat(e.user_id,"; response.user_id=").concat(l.user_id)),e.user_id===l.user_id));console.log("foundIndex: ".concat(t)),-1==t&&(console.log("updates admin with new user"),x.push(l),p(x)),g(!1)}}catch(e){console.error("Error creating the key:",e)}},O=async e=>{try{if(null!=s&&null!=x){u.ZP.info("Making API Call"),e.user_email,e.user_id;let l=await K(s,e,"proxy_admin");console.log("response for team create call: ".concat(l));let t=x.findIndex(e=>(console.log("user.user_id=".concat(e.user_id,"; response.user_id=").concat(l.user_id)),e.user_id===l.user_id));console.log("foundIndex: ".concat(t)),-1==t&&(console.log("updates admin with new user"),x.push(l),p(x)),f(!1)}}catch(e){console.error("Error creating the key:",e)}},F=async e=>{null!=s&&Y(s,{environment_variables:{PROXY_BASE_URL:e.proxy_base_url,GOOGLE_CLIENT_ID:e.google_client_id,GOOGLE_CLIENT_SECRET:e.google_client_secret}})};return console.log("admins: ".concat(null==x?void 0:x.length)),(0,r.jsxs)("div",{className:"w-full m-2 mt-2 p-8",children:[(0,r.jsx)(c,{level:4,children:"Admin Access "}),(0,r.jsxs)(d,{children:[a&&(0,r.jsx)("a",{href:"https://docs.litellm.ai/docs/proxy/ui#restrict-ui-access",children:"Requires SSO Setup"}),(0,r.jsx)("br",{}),(0,r.jsx)("b",{children:"Proxy Admin: "})," Can create keys, teams, users, add models, etc. ",(0,r.jsx)("br",{}),(0,r.jsx)("b",{children:"Proxy Admin Viewer: "}),"Can just view spend. They cannot create keys, teams or grant users access to new models."," "]}),(0,r.jsxs)($.Z,{numItems:1,className:"gap-2 p-2 w-full",children:[(0,r.jsx)(H.Z,{numColSpan:1,children:(0,r.jsx)(ey.Z,{className:"w-full mx-auto flex-auto overflow-y-auto max-h-[50vh]",children:(0,r.jsxs)(eZ.Z,{children:[(0,r.jsx)(eb.Z,{children:(0,r.jsxs)(ev.Z,{children:[(0,r.jsx)(ek.Z,{children:"Member Name"}),(0,r.jsx)(ek.Z,{children:"Role"})]})}),(0,r.jsx)(e_.Z,{children:x?x.map((e,l)=>(0,r.jsxs)(ev.Z,{children:[(0,r.jsx)(ew.Z,{children:e.user_email?e.user_email:e.user_id?e.user_id:null}),(0,r.jsx)(ew.Z,{children:e.user_role}),(0,r.jsxs)(ew.Z,{children:[(0,r.jsx)(ef.Z,{icon:ep.Z,size:"sm",onClick:()=>_(!0)}),(0,r.jsx)(ei.Z,{title:"Update role",visible:Z,width:800,footer:null,onOk:A,onCancel:E,children:C(T,e.user_role,e.user_id)})]})]},l)):null})]})})}),(0,r.jsx)(H.Z,{numColSpan:1,children:(0,r.jsxs)("div",{className:"flex justify-start",children:[(0,r.jsx)(X.Z,{className:"mr-4 mb-5",onClick:()=>f(!0),children:"+ Add admin"}),(0,r.jsx)(ei.Z,{title:"Add admin",visible:y,width:800,footer:null,onOk:()=>{f(!1),i.resetFields()},onCancel:()=>{f(!1),i.resetFields()},children:I(O)}),(0,r.jsx)(X.Z,{className:"mb-5",onClick:()=>g(!0),children:"+ Add viewer"}),(0,r.jsx)(ei.Z,{title:"Add viewer",visible:j,width:800,footer:null,onOk:()=>{g(!1),i.resetFields()},onCancel:()=>{g(!1),i.resetFields()},children:I(P)})]})})]}),(0,r.jsxs)($.Z,{children:[(0,r.jsx)(c,{level:4,children:"Add SSO"}),(0,r.jsxs)("div",{className:"flex justify-start mb-4",children:[(0,r.jsx)(X.Z,{onClick:()=>b(!0),children:"Add SSO"}),(0,r.jsx)(ei.Z,{title:"Add SSO",visible:w,width:800,footer:null,onOk:()=>{b(!1),o.resetFields()},onCancel:()=>{b(!1),o.resetFields()},children:(0,r.jsxs)(eo.Z,{form:o,onFinish:e=>{O(e),F(e),b(!1),v(!0)},labelCol:{span:8},wrapperCol:{span:16},labelAlign:"left",children:[(0,r.jsxs)(r.Fragment,{children:[(0,r.jsx)(eo.Z.Item,{label:"Admin Email",name:"user_email",rules:[{required:!0,message:"Please enter the email of the proxy admin"}],children:(0,r.jsx)(ec.Z,{})}),(0,r.jsx)(eo.Z.Item,{label:"PROXY BASE URL",name:"proxy_base_url",rules:[{required:!0,message:"Please enter the proxy base url"}],children:(0,r.jsx)(ec.Z,{})}),(0,r.jsx)(eo.Z.Item,{label:"GOOGLE CLIENT ID",name:"google_client_id",rules:[{required:!0,message:"Please enter the google client id"}],children:(0,r.jsx)(ec.Z.Password,{})}),(0,r.jsx)(eo.Z.Item,{label:"GOOGLE CLIENT SECRET",name:"google_client_secret",rules:[{required:!0,message:"Please enter the google client secret"}],children:(0,r.jsx)(ec.Z.Password,{})})]}),(0,r.jsx)("div",{style:{textAlign:"right",marginTop:"10px"},children:(0,r.jsx)(em.ZP,{htmlType:"submit",children:"Save"})})]})}),(0,r.jsxs)(ei.Z,{title:"SSO Setup Instructions",visible:k,width:800,footer:null,onOk:S,onCancel:()=>{v(!1)},children:[(0,r.jsx)("p",{children:"Follow these steps to complete the SSO setup:"}),(0,r.jsx)(es.Z,{className:"mt-2",children:"1. DO NOT Exit this TAB"}),(0,r.jsx)(es.Z,{className:"mt-2",children:"2. Open a new tab, visit your proxy base url"}),(0,r.jsx)(es.Z,{className:"mt-2",children:"3. Confirm your SSO is configured correctly and you can login on the new Tab"}),(0,r.jsx)(es.Z,{className:"mt-2",children:"4. If Step 3 is successful, you can close this tab"}),(0,r.jsx)("div",{style:{textAlign:"right",marginTop:"10px"},children:(0,r.jsx)(em.ZP,{onClick:S,children:"Done"})})]})]}),(0,r.jsxs)(la.Z,{title:"Login without SSO",color:"teal",children:["If you need to login without sso, you can access ",(0,r.jsxs)("a",{href:l,target:"_blank",children:[(0,r.jsx)("b",{children:l}),"  "]})]})]})]})},ln=t(42556);let lo=[{name:"slack",variables:{LANGFUSE_HOST:null,LANGFUSE_PUBLIC_KEY:null,LANGFUSE_SECRET_KEY:null,OPENMETER_API_KEY:null,SLACK_WEBHOOK_URL:null}},{name:"langfuse",variables:{LANGFUSE_HOST:null,LANGFUSE_PUBLIC_KEY:null,LANGFUSE_SECRET_KEY:null,OPENMETER_API_KEY:null,SLACK_WEBHOOK_URL:null}},{name:"openmeter",variables:{LANGFUSE_HOST:null,LANGFUSE_PUBLIC_KEY:null,LANGFUSE_SECRET_KEY:null,OPENMETER_API_KEY:null,SLACK_WEBHOOK_URL:null}}];var li=e=>{let{accessToken:l,userRole:t,userID:s}=e,[a,o]=(0,n.useState)(lo),[i,c]=(0,n.useState)([]),[d,m]=(0,n.useState)(!1),[h]=eo.Z.useForm(),[x,p]=(0,n.useState)(null),[j,g]=(0,n.useState)([]),[y,f]=(0,n.useState)(""),[Z,_]=(0,n.useState)({}),[w,b]=(0,n.useState)([]),k=e=>{w.includes(e)?b(w.filter(l=>l!==e)):b([...w,e])},v={llm_exceptions:"LLM Exceptions",llm_too_slow:"LLM Responses Too Slow",llm_requests_hanging:"LLM Requests Hanging",budget_alerts:"Budget Alerts (API Keys, Users)",db_exceptions:"Database Exceptions (Read/Write)",daily_reports:"Weekly/Monthly Spend Reports"};(0,n.useEffect)(()=>{l&&t&&s&&V(l,s,t).then(e=>{console.log("callbacks",e);let l=lo;o(l=l.map(l=>{let t=e.callbacks.find(e=>e.name===l.name);return t?{...l,variables:{...l.variables,...t.variables}}:l}));let t=e.alerts;if(console.log("alerts_data",t),t&&t.length>0){let e=t[0];console.log("_alert_info",e);let l=e.variables.SLACK_WEBHOOK_URL;console.log("catch_all_webhook",l),b(e.active_alerts),f(l),_(e.alerts_to_webhook)}c(t)})},[l,t,s]);let S=e=>w&&w.includes(e),N=e=>{if(!l)return;let t=Object.fromEntries(Object.entries(e.variables).map(e=>{var l;let[t,s]=e;return[t,(null===(l=document.querySelector('input[name="'.concat(t,'"]')))||void 0===l?void 0:l.value)||s]}));console.log("updatedVariables",t),console.log("updateAlertTypes",j);let s={environment_variables:t,litellm_settings:{success_callback:[e.name]}};try{Y(l,s)}catch(e){u.ZP.error("Failed to update callback: "+e,20)}u.ZP.success("Callback updated successfully")},A=()=>{l&&h.validateFields().then(e=>{if(console.log("Form values:",e),"langfuse"===e.callback){Y(l,{environment_variables:{LANGFUSE_PUBLIC_KEY:e.langfusePublicKey,LANGFUSE_SECRET_KEY:e.langfusePrivateKey},litellm_settings:{success_callback:[e.callback]}});let t={name:e.callback,variables:{SLACK_WEBHOOK_URL:null,LANGFUSE_HOST:null,LANGFUSE_PUBLIC_KEY:e.langfusePublicKey,LANGFUSE_SECRET_KEY:e.langfusePrivateKey,OPENMETER_API_KEY:null}};o(a?[...a,t]:[t])}else if("slack"===e.callback){console.log("values.slackWebhookUrl: ".concat(e.slackWebhookUrl)),Y(l,{general_settings:{alerting:["slack"],alerting_threshold:300},environment_variables:{SLACK_WEBHOOK_URL:e.slackWebhookUrl}}),console.log("values.callback: ".concat(e.callback));let t={name:e.callback,variables:{SLACK_WEBHOOK_URL:e.slackWebhookUrl,LANGFUSE_HOST:null,LANGFUSE_PUBLIC_KEY:null,LANGFUSE_SECRET_KEY:null,OPENMETER_API_KEY:null}};o(a?[...a,t]:[t])}else if("openmeter"==e.callback){console.log("values.openMeterApiKey: ".concat(e.openMeterApiKey)),Y(l,{environment_variables:{OPENMETER_API_KEY:e.openMeterApiKey},litellm_settings:{success_callback:[e.callback]}});let t={name:e.callback,variables:{SLACK_WEBHOOK_URL:null,LANGFUSE_HOST:null,LANGFUSE_PUBLIC_KEY:null,LANGFUSE_SECRET_KEY:null,OPENMETER_API_KEY:e.openMeterAPIKey}};o(a?[...a,t]:[t])}m(!1),h.resetFields(),p(null)})};return l?(console.log("callbacks: ".concat(a)),(0,r.jsxs)("div",{className:"w-full mx-4",children:[(0,r.jsxs)($.Z,{numItems:1,className:"gap-2 p-8 w-full mt-2",children:[(0,r.jsx)(la.Z,{title:"[UI] Presidio PII + Guardrails Coming Soon. https://docs.litellm.ai/docs/proxy/pii_masking",color:"sky"}),(0,r.jsxs)(eB.Z,{children:[(0,r.jsxs)(eD.Z,{variant:"line",defaultValue:"1",children:[(0,r.jsx)(eU.Z,{value:"1",children:"Logging Callbacks"}),(0,r.jsx)(eU.Z,{value:"2",children:"Alerting"})]}),(0,r.jsxs)(ez.Z,{children:[(0,r.jsx)(eK.Z,{children:(0,r.jsx)(ey.Z,{children:(0,r.jsxs)(eZ.Z,{children:[(0,r.jsx)(eb.Z,{children:(0,r.jsxs)(ev.Z,{children:[(0,r.jsx)(ek.Z,{children:"Callback"}),(0,r.jsx)(ek.Z,{children:"Callback Env Vars"})]})}),(0,r.jsx)(e_.Z,{children:a.filter(e=>"slack"!==e.name).map((e,t)=>{var s;return(0,r.jsxs)(ev.Z,{children:[(0,r.jsx)(ew.Z,{children:(0,r.jsx)(eg.Z,{color:"emerald",children:e.name})}),(0,r.jsxs)(ew.Z,{children:[(0,r.jsx)("ul",{children:Object.entries(null!==(s=e.variables)&&void 0!==s?s:{}).filter(l=>{let[t,s]=l;return t.toLowerCase().includes(e.name)}).map(e=>{let[l,t]=e;return(0,r.jsxs)("li",{children:[(0,r.jsx)(es.Z,{className:"mt-2",children:l}),"LANGFUSE_HOST"===l?(0,r.jsx)("p",{children:"default value=https://cloud.langfuse.com"}):(0,r.jsx)("div",{}),(0,r.jsx)(Q.Z,{name:l,defaultValue:t,type:"password"})]},l)})}),(0,r.jsx)(X.Z,{className:"mt-2",onClick:()=>N(e),children:"Save Changes"}),(0,r.jsx)(X.Z,{onClick:()=>z(l,e.name),className:"mx-2",children:"Test Callback"})]})]},t)})})]})})}),(0,r.jsx)(eK.Z,{children:(0,r.jsxs)(ey.Z,{children:[(0,r.jsxs)(es.Z,{className:"my-2",children:["Alerts are only supported for Slack Webhook URLs. Get your webhook urls from ",(0,r.jsx)("a",{href:"https://api.slack.com/messaging/webhooks",target:"_blank",style:{color:"blue"},children:"here"})]}),(0,r.jsxs)(eZ.Z,{children:[(0,r.jsx)(eb.Z,{children:(0,r.jsxs)(ev.Z,{children:[(0,r.jsx)(ek.Z,{}),(0,r.jsx)(ek.Z,{}),(0,r.jsx)(ek.Z,{children:"Slack Webhook URL"})]})}),(0,r.jsx)(e_.Z,{children:Object.entries(v).map((e,l)=>{let[t,s]=e;return(0,r.jsxs)(ev.Z,{children:[(0,r.jsx)(ew.Z,{children:(0,r.jsx)(ln.Z,{id:"switch",name:"switch",checked:S(t),onChange:()=>k(t)})}),(0,r.jsx)(ew.Z,{children:(0,r.jsx)(es.Z,{children:s})}),(0,r.jsx)(ew.Z,{children:(0,r.jsx)(Q.Z,{name:t,type:"password",defaultValue:Z&&Z[t]?Z[t]:y})})]},l)})})]}),(0,r.jsx)(X.Z,{size:"xs",className:"mt-2",onClick:()=>{if(!l)return;let e={};Object.entries(v).forEach(l=>{let[t,s]=l,a=document.querySelector('input[name="'.concat(t,'"]'));console.log("key",t),console.log("webhookInput",a);let r=(null==a?void 0:a.value)||"";console.log("newWebhookValue",r),e[t]=r}),console.log("updatedAlertToWebhooks",e);let t={general_settings:{alert_to_webhook_url:e,alert_types:w}};console.log("payload",t);try{Y(l,t)}catch(e){u.ZP.error("Failed to update alerts: "+e,20)}u.ZP.success("Alerts updated successfully")},children:"Save Changes"}),(0,r.jsx)(X.Z,{onClick:()=>z(l,"slack"),className:"mx-2",children:"Test Alerts"})]})})]})]})]}),(0,r.jsx)(ei.Z,{title:"Add Callback",visible:d,onOk:A,width:800,onCancel:()=>{m(!1),h.resetFields(),p(null)},footer:null,children:(0,r.jsxs)(eo.Z,{form:h,layout:"vertical",onFinish:A,children:[(0,r.jsx)(eo.Z.Item,{label:"Callback",name:"callback",rules:[{required:!0,message:"Please select a callback"}],children:(0,r.jsxs)(en.default,{onChange:e=>{p(e)},children:[(0,r.jsx)(en.default.Option,{value:"langfuse",children:"langfuse"}),(0,r.jsx)(en.default.Option,{value:"openmeter",children:"openmeter"})]})}),"langfuse"===x&&(0,r.jsxs)(r.Fragment,{children:[(0,r.jsx)(eo.Z.Item,{label:"LANGFUSE_PUBLIC_KEY",name:"langfusePublicKey",rules:[{required:!0,message:"Please enter the public key"}],children:(0,r.jsx)(Q.Z,{type:"password"})}),(0,r.jsx)(eo.Z.Item,{label:"LANGFUSE_PRIVATE_KEY",name:"langfusePrivateKey",rules:[{required:!0,message:"Please enter the private key"}],children:(0,r.jsx)(Q.Z,{type:"password"})})]}),"openmeter"==x&&(0,r.jsx)(r.Fragment,{children:(0,r.jsx)(eo.Z.Item,{label:"OPENMETER_API_KEY",name:"openMeterApiKey",rules:[{required:!0,message:"Please enter the openmeter api key"}],children:(0,r.jsx)(Q.Z,{type:"password"})})}),(0,r.jsx)("div",{style:{textAlign:"right",marginTop:"10px"},children:(0,r.jsx)(em.ZP,{htmlType:"submit",children:"Save"})})]})})]})):null};let{Option:lc}=en.default;var ld=e=>{let{models:l,accessToken:t,routerSettings:s,setRouterSettings:a}=e,[o]=eo.Z.useForm(),[i,c]=(0,n.useState)(!1),[d,m]=(0,n.useState)("");return(0,r.jsxs)("div",{children:[(0,r.jsx)(X.Z,{className:"mx-auto",onClick:()=>c(!0),children:"+ Add Fallbacks"}),(0,r.jsx)(ei.Z,{title:"Add Fallbacks",visible:i,width:800,footer:null,onOk:()=>{c(!1),o.resetFields()},onCancel:()=>{c(!1),o.resetFields()},children:(0,r.jsxs)(eo.Z,{form:o,onFinish:e=>{console.log(e);let{model_name:l,models:r}=e,n=[...s.fallbacks||[],{[l]:r}],i={...s,fallbacks:n};console.log(i);try{Y(t,{router_settings:i}),a(i)}catch(e){u.ZP.error("Failed to update router settings: "+e,20)}u.ZP.success("router settings updated successfully"),c(!1),o.resetFields()},labelCol:{span:8},wrapperCol:{span:16},labelAlign:"left",children:[(0,r.jsxs)(r.Fragment,{children:[(0,r.jsx)(eo.Z.Item,{label:"Public Model Name",name:"model_name",rules:[{required:!0,message:"Set the model to fallback for"}],help:"required",children:(0,r.jsx)(eS.Z,{defaultValue:d,children:l&&l.map((e,l)=>(0,r.jsx)(eN.Z,{value:e,onClick:()=>m(e),children:e},l))})}),(0,r.jsx)(eo.Z.Item,{label:"Fallback Models",name:"models",rules:[{required:!0,message:"Please select a model"}],help:"required",children:(0,r.jsx)(eV.Z,{value:l,children:l&&l.filter(e=>e!=d).map(e=>(0,r.jsx)(eq.Z,{value:e,children:e},e))})})]}),(0,r.jsx)("div",{style:{textAlign:"right",marginTop:"10px"},children:(0,r.jsx)(em.ZP,{htmlType:"submit",children:"Add Fallbacks"})})]})})]})},lm=t(12968);async function lu(e,l){console.log("isLocal:",!1);let t=window.location.origin,s=new lm.ZP.OpenAI({apiKey:l,baseURL:t,dangerouslyAllowBrowser:!0});try{let l=await s.chat.completions.create({model:e,messages:[{role:"user",content:"Hi, this is a test message"}],mock_testing_fallbacks:!0});u.ZP.success((0,r.jsxs)("span",{children:["Test model=",(0,r.jsx)("strong",{children:e}),", received model=",(0,r.jsx)("strong",{children:l.model}),". See"," ",(0,r.jsx)("a",{href:"#",onClick:()=>window.open("https://docs.litellm.ai/docs/proxy/reliability","_blank"),style:{textDecoration:"underline",color:"blue"},children:"curl"})]}))}catch(e){u.ZP.error("Error occurred while generating model response. Please try again. Error: ".concat(e),20)}}let lh={ttl:3600,lowest_latency_buffer:0},lx=e=>{let{selectedStrategy:l,strategyArgs:t,paramExplanation:s}=e;return(0,r.jsxs)(ee.Z,{children:[(0,r.jsx)(et.Z,{className:"text-sm font-medium text-tremor-content-strong dark:text-dark-tremor-content-strong",children:"Routing Strategy Specific Args"}),(0,r.jsx)(el.Z,{children:"latency-based-routing"==l?(0,r.jsx)(ey.Z,{children:(0,r.jsxs)(eZ.Z,{children:[(0,r.jsx)(eb.Z,{children:(0,r.jsxs)(ev.Z,{children:[(0,r.jsx)(ek.Z,{children:"Setting"}),(0,r.jsx)(ek.Z,{children:"Value"})]})}),(0,r.jsx)(e_.Z,{children:Object.entries(t).map(e=>{let[l,t]=e;return(0,r.jsxs)(ev.Z,{children:[(0,r.jsxs)(ew.Z,{children:[(0,r.jsx)(es.Z,{children:l}),(0,r.jsx)("p",{style:{fontSize:"0.65rem",color:"#808080",fontStyle:"italic"},className:"mt-1",children:s[l]})]}),(0,r.jsx)(ew.Z,{children:(0,r.jsx)(Q.Z,{name:l,defaultValue:"object"==typeof t?JSON.stringify(t,null,2):t.toString()})})]},l)})})]})}):(0,r.jsx)(es.Z,{children:"No specific settings"})})]})};var lp=e=>{let{accessToken:l,userRole:t,userID:s,modelData:a}=e,[o,i]=(0,n.useState)({}),[c,d]=(0,n.useState)({}),[m,h]=(0,n.useState)([]),[x,p]=(0,n.useState)(!1),[j]=eo.Z.useForm(),[g,y]=(0,n.useState)(null),[f,Z]=(0,n.useState)(null),[_,w]=(0,n.useState)(null),b={routing_strategy_args:"(dict) Arguments to pass to the routing strategy",routing_strategy:"(string) Routing strategy to use",allowed_fails:"(int) Number of times a deployment can fail before being added to cooldown",cooldown_time:"(int) time in seconds to cooldown a deployment after failure",num_retries:"(int) Number of retries for failed requests. Defaults to 0.",timeout:"(float) Timeout for requests. Defaults to None.",retry_after:"(int) Minimum time to wait before retrying a failed request",ttl:"(int) Sliding window to look back over when calculating the average latency of a deployment. Default - 1 hour (in seconds).",lowest_latency_buffer:"(float) Shuffle between deployments within this % of the lowest latency. Default - 0 (i.e. always pick lowest latency)."};(0,n.useEffect)(()=>{l&&t&&s&&(V(l,s,t).then(e=>{console.log("callbacks",e),i(e.router_settings)}),q(l).then(e=>{h(e)}))},[l,t,s]);let k=async e=>{if(l){console.log("received key: ".concat(e)),console.log("routerSettings['fallbacks']: ".concat(o.fallbacks)),o.fallbacks.map(l=>(e in l&&delete l[e],l));try{await Y(l,{router_settings:o}),i({...o}),Z(o.routing_strategy),u.ZP.success("Router settings updated successfully")}catch(e){u.ZP.error("Failed to update router settings: "+e,20)}}},v=(e,l)=>{h(m.map(t=>t.field_name===e?{...t,field_value:l}:t))},S=(e,t)=>{if(!l)return;let s=m[t].field_value;if(null!=s&&void 0!=s)try{G(l,e,s);let t=m.map(l=>l.field_name===e?{...l,stored_in_db:!0}:l);h(t)}catch(e){}},N=(e,t)=>{if(l)try{W(l,e);let t=m.map(l=>l.field_name===e?{...l,stored_in_db:null,field_value:null}:l);h(t)}catch(e){}},A=e=>{if(!l)return;console.log("router_settings",e);let t=Object.fromEntries(Object.entries(e).map(e=>{let[l,t]=e;if("routing_strategy_args"!==l&&"routing_strategy"!==l){var s;return[l,(null===(s=document.querySelector('input[name="'.concat(l,'"]')))||void 0===s?void 0:s.value)||t]}if("routing_strategy"==l)return[l,f];if("routing_strategy_args"==l&&"latency-based-routing"==f){let e={},l=document.querySelector('input[name="lowest_latency_buffer"]'),t=document.querySelector('input[name="ttl"]');return(null==l?void 0:l.value)&&(e.lowest_latency_buffer=Number(l.value)),(null==t?void 0:t.value)&&(e.ttl=Number(t.value)),console.log("setRoutingStrategyArgs: ".concat(e)),["routing_strategy_args",e]}return null}).filter(e=>null!=e));console.log("updatedVariables",t);try{Y(l,{router_settings:t})}catch(e){u.ZP.error("Failed to update router settings: "+e,20)}u.ZP.success("router settings updated successfully")};return l?(0,r.jsx)("div",{className:"w-full mx-4",children:(0,r.jsxs)(eB.Z,{className:"gap-2 p-8 h-[75vh] w-full mt-2",children:[(0,r.jsxs)(eD.Z,{variant:"line",defaultValue:"1",children:[(0,r.jsx)(eU.Z,{value:"1",children:"Loadbalancing"}),(0,r.jsx)(eU.Z,{value:"2",children:"Fallbacks"}),(0,r.jsx)(eU.Z,{value:"3",children:"General"})]}),(0,r.jsxs)(ez.Z,{children:[(0,r.jsx)(eK.Z,{children:(0,r.jsxs)($.Z,{numItems:1,className:"gap-2 p-8 w-full mt-2",children:[(0,r.jsx)(ea.Z,{children:"Router Settings"}),(0,r.jsxs)(ey.Z,{children:[(0,r.jsxs)(eZ.Z,{children:[(0,r.jsx)(eb.Z,{children:(0,r.jsxs)(ev.Z,{children:[(0,r.jsx)(ek.Z,{children:"Setting"}),(0,r.jsx)(ek.Z,{children:"Value"})]})}),(0,r.jsx)(e_.Z,{children:Object.entries(o).filter(e=>{let[l,t]=e;return"fallbacks"!=l&&"context_window_fallbacks"!=l&&"routing_strategy_args"!=l}).map(e=>{let[l,t]=e;return(0,r.jsxs)(ev.Z,{children:[(0,r.jsxs)(ew.Z,{children:[(0,r.jsx)(es.Z,{children:l}),(0,r.jsx)("p",{style:{fontSize:"0.65rem",color:"#808080",fontStyle:"italic"},className:"mt-1",children:b[l]})]}),(0,r.jsx)(ew.Z,{children:"routing_strategy"==l?(0,r.jsxs)(eS.Z,{defaultValue:t,className:"w-full max-w-md",onValueChange:Z,children:[(0,r.jsx)(eN.Z,{value:"usage-based-routing",children:"usage-based-routing"}),(0,r.jsx)(eN.Z,{value:"latency-based-routing",children:"latency-based-routing"}),(0,r.jsx)(eN.Z,{value:"simple-shuffle",children:"simple-shuffle"})]}):(0,r.jsx)(Q.Z,{name:l,defaultValue:"object"==typeof t?JSON.stringify(t,null,2):t.toString()})})]},l)})})]}),(0,r.jsx)(lx,{selectedStrategy:f,strategyArgs:o&&o.routing_strategy_args&&Object.keys(o.routing_strategy_args).length>0?o.routing_strategy_args:lh,paramExplanation:b})]}),(0,r.jsx)(H.Z,{children:(0,r.jsx)(X.Z,{className:"mt-2",onClick:()=>A(o),children:"Save Changes"})})]})}),(0,r.jsxs)(eK.Z,{children:[(0,r.jsxs)(eZ.Z,{children:[(0,r.jsx)(eb.Z,{children:(0,r.jsxs)(ev.Z,{children:[(0,r.jsx)(ek.Z,{children:"Model Name"}),(0,r.jsx)(ek.Z,{children:"Fallbacks"})]})}),(0,r.jsx)(e_.Z,{children:o.fallbacks&&o.fallbacks.map((e,t)=>Object.entries(e).map(e=>{let[s,a]=e;return(0,r.jsxs)(ev.Z,{children:[(0,r.jsx)(ew.Z,{children:s}),(0,r.jsx)(ew.Z,{children:Array.isArray(a)?a.join(", "):a}),(0,r.jsx)(ew.Z,{children:(0,r.jsx)(X.Z,{onClick:()=>lu(s,l),children:"Test Fallback"})}),(0,r.jsx)(ew.Z,{children:(0,r.jsx)(ef.Z,{icon:ej.Z,size:"sm",onClick:()=>k(s)})})]},t.toString()+s)}))})]}),(0,r.jsx)(ld,{models:(null==a?void 0:a.data)?a.data.map(e=>e.model_name):[],accessToken:l,routerSettings:o,setRouterSettings:i})]}),(0,r.jsx)(eK.Z,{children:(0,r.jsx)(ey.Z,{children:(0,r.jsxs)(eZ.Z,{children:[(0,r.jsx)(eb.Z,{children:(0,r.jsxs)(ev.Z,{children:[(0,r.jsx)(ek.Z,{children:"Setting"}),(0,r.jsx)(ek.Z,{children:"Value"}),(0,r.jsx)(ek.Z,{children:"Status"}),(0,r.jsx)(ek.Z,{children:"Action"})]})}),(0,r.jsx)(e_.Z,{children:m.map((e,l)=>(0,r.jsxs)(ev.Z,{children:[(0,r.jsxs)(ew.Z,{children:[(0,r.jsx)(es.Z,{children:e.field_name}),(0,r.jsx)("p",{style:{fontSize:"0.65rem",color:"#808080",fontStyle:"italic"},className:"mt-1",children:e.field_description})]}),(0,r.jsx)(ew.Z,{children:"Integer"==e.field_type?(0,r.jsx)(ed.Z,{step:1,value:e.field_value,onChange:l=>v(e.field_name,l)}):null}),(0,r.jsx)(ew.Z,{children:!0==e.stored_in_db?(0,r.jsx)(eg.Z,{icon:eQ.Z,className:"text-white",children:"In DB"}):!1==e.stored_in_db?(0,r.jsx)(eg.Z,{className:"text-gray bg-white outline",children:"In Config"}):(0,r.jsx)(eg.Z,{className:"text-gray bg-white outline",children:"Not Set"})}),(0,r.jsxs)(ew.Z,{children:[(0,r.jsx)(X.Z,{onClick:()=>S(e.field_name,l),children:"Update"}),(0,r.jsx)(ef.Z,{icon:ej.Z,color:"red",onClick:()=>N(e.field_name,l),children:"Reset"})]})]},l))})]})})})]})]})}):null},lj=t(67951),lg=e=>{let{}=e;return(0,r.jsx)(r.Fragment,{children:(0,r.jsx)($.Z,{className:"gap-2 p-8 h-[80vh] w-full mt-2",children:(0,r.jsxs)("div",{className:"mb-5",children:[(0,r.jsx)("p",{className:"text-2xl text-tremor-content-strong dark:text-dark-tremor-content-strong font-semibold",children:"OpenAI Compatible Proxy: API Reference"}),(0,r.jsx)(es.Z,{className:"mt-2 mb-2",children:"LiteLLM is OpenAI Compatible. This means your API Key works with the OpenAI SDK. Just replace the base_url to point to your litellm proxy. Example Below "}),(0,r.jsxs)(eB.Z,{children:[(0,r.jsxs)(eD.Z,{children:[(0,r.jsx)(eU.Z,{children:"OpenAI Python SDK"}),(0,r.jsx)(eU.Z,{children:"LlamaIndex"}),(0,r.jsx)(eU.Z,{children:"Langchain Py"})]}),(0,r.jsxs)(ez.Z,{children:[(0,r.jsx)(eK.Z,{children:(0,r.jsx)(lj.Z,{language:"python",children:'\nimport openai\nclient = openai.OpenAI(\n    api_key="your_api_key",\n    base_url="http://0.0.0.0:4000" # LiteLLM Proxy is OpenAI compatible, Read More: https://docs.litellm.ai/docs/proxy/user_keys\n)\n\nresponse = client.chat.completions.create(\n    model="gpt-3.5-turbo", # model to send to the proxy\n    messages = [\n        {\n            "role": "user",\n            "content": "this is a test request, write a short poem"\n        }\n    ]\n)\n\nprint(response)\n            '})}),(0,r.jsx)(eK.Z,{children:(0,r.jsx)(lj.Z,{language:"python",children:'\nimport os, dotenv\n\nfrom llama_index.llms import AzureOpenAI\nfrom llama_index.embeddings import AzureOpenAIEmbedding\nfrom llama_index import VectorStoreIndex, SimpleDirectoryReader, ServiceContext\n\nllm = AzureOpenAI(\n    engine="azure-gpt-3.5",               # model_name on litellm proxy\n    temperature=0.0,\n    azure_endpoint="http://0.0.0.0:4000", # litellm proxy endpoint\n    api_key="sk-1234",                    # litellm proxy API Key\n    api_version="2023-07-01-preview",\n)\n\nembed_model = AzureOpenAIEmbedding(\n    deployment_name="azure-embedding-model",\n    azure_endpoint="http://0.0.0.0:4000",\n    api_key="sk-1234",\n    api_version="2023-07-01-preview",\n)\n\n\ndocuments = SimpleDirectoryReader("llama_index_data").load_data()\nservice_context = ServiceContext.from_defaults(llm=llm, embed_model=embed_model)\nindex = VectorStoreIndex.from_documents(documents, service_context=service_context)\n\nquery_engine = index.as_query_engine()\nresponse = query_engine.query("What did the author do growing up?")\nprint(response)\n\n            '})}),(0,r.jsx)(eK.Z,{children:(0,r.jsx)(lj.Z,{language:"python",children:'\nfrom langchain.chat_models import ChatOpenAI\nfrom langchain.prompts.chat import (\n    ChatPromptTemplate,\n    HumanMessagePromptTemplate,\n    SystemMessagePromptTemplate,\n)\nfrom langchain.schema import HumanMessage, SystemMessage\n\nchat = ChatOpenAI(\n    openai_api_base="http://0.0.0.0:4000",\n    model = "gpt-3.5-turbo",\n    temperature=0.1\n)\n\nmessages = [\n    SystemMessage(\n        content="You are a helpful assistant that im using to make a test request to."\n    ),\n    HumanMessage(\n        content="test from litellm. tell me why it\'s amazing in 1 sentence"\n    ),\n]\nresponse = chat(messages)\n\nprint(response)\n\n            '})})]})]})]})})})};async function ly(e,l,t,s){console.log("isLocal:",!1);let a=window.location.origin,r=new lm.ZP.OpenAI({apiKey:s,baseURL:a,dangerouslyAllowBrowser:!0});try{for await(let s of(await r.chat.completions.create({model:t,stream:!0,messages:[{role:"user",content:e}]})))console.log(s),s.choices[0].delta.content&&l(s.choices[0].delta.content)}catch(e){u.ZP.error("Error occurred while generating model response. Please try again. Error: ".concat(e),20)}}var lf=e=>{let{accessToken:l,token:t,userRole:s,userID:a}=e,[o,i]=(0,n.useState)(""),[c,d]=(0,n.useState)(""),[m,u]=(0,n.useState)([]),[h,x]=(0,n.useState)(void 0),[p,j]=(0,n.useState)([]);(0,n.useEffect)(()=>{l&&t&&s&&a&&(async()=>{try{let e=await N(l,a,s);if(console.log("model_info:",e),(null==e?void 0:e.data.length)>0){let l=e.data.map(e=>({value:e.id,label:e.id}));console.log(l),j(l),x(e.data[0].id)}}catch(e){console.error("Error fetching model info:",e)}})()},[l,a,s]);let g=(e,l)=>{u(t=>{let s=t[t.length-1];return s&&s.role===e?[...t.slice(0,t.length-1),{role:e,content:s.content+l}]:[...t,{role:e,content:l}]})},y=async()=>{if(""!==c.trim()&&o&&t&&s&&a){u(e=>[...e,{role:"user",content:c}]);try{h&&await ly(c,e=>g("assistant",e),h,o)}catch(e){console.error("Error fetching model response",e),g("assistant","Error fetching model response")}d("")}};if(s&&"Admin Viewer"==s){let{Title:e,Paragraph:l}=eR.default;return(0,r.jsxs)("div",{children:[(0,r.jsx)(e,{level:1,children:"Access Denied"}),(0,r.jsx)(l,{children:"Ask your proxy admin for access to test models"})]})}return(0,r.jsx)("div",{style:{width:"100%",position:"relative"},children:(0,r.jsx)($.Z,{className:"gap-2 p-8 h-[80vh] w-full mt-2",children:(0,r.jsx)(ey.Z,{children:(0,r.jsxs)(eB.Z,{children:[(0,r.jsx)(eD.Z,{children:(0,r.jsx)(eU.Z,{children:"Chat"})}),(0,r.jsx)(ez.Z,{children:(0,r.jsxs)(eK.Z,{children:[(0,r.jsx)("div",{className:"sm:max-w-2xl",children:(0,r.jsxs)($.Z,{numItems:2,children:[(0,r.jsxs)(H.Z,{children:[(0,r.jsx)(es.Z,{children:"API Key"}),(0,r.jsx)(Q.Z,{placeholder:"Type API Key here",type:"password",onValueChange:i,value:o})]}),(0,r.jsxs)(H.Z,{className:"mx-2",children:[(0,r.jsx)(es.Z,{children:"Select Model:"}),(0,r.jsx)(en.default,{placeholder:"Select a Model",onChange:e=>{console.log("selected ".concat(e)),x(e)},options:p,style:{width:"200px"}})]})]})}),(0,r.jsxs)(eZ.Z,{className:"mt-5",style:{display:"block",maxHeight:"60vh",overflowY:"auto"},children:[(0,r.jsx)(eb.Z,{children:(0,r.jsx)(ev.Z,{children:(0,r.jsx)(ew.Z,{})})}),(0,r.jsx)(e_.Z,{children:m.map((e,l)=>(0,r.jsx)(ev.Z,{children:(0,r.jsx)(ew.Z,{children:"".concat(e.role,": ").concat(e.content)})},l))})]}),(0,r.jsx)("div",{className:"mt-3",style:{position:"absolute",bottom:5,width:"95%"},children:(0,r.jsxs)("div",{className:"flex",children:[(0,r.jsx)(Q.Z,{type:"text",value:c,onChange:e=>d(e.target.value),placeholder:"Type your message..."}),(0,r.jsx)(X.Z,{onClick:y,className:"ml-2",children:"Send"})]})})]})})]})})})})},lZ=t(33509),l_=t(95781);let{Sider:lw}=lZ.default;var lb=e=>{let{setPage:l,userRole:t,defaultSelectedKey:s}=e;return"Admin Viewer"==t?(0,r.jsx)(lZ.default,{style:{minHeight:"100vh",maxWidth:"120px"},children:(0,r.jsx)(lw,{width:120,children:(0,r.jsxs)(l_.Z,{mode:"inline",defaultSelectedKeys:s||["4"],style:{height:"100%",borderRight:0},children:[(0,r.jsx)(l_.Z.Item,{onClick:()=>l("api-keys"),children:"API Keys"},"4"),(0,r.jsx)(l_.Z.Item,{onClick:()=>l("models"),children:"Models"},"2"),(0,r.jsx)(l_.Z.Item,{onClick:()=>l("llm-playground"),children:"Chat UI"},"3"),(0,r.jsx)(l_.Z.Item,{onClick:()=>l("usage"),children:"Usage"},"1")]})})}):(0,r.jsx)(lZ.default,{style:{minHeight:"100vh",maxWidth:"145px"},children:(0,r.jsx)(lw,{width:145,children:(0,r.jsxs)(l_.Z,{mode:"inline",defaultSelectedKeys:s||["1"],style:{height:"100%",borderRight:0},children:[(0,r.jsx)(l_.Z.Item,{onClick:()=>l("api-keys"),children:(0,r.jsx)(es.Z,{children:"API Keys"})},"1"),(0,r.jsx)(l_.Z.Item,{onClick:()=>l("llm-playground"),children:(0,r.jsx)(es.Z,{children:"Test Key"})},"3"),"Admin"==t?(0,r.jsx)(l_.Z.Item,{onClick:()=>l("models"),children:(0,r.jsx)(es.Z,{children:"Models"})},"2"):null,"Admin"==t?(0,r.jsx)(l_.Z.Item,{onClick:()=>l("usage"),children:(0,r.jsx)(es.Z,{children:"Usage"})},"4"):null,"Admin"==t?(0,r.jsx)(l_.Z.Item,{onClick:()=>l("teams"),children:(0,r.jsx)(es.Z,{children:"Teams"})},"6"):null,"Admin"==t?(0,r.jsx)(l_.Z.Item,{onClick:()=>l("users"),children:(0,r.jsx)(es.Z,{children:"Users"})},"5"):null,"Admin"==t?(0,r.jsx)(l_.Z.Item,{onClick:()=>l("settings"),children:(0,r.jsx)(es.Z,{children:"Logging & Alerts"})},"8"):null,"Admin"==t?(0,r.jsx)(l_.Z.Item,{onClick:()=>l("general-settings"),children:(0,r.jsx)(es.Z,{children:"Router Settings"})},"9"):null,"Admin"==t?(0,r.jsx)(l_.Z.Item,{onClick:()=>l("admin-panel"),children:(0,r.jsx)(es.Z,{children:"Admin"})},"7"):null,(0,r.jsx)(l_.Z.Item,{onClick:()=>l("api_ref"),children:(0,r.jsx)(es.Z,{children:"API Reference"})},"11")]})})})},lk=t(67989),lv=e=>{let{accessToken:l,token:t,userRole:s,userID:a,keys:o}=e,i=new Date,[c,d]=(0,n.useState)([]),[m,u]=(0,n.useState)([]),[h,x]=(0,n.useState)([]),[p,j]=(0,n.useState)([]),[g,y]=(0,n.useState)([]),[f,Z]=(0,n.useState)([]),[_,w]=(0,n.useState)([]),[b,k]=(0,n.useState)([]),[v,S]=(0,n.useState)(""),[N,R]=(0,n.useState)({from:new Date(Date.now()-6048e5),to:new Date}),M=new Date(i.getFullYear(),i.getMonth(),1),L=new Date(i.getFullYear(),i.getMonth()+1,0),U=z(M),B=z(L);console.log("keys in usage",o);let D=async(e,t,s)=>{if(!e||!t||!l)return;t.setHours(23,59,59,999),e.setHours(0,0,0,0),console.log("uiSelectedKey",s);let a=await P(l,s,e.toISOString(),t.toISOString());console.log("End user data updated successfully",a),j(a)},K=async(e,t)=>{e&&t&&l&&(t.setHours(23,59,59,999),e.setHours(0,0,0,0),Z((await E(l,e.toISOString(),t.toISOString())).spend_per_tag),console.log("Tag spend data updated successfully"))};function z(e){let l=e.getFullYear(),t=e.getMonth()+1,s=e.getDate();return"".concat(l,"-").concat(t<10?"0"+t:t,"-").concat(s<10?"0"+s:s)}return console.log("Start date is ".concat(U)),console.log("End date is ".concat(B)),(0,n.useEffect)(()=>{l&&t&&s&&a&&(async()=>{try{if(console.log("user role: ".concat(s)),"Admin"==s||"Admin Viewer"==s){var e,r;let t=await C(l);d(t);let s=(await T(l)).map(e=>({key:(e.key_alias||e.key_name||e.api_key).substring(0,10),spend:e.total_spend}));u(s);let a=(await O(l)).map(e=>({key:e.model,spend:e.total_spend}));x(a);let n=await A(l);console.log("teamSpend",n),y(n.daily_spend),w(n.teams);let o=n.total_spend_per_team;o=o.map(e=>(e.name=e.team_id||"",e.value=e.total_spend||0,e.value=e.value.toFixed(2),e)),k(o);let i=await E(l,null===(e=N.from)||void 0===e?void 0:e.toISOString(),null===(r=N.to)||void 0===r?void 0:r.toISOString());Z(i.spend_per_tag);let c=await P(l,null,void 0,void 0);j(c),console.log("spend/user result",c)}else"App Owner"==s&&await I(l,t,s,a,U,B).then(async e=>{if(console.log("result from spend logs call",e),"daily_spend"in e){let l=e.daily_spend;console.log("daily spend",l),d(l);let t=e.top_api_keys;u(t)}else{let t=(await F(l,function(e){let l=[];e.forEach(e=>{Object.entries(e).forEach(e=>{let[t,s]=e;"spend"!==t&&"startTime"!==t&&"models"!==t&&"users"!==t&&l.push({key:t,spend:s})})}),l.sort((e,l)=>Number(l.spend)-Number(e.spend));let t=l.slice(0,5).map(e=>e.key);return console.log("topKeys: ".concat(Object.keys(t[0]))),t}(e))).info.map(e=>({key:(e.key_name||e.key_alias).substring(0,10),spend:e.spend}));u(t),d(e)}})}catch(e){console.error("There was an error fetching the data",e)}})()},[l,t,s,a,U,B]),(0,r.jsxs)("div",{style:{width:"100%"},className:"p-8",children:[(0,r.jsx)(eT,{userID:a,userRole:s,accessToken:l,userSpend:null,selectedTeam:null}),(0,r.jsxs)(eB.Z,{children:[(0,r.jsxs)(eD.Z,{className:"mt-2",children:[(0,r.jsx)(eU.Z,{children:"All Up"}),(0,r.jsx)(eU.Z,{children:"Team Based Usage"}),(0,r.jsx)(eU.Z,{children:"End User Usage"}),(0,r.jsx)(eU.Z,{children:"Tag Based Usage"})]}),(0,r.jsxs)(ez.Z,{children:[(0,r.jsx)(eK.Z,{children:(0,r.jsxs)($.Z,{numItems:2,className:"gap-2 h-[75vh] w-full",children:[(0,r.jsx)(H.Z,{numColSpan:2,children:(0,r.jsxs)(ey.Z,{children:[(0,r.jsx)(ea.Z,{children:"Monthly Spend"}),(0,r.jsx)(eW.Z,{data:c,index:"date",categories:["spend"],colors:["blue"],valueFormatter:e=>"$ ".concat(new Intl.NumberFormat("us").format(e).toString()),yAxisWidth:100,tickGap:5})]})}),(0,r.jsx)(H.Z,{numColSpan:1,children:(0,r.jsxs)(ey.Z,{children:[(0,r.jsx)(ea.Z,{children:"Top API Keys"}),(0,r.jsx)(eW.Z,{className:"mt-4 h-40",data:m,index:"key",categories:["spend"],colors:["blue"],yAxisWidth:80,tickGap:5,layout:"vertical",showXAxis:!1,showLegend:!1})]})}),(0,r.jsx)(H.Z,{numColSpan:1,children:(0,r.jsxs)(ey.Z,{children:[(0,r.jsx)(ea.Z,{children:"Top Models"}),(0,r.jsx)(eW.Z,{className:"mt-4 h-40",data:h,index:"key",categories:["spend"],colors:["blue"],yAxisWidth:200,layout:"vertical",showXAxis:!1,showLegend:!1})]})}),(0,r.jsx)(H.Z,{numColSpan:1})]})}),(0,r.jsx)(eK.Z,{children:(0,r.jsxs)($.Z,{numItems:2,className:"gap-2 h-[75vh] w-full",children:[(0,r.jsxs)(H.Z,{numColSpan:2,children:[(0,r.jsxs)(ey.Z,{className:"mb-2",children:[(0,r.jsx)(ea.Z,{children:"Total Spend Per Team"}),(0,r.jsx)(lk.Z,{data:b})]}),(0,r.jsxs)(ey.Z,{children:[(0,r.jsx)(ea.Z,{children:"Daily Spend Per Team"}),(0,r.jsx)(eW.Z,{className:"h-72",data:g,showLegend:!0,index:"date",categories:_,yAxisWidth:80,colors:["blue","green","yellow","red","purple"],stack:!0})]})]}),(0,r.jsx)(H.Z,{numColSpan:2})]})}),(0,r.jsxs)(eK.Z,{children:[(0,r.jsxs)("p",{className:"mb-2 text-gray-500 italic text-[12px]",children:["End-Users of your LLM API calls. Tracked when a `user` param is passed in your LLM calls ",(0,r.jsx)("a",{className:"text-blue-500",href:"https://docs.litellm.ai/docs/proxy/users",target:"_blank",children:"docs here"})]}),(0,r.jsxs)($.Z,{numItems:2,children:[(0,r.jsxs)(H.Z,{children:[(0,r.jsx)(es.Z,{children:"Select Time Range"}),(0,r.jsx)(eL.Z,{enableSelect:!0,value:N,onValueChange:e=>{R(e),D(e.from,e.to,null)}})]}),(0,r.jsxs)(H.Z,{children:[(0,r.jsx)(es.Z,{children:"Select Key"}),(0,r.jsxs)(eS.Z,{defaultValue:"all-keys",children:[(0,r.jsx)(eN.Z,{value:"all-keys",onClick:()=>{D(N.from,N.to,null)},children:"All Keys"},"all-keys"),null==o?void 0:o.map((e,l)=>e&&null!==e.key_alias&&e.key_alias.length>0?(0,r.jsx)(eN.Z,{value:String(l),onClick:()=>{D(N.from,N.to,e.token)},children:e.key_alias},l):null)]})]})]}),(0,r.jsx)(ey.Z,{className:"mt-4",children:(0,r.jsxs)(eZ.Z,{className:"max-h-[70vh] min-h-[500px]",children:[(0,r.jsx)(eb.Z,{children:(0,r.jsxs)(ev.Z,{children:[(0,r.jsx)(ek.Z,{children:"End User"}),(0,r.jsx)(ek.Z,{children:"Spend"}),(0,r.jsx)(ek.Z,{children:"Total Events"})]})}),(0,r.jsx)(e_.Z,{children:null==p?void 0:p.map((e,l)=>{var t;return(0,r.jsxs)(ev.Z,{children:[(0,r.jsx)(ew.Z,{children:e.end_user}),(0,r.jsx)(ew.Z,{children:null===(t=e.total_spend)||void 0===t?void 0:t.toFixed(4)}),(0,r.jsx)(ew.Z,{children:e.total_count})]},l)})})]})})]}),(0,r.jsx)(eK.Z,{children:(0,r.jsxs)($.Z,{numItems:2,className:"gap-2 h-[75vh] w-full mb-4",children:[(0,r.jsxs)(H.Z,{numColSpan:2,children:[(0,r.jsx)(eL.Z,{className:"mb-4",enableSelect:!0,value:N,onValueChange:e=>{R(e),K(e.from,e.to)}}),(0,r.jsxs)(ey.Z,{children:[(0,r.jsx)(ea.Z,{children:"Spend Per Tag"}),(0,r.jsxs)(es.Z,{children:["Get Started Tracking cost per tag ",(0,r.jsx)("a",{className:"text-blue-500",href:"https://docs.litellm.ai/docs/proxy/enterprise#tracking-spend-for-custom-tags",target:"_blank",children:"here"})]}),(0,r.jsx)(eW.Z,{className:"h-72",data:f,index:"name",categories:["spend"],colors:["blue"]})]})]}),(0,r.jsx)(H.Z,{numColSpan:2})]})})]})]})]})},lS=()=>{let{Title:e,Paragraph:l}=eR.default,[t,s]=(0,n.useState)(""),[a,i]=(0,n.useState)(null),[c,d]=(0,n.useState)(null),[u,h]=(0,n.useState)(null),[x,p]=(0,n.useState)(!0),j=(0,o.useSearchParams)(),[g,y]=(0,n.useState)({data:[]}),f=j.get("userID"),Z=j.get("token"),[_,w]=(0,n.useState)("api-keys"),[b,k]=(0,n.useState)(null);return(0,n.useEffect)(()=>{if(Z){let e=(0,eF.o)(Z);if(e){if(console.log("Decoded token:",e),console.log("Decoded key:",e.key),k(e.key),e.user_role){let l=function(e){if(!e)return"Undefined Role";switch(console.log("Received user role: ".concat(e.toLowerCase())),console.log("Received user role length: ".concat(e.toLowerCase().length)),e.toLowerCase()){case"app_owner":case"demo_app_owner":return"App Owner";case"app_admin":case"proxy_admin":return"Admin";case"proxy_admin_viewer":return"Admin Viewer";case"app_user":return"App User";default:return"Unknown Role"}}(e.user_role);console.log("Decoded user_role:",l),s(l),"Admin Viewer"==l&&w("usage")}else console.log("User role not defined");e.user_email?i(e.user_email):console.log("User Email is not set ".concat(e)),e.login_method?p("username_password"==e.login_method):console.log("User Email is not set ".concat(e))}}},[Z]),(0,r.jsx)(n.Suspense,{fallback:(0,r.jsx)("div",{children:"Loading..."}),children:(0,r.jsxs)("div",{className:"flex flex-col min-h-screen",children:[(0,r.jsx)(m,{userID:f,userRole:t,userEmail:a,showSSOBanner:x}),(0,r.jsxs)("div",{className:"flex flex-1 overflow-auto",children:[(0,r.jsx)("div",{className:"mt-8",children:(0,r.jsx)(lb,{setPage:w,userRole:t,defaultSelectedKey:null})}),"api-keys"==_?(0,r.jsx)(eM,{userID:f,userRole:t,teams:c,keys:u,setUserRole:s,userEmail:a,setUserEmail:i,setTeams:d,setKeys:h}):"models"==_?(0,r.jsx)(e9,{userID:f,userRole:t,token:Z,accessToken:b,modelData:g,setModelData:y}):"llm-playground"==_?(0,r.jsx)(lf,{userID:f,userRole:t,token:Z,accessToken:b}):"users"==_?(0,r.jsx)(lt,{userID:f,userRole:t,token:Z,keys:u,teams:c,accessToken:b,setKeys:h}):"teams"==_?(0,r.jsx)(ls,{teams:c,setTeams:d,searchParams:j,accessToken:b,userID:f,userRole:t}):"admin-panel"==_?(0,r.jsx)(lr,{setTeams:d,searchParams:j,accessToken:b,showSSOBanner:x}):"api_ref"==_?(0,r.jsx)(lg,{}):"settings"==_?(0,r.jsx)(li,{userID:f,userRole:t,accessToken:b}):"general-settings"==_?(0,r.jsx)(lp,{userID:f,userRole:t,accessToken:b,modelData:g}):(0,r.jsx)(lv,{userID:f,userRole:t,token:Z,accessToken:b,keys:u})]})]})})}}},function(e){e.O(0,[936,884,971,69,744],function(){return e(e.s=20661)}),_N_E=e.O()}]);
\ No newline at end of file
diff --git a/litellm/proxy/_experimental/out/_next/static/obp5wqVSVDMiDTC414cR8/_buildManifest.js b/litellm/proxy/_experimental/out/_next/static/l-0LDfSCdaUCAbcLIx_QC/_buildManifest.js
similarity index 100%
rename from litellm/proxy/_experimental/out/_next/static/obp5wqVSVDMiDTC414cR8/_buildManifest.js
rename to litellm/proxy/_experimental/out/_next/static/l-0LDfSCdaUCAbcLIx_QC/_buildManifest.js
diff --git a/litellm/proxy/_experimental/out/_next/static/obp5wqVSVDMiDTC414cR8/_ssgManifest.js b/litellm/proxy/_experimental/out/_next/static/l-0LDfSCdaUCAbcLIx_QC/_ssgManifest.js
similarity index 100%
rename from litellm/proxy/_experimental/out/_next/static/obp5wqVSVDMiDTC414cR8/_ssgManifest.js
rename to litellm/proxy/_experimental/out/_next/static/l-0LDfSCdaUCAbcLIx_QC/_ssgManifest.js
diff --git a/litellm/proxy/_experimental/out/index.html b/litellm/proxy/_experimental/out/index.html
index 930018e005..66765eacb2 100644
--- a/litellm/proxy/_experimental/out/index.html
+++ b/litellm/proxy/_experimental/out/index.html
@@ -1 +1 @@
-<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-de9c0fadf6a94b3b.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-f960ab1e6d32b002.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-04708d7d4a17c1ee.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-de9c0fadf6a94b3b.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/f04e46b02318b660.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[7926,[\"936\",\"static/chunks/2f6dbc85-052c4579f80d66ae.js\",\"884\",\"static/chunks/884-7576ee407a2ecbe6.js\",\"931\",\"static/chunks/app/page-6a39771cacf75ea6.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/f04e46b02318b660.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"obp5wqVSVDMiDTC414cR8\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_c23dc8\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>
\ No newline at end of file
+<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-de9c0fadf6a94b3b.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-f960ab1e6d32b002.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-04708d7d4a17c1ee.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-de9c0fadf6a94b3b.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/f04e46b02318b660.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[4858,[\"936\",\"static/chunks/2f6dbc85-052c4579f80d66ae.js\",\"884\",\"static/chunks/884-7576ee407a2ecbe6.js\",\"931\",\"static/chunks/app/page-f20fdea77aed85ba.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/f04e46b02318b660.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"l-0LDfSCdaUCAbcLIx_QC\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_c23dc8\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>
\ No newline at end of file
diff --git a/litellm/proxy/_experimental/out/index.txt b/litellm/proxy/_experimental/out/index.txt
index d67a480b37..cecddd99e8 100644
--- a/litellm/proxy/_experimental/out/index.txt
+++ b/litellm/proxy/_experimental/out/index.txt
@@ -1,7 +1,7 @@
 2:I[77831,[],""]
-3:I[7926,["936","static/chunks/2f6dbc85-052c4579f80d66ae.js","884","static/chunks/884-7576ee407a2ecbe6.js","931","static/chunks/app/page-6a39771cacf75ea6.js"],""]
+3:I[4858,["936","static/chunks/2f6dbc85-052c4579f80d66ae.js","884","static/chunks/884-7576ee407a2ecbe6.js","931","static/chunks/app/page-f20fdea77aed85ba.js"],""]
 4:I[5613,[],""]
 5:I[31778,[],""]
-0:["obp5wqVSVDMiDTC414cR8",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/f04e46b02318b660.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
+0:["l-0LDfSCdaUCAbcLIx_QC",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/f04e46b02318b660.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
 6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
 1:null
diff --git a/litellm/proxy/_logging.py b/litellm/proxy/_logging.py
new file mode 100644
index 0000000000..fcabad7cd6
--- /dev/null
+++ b/litellm/proxy/_logging.py
@@ -0,0 +1,20 @@
+import json
+import logging
+from logging import Formatter
+
+
+class JsonFormatter(Formatter):
+    def __init__(self):
+        super(JsonFormatter, self).__init__()
+
+    def format(self, record):
+        json_record = {}
+        json_record["message"] = record.getMessage()
+        return json.dumps(json_record)
+
+
+logger = logging.root
+handler = logging.StreamHandler()
+handler.setFormatter(JsonFormatter())
+logger.handlers = [handler]
+logger.setLevel(logging.DEBUG)
diff --git a/litellm/proxy/_super_secret_config.yaml b/litellm/proxy/_super_secret_config.yaml
index 42f9e3be50..f349bd09e9 100644
--- a/litellm/proxy/_super_secret_config.yaml
+++ b/litellm/proxy/_super_secret_config.yaml
@@ -1,45 +1,20 @@
 model_list:
-- litellm_params:
-    api_base: os.environ/AZURE_API_BASE
-    api_key: os.environ/AZURE_API_KEY
-    api_version: 2023-07-01-preview
-    model: azure/azure-embedding-model
-  model_info:
-    base_model: text-embedding-ada-002
-    mode: embedding
-  model_name: text-embedding-ada-002
-- model_name: gpt-3.5-turbo-012
-  litellm_params:
-    model: gpt-3.5-turbo
-    api_base: http://0.0.0.0:8080
-    api_key: "" 
-- model_name: gpt-3.5-turbo-0125-preview
-  litellm_params:
-    model: azure/chatgpt-v-2
-    api_key: os.environ/AZURE_API_KEY
-    api_base: os.environ/AZURE_API_BASE
-    input_cost_per_token: 0.0
-    output_cost_per_token: 0.0
-- model_name: bert-classifier
-  litellm_params:
-    model: huggingface/text-classification/shahrukhx01/question-vs-statement-classifier
-    api_key: os.environ/HUGGINGFACE_API_KEY
+  - model_name: gpt-3.5-turbo-fake-model
+    litellm_params:
+      model: openai/my-fake-model
+      api_base: http://0.0.0.0:8080 
+      api_key: ""
+  - model_name: gpt-3.5-turbo
+    litellm_params:
+      model: azure/gpt-35-turbo
+      api_base: https://my-endpoint-europe-berri-992.openai.azure.com/
+      api_key: os.environ/AZURE_EUROPE_API_KEY
+  - model_name: gpt-3.5-turbo
+    litellm_params:
+      model: azure/chatgpt-v-2
+      api_base: https://openai-gpt-4-test-v-1.openai.azure.com/
+      api_version: "2023-05-15"
+      api_key: os.environ/AZURE_API_KEY # The `os.environ/` prefix tells litellm to read this from the env. See https://docs.litellm.ai/docs/simple_proxy#load-api-keys-from-vault
+  
 router_settings:
-  redis_host: redis
-  # redis_password: <your redis password>
-  redis_port: 6379
   enable_pre_call_checks: true
-
-litellm_settings:
-  set_verbose: True
-  fallbacks: [{"gpt-3.5-turbo-012": ["gpt-3.5-turbo-0125-preview"]}]
-  # service_callback: ["prometheus_system"]
-  # success_callback: ["prometheus"]
-  # failure_callback: ["prometheus"]
-
-general_settings:
-  enable_jwt_auth: True
-  disable_reset_budget: True
-  proxy_batch_write_at: 60 # 👈 Frequency of batch writing logs to server (in seconds)
-  routing_strategy: simple-shuffle # Literal["simple-shuffle", "least-busy", "usage-based-routing","latency-based-routing"], default="simple-shuffle"
-  alerting: ["slack"]
diff --git a/litellm/proxy/_types.py b/litellm/proxy/_types.py
index b1af153e81..63d82e7095 100644
--- a/litellm/proxy/_types.py
+++ b/litellm/proxy/_types.py
@@ -1,37 +1,11 @@
-from pydantic import ConfigDict, BaseModel, Field, root_validator, Json, VERSION
+from pydantic import BaseModel, Extra, Field, root_validator, Json, validator
+from dataclasses import fields
 import enum
 from typing import Optional, List, Union, Dict, Literal, Any
 from datetime import datetime
-import uuid
-import json
+import uuid, json, sys, os
 from litellm.types.router import UpdateRouterConfig
 
-try:
-    from pydantic import model_validator  # type: ignore
-except ImportError:
-    from pydantic import root_validator  # pydantic v1
-
-    def model_validator(mode):  # type: ignore
-        pre = mode == "before"
-        return root_validator(pre=pre)
-
-
-# Function to get Pydantic version
-def is_pydantic_v2() -> int:
-    return int(VERSION.split(".")[0])
-
-
-def get_model_config(arbitrary_types_allowed: bool = False) -> ConfigDict:
-    # Version-specific configuration
-    if is_pydantic_v2() >= 2:
-        model_config = ConfigDict(extra="allow", arbitrary_types_allowed=arbitrary_types_allowed, protected_namespaces=())  # type: ignore
-    else:
-        from pydantic import Extra
-
-        model_config = ConfigDict(extra=Extra.allow, arbitrary_types_allowed=arbitrary_types_allowed)  # type: ignore
-
-    return model_config
-
 
 def hash_token(token: str):
     import hashlib
@@ -61,7 +35,8 @@ class LiteLLMBase(BaseModel):
             # if using pydantic v1
             return self.__fields_set__
 
-    model_config = get_model_config()
+    class Config:
+        protected_namespaces = ()
 
 
 class LiteLLM_UpperboundKeyGenerateParams(LiteLLMBase):
@@ -77,8 +52,18 @@ class LiteLLM_UpperboundKeyGenerateParams(LiteLLMBase):
 
 
 class LiteLLMRoutes(enum.Enum):
+    openai_route_names: List = [
+        "chat_completion",
+        "completion",
+        "embeddings",
+        "image_generation",
+        "audio_transcriptions",
+        "moderations",
+        "model_list",  # OpenAI /v1/models route
+    ]
     openai_routes: List = [
         # chat completions
+        "/engines/{model}/chat/completions",
         "/openai/deployments/{model}/chat/completions",
         "/chat/completions",
         "/v1/chat/completions",
@@ -102,11 +87,8 @@ class LiteLLMRoutes(enum.Enum):
         # models
         "/models",
         "/v1/models",
-    ]
-
-    # NOTE: ROUTES ONLY FOR MASTER KEY - only the Master Key should be able to Reset Spend
-    master_key_only_routes: List = [
-        "/global/spend/reset",
+        # token counter
+        "/utils/token_counter",
     ]
 
     info_routes: List = [
@@ -119,6 +101,11 @@ class LiteLLMRoutes(enum.Enum):
         "/v2/key/info",
     ]
 
+    # NOTE: ROUTES ONLY FOR MASTER KEY - only the Master Key should be able to Reset Spend
+    master_key_only_routes: List = [
+        "/global/spend/reset",
+    ]
+
     sso_only_routes: List = [
         "/key/generate",
         "/key/update",
@@ -227,13 +214,19 @@ class LiteLLM_JWTAuth(LiteLLMBase):
         "global_spend_tracking_routes",
         "info_routes",
     ]
-    team_jwt_scope: str = "litellm_team"
-    team_id_jwt_field: str = "client_id"
+    team_id_jwt_field: Optional[str] = None
     team_allowed_routes: List[
         Literal["openai_routes", "info_routes", "management_routes"]
     ] = ["openai_routes", "info_routes"]
+    team_id_default: Optional[str] = Field(
+        default=None,
+        description="If no team_id given, default permissions/spend-tracking to this team.s",
+    )
     org_id_jwt_field: Optional[str] = None
     user_id_jwt_field: Optional[str] = None
+    user_id_upsert: bool = Field(
+        default=False, description="If user doesn't exist, upsert them into the db."
+    )
     end_user_id_jwt_field: Optional[str] = None
     public_key_ttl: float = 600
 
@@ -258,8 +251,12 @@ class LiteLLMPromptInjectionParams(LiteLLMBase):
     llm_api_name: Optional[str] = None
     llm_api_system_prompt: Optional[str] = None
     llm_api_fail_call_string: Optional[str] = None
+    reject_as_response: Optional[bool] = Field(
+        default=False,
+        description="Return rejected request error message as a string to the user. Default behaviour is to raise an exception.",
+    )
 
-    @model_validator(mode="before")
+    @root_validator(pre=True)
     def check_llm_api_params(cls, values):
         llm_api_check = values.get("llm_api_check")
         if llm_api_check is True:
@@ -317,7 +314,8 @@ class ProxyChatCompletionRequest(LiteLLMBase):
     deployment_id: Optional[str] = None
     request_timeout: Optional[int] = None
 
-    model_config = get_model_config()
+    class Config:
+        extra = "allow"  # allow params not defined here, these fall in litellm.completion(**kwargs)
 
 
 class ModelInfoDelete(LiteLLMBase):
@@ -344,9 +342,11 @@ class ModelInfo(LiteLLMBase):
         ]
     ]
 
-    model_config = get_model_config()
+    class Config:
+        extra = Extra.allow  # Allow extra fields
+        protected_namespaces = ()
 
-    @model_validator(mode="before")
+    @root_validator(pre=True)
     def set_model_info(cls, values):
         if values.get("id") is None:
             values.update({"id": str(uuid.uuid4())})
@@ -372,9 +372,10 @@ class ModelParams(LiteLLMBase):
     litellm_params: dict
     model_info: ModelInfo
 
-    model_config = get_model_config()
+    class Config:
+        protected_namespaces = ()
 
-    @model_validator(mode="before")
+    @root_validator(pre=True)
     def set_model_info(cls, values):
         if values.get("model_info") is None:
             values.update({"model_info": ModelInfo()})
@@ -410,7 +411,8 @@ class GenerateKeyRequest(GenerateRequestBase):
         {}
     )  # {"gpt-4": 5.0, "gpt-3.5-turbo": 5.0}, defaults to {}
 
-    model_config = get_model_config()
+    class Config:
+        protected_namespaces = ()
 
 
 class GenerateKeyResponse(GenerateKeyRequest):
@@ -420,7 +422,7 @@ class GenerateKeyResponse(GenerateKeyRequest):
     user_id: Optional[str] = None
     token_id: Optional[str] = None
 
-    @model_validator(mode="before")
+    @root_validator(pre=True)
     def set_model_info(cls, values):
         if values.get("token") is not None:
             values.update({"key": values.get("token")})
@@ -460,7 +462,8 @@ class LiteLLM_ModelTable(LiteLLMBase):
     created_by: str
     updated_by: str
 
-    model_config = get_model_config()
+    class Config:
+        protected_namespaces = ()
 
 
 class NewUserRequest(GenerateKeyRequest):
@@ -488,7 +491,7 @@ class UpdateUserRequest(GenerateRequestBase):
     user_role: Optional[str] = None
     max_budget: Optional[float] = None
 
-    @model_validator(mode="before")
+    @root_validator(pre=True)
     def check_user_info(cls, values):
         if values.get("user_id") is None and values.get("user_email") is None:
             raise ValueError("Either user id or user email must be provided")
@@ -508,7 +511,7 @@ class NewEndUserRequest(LiteLLMBase):
         None  # if no equivalent model in allowed region - default all requests to this model
     )
 
-    @model_validator(mode="before")
+    @root_validator(pre=True)
     def check_user_info(cls, values):
         if values.get("max_budget") is not None and values.get("budget_id") is not None:
             raise ValueError("Set either 'max_budget' or 'budget_id', not both.")
@@ -521,7 +524,7 @@ class Member(LiteLLMBase):
     user_id: Optional[str] = None
     user_email: Optional[str] = None
 
-    @model_validator(mode="before")
+    @root_validator(pre=True)
     def check_user_info(cls, values):
         if values.get("user_id") is None and values.get("user_email") is None:
             raise ValueError("Either user id or user email must be provided")
@@ -546,7 +549,8 @@ class TeamBase(LiteLLMBase):
 class NewTeamRequest(TeamBase):
     model_aliases: Optional[dict] = None
 
-    model_config = get_model_config()
+    class Config:
+        protected_namespaces = ()
 
 
 class GlobalEndUsersSpend(LiteLLMBase):
@@ -565,7 +569,7 @@ class TeamMemberDeleteRequest(LiteLLMBase):
     user_id: Optional[str] = None
     user_email: Optional[str] = None
 
-    @model_validator(mode="before")
+    @root_validator(pre=True)
     def check_user_info(cls, values):
         if values.get("user_id") is None and values.get("user_email") is None:
             raise ValueError("Either user id or user email must be provided")
@@ -599,9 +603,10 @@ class LiteLLM_TeamTable(TeamBase):
     budget_reset_at: Optional[datetime] = None
     model_id: Optional[int] = None
 
-    model_config = get_model_config()
+    class Config:
+        protected_namespaces = ()
 
-    @model_validator(mode="before")
+    @root_validator(pre=True)
     def set_model_info(cls, values):
         dict_fields = [
             "metadata",
@@ -637,7 +642,8 @@ class LiteLLM_BudgetTable(LiteLLMBase):
     model_max_budget: Optional[dict] = None
     budget_duration: Optional[str] = None
 
-    model_config = get_model_config()
+    class Config:
+        protected_namespaces = ()
 
 
 class NewOrganizationRequest(LiteLLM_BudgetTable):
@@ -687,7 +693,8 @@ class KeyManagementSettings(LiteLLMBase):
 class TeamDefaultSettings(LiteLLMBase):
     team_id: str
 
-    model_config = get_model_config()
+    class Config:
+        extra = "allow"  # allow params not defined here, these fall in litellm.completion(**kwargs)
 
 
 class DynamoDBArgs(LiteLLMBase):
@@ -711,6 +718,25 @@ class DynamoDBArgs(LiteLLMBase):
     assume_role_aws_session_name: Optional[str] = None
 
 
+class ConfigFieldUpdate(LiteLLMBase):
+    field_name: str
+    field_value: Any
+    config_type: Literal["general_settings"]
+
+
+class ConfigFieldDelete(LiteLLMBase):
+    config_type: Literal["general_settings"]
+    field_name: str
+
+
+class ConfigList(LiteLLMBase):
+    field_name: str
+    field_type: str
+    field_description: str
+    field_value: Any
+    stored_in_db: Optional[bool]
+
+
 class ConfigGeneralSettings(LiteLLMBase):
     """
     Documents all the fields supported by `general_settings` in config.yaml
@@ -758,7 +784,11 @@ class ConfigGeneralSettings(LiteLLMBase):
         description="override user_api_key_auth with your own auth script - https://docs.litellm.ai/docs/proxy/virtual_keys#custom-auth",
     )
     max_parallel_requests: Optional[int] = Field(
-        None, description="maximum parallel requests for each api key"
+        None,
+        description="maximum parallel requests for each api key",
+    )
+    global_max_parallel_requests: Optional[int] = Field(
+        None, description="global max parallel requests to allow for a proxy instance."
     )
     infer_model_from_keys: Optional[bool] = Field(
         None,
@@ -828,7 +858,8 @@ class ConfigYAML(LiteLLMBase):
         description="litellm router object settings. See router.py __init__ for all, example router.num_retries=5, router.timeout=5, router.max_retries=5, router.retry_after=5",
     )
 
-    model_config = get_model_config()
+    class Config:
+        protected_namespaces = ()
 
 
 class LiteLLM_VerificationToken(LiteLLMBase):
@@ -862,7 +893,8 @@ class LiteLLM_VerificationToken(LiteLLMBase):
     user_id_rate_limits: Optional[dict] = None
     team_id_rate_limits: Optional[dict] = None
 
-    model_config = get_model_config()
+    class Config:
+        protected_namespaces = ()
 
 
 class LiteLLM_VerificationTokenView(LiteLLM_VerificationToken):
@@ -892,7 +924,7 @@ class UserAPIKeyAuth(
     user_role: Optional[Literal["proxy_admin", "app_owner", "app_user"]] = None
     allowed_model_region: Optional[Literal["eu"]] = None
 
-    @model_validator(mode="before")
+    @root_validator(pre=True)
     def check_api_key(cls, values):
         if values.get("api_key") is not None:
             values.update({"token": hash_token(values.get("api_key"))})
@@ -919,7 +951,7 @@ class LiteLLM_UserTable(LiteLLMBase):
     tpm_limit: Optional[int] = None
     rpm_limit: Optional[int] = None
 
-    @model_validator(mode="before")
+    @root_validator(pre=True)
     def set_model_info(cls, values):
         if values.get("spend") is None:
             values.update({"spend": 0.0})
@@ -927,7 +959,8 @@ class LiteLLM_UserTable(LiteLLMBase):
             values.update({"models": []})
         return values
 
-    model_config = get_model_config()
+    class Config:
+        protected_namespaces = ()
 
 
 class LiteLLM_EndUserTable(LiteLLMBase):
@@ -939,13 +972,14 @@ class LiteLLM_EndUserTable(LiteLLMBase):
     default_model: Optional[str] = None
     litellm_budget_table: Optional[LiteLLM_BudgetTable] = None
 
-    @model_validator(mode="before")
+    @root_validator(pre=True)
     def set_model_info(cls, values):
         if values.get("spend") is None:
             values.update({"spend": 0.0})
         return values
 
-    model_config = get_model_config()
+    class Config:
+        protected_namespaces = ()
 
 
 class LiteLLM_SpendLogs(LiteLLMBase):
@@ -983,3 +1017,30 @@ class LiteLLM_ErrorLogs(LiteLLMBase):
 
 class LiteLLM_SpendLogs_ResponseObject(LiteLLMBase):
     response: Optional[List[Union[LiteLLM_SpendLogs, Any]]] = None
+
+
+class TokenCountRequest(LiteLLMBase):
+    model: str
+    prompt: Optional[str] = None
+    messages: Optional[List[dict]] = None
+
+
+class TokenCountResponse(LiteLLMBase):
+    total_tokens: int
+    request_model: str
+    model_used: str
+    tokenizer_type: str
+
+
+class CallInfo(LiteLLMBase):
+    """Used for slack budget alerting"""
+
+    spend: float
+    max_budget: float
+    token: str = Field(description="Hashed value of that key")
+    user_id: Optional[str] = None
+    team_id: Optional[str] = None
+    user_email: Optional[str] = None
+    key_alias: Optional[str] = None
+    projected_exceeded_date: Optional[str] = None
+    projected_spend: Optional[float] = None
diff --git a/litellm/proxy/auth/auth_checks.py b/litellm/proxy/auth/auth_checks.py
index 62e5eba013..08da25556d 100644
--- a/litellm/proxy/auth/auth_checks.py
+++ b/litellm/proxy/auth/auth_checks.py
@@ -26,7 +26,7 @@ all_routes = LiteLLMRoutes.openai_routes.value + LiteLLMRoutes.management_routes
 
 def common_checks(
     request_body: dict,
-    team_object: LiteLLM_TeamTable,
+    team_object: Optional[LiteLLM_TeamTable],
     user_object: Optional[LiteLLM_UserTable],
     end_user_object: Optional[LiteLLM_EndUserTable],
     global_proxy_spend: Optional[float],
@@ -45,13 +45,14 @@ def common_checks(
     6. [OPTIONAL] If 'litellm.max_budget' is set (>0), is proxy under budget
     """
     _model = request_body.get("model", None)
-    if team_object.blocked == True:
+    if team_object is not None and team_object.blocked == True:
         raise Exception(
             f"Team={team_object.team_id} is blocked. Update via `/team/unblock` if your admin."
         )
     # 2. If user can call model
     if (
         _model is not None
+        and team_object is not None
         and len(team_object.models) > 0
         and _model not in team_object.models
     ):
@@ -65,7 +66,8 @@ def common_checks(
             )
     # 3. If team is in budget
     if (
-        team_object.max_budget is not None
+        team_object is not None
+        and team_object.max_budget is not None
         and team_object.spend is not None
         and team_object.spend > team_object.max_budget
     ):
@@ -239,6 +241,7 @@ async def get_user_object(
     user_id: str,
     prisma_client: Optional[PrismaClient],
     user_api_key_cache: DualCache,
+    user_id_upsert: bool,
 ) -> Optional[LiteLLM_UserTable]:
     """
     - Check if user id in proxy User Table
@@ -252,7 +255,7 @@ async def get_user_object(
         return None
 
     # check if in cache
-    cached_user_obj = user_api_key_cache.async_get_cache(key=user_id)
+    cached_user_obj = await user_api_key_cache.async_get_cache(key=user_id)
     if cached_user_obj is not None:
         if isinstance(cached_user_obj, dict):
             return LiteLLM_UserTable(**cached_user_obj)
@@ -260,16 +263,27 @@ async def get_user_object(
             return cached_user_obj
     # else, check db
     try:
+
         response = await prisma_client.db.litellm_usertable.find_unique(
             where={"user_id": user_id}
         )
 
         if response is None:
-            raise Exception
+            if user_id_upsert:
+                response = await prisma_client.db.litellm_usertable.create(
+                    data={"user_id": user_id}
+                )
+            else:
+                raise Exception
 
-        return LiteLLM_UserTable(**response.dict())
-    except Exception as e:  # if end-user not in db
-        raise Exception(
+        _response = LiteLLM_UserTable(**dict(response))
+
+        # save the user object to cache
+        await user_api_key_cache.async_set_cache(key=user_id, value=_response)
+
+        return _response
+    except Exception as e:  # if user not in db
+        raise ValueError(
             f"User doesn't exist in db. 'user_id'={user_id}. Create user via `/user/new` call."
         )
 
@@ -290,7 +304,7 @@ async def get_team_object(
         )
 
     # check if in cache
-    cached_team_obj = user_api_key_cache.async_get_cache(key=team_id)
+    cached_team_obj = await user_api_key_cache.async_get_cache(key=team_id)
     if cached_team_obj is not None:
         if isinstance(cached_team_obj, dict):
             return LiteLLM_TeamTable(**cached_team_obj)
@@ -305,7 +319,11 @@ async def get_team_object(
         if response is None:
             raise Exception
 
-        return LiteLLM_TeamTable(**response.dict())
+        _response = LiteLLM_TeamTable(**response.dict())
+        # save the team object to cache
+        await user_api_key_cache.async_set_cache(key=response.team_id, value=_response)
+
+        return _response
     except Exception as e:
         raise Exception(
             f"Team doesn't exist in db. Team={team_id}. Create team via `/team/new` call."
diff --git a/litellm/proxy/auth/handle_jwt.py b/litellm/proxy/auth/handle_jwt.py
index 18c0d7b2ce..0a186d7dde 100644
--- a/litellm/proxy/auth/handle_jwt.py
+++ b/litellm/proxy/auth/handle_jwt.py
@@ -55,12 +55,9 @@ class JWTHandler:
             return True
         return False
 
-    def is_team(self, scopes: list) -> bool:
-        if self.litellm_jwtauth.team_jwt_scope in scopes:
-            return True
-        return False
-
-    def get_end_user_id(self, token: dict, default_value: Optional[str]) -> str:
+    def get_end_user_id(
+        self, token: dict, default_value: Optional[str]
+    ) -> Optional[str]:
         try:
             if self.litellm_jwtauth.end_user_id_jwt_field is not None:
                 user_id = token[self.litellm_jwtauth.end_user_id_jwt_field]
@@ -70,13 +67,36 @@ class JWTHandler:
             user_id = default_value
         return user_id
 
+    def is_required_team_id(self) -> bool:
+        """
+        Returns:
+        - True: if 'team_id_jwt_field' is set
+        - False: if not
+        """
+        if self.litellm_jwtauth.team_id_jwt_field is None:
+            return False
+        return True
+
     def get_team_id(self, token: dict, default_value: Optional[str]) -> Optional[str]:
         try:
-            team_id = token[self.litellm_jwtauth.team_id_jwt_field]
+            if self.litellm_jwtauth.team_id_jwt_field is not None:
+                team_id = token[self.litellm_jwtauth.team_id_jwt_field]
+            elif self.litellm_jwtauth.team_id_default is not None:
+                team_id = self.litellm_jwtauth.team_id_default
+            else:
+                team_id = None
         except KeyError:
             team_id = default_value
         return team_id
 
+    def is_upsert_user_id(self) -> bool:
+        """
+        Returns:
+        - True: if 'user_id_upsert' is set
+        - False: if not
+        """
+        return self.litellm_jwtauth.user_id_upsert
+
     def get_user_id(self, token: dict, default_value: Optional[str]) -> Optional[str]:
         try:
             if self.litellm_jwtauth.user_id_jwt_field is not None:
@@ -165,7 +185,7 @@ class JWTHandler:
         decode_options = None
         if audience is None:
             decode_options = {"verify_aud": False}
-        
+
         from jwt.algorithms import RSAAlgorithm
 
         header = jwt.get_unverified_header(token)
@@ -207,12 +227,14 @@ class JWTHandler:
                 raise Exception(f"Validation fails: {str(e)}")
         elif public_key is not None and isinstance(public_key, str):
             try:
-                cert = x509.load_pem_x509_certificate(public_key.encode(), default_backend())
+                cert = x509.load_pem_x509_certificate(
+                    public_key.encode(), default_backend()
+                )
 
                 # Extract public key
                 key = cert.public_key().public_bytes(
                     serialization.Encoding.PEM,
-                    serialization.PublicFormat.SubjectPublicKeyInfo
+                    serialization.PublicFormat.SubjectPublicKeyInfo,
                 )
 
                 # decode the token using the public key
@@ -221,7 +243,7 @@ class JWTHandler:
                     key,
                     algorithms=algorithms,
                     audience=audience,
-                    options=decode_options
+                    options=decode_options,
                 )
                 return payload
 
diff --git a/litellm/proxy/auth/litellm_license.py b/litellm/proxy/auth/litellm_license.py
new file mode 100644
index 0000000000..c6c61ad4e0
--- /dev/null
+++ b/litellm/proxy/auth/litellm_license.py
@@ -0,0 +1,42 @@
+# What is this?
+## If litellm license in env, checks if it's valid
+import os
+from litellm.llms.custom_httpx.http_handler import HTTPHandler
+
+
+class LicenseCheck:
+    """
+    - Check if license in env
+    - Returns if license is valid
+    """
+
+    base_url = "https://license.litellm.ai"
+
+    def __init__(self) -> None:
+        self.license_str = os.getenv("LITELLM_LICENSE", None)
+        self.http_handler = HTTPHandler()
+
+    def _verify(self, license_str: str) -> bool:
+        url = "{}/verify_license/{}".format(self.base_url, license_str)
+
+        try:  # don't impact user, if call fails
+            response = self.http_handler.get(url=url)
+
+            response.raise_for_status()
+
+            response_json = response.json()
+
+            premium = response_json["verify"]
+
+            assert isinstance(premium, bool)
+
+            return premium
+        except Exception as e:
+            return False
+
+    def is_premium(self) -> bool:
+        if self.license_str is None:
+            return False
+        elif self._verify(license_str=self.license_str):
+            return True
+        return False
diff --git a/litellm/proxy/hooks/parallel_request_limiter.py b/litellm/proxy/hooks/parallel_request_limiter.py
index 28e6d1853f..26238b6c08 100644
--- a/litellm/proxy/hooks/parallel_request_limiter.py
+++ b/litellm/proxy/hooks/parallel_request_limiter.py
@@ -79,6 +79,9 @@ class _PROXY_MaxParallelRequestsHandler(CustomLogger):
         max_parallel_requests = user_api_key_dict.max_parallel_requests
         if max_parallel_requests is None:
             max_parallel_requests = sys.maxsize
+        global_max_parallel_requests = data.get("metadata", {}).get(
+            "global_max_parallel_requests", None
+        )
         tpm_limit = getattr(user_api_key_dict, "tpm_limit", sys.maxsize)
         if tpm_limit is None:
             tpm_limit = sys.maxsize
@@ -91,6 +94,24 @@ class _PROXY_MaxParallelRequestsHandler(CustomLogger):
         # Setup values
         # ------------
 
+        if global_max_parallel_requests is not None:
+            # get value from cache
+            _key = "global_max_parallel_requests"
+            current_global_requests = await cache.async_get_cache(
+                key=_key, local_only=True
+            )
+            # check if below limit
+            if current_global_requests is None:
+                current_global_requests = 1
+            # if above -> raise error
+            if current_global_requests >= global_max_parallel_requests:
+                raise HTTPException(
+                    status_code=429, detail="Max parallel request limit reached."
+                )
+            # if below -> increment
+            else:
+                await cache.async_increment_cache(key=_key, value=1, local_only=True)
+
         current_date = datetime.now().strftime("%Y-%m-%d")
         current_hour = datetime.now().strftime("%H")
         current_minute = datetime.now().strftime("%M")
@@ -207,6 +228,9 @@ class _PROXY_MaxParallelRequestsHandler(CustomLogger):
     async def async_log_success_event(self, kwargs, response_obj, start_time, end_time):
         try:
             self.print_verbose(f"INSIDE parallel request limiter ASYNC SUCCESS LOGGING")
+            global_max_parallel_requests = kwargs["litellm_params"]["metadata"].get(
+                "global_max_parallel_requests", None
+            )
             user_api_key = kwargs["litellm_params"]["metadata"]["user_api_key"]
             user_api_key_user_id = kwargs["litellm_params"]["metadata"].get(
                 "user_api_key_user_id", None
@@ -222,6 +246,14 @@ class _PROXY_MaxParallelRequestsHandler(CustomLogger):
             # Setup values
             # ------------
 
+            if global_max_parallel_requests is not None:
+                # get value from cache
+                _key = "global_max_parallel_requests"
+                # decrement
+                await self.user_api_key_cache.async_increment_cache(
+                    key=_key, value=-1, local_only=True
+                )
+
             current_date = datetime.now().strftime("%Y-%m-%d")
             current_hour = datetime.now().strftime("%H")
             current_minute = datetime.now().strftime("%M")
@@ -336,6 +368,9 @@ class _PROXY_MaxParallelRequestsHandler(CustomLogger):
     async def async_log_failure_event(self, kwargs, response_obj, start_time, end_time):
         try:
             self.print_verbose(f"Inside Max Parallel Request Failure Hook")
+            global_max_parallel_requests = kwargs["litellm_params"]["metadata"].get(
+                "global_max_parallel_requests", None
+            )
             user_api_key = (
                 kwargs["litellm_params"].get("metadata", {}).get("user_api_key", None)
             )
@@ -347,17 +382,26 @@ class _PROXY_MaxParallelRequestsHandler(CustomLogger):
                 return
 
             ## decrement call count if call failed
-            if (
-                hasattr(kwargs["exception"], "status_code")
-                and kwargs["exception"].status_code == 429
-                and "Max parallel request limit reached" in str(kwargs["exception"])
-            ):
+            if "Max parallel request limit reached" in str(kwargs["exception"]):
                 pass  # ignore failed calls due to max limit being reached
             else:
                 # ------------
                 # Setup values
                 # ------------
 
+                if global_max_parallel_requests is not None:
+                    # get value from cache
+                    _key = "global_max_parallel_requests"
+                    current_global_requests = (
+                        await self.user_api_key_cache.async_get_cache(
+                            key=_key, local_only=True
+                        )
+                    )
+                    # decrement
+                    await self.user_api_key_cache.async_increment_cache(
+                        key=_key, value=-1, local_only=True
+                    )
+
                 current_date = datetime.now().strftime("%Y-%m-%d")
                 current_hour = datetime.now().strftime("%H")
                 current_minute = datetime.now().strftime("%M")
diff --git a/litellm/proxy/hooks/prompt_injection_detection.py b/litellm/proxy/hooks/prompt_injection_detection.py
index 896046e943..08dbedd8c8 100644
--- a/litellm/proxy/hooks/prompt_injection_detection.py
+++ b/litellm/proxy/hooks/prompt_injection_detection.py
@@ -146,6 +146,7 @@ class _OPTIONAL_PromptInjectionDetection(CustomLogger):
             try:
                 assert call_type in [
                     "completion",
+                    "text_completion",
                     "embeddings",
                     "image_generation",
                     "moderation",
@@ -192,6 +193,15 @@ class _OPTIONAL_PromptInjectionDetection(CustomLogger):
             return data
 
         except HTTPException as e:
+
+            if (
+                e.status_code == 400
+                and isinstance(e.detail, dict)
+                and "error" in e.detail
+                and self.prompt_injection_params is not None
+                and self.prompt_injection_params.reject_as_response
+            ):
+                return e.detail["error"]
             raise e
         except Exception as e:
             traceback.print_exc()
diff --git a/litellm/proxy/proxy_cli.py b/litellm/proxy/proxy_cli.py
index 50eca5ecb3..537f437364 100644
--- a/litellm/proxy/proxy_cli.py
+++ b/litellm/proxy/proxy_cli.py
@@ -17,6 +17,7 @@ if litellm_mode == "DEV":
 from importlib import resources
 import shutil
 
+
 telemetry = None
 
 
@@ -505,6 +506,7 @@ def run_server(
             port = random.randint(1024, 49152)
 
         from litellm.proxy.proxy_server import app
+        import litellm
 
         if run_gunicorn == False:
             if ssl_certfile_path is not None and ssl_keyfile_path is not None:
@@ -519,7 +521,15 @@ def run_server(
                     ssl_certfile=ssl_certfile_path,
                 )  # run uvicorn
             else:
-                uvicorn.run(app, host=host, port=port)  # run uvicorn
+                print(f"litellm.json_logs: {litellm.json_logs}")
+                if litellm.json_logs:
+                    from litellm.proxy._logging import logger
+
+                    uvicorn.run(
+                        app, host=host, port=port, log_config=None
+                    )  # run uvicorn w/ json
+                else:
+                    uvicorn.run(app, host=host, port=port)  # run uvicorn
         elif run_gunicorn == True:
             import gunicorn.app.base
 
diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py
index e747e1047a..08c2fee074 100644
--- a/litellm/proxy/proxy_server.py
+++ b/litellm/proxy/proxy_server.py
@@ -2,7 +2,7 @@ import sys, os, platform, time, copy, re, asyncio, inspect
 import threading, ast
 import shutil, random, traceback, requests
 from datetime import datetime, timedelta, timezone
-from typing import Optional, List, Callable
+from typing import Optional, List, Callable, get_args
 import secrets, subprocess
 import hashlib, uuid
 import warnings
@@ -110,6 +110,7 @@ from litellm.router import LiteLLM_Params, Deployment, updateDeployment
 from litellm.router import ModelInfo as RouterModelInfo
 from litellm._logging import verbose_router_logger, verbose_proxy_logger
 from litellm.proxy.auth.handle_jwt import JWTHandler
+from litellm.proxy.auth.litellm_license import LicenseCheck
 from litellm.proxy.hooks.prompt_injection_detection import (
     _OPTIONAL_PromptInjectionDetection,
 )
@@ -123,6 +124,7 @@ from litellm.proxy.auth.auth_checks import (
     get_actual_routes,
 )
 from litellm.llms.custom_httpx.httpx_handler import HTTPHandler
+from litellm.exceptions import RejectedRequestError
 
 try:
     from litellm._version import version
@@ -150,6 +152,7 @@ from fastapi.responses import (
     ORJSONResponse,
     JSONResponse,
 )
+from fastapi.openapi.utils import get_openapi
 from fastapi.responses import RedirectResponse
 from fastapi.middleware.cors import CORSMiddleware
 from fastapi.staticfiles import StaticFiles
@@ -169,17 +172,30 @@ except Exception as e:
     except Exception as e:
         pass
 
+_license_check = LicenseCheck()
+premium_user: bool = _license_check.is_premium()
+
 ui_link = f"/ui/"
 ui_message = (
     f"👉 [```LiteLLM Admin Panel on /ui```]({ui_link}). Create, Edit Keys with SSO"
 )
 
+### CUSTOM BRANDING [ENTERPRISE FEATURE] ###
 _docs_url = None if os.getenv("NO_DOCS", "False") == "True" else "/"
+_title = os.getenv("DOCS_TITLE", "LiteLLM API") if premium_user else "LiteLLM API"
+_description = (
+    os.getenv(
+        "DOCS_DESCRIPTION",
+        f"Proxy Server to call 100+ LLMs in the OpenAI format\n\n{ui_message}",
+    )
+    if premium_user
+    else f"Proxy Server to call 100+ LLMs in the OpenAI format\n\n{ui_message}"
+)
 
 app = FastAPI(
     docs_url=_docs_url,
-    title="LiteLLM API",
-    description=f"Proxy Server to call 100+ LLMs in the OpenAI format\n\n{ui_message}",
+    title=_title,
+    description=_description,
     version=version,
     root_path=os.environ.get(
         "SERVER_ROOT_PATH", ""
@@ -187,6 +203,31 @@ app = FastAPI(
 )
 
 
+### CUSTOM API DOCS [ENTERPRISE FEATURE] ###
+# Custom OpenAPI schema generator to include only selected routes
+def custom_openapi():
+    if app.openapi_schema:
+        return app.openapi_schema
+    openapi_schema = get_openapi(
+        title=app.title,
+        version=app.version,
+        description=app.description,
+        routes=app.routes,
+    )
+    # Filter routes to include only specific ones
+    openai_routes = LiteLLMRoutes.openai_routes.value
+    paths_to_include: dict = {}
+    for route in openai_routes:
+        paths_to_include[route] = openapi_schema["paths"][route]
+    openapi_schema["paths"] = paths_to_include
+    app.openapi_schema = openapi_schema
+    return app.openapi_schema
+
+
+if os.getenv("DOCS_FILTERED", "False") == "True" and premium_user:
+    app.openapi = custom_openapi  # type: ignore
+
+
 class ProxyException(Exception):
     # NOTE: DO NOT MODIFY THIS
     # This is used to map exactly to OPENAI Exceptions
@@ -234,6 +275,7 @@ class SpecialModelNames(enum.Enum):
 class CommonProxyErrors(enum.Enum):
     db_not_connected_error = "DB not connected"
     no_llm_router = "No models configured on proxy"
+    not_allowed_access = "Admin-only endpoint. Not allowed to access this."
 
 
 @app.exception_handler(ProxyException)
@@ -351,6 +393,31 @@ def _get_pydantic_json_dict(pydantic_obj: BaseModel) -> dict:
         return pydantic_obj.dict()
 
 
+def get_custom_headers(
+    *,
+    model_id: Optional[str] = None,
+    cache_key: Optional[str] = None,
+    api_base: Optional[str] = None,
+    version: Optional[str] = None,
+    model_region: Optional[str] = None,
+) -> dict:
+    exclude_values = {"", None}
+    headers = {
+        "x-litellm-model-id": model_id,
+        "x-litellm-cache-key": cache_key,
+        "x-litellm-model-api-base": api_base,
+        "x-litellm-version": version,
+        "x-litellm-model-region": model_region,
+    }
+    try:
+        return {
+            key: value for key, value in headers.items() if value not in exclude_values
+        }
+    except Exception as e:
+        verbose_proxy_logger.error(f"Error setting custom headers: {e}")
+        return {}
+
+
 async def check_request_disconnection(request: Request, llm_api_call_task):
     """
     Asynchronously checks if the request is disconnected at regular intervals.
@@ -363,7 +430,10 @@ async def check_request_disconnection(request: Request, llm_api_call_task):
     Returns:
     - None
     """
-    while True:
+
+    # only run this function for 10 mins -> if these don't get cancelled -> we don't want the server to have many while loops
+    start_time = time.time()
+    while time.time() - start_time < 600:
         await asyncio.sleep(1)
         if await request.is_disconnected():
 
@@ -440,29 +510,32 @@ async def user_api_key_auth(
                 # get team id
                 team_id = jwt_handler.get_team_id(token=valid_token, default_value=None)
 
-                if team_id is None:
+                if team_id is None and jwt_handler.is_required_team_id() == True:
                     raise Exception(
                         f"No team id passed in. Field checked in jwt token - '{jwt_handler.litellm_jwtauth.team_id_jwt_field}'"
                     )
-                # check allowed team routes
-                is_allowed = allowed_routes_check(
-                    user_role="team",
-                    user_route=route,
-                    litellm_proxy_roles=jwt_handler.litellm_jwtauth,
-                )
-                if is_allowed == False:
-                    allowed_routes = jwt_handler.litellm_jwtauth.team_allowed_routes  # type: ignore
-                    actual_routes = get_actual_routes(allowed_routes=allowed_routes)
-                    raise Exception(
-                        f"Team not allowed to access this route. Route={route}, Allowed Routes={actual_routes}"
-                    )
 
-                # check if team in db
-                team_object = await get_team_object(
-                    team_id=team_id,
-                    prisma_client=prisma_client,
-                    user_api_key_cache=user_api_key_cache,
-                )
+                team_object: Optional[LiteLLM_TeamTable] = None
+                if team_id is not None:
+                    # check allowed team routes
+                    is_allowed = allowed_routes_check(
+                        user_role="team",
+                        user_route=route,
+                        litellm_proxy_roles=jwt_handler.litellm_jwtauth,
+                    )
+                    if is_allowed == False:
+                        allowed_routes = jwt_handler.litellm_jwtauth.team_allowed_routes  # type: ignore
+                        actual_routes = get_actual_routes(allowed_routes=allowed_routes)
+                        raise Exception(
+                            f"Team not allowed to access this route. Route={route}, Allowed Routes={actual_routes}"
+                        )
+
+                    # check if team in db
+                    team_object = await get_team_object(
+                        team_id=team_id,
+                        prisma_client=prisma_client,
+                        user_api_key_cache=user_api_key_cache,
+                    )
 
                 # [OPTIONAL] track spend for an org id - `LiteLLM_OrganizationTable`
                 org_id = jwt_handler.get_org_id(token=valid_token, default_value=None)
@@ -481,11 +554,9 @@ async def user_api_key_auth(
                         user_id=user_id,
                         prisma_client=prisma_client,
                         user_api_key_cache=user_api_key_cache,
+                        user_id_upsert=jwt_handler.is_upsert_user_id(),
                     )
-                    # save the user object to cache
-                    await user_api_key_cache.async_set_cache(
-                        key=user_id, value=user_object
-                    )
+
                 # [OPTIONAL] track spend against an external user - `LiteLLM_EndUserTable`
                 end_user_object = None
                 end_user_id = jwt_handler.get_end_user_id(
@@ -519,17 +590,15 @@ async def user_api_key_auth(
                             ttl=UserAPIKeyCacheTTLEnum.global_proxy_spend.value,
                         )
                     if global_proxy_spend is not None:
-                        user_info = {
-                            "user_id": litellm_proxy_admin_name,
-                            "max_budget": litellm.max_budget,
-                            "spend": global_proxy_spend,
-                            "user_email": "",
-                        }
+                        user_info = CallInfo(
+                            user_id=litellm_proxy_admin_name,
+                            max_budget=litellm.max_budget,
+                            spend=global_proxy_spend,
+                            token=valid_token["token"],
+                        )
                         asyncio.create_task(
                             proxy_logging_obj.budget_alerts(
-                                user_max_budget=litellm.max_budget,
-                                user_current_spend=global_proxy_spend,
-                                type="user_and_proxy_budget",
+                                type="proxy_budget",
                                 user_info=user_info,
                             )
                         )
@@ -547,18 +616,18 @@ async def user_api_key_auth(
                     global_proxy_spend=global_proxy_spend,
                     route=route,
                 )
-                # save team object in cache
-                await user_api_key_cache.async_set_cache(
-                    key=team_object.team_id, value=team_object
-                )
 
                 # return UserAPIKeyAuth object
                 return UserAPIKeyAuth(
                     api_key=None,
-                    team_id=team_object.team_id,
-                    team_tpm_limit=team_object.tpm_limit,
-                    team_rpm_limit=team_object.rpm_limit,
-                    team_models=team_object.models,
+                    team_id=team_object.team_id if team_object is not None else None,
+                    team_tpm_limit=(
+                        team_object.tpm_limit if team_object is not None else None
+                    ),
+                    team_rpm_limit=(
+                        team_object.rpm_limit if team_object is not None else None
+                    ),
+                    team_models=team_object.models if team_object is not None else [],
                     user_role="app_owner",
                     user_id=user_id,
                     org_id=org_id,
@@ -566,9 +635,9 @@ async def user_api_key_auth(
         #### ELSE ####
         if master_key is None:
             if isinstance(api_key, str):
-                return UserAPIKeyAuth(api_key=api_key)
+                return UserAPIKeyAuth(api_key=api_key, user_role="proxy_admin")
             else:
-                return UserAPIKeyAuth()
+                return UserAPIKeyAuth(user_role="proxy_admin")
         elif api_key is None:  # only require api key if master key is set
             raise Exception("No api key passed in.")
         elif api_key == "":
@@ -599,16 +668,51 @@ async def user_api_key_auth(
         ):
             return valid_token
 
+        ## Check END-USER OBJECT
+        request_data = await _read_request_body(request=request)
+        _end_user_object = None
+        end_user_params = {}
+        if "user" in request_data:
+            try:
+                _end_user_object = await get_end_user_object(
+                    end_user_id=request_data["user"],
+                    prisma_client=prisma_client,
+                    user_api_key_cache=user_api_key_cache,
+                )
+                if _end_user_object is not None:
+                    end_user_params["allowed_model_region"] = (
+                        _end_user_object.allowed_model_region
+                    )
+            except Exception as e:
+                verbose_proxy_logger.debug(
+                    "Unable to find user in db. Error - {}".format(str(e))
+                )
+                pass
+
         try:
-            is_master_key_valid = secrets.compare_digest(api_key, master_key)
+            is_master_key_valid = secrets.compare_digest(api_key, master_key)  # type: ignore
         except Exception as e:
             is_master_key_valid = False
 
+        ## VALIDATE MASTER KEY ##
+        try:
+            assert isinstance(master_key, str)
+        except Exception as e:
+            raise HTTPException(
+                status_code=500,
+                detail={
+                    "Master key must be a valid string. Current type={}".format(
+                        type(master_key)
+                    )
+                },
+            )
+
         if is_master_key_valid:
             _user_api_key_obj = UserAPIKeyAuth(
                 api_key=master_key,
                 user_role="proxy_admin",
                 user_id=litellm_proxy_admin_name,
+                **end_user_params,
             )
             await user_api_key_cache.async_set_cache(
                 key=hash_token(master_key), value=_user_api_key_obj
@@ -659,6 +763,7 @@ async def user_api_key_auth(
             verbose_proxy_logger.debug("Token from db: %s", valid_token)
         elif valid_token is not None:
             verbose_proxy_logger.debug("API Key Cache Hit!")
+
         user_id_information = None
         if valid_token:
             # Got Valid Token from Cache, DB
@@ -672,10 +777,6 @@ async def user_api_key_auth(
             # 7. If token spend is under team budget
             # 8. If team spend is under team budget
 
-            request_data = await _read_request_body(
-                request=request
-            )  # request data, used across all checks. Making this easily available
-
             # Check 1. If token can call model
             _model_alias_map = {}
             if (
@@ -820,12 +921,18 @@ async def user_api_key_auth(
                                 user_max_budget is not None
                                 and user_current_spend is not None
                             ):
+                                call_info = CallInfo(
+                                    token=valid_token.token,
+                                    spend=user_current_spend,
+                                    max_budget=user_max_budget,
+                                    user_id=_user.get("user_id", None),
+                                    user_email=_user.get("user_email", None),
+                                    key_alias=valid_token.key_alias,
+                                )
                                 asyncio.create_task(
                                     proxy_logging_obj.budget_alerts(
-                                        user_max_budget=user_max_budget,
-                                        user_current_spend=user_current_spend,
-                                        type="user_and_proxy_budget",
-                                        user_info=_user,
+                                        type="user_budget",
+                                        user_info=call_info,
                                     )
                                 )
 
@@ -845,12 +952,20 @@ async def user_api_key_auth(
                             user_max_budget is not None
                             and user_current_spend is not None
                         ):
+                            call_info = CallInfo(
+                                token=valid_token.token,
+                                spend=user_current_spend,
+                                max_budget=user_max_budget,
+                                user_id=getattr(user_id_information, "user_id", None),
+                                user_email=getattr(
+                                    user_id_information, "user_email", None
+                                ),
+                                key_alias=valid_token.key_alias,
+                            )
                             asyncio.create_task(
                                 proxy_logging_obj.budget_alerts(
-                                    user_max_budget=user_max_budget,
-                                    user_current_spend=user_current_spend,
                                     type="user_budget",
-                                    user_info=user_id_information,
+                                    user_info=call_info,
                                 )
                             )
 
@@ -880,12 +995,17 @@ async def user_api_key_auth(
 
             # Check 4. Token Spend is under budget
             if valid_token.spend is not None and valid_token.max_budget is not None:
+                call_info = CallInfo(
+                    token=valid_token.token,
+                    spend=valid_token.spend,
+                    max_budget=valid_token.max_budget,
+                    user_id=valid_token.user_id,
+                    team_id=valid_token.team_id,
+                )
                 asyncio.create_task(
                     proxy_logging_obj.budget_alerts(
-                        user_max_budget=valid_token.max_budget,
-                        user_current_spend=valid_token.spend,
                         type="token_budget",
-                        user_info=valid_token,
+                        user_info=call_info,
                     )
                 )
 
@@ -914,7 +1034,7 @@ async def user_api_key_auth(
                             {"startTime": {"gt": twenty_eight_days_ago}},
                             {"model": current_model},
                         ]
-                    },
+                    },  # type: ignore
                 )
                 if (
                     len(model_spend) > 0
@@ -930,39 +1050,24 @@ async def user_api_key_auth(
                         raise Exception(
                             f"ExceededModelBudget: Current spend for model: {current_model_spend}; Max Budget for Model: {current_model_budget}"
                         )
-            # Check 6. Token spend is under Team budget
-            if (
-                valid_token.spend is not None
-                and hasattr(valid_token, "team_max_budget")
-                and valid_token.team_max_budget is not None
-            ):
-                asyncio.create_task(
-                    proxy_logging_obj.budget_alerts(
-                        user_max_budget=valid_token.team_max_budget,
-                        user_current_spend=valid_token.spend,
-                        type="token_budget",
-                        user_info=valid_token,
-                    )
-                )
 
-                if valid_token.spend >= valid_token.team_max_budget:
-                    raise Exception(
-                        f"ExceededTokenBudget: Current spend for token: {valid_token.spend}; Max Budget for Team: {valid_token.team_max_budget}"
-                    )
-
-            # Check 7. Team spend is under Team budget
+            # Check 6. Team spend is under Team budget
             if (
                 hasattr(valid_token, "team_spend")
                 and valid_token.team_spend is not None
                 and hasattr(valid_token, "team_max_budget")
                 and valid_token.team_max_budget is not None
             ):
+                call_info = CallInfo(
+                    token=valid_token.token,
+                    spend=valid_token.team_spend,
+                    max_budget=valid_token.team_max_budget,
+                    user_id=valid_token.user_id,
+                )
                 asyncio.create_task(
                     proxy_logging_obj.budget_alerts(
-                        user_max_budget=valid_token.team_max_budget,
-                        user_current_spend=valid_token.team_spend,
-                        type="token_budget",
-                        user_info=valid_token,
+                        type="team_budget",
+                        user_info=call_info,
                     )
                 )
 
@@ -986,14 +1091,6 @@ async def user_api_key_auth(
                 key=valid_token.team_id, value=_team_obj
             )  # save team table in cache - used for tpm/rpm limiting - tpm_rpm_limiter.py
 
-            _end_user_object = None
-            if "user" in request_data:
-                _end_user_object = await get_end_user_object(
-                    end_user_id=request_data["user"],
-                    prisma_client=prisma_client,
-                    user_api_key_cache=user_api_key_cache,
-                )
-
             global_proxy_spend = None
             if (
                 litellm.max_budget > 0 and prisma_client is not None
@@ -1016,18 +1113,17 @@ async def user_api_key_auth(
                     )
 
                 if global_proxy_spend is not None:
-                    user_info = {
-                        "user_id": litellm_proxy_admin_name,
-                        "max_budget": litellm.max_budget,
-                        "spend": global_proxy_spend,
-                        "user_email": "",
-                    }
+                    call_info = CallInfo(
+                        token=valid_token.token,
+                        spend=global_proxy_spend,
+                        max_budget=litellm.max_budget,
+                        user_id=litellm_proxy_admin_name,
+                        team_id=valid_token.team_id,
+                    )
                     asyncio.create_task(
                         proxy_logging_obj.budget_alerts(
-                            user_max_budget=litellm.max_budget,
-                            user_current_spend=global_proxy_spend,
-                            type="user_and_proxy_budget",
-                            user_info=user_info,
+                            type="proxy_budget",
+                            user_info=call_info,
                         )
                     )
             _ = common_checks(
@@ -1073,6 +1169,8 @@ async def user_api_key_auth(
             if not _is_user_proxy_admin(user_id_information):  # if non-admin
                 if route in LiteLLMRoutes.openai_routes.value:
                     pass
+                elif request["route"].name in LiteLLMRoutes.openai_route_names.value:
+                    pass
                 elif (
                     route in LiteLLMRoutes.info_routes.value
                 ):  # check if user allowed to call an info route
@@ -1187,7 +1285,18 @@ async def user_api_key_auth(
             # No token was found when looking up in the DB
             raise Exception("Invalid token passed")
         if valid_token_dict is not None:
-            return UserAPIKeyAuth(api_key=api_key, **valid_token_dict)
+            if user_id_information is not None and _is_user_proxy_admin(
+                user_id_information
+            ):
+                return UserAPIKeyAuth(
+                    api_key=api_key, user_role="proxy_admin", **valid_token_dict
+                )
+            elif _has_user_setup_sso() and route in LiteLLMRoutes.sso_only_routes.value:
+                return UserAPIKeyAuth(
+                    api_key=api_key, user_role="app_owner", **valid_token_dict
+                )
+            else:
+                return UserAPIKeyAuth(api_key=api_key, **valid_token_dict)
         else:
             raise Exception()
     except Exception as e:
@@ -1406,13 +1515,8 @@ async def _PROXY_track_cost_callback(
         model = kwargs.get("model", "")
         metadata = kwargs.get("litellm_params", {}).get("metadata", {})
         error_msg += f"\n Args to _PROXY_track_cost_callback\n model: {model}\n metadata: {metadata}\n"
-        user_id = user_id or "not-found"
         asyncio.create_task(
-            proxy_logging_obj.budget_alerts(
-                user_max_budget=0,
-                user_current_spend=0,
-                type="failed_tracking",
-                user_info=user_id,
+            proxy_logging_obj.failed_tracking_alert(
                 error_message=error_msg,
             )
         )
@@ -1499,68 +1603,6 @@ async def update_database(
                                 end_user_id, 0
                             )
                         )
-                elif custom_db_client is not None:
-                    for id in user_ids:
-                        if id is None:
-                            continue
-                        if (
-                            custom_db_client is not None
-                            and id != litellm_proxy_budget_name
-                        ):
-                            existing_spend_obj = await custom_db_client.get_data(
-                                key=id, table_name="user"
-                            )
-                        verbose_proxy_logger.debug(
-                            f"Updating existing_spend_obj: {existing_spend_obj}"
-                        )
-                        if existing_spend_obj is None:
-                            # if user does not exist in LiteLLM_UserTable, create a new user
-                            existing_spend = 0
-                            max_user_budget = None
-                            if litellm.max_user_budget is not None:
-                                max_user_budget = litellm.max_user_budget
-                            existing_spend_obj = LiteLLM_UserTable(
-                                user_id=id,
-                                spend=0,
-                                max_budget=max_user_budget,
-                                user_email=None,
-                            )
-                        else:
-                            existing_spend = existing_spend_obj.spend
-
-                        # Calculate the new cost by adding the existing cost and response_cost
-                        existing_spend_obj.spend = existing_spend + response_cost
-
-                        # track cost per model, for the given user
-                        spend_per_model = existing_spend_obj.model_spend or {}
-                        current_model = kwargs.get("model")
-
-                        if current_model is not None and spend_per_model is not None:
-                            if spend_per_model.get(current_model) is None:
-                                spend_per_model[current_model] = response_cost
-                            else:
-                                spend_per_model[current_model] += response_cost
-                        existing_spend_obj.model_spend = spend_per_model
-
-                        valid_token = user_api_key_cache.get_cache(key=id)
-                        if valid_token is not None and isinstance(valid_token, dict):
-                            user_api_key_cache.set_cache(
-                                key=id, value=existing_spend_obj.json()
-                            )
-
-                        verbose_proxy_logger.debug(
-                            f"user - new cost: {existing_spend_obj.spend}, user_id: {id}"
-                        )
-                        data_list.append(existing_spend_obj)
-
-                        if custom_db_client is not None and user_id is not None:
-                            new_spend = data_list[0].spend
-                            await custom_db_client.update_data(
-                                key=user_id,
-                                value={"spend": new_spend},
-                                table_name="user",
-                            )
-
             except Exception as e:
                 verbose_proxy_logger.info(
                     "\033[91m"
@@ -1580,31 +1622,6 @@ async def update_database(
                         response_cost
                         + prisma_client.key_list_transactons.get(hashed_token, 0)
                     )
-                elif custom_db_client is not None:
-                    # Fetch the existing cost for the given token
-                    existing_spend_obj = await custom_db_client.get_data(
-                        key=token, table_name="key"
-                    )
-                    verbose_proxy_logger.debug(
-                        f"_update_key_db existing spend: {existing_spend_obj}"
-                    )
-                    if existing_spend_obj is None:
-                        existing_spend = 0
-                    else:
-                        existing_spend = existing_spend_obj.spend
-                    # Calculate the new cost by adding the existing cost and response_cost
-                    new_spend = existing_spend + response_cost
-
-                    verbose_proxy_logger.debug("new cost: %s", new_spend)
-                    # Update the cost column for the given token
-                    await custom_db_client.update_data(
-                        key=token, value={"spend": new_spend}, table_name="key"
-                    )
-
-                    valid_token = user_api_key_cache.get_cache(key=token)
-                    if valid_token is not None:
-                        valid_token.spend = new_spend
-                        user_api_key_cache.set_cache(key=token, value=valid_token)
             except Exception as e:
                 verbose_proxy_logger.info(
                     f"Update Key DB Call failed to execute - {str(e)}\n{traceback.format_exc()}"
@@ -1622,6 +1639,7 @@ async def update_database(
                         response_obj=completion_response,
                         start_time=start_time,
                         end_time=end_time,
+                        end_user_id=end_user_id,
                     )
 
                     payload["spend"] = response_cost
@@ -1652,31 +1670,6 @@ async def update_database(
                         response_cost
                         + prisma_client.team_list_transactons.get(team_id, 0)
                     )
-                elif custom_db_client is not None:
-                    # Fetch the existing cost for the given token
-                    existing_spend_obj = await custom_db_client.get_data(
-                        key=token, table_name="key"
-                    )
-                    verbose_proxy_logger.debug(
-                        f"_update_key_db existing spend: {existing_spend_obj}"
-                    )
-                    if existing_spend_obj is None:
-                        existing_spend = 0
-                    else:
-                        existing_spend = existing_spend_obj.spend
-                    # Calculate the new cost by adding the existing cost and response_cost
-                    new_spend = existing_spend + response_cost
-
-                    verbose_proxy_logger.debug("new cost: %s", new_spend)
-                    # Update the cost column for the given token
-                    await custom_db_client.update_data(
-                        key=token, value={"spend": new_spend}, table_name="key"
-                    )
-
-                    valid_token = user_api_key_cache.get_cache(key=token)
-                    if valid_token is not None:
-                        valid_token.spend = new_spend
-                        user_api_key_cache.set_cache(key=token, value=valid_token)
             except Exception as e:
                 verbose_proxy_logger.info(
                     f"Update Team DB failed to execute - {str(e)}\n{traceback.format_exc()}"
@@ -1735,14 +1728,14 @@ async def update_cache(
     """
 
     ### UPDATE KEY SPEND ###
-    async def _update_key_cache():
+    async def _update_key_cache(token: str, response_cost: float):
         # Fetch the existing cost for the given token
         if isinstance(token, str) and token.startswith("sk-"):
             hashed_token = hash_token(token=token)
         else:
             hashed_token = token
         verbose_proxy_logger.debug("_update_key_cache: hashed_token=%s", hashed_token)
-        existing_spend_obj = await user_api_key_cache.async_get_cache(key=hashed_token)
+        existing_spend_obj: LiteLLM_VerificationTokenView = await user_api_key_cache.async_get_cache(key=hashed_token)  # type: ignore
         verbose_proxy_logger.debug(
             f"_update_key_cache: existing_spend_obj={existing_spend_obj}"
         )
@@ -1751,7 +1744,7 @@ async def update_cache(
         )
         if existing_spend_obj is None:
             existing_spend = 0
-            existing_spend_obj = LiteLLM_VerificationTokenView()
+            existing_spend_obj = LiteLLM_VerificationTokenView(token=token)
         else:
             existing_spend = existing_spend_obj.spend
         # Calculate the new cost by adding the existing cost and response_cost
@@ -1765,29 +1758,36 @@ async def update_cache(
             and (
                 _is_projected_spend_over_limit(
                     current_spend=new_spend,
-                    soft_budget_limit=existing_spend_obj.litellm_budget_table.soft_budget,
+                    soft_budget_limit=existing_spend_obj.litellm_budget_table[
+                        "soft_budget"
+                    ],
                 )
                 == True
             )
         ):
-            key_alias = existing_spend_obj.key_alias
             projected_spend, projected_exceeded_date = _get_projected_spend_over_limit(
                 current_spend=new_spend,
-                soft_budget_limit=existing_spend_obj.litellm_budget_table.soft_budget,
+                soft_budget_limit=existing_spend_obj.litellm_budget_table.get(
+                    "soft_budget", None
+                ),
+            )  # type: ignore
+            soft_limit = existing_spend_obj.litellm_budget_table.get(
+                "soft_budget", float("inf")
+            )
+            call_info = CallInfo(
+                token=existing_spend_obj.token or "",
+                spend=new_spend,
+                key_alias=existing_spend_obj.key_alias,
+                max_budget=soft_limit,
+                user_id=existing_spend_obj.user_id,
+                projected_spend=projected_spend,
+                projected_exceeded_date=projected_exceeded_date,
             )
-            soft_limit = existing_spend_obj.litellm_budget_table.soft_budget
-            user_info = {
-                "key_alias": key_alias,
-                "projected_spend": projected_spend,
-                "projected_exceeded_date": projected_exceeded_date,
-            }
             # alert user
             asyncio.create_task(
                 proxy_logging_obj.budget_alerts(
                     type="projected_limit_exceeded",
-                    user_info=user_info,
-                    user_max_budget=soft_limit,
-                    user_current_spend=new_spend,
+                    user_info=call_info,
                 )
             )
             # set cooldown on alert
@@ -1797,7 +1797,7 @@ async def update_cache(
             existing_spend_obj is not None
             and getattr(existing_spend_obj, "team_spend", None) is not None
         ):
-            existing_team_spend = existing_spend_obj.team_spend
+            existing_team_spend = existing_spend_obj.team_spend or 0
             # Calculate the new cost by adding the existing cost and response_cost
             existing_spend_obj.team_spend = existing_team_spend + response_cost
 
@@ -1914,8 +1914,8 @@ async def update_cache(
                 f"An error occurred updating end user cache: {str(e)}\n\n{traceback.format_exc()}"
             )
 
-    if token is not None:
-        asyncio.create_task(_update_key_cache())
+    if token is not None and response_cost is not None:
+        asyncio.create_task(_update_key_cache(token=token, response_cost=response_cost))
 
     asyncio.create_task(_update_user_cache())
 
@@ -2193,8 +2193,18 @@ class ProxyConfig:
                 elif key == "callbacks":
                     if isinstance(value, list):
                         imported_list: List[Any] = []
+                        known_compatible_callbacks = list(
+                            get_args(
+                                litellm._custom_logger_compatible_callbacks_literal
+                            )
+                        )
                         for callback in value:  # ["presidio", <my-custom-callback>]
-                            if isinstance(callback, str) and callback == "presidio":
+                            if (
+                                isinstance(callback, str)
+                                and callback in known_compatible_callbacks
+                            ):
+                                imported_list.append(callback)
+                            elif isinstance(callback, str) and callback == "presidio":
                                 from litellm.proxy.hooks.presidio_pii_masking import (
                                     _OPTIONAL_PresidioPIIMasking,
                                 )
@@ -2472,6 +2482,12 @@ class ProxyConfig:
             )
             if master_key and master_key.startswith("os.environ/"):
                 master_key = litellm.get_secret(master_key)
+                if not isinstance(master_key, str):
+                    raise Exception(
+                        "Master key must be a string. Current type - {}".format(
+                            type(master_key)
+                        )
+                    )
 
             if master_key is not None and isinstance(master_key, str):
                 litellm_master_key_hash = hash_token(master_key)
@@ -2594,6 +2610,11 @@ class ProxyConfig:
 
         Return model info w/ id
         """
+        _id: Optional[str] = getattr(model, "model_id", None)
+        if _id is not None:
+            model.model_info["id"] = _id
+            model.model_info["db_model"] = True
+
         if model.model_info is not None and isinstance(model.model_info, dict):
             if "id" not in model.model_info:
                 model.model_info["id"] = model.model_id
@@ -2795,7 +2816,19 @@ class ProxyConfig:
                     "Error setting env variable: %s - %s", k, str(e)
                 )
 
-        # general_settings
+        # router settings
+        if llm_router is not None and prisma_client is not None:
+            db_router_settings = await prisma_client.db.litellm_config.find_first(
+                where={"param_name": "router_settings"}
+            )
+            if (
+                db_router_settings is not None
+                and db_router_settings.param_value is not None
+            ):
+                _router_settings = db_router_settings.param_value
+                llm_router.update_settings(**_router_settings)
+
+        ## ALERTING ## [TODO] move this to the _update_general_settings() block
         _general_settings = config_data.get("general_settings", {})
         if "alerting" in _general_settings:
             general_settings["alerting"] = _general_settings["alerting"]
@@ -2819,17 +2852,24 @@ class ProxyConfig:
                 alert_to_webhook_url=general_settings["alert_to_webhook_url"]
             )
 
-        # router settings
-        if llm_router is not None and prisma_client is not None:
-            db_router_settings = await prisma_client.db.litellm_config.find_first(
-                where={"param_name": "router_settings"}
-            )
-            if (
-                db_router_settings is not None
-                and db_router_settings.param_value is not None
-            ):
-                _router_settings = db_router_settings.param_value
-                llm_router.update_settings(**_router_settings)
+    async def _update_general_settings(self, db_general_settings: Optional[Json]):
+        """
+        Pull from DB, read general settings value
+        """
+        global general_settings
+        if db_general_settings is None:
+            return
+        _general_settings = dict(db_general_settings)
+        ## MAX PARALLEL REQUESTS ##
+        if "max_parallel_requests" in _general_settings:
+            general_settings["max_parallel_requests"] = _general_settings[
+                "max_parallel_requests"
+            ]
+
+        if "global_max_parallel_requests" in _general_settings:
+            general_settings["global_max_parallel_requests"] = _general_settings[
+                "global_max_parallel_requests"
+            ]
 
     async def add_deployment(
         self,
@@ -2837,7 +2877,7 @@ class ProxyConfig:
         proxy_logging_obj: ProxyLogging,
     ):
         """
-        - Check db for new models (last 10 most recently updated)
+        - Check db for new models
         - Check if model id's in router already
         - If not, add to router
         """
@@ -2850,9 +2890,21 @@ class ProxyConfig:
                 )
             verbose_proxy_logger.debug(f"llm_router: {llm_router}")
             new_models = await prisma_client.db.litellm_proxymodeltable.find_many()
+            # update llm router
             await self._update_llm_router(
                 new_models=new_models, proxy_logging_obj=proxy_logging_obj
             )
+
+            db_general_settings = await prisma_client.db.litellm_config.find_first(
+                where={"param_name": "general_settings"}
+            )
+
+            # update general settings
+            if db_general_settings is not None:
+                await self._update_general_settings(
+                    db_general_settings=db_general_settings.param_value,
+                )
+
         except Exception as e:
             verbose_proxy_logger.error(
                 "{}\nTraceback:{}".format(str(e), traceback.format_exc())
@@ -2929,19 +2981,19 @@ async def generate_key_helper_fn(
         expires = None
     else:
         duration_s = _duration_in_seconds(duration=duration)
-        expires = datetime.utcnow() + timedelta(seconds=duration_s)
+        expires = datetime.now(timezone.utc) + timedelta(seconds=duration_s)
 
     if key_budget_duration is None:  # one-time budget
         key_reset_at = None
     else:
         duration_s = _duration_in_seconds(duration=key_budget_duration)
-        key_reset_at = datetime.utcnow() + timedelta(seconds=duration_s)
+        key_reset_at = datetime.now(timezone.utc) + timedelta(seconds=duration_s)
 
     if budget_duration is None:  # one-time budget
         reset_at = None
     else:
         duration_s = _duration_in_seconds(duration=budget_duration)
-        reset_at = datetime.utcnow() + timedelta(seconds=duration_s)
+        reset_at = datetime.now(timezone.utc) + timedelta(seconds=duration_s)
 
     aliases_json = json.dumps(aliases)
     config_json = json.dumps(config)
@@ -3052,27 +3104,6 @@ async def generate_key_helper_fn(
                 data=key_data, table_name="key"
             )
             key_data["token_id"] = getattr(create_key_response, "token", None)
-        elif custom_db_client is not None:
-            if table_name is None or table_name == "user":
-                ## CREATE USER (If necessary)
-                verbose_proxy_logger.debug(
-                    "CustomDBClient: Creating User= %s", user_data
-                )
-                user_row = await custom_db_client.insert_data(
-                    value=user_data, table_name="user"
-                )
-                if user_row is None:
-                    # GET USER ROW
-                    user_row = await custom_db_client.get_data(
-                        key=user_id, table_name="user"  # type: ignore
-                    )
-
-            ## use default user model list if no key-specific model list provided
-            if len(user_row.models) > 0 and len(key_data["models"]) == 0:  # type: ignore
-                key_data["models"] = user_row.models
-            ## CREATE KEY
-            verbose_proxy_logger.debug("CustomDBClient: Creating Key= %s", key_data)
-            await custom_db_client.insert_data(value=key_data, table_name="key")
     except Exception as e:
         traceback.print_exc()
         if isinstance(e, HTTPException):
@@ -3501,7 +3532,7 @@ async def startup_event():
         store_model_in_db = (
             litellm.get_secret("STORE_MODEL_IN_DB", store_model_in_db)
             or store_model_in_db
-        )
+        )  # type: ignore
         if store_model_in_db == True:
             scheduler.add_job(
                 proxy_config.add_deployment,
@@ -3619,7 +3650,6 @@ async def chat_completion(
 ):
     global general_settings, user_debug, proxy_logging_obj, llm_model_list
     data = {}
-    check_request_disconnected = None
     try:
         body = await request.body()
         body_str = body.decode()
@@ -3668,6 +3698,9 @@ async def chat_completion(
         data["metadata"]["user_api_key_alias"] = getattr(
             user_api_key_dict, "key_alias", None
         )
+        data["metadata"]["global_max_parallel_requests"] = general_settings.get(
+            "global_max_parallel_requests", None
+        )
         data["metadata"]["user_api_key_user_id"] = user_api_key_dict.user_id
         data["metadata"]["user_api_key_org_id"] = user_api_key_dict.org_id
         data["metadata"]["user_api_key_team_id"] = getattr(
@@ -3734,8 +3767,8 @@ async def chat_completion(
 
         data["litellm_logging_obj"] = logging_obj
 
-        ### CALL HOOKS ### - modify incoming data before calling the model
-        data = await proxy_logging_obj.pre_call_hook(
+        ### CALL HOOKS ### - modify/reject incoming data before calling the model
+        data = await proxy_logging_obj.pre_call_hook(  # type: ignore
             user_api_key_dict=user_api_key_dict, data=data, call_type="completion"
         )
 
@@ -3799,9 +3832,6 @@ async def chat_completion(
             *tasks
         )  # run the moderation check in parallel to the actual llm api call
 
-        check_request_disconnected = asyncio.create_task(
-            check_request_disconnection(request, llm_responses)
-        )
         responses = await llm_responses
 
         response = responses[1]
@@ -3819,13 +3849,13 @@ async def chat_completion(
         if (
             "stream" in data and data["stream"] == True
         ):  # use generate_responses to stream responses
-            custom_headers = {
-                "x-litellm-model-id": model_id,
-                "x-litellm-cache-key": cache_key,
-                "x-litellm-model-api-base": api_base,
-                "x-litellm-version": version,
-                "x-litellm-model-region": user_api_key_dict.allowed_model_region or "",
-            }
+            custom_headers = get_custom_headers(
+                model_id=model_id,
+                cache_key=cache_key,
+                api_base=api_base,
+                version=version,
+                model_region=getattr(user_api_key_dict, "allowed_model_region", ""),
+            )
             selected_data_generator = select_data_generator(
                 response=response,
                 user_api_key_dict=user_api_key_dict,
@@ -3837,12 +3867,14 @@ async def chat_completion(
                 headers=custom_headers,
             )
 
-        fastapi_response.headers["x-litellm-model-id"] = model_id
-        fastapi_response.headers["x-litellm-cache-key"] = cache_key
-        fastapi_response.headers["x-litellm-model-api-base"] = api_base
-        fastapi_response.headers["x-litellm-version"] = version
-        fastapi_response.headers["x-litellm-model-region"] = (
-            user_api_key_dict.allowed_model_region or ""
+        fastapi_response.headers.update(
+            get_custom_headers(
+                model_id=model_id,
+                cache_key=cache_key,
+                api_base=api_base,
+                version=version,
+                model_region=getattr(user_api_key_dict, "allowed_model_region", ""),
+            )
         )
 
         ### CALL HOOKS ### - modify outgoing data
@@ -3851,14 +3883,51 @@ async def chat_completion(
         )
 
         return response
+    except RejectedRequestError as e:
+        _data = e.request_data
+        _data["litellm_status"] = "fail"  # used for alerting
+        await proxy_logging_obj.post_call_failure_hook(
+            user_api_key_dict=user_api_key_dict,
+            original_exception=e,
+            request_data=_data,
+        )
+        _chat_response = litellm.ModelResponse()
+        _chat_response.choices[0].message.content = e.message  # type: ignore
+
+        if data.get("stream", None) is not None and data["stream"] == True:
+            _iterator = litellm.utils.ModelResponseIterator(
+                model_response=_chat_response, convert_to_delta=True
+            )
+            _streaming_response = litellm.CustomStreamWrapper(
+                completion_stream=_iterator,
+                model=data.get("model", ""),
+                custom_llm_provider="cached_response",
+                logging_obj=data.get("litellm_logging_obj", None),
+            )
+            selected_data_generator = select_data_generator(
+                response=_streaming_response,
+                user_api_key_dict=user_api_key_dict,
+                request_data=_data,
+            )
+
+            return StreamingResponse(
+                selected_data_generator,
+                media_type="text/event-stream",
+            )
+        _usage = litellm.Usage(prompt_tokens=0, completion_tokens=0, total_tokens=0)
+        _chat_response.usage = _usage  # type: ignore
+        return _chat_response
     except Exception as e:
         data["litellm_status"] = "fail"  # used for alerting
         traceback.print_exc()
         await proxy_logging_obj.post_call_failure_hook(
             user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data
         )
+        litellm_debug_info = getattr(e, "litellm_debug_info", "")
         verbose_proxy_logger.debug(
-            f"\033[1;31mAn error occurred: {e}\n\n Debug this by setting `--debug`, e.g. `litellm --model gpt-3.5-turbo --debug`"
+            "\033[1;31mAn error occurred: %s %s\n\n Debug this by setting `--debug`, e.g. `litellm --model gpt-3.5-turbo --debug`",
+            e,
+            litellm_debug_info,
         )
         router_model_names = llm_router.model_names if llm_router is not None else []
         if user_debug:
@@ -3878,9 +3947,6 @@ async def chat_completion(
             param=getattr(e, "param", "None"),
             code=getattr(e, "status_code", 500),
         )
-    finally:
-        if check_request_disconnected is not None:
-            check_request_disconnected.cancel()
 
 
 @router.post(
@@ -3906,7 +3972,7 @@ async def completion(
     user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
 ):
     global user_temperature, user_request_timeout, user_max_tokens, user_api_base
-    check_request_disconnected = None
+    data = {}
     try:
         body = await request.body()
         body_str = body.decode()
@@ -3935,6 +4001,9 @@ async def completion(
         data["metadata"]["user_api_key_team_id"] = getattr(
             user_api_key_dict, "team_id", None
         )
+        data["metadata"]["global_max_parallel_requests"] = general_settings.get(
+            "global_max_parallel_requests", None
+        )
         data["metadata"]["user_api_key_team_alias"] = getattr(
             user_api_key_dict, "team_alias", None
         )
@@ -3962,8 +4031,8 @@ async def completion(
             data["model"] = litellm.model_alias_map[data["model"]]
 
         ### CALL HOOKS ### - modify incoming data before calling the model
-        data = await proxy_logging_obj.pre_call_hook(
-            user_api_key_dict=user_api_key_dict, data=data, call_type="completion"
+        data = await proxy_logging_obj.pre_call_hook(  # type: ignore
+            user_api_key_dict=user_api_key_dict, data=data, call_type="text_completion"
         )
 
         ### ROUTE THE REQUESTs ###
@@ -4003,9 +4072,6 @@ async def completion(
                     + data.get("model", "")
                 },
             )
-        check_request_disconnected = asyncio.create_task(
-            check_request_disconnection(request, llm_response)
-        )
 
         # Await the llm_response task
         response = await llm_response
@@ -4015,16 +4081,19 @@ async def completion(
         cache_key = hidden_params.get("cache_key", None) or ""
         api_base = hidden_params.get("api_base", None) or ""
 
+        ### ALERTING ###
+        data["litellm_status"] = "success"  # used for alerting
+
         verbose_proxy_logger.debug("final response: %s", response)
         if (
             "stream" in data and data["stream"] == True
         ):  # use generate_responses to stream responses
-            custom_headers = {
-                "x-litellm-model-id": model_id,
-                "x-litellm-cache-key": cache_key,
-                "x-litellm-model-api-base": api_base,
-                "x-litellm-version": version,
-            }
+            custom_headers = get_custom_headers(
+                model_id=model_id,
+                cache_key=cache_key,
+                api_base=api_base,
+                version=version,
+            )
             selected_data_generator = select_data_generator(
                 response=response,
                 user_api_key_dict=user_api_key_dict,
@@ -4036,19 +4105,67 @@ async def completion(
                 media_type="text/event-stream",
                 headers=custom_headers,
             )
-
-        fastapi_response.headers["x-litellm-model-id"] = model_id
-        fastapi_response.headers["x-litellm-cache-key"] = cache_key
-        fastapi_response.headers["x-litellm-model-api-base"] = api_base
-        fastapi_response.headers["x-litellm-version"] = version
+        fastapi_response.headers.update(
+            get_custom_headers(
+                model_id=model_id,
+                cache_key=cache_key,
+                api_base=api_base,
+                version=version,
+            )
+        )
 
         return response
+    except RejectedRequestError as e:
+        _data = e.request_data
+        _data["litellm_status"] = "fail"  # used for alerting
+        await proxy_logging_obj.post_call_failure_hook(
+            user_api_key_dict=user_api_key_dict,
+            original_exception=e,
+            request_data=_data,
+        )
+        if _data.get("stream", None) is not None and _data["stream"] == True:
+            _chat_response = litellm.ModelResponse()
+            _usage = litellm.Usage(
+                prompt_tokens=0,
+                completion_tokens=0,
+                total_tokens=0,
+            )
+            _chat_response.usage = _usage  # type: ignore
+            _chat_response.choices[0].message.content = e.message  # type: ignore
+            _iterator = litellm.utils.ModelResponseIterator(
+                model_response=_chat_response, convert_to_delta=True
+            )
+            _streaming_response = litellm.TextCompletionStreamWrapper(
+                completion_stream=_iterator,
+                model=_data.get("model", ""),
+            )
+
+            selected_data_generator = select_data_generator(
+                response=_streaming_response,
+                user_api_key_dict=user_api_key_dict,
+                request_data=data,
+            )
+
+            return StreamingResponse(
+                selected_data_generator,
+                media_type="text/event-stream",
+                headers={},
+            )
+        else:
+            _response = litellm.TextCompletionResponse()
+            _response.choices[0].text = e.message
+            return _response
     except Exception as e:
         data["litellm_status"] = "fail"  # used for alerting
+        await proxy_logging_obj.post_call_failure_hook(
+            user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data
+        )
         verbose_proxy_logger.debug("EXCEPTION RAISED IN PROXY MAIN.PY")
+        litellm_debug_info = getattr(e, "litellm_debug_info", "")
         verbose_proxy_logger.debug(
-            "\033[1;31mAn error occurred: %s\n\n Debug this by setting `--debug`, e.g. `litellm --model gpt-3.5-turbo --debug`",
+            "\033[1;31mAn error occurred: %s %s\n\n Debug this by setting `--debug`, e.g. `litellm --model gpt-3.5-turbo --debug`",
             e,
+            litellm_debug_info,
         )
         traceback.print_exc()
         error_traceback = traceback.format_exc()
@@ -4059,9 +4176,6 @@ async def completion(
             param=getattr(e, "param", "None"),
             code=getattr(e, "status_code", 500),
         )
-    finally:
-        if check_request_disconnected is not None:
-            check_request_disconnected.cancel()
 
 
 @router.post(
@@ -4126,6 +4240,9 @@ async def embeddings(
         data["metadata"]["user_api_key_alias"] = getattr(
             user_api_key_dict, "key_alias", None
         )
+        data["metadata"]["global_max_parallel_requests"] = general_settings.get(
+            "global_max_parallel_requests", None
+        )
         data["metadata"]["user_api_key_user_id"] = user_api_key_dict.user_id
         data["metadata"]["user_api_key_team_id"] = getattr(
             user_api_key_dict, "team_id", None
@@ -4237,12 +4354,14 @@ async def embeddings(
         cache_key = hidden_params.get("cache_key", None) or ""
         api_base = hidden_params.get("api_base", None) or ""
 
-        fastapi_response.headers["x-litellm-model-id"] = model_id
-        fastapi_response.headers["x-litellm-cache-key"] = cache_key
-        fastapi_response.headers["x-litellm-model-api-base"] = api_base
-        fastapi_response.headers["x-litellm-version"] = version
-        fastapi_response.headers["x-litellm-model-region"] = (
-            user_api_key_dict.allowed_model_region or ""
+        fastapi_response.headers.update(
+            get_custom_headers(
+                model_id=model_id,
+                cache_key=cache_key,
+                api_base=api_base,
+                version=version,
+                model_region=getattr(user_api_key_dict, "allowed_model_region", ""),
+            )
         )
 
         return response
@@ -4251,6 +4370,12 @@ async def embeddings(
         await proxy_logging_obj.post_call_failure_hook(
             user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data
         )
+        litellm_debug_info = getattr(e, "litellm_debug_info", "")
+        verbose_proxy_logger.debug(
+            "\033[1;31mAn error occurred: %s %s\n\n Debug this by setting `--debug`, e.g. `litellm --model gpt-3.5-turbo --debug`",
+            e,
+            litellm_debug_info,
+        )
         traceback.print_exc()
         if isinstance(e, HTTPException):
             raise ProxyException(
@@ -4324,6 +4449,9 @@ async def image_generation(
         data["metadata"]["user_api_key_alias"] = getattr(
             user_api_key_dict, "key_alias", None
         )
+        data["metadata"]["global_max_parallel_requests"] = general_settings.get(
+            "global_max_parallel_requests", None
+        )
         data["metadata"]["user_api_key_user_id"] = user_api_key_dict.user_id
         data["metadata"]["user_api_key_team_id"] = getattr(
             user_api_key_dict, "team_id", None
@@ -4409,12 +4537,14 @@ async def image_generation(
         cache_key = hidden_params.get("cache_key", None) or ""
         api_base = hidden_params.get("api_base", None) or ""
 
-        fastapi_response.headers["x-litellm-model-id"] = model_id
-        fastapi_response.headers["x-litellm-cache-key"] = cache_key
-        fastapi_response.headers["x-litellm-model-api-base"] = api_base
-        fastapi_response.headers["x-litellm-version"] = version
-        fastapi_response.headers["x-litellm-model-region"] = (
-            user_api_key_dict.allowed_model_region or ""
+        fastapi_response.headers.update(
+            get_custom_headers(
+                model_id=model_id,
+                cache_key=cache_key,
+                api_base=api_base,
+                version=version,
+                model_region=getattr(user_api_key_dict, "allowed_model_region", ""),
+            )
         )
 
         return response
@@ -4504,6 +4634,9 @@ async def audio_transcriptions(
         data["metadata"]["user_api_key_team_id"] = getattr(
             user_api_key_dict, "team_id", None
         )
+        data["metadata"]["global_max_parallel_requests"] = general_settings.get(
+            "global_max_parallel_requests", None
+        )
         data["metadata"]["user_api_key_team_alias"] = getattr(
             user_api_key_dict, "team_alias", None
         )
@@ -4605,12 +4738,14 @@ async def audio_transcriptions(
         cache_key = hidden_params.get("cache_key", None) or ""
         api_base = hidden_params.get("api_base", None) or ""
 
-        fastapi_response.headers["x-litellm-model-id"] = model_id
-        fastapi_response.headers["x-litellm-cache-key"] = cache_key
-        fastapi_response.headers["x-litellm-model-api-base"] = api_base
-        fastapi_response.headers["x-litellm-version"] = version
-        fastapi_response.headers["x-litellm-model-region"] = (
-            user_api_key_dict.allowed_model_region or ""
+        fastapi_response.headers.update(
+            get_custom_headers(
+                model_id=model_id,
+                cache_key=cache_key,
+                api_base=api_base,
+                version=version,
+                model_region=getattr(user_api_key_dict, "allowed_model_region", ""),
+            )
         )
 
         return response
@@ -4701,6 +4836,9 @@ async def moderations(
             "authorization", None
         )  # do not store the original `sk-..` api key in the db
         data["metadata"]["headers"] = _headers
+        data["metadata"]["global_max_parallel_requests"] = general_settings.get(
+            "global_max_parallel_requests", None
+        )
         data["metadata"]["user_api_key_alias"] = getattr(
             user_api_key_dict, "key_alias", None
         )
@@ -4783,12 +4921,14 @@ async def moderations(
         cache_key = hidden_params.get("cache_key", None) or ""
         api_base = hidden_params.get("api_base", None) or ""
 
-        fastapi_response.headers["x-litellm-model-id"] = model_id
-        fastapi_response.headers["x-litellm-cache-key"] = cache_key
-        fastapi_response.headers["x-litellm-model-api-base"] = api_base
-        fastapi_response.headers["x-litellm-version"] = version
-        fastapi_response.headers["x-litellm-model-region"] = (
-            user_api_key_dict.allowed_model_region or ""
+        fastapi_response.headers.update(
+            get_custom_headers(
+                model_id=model_id,
+                cache_key=cache_key,
+                api_base=api_base,
+                version=version,
+                model_region=getattr(user_api_key_dict, "allowed_model_region", ""),
+            )
         )
 
         return response
@@ -4816,6 +4956,90 @@ async def moderations(
             )
 
 
+#### DEV UTILS ####
+
+
+@router.post(
+    "/utils/token_counter",
+    tags=["llm utils"],
+    dependencies=[Depends(user_api_key_auth)],
+    response_model=TokenCountResponse,
+)
+async def token_counter(request: TokenCountRequest):
+    """ """
+    from litellm import token_counter
+
+    global llm_router
+
+    prompt = request.prompt
+    messages = request.messages
+    if prompt is None and messages is None:
+        raise HTTPException(
+            status_code=400, detail="prompt or messages must be provided"
+        )
+
+    deployment = None
+    litellm_model_name = None
+    if llm_router is not None:
+        # get 1 deployment corresponding to the model
+        for _model in llm_router.model_list:
+            if _model["model_name"] == request.model:
+                deployment = _model
+                break
+    if deployment is not None:
+        litellm_model_name = deployment.get("litellm_params", {}).get("model")
+        # remove the custom_llm_provider_prefix in the litellm_model_name
+        if "/" in litellm_model_name:
+            litellm_model_name = litellm_model_name.split("/", 1)[1]
+
+    model_to_use = (
+        litellm_model_name or request.model
+    )  # use litellm model name, if it's not avalable then fallback to request.model
+    _tokenizer_used = litellm.utils._select_tokenizer(model=model_to_use)
+    tokenizer_used = str(_tokenizer_used["type"])
+    total_tokens = token_counter(
+        model=model_to_use,
+        text=prompt,
+        messages=messages,
+    )
+    return TokenCountResponse(
+        total_tokens=total_tokens,
+        request_model=request.model,
+        model_used=model_to_use,
+        tokenizer_type=tokenizer_used,
+    )
+
+
+@router.get(
+    "/utils/supported_openai_params",
+    tags=["llm utils"],
+    dependencies=[Depends(user_api_key_auth)],
+)
+async def supported_openai_params(model: str):
+    """
+    Returns supported openai params for a given litellm model name 
+
+    e.g. `gpt-4` vs `gpt-3.5-turbo` 
+
+    Example curl: 
+    ```
+    curl -X GET --location 'http://localhost:4000/utils/supported_openai_params?model=gpt-3.5-turbo-16k' \
+        --header 'Authorization: Bearer sk-1234'
+    ```
+    """
+    try:
+        model, custom_llm_provider, _, _ = litellm.get_llm_provider(model=model)
+        return {
+            "supported_openai_params": litellm.get_supported_openai_params(
+                model=model, custom_llm_provider=custom_llm_provider
+            )
+        }
+    except Exception as e:
+        raise HTTPException(
+            status_code=400, detail={"error": "Could not map model={}".format(model)}
+        )
+
+
 #### KEY MANAGEMENT ####
 
 
@@ -5025,7 +5249,7 @@ async def update_key_fn(request: Request, data: UpdateKeyRequest):
         if "duration" in non_default_values:
             duration = non_default_values.pop("duration")
             duration_s = _duration_in_seconds(duration=duration)
-            expires = datetime.utcnow() + timedelta(seconds=duration_s)
+            expires = datetime.now(timezone.utc) + timedelta(seconds=duration_s)
             non_default_values["expires"] = expires
 
         response = await prisma_client.update_data(
@@ -5717,6 +5941,8 @@ async def calculate_spend(request: Request):
 
     Calculate spend **before** making call:
 
+    Note: If you see a spend of $0.0 you need to set custom_pricing for your model: https://docs.litellm.ai/docs/proxy/custom_pricing
+
     ```
     curl --location 'http://localhost:4000/spend/calculate'
     --header 'Authorization: Bearer sk-1234'
@@ -6622,7 +6848,19 @@ async def user_info(
 )
 async def user_update(data: UpdateUserRequest):
     """
-    [TODO]: Use this to update user budget
+    Example curl 
+
+    ```
+    curl --location 'http://0.0.0.0:4000/user/update' \
+    --header 'Authorization: Bearer sk-1234' \
+    --header 'Content-Type: application/json' \
+    --data '{
+        "user_id": "test-litellm-user-4",
+        "user_role": "proxy_admin_viewer"
+    }'
+
+    See below for all params 
+    ```
     """
     global prisma_client
     try:
@@ -7020,16 +7258,29 @@ async def new_end_user(
     return end_user_record
 
 
-@router.post(
+@router.get(
     "/end_user/info",
     tags=["End User Management"],
     dependencies=[Depends(user_api_key_auth)],
 )
-async def end_user_info():
-    """
-    [TODO] Needs to be implemented.
-    """
-    pass
+async def end_user_info(
+    end_user_id: str = fastapi.Query(
+        description="End User ID in the request parameters"
+    ),
+):
+    global prisma_client
+
+    if prisma_client is None:
+        raise HTTPException(
+            status_code=500,
+            detail={"error": CommonProxyErrors.db_not_connected_error.value},
+        )
+
+    user_info = await prisma_client.db.litellm_endusertable.find_first(
+        where={"user_id": end_user_id}
+    )
+
+    return user_info
 
 
 @router.post(
@@ -7271,6 +7522,11 @@ async def update_team(
     existing_team_row = await prisma_client.get_data(
         team_id=data.team_id, table_name="team", query_type="find_unique"
     )
+    if existing_team_row is None:
+        raise HTTPException(
+            status_code=404,
+            detail={"error": f"Team not found, passed team_id={data.team_id}"},
+        )
 
     updated_kv = data.json(exclude_none=True)
     team_row = await prisma_client.update_data(
@@ -7524,6 +7780,17 @@ async def delete_team(
     if data.team_ids is None:
         raise HTTPException(status_code=400, detail={"error": "No team id passed in"})
 
+    # check that all teams passed exist
+    for team_id in data.team_ids:
+        team_row = await prisma_client.get_data(  # type: ignore
+            team_id=team_id, table_name="team", query_type="find_unique"
+        )
+        if team_row is None:
+            raise HTTPException(
+                status_code=404,
+                detail={"error": f"Team not found, passed team_id={team_id}"},
+            )
+
     ## DELETE ASSOCIATED KEYS
     await prisma_client.delete_data(team_id_list=data.team_ids, table_name="key")
     ## DELETE TEAMS
@@ -7571,6 +7838,12 @@ async def team_info(
         team_info = await prisma_client.get_data(
             team_id=team_id, table_name="team", query_type="find_unique"
         )
+        if team_info is None:
+            raise HTTPException(
+                status_code=status.HTTP_404_NOT_FOUND,
+                detail={"message": f"Team not found, passed team id: {team_id}."},
+            )
+
         ## GET ALL KEYS ##
         keys = await prisma_client.get_data(
             team_id=team_id,
@@ -7783,7 +8056,12 @@ async def new_organization(
 
         If none provided, create one based on provided values
         """
-        budget_row = LiteLLM_BudgetTable(**data.json(exclude_none=True))
+        budget_params = LiteLLM_BudgetTable.model_fields.keys()
+
+        # Only include Budget Params when creating an entry in litellm_budgettable
+        _json_data = data.json(exclude_none=True)
+        _budget_data = {k: v for k, v in _json_data.items() if k in budget_params}
+        budget_row = LiteLLM_BudgetTable(**_budget_data)
 
         new_budget = prisma_client.jsonify_object(budget_row.json(exclude_none=True))
 
@@ -7986,6 +8264,7 @@ async def add_new_model(
                     await proxy_logging_obj.slack_alerting_instance.model_added_alert(
                         model_name=model_params.model_name,
                         litellm_model_name=_orignal_litellm_model_name,
+                        passed_model_info=model_params.model_info,
                     )
             except:
                 pass
@@ -8797,9 +9076,25 @@ async def google_login(request: Request):
     PROXY_BASE_URL should be the your deployed proxy endpoint, e.g. PROXY_BASE_URL="https://litellm-production-7002.up.railway.app/"
     Example:
     """
+    global premium_user
     microsoft_client_id = os.getenv("MICROSOFT_CLIENT_ID", None)
     google_client_id = os.getenv("GOOGLE_CLIENT_ID", None)
     generic_client_id = os.getenv("GENERIC_CLIENT_ID", None)
+
+    ####### Check if user is a Enterprise / Premium User #######
+    if (
+        microsoft_client_id is not None
+        or google_client_id is not None
+        or generic_client_id is not None
+    ):
+        if premium_user != True:
+            raise ProxyException(
+                message="You must be a LiteLLM Enterprise user to use SSO. If you have a license please set `LITELLM_LICENSE` in your env. If you want to obtain a license meet with us here: https://calendly.com/d/4mp-gd3-k5k/litellm-1-1-onboarding-chat You are seeing this error message because You set one of `MICROSOFT_CLIENT_ID`, `GOOGLE_CLIENT_ID`, or `GENERIC_CLIENT_ID` in your env. Please unset this",
+                type="auth_error",
+                param="premium_user",
+                code=status.HTTP_403_FORBIDDEN,
+            )
+
     # get url from request
     redirect_url = os.getenv("PROXY_BASE_URL", str(request.base_url))
     ui_username = os.getenv("UI_USERNAME")
@@ -9374,7 +9669,7 @@ async def auth_callback(request: Request):
     return RedirectResponse(url=litellm_dashboard_ui)
 
 
-#### BASIC ENDPOINTS ####
+#### CONFIG MANAGEMENT ####
 @router.post(
     "/config/update",
     tags=["config.yaml"],
@@ -9510,6 +9805,299 @@ async def update_config(config_info: ConfigYAML):
         )
 
 
+### CONFIG GENERAL SETTINGS
+"""
+- Update config settings
+- Get config settings
+
+Keep it more precise, to prevent overwrite other values unintentially
+"""
+
+
+@router.post(
+    "/config/field/update",
+    tags=["config.yaml"],
+    dependencies=[Depends(user_api_key_auth)],
+)
+async def update_config_general_settings(
+    data: ConfigFieldUpdate,
+    user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
+):
+    """
+    Update a specific field in litellm general settings
+    """
+    global prisma_client
+    ## VALIDATION ##
+    """
+    - Check if prisma_client is None
+    - Check if user allowed to call this endpoint (admin-only)
+    - Check if param in general settings 
+    - Check if config value is valid type 
+    """
+
+    if prisma_client is None:
+        raise HTTPException(
+            status_code=400,
+            detail={"error": CommonProxyErrors.db_not_connected_error.value},
+        )
+
+    if user_api_key_dict.user_role != "proxy_admin":
+        raise HTTPException(
+            status_code=400,
+            detail={"error": CommonProxyErrors.not_allowed_access.value},
+        )
+
+    if data.field_name not in ConfigGeneralSettings.model_fields:
+        raise HTTPException(
+            status_code=400,
+            detail={"error": "Invalid field={} passed in.".format(data.field_name)},
+        )
+
+    try:
+        cgs = ConfigGeneralSettings(**{data.field_name: data.field_value})
+    except:
+        raise HTTPException(
+            status_code=400,
+            detail={
+                "error": "Invalid type of field value={} passed in.".format(
+                    type(data.field_value),
+                )
+            },
+        )
+
+    ## get general settings from db
+    db_general_settings = await prisma_client.db.litellm_config.find_first(
+        where={"param_name": "general_settings"}
+    )
+    ### update value
+
+    if db_general_settings is None or db_general_settings.param_value is None:
+        general_settings = {}
+    else:
+        general_settings = dict(db_general_settings.param_value)
+
+    ## update db
+
+    general_settings[data.field_name] = data.field_value
+
+    response = await prisma_client.db.litellm_config.upsert(
+        where={"param_name": "general_settings"},
+        data={
+            "create": {"param_name": "general_settings", "param_value": json.dumps(general_settings)},  # type: ignore
+            "update": {"param_value": json.dumps(general_settings)},  # type: ignore
+        },
+    )
+
+    return response
+
+
+@router.get(
+    "/config/field/info",
+    tags=["config.yaml"],
+    dependencies=[Depends(user_api_key_auth)],
+)
+async def get_config_general_settings(
+    field_name: str,
+    user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
+):
+    global prisma_client
+
+    ## VALIDATION ##
+    """
+    - Check if prisma_client is None
+    - Check if user allowed to call this endpoint (admin-only)
+    - Check if param in general settings 
+    """
+    if prisma_client is None:
+        raise HTTPException(
+            status_code=400,
+            detail={"error": CommonProxyErrors.db_not_connected_error.value},
+        )
+
+    if user_api_key_dict.user_role != "proxy_admin":
+        raise HTTPException(
+            status_code=400,
+            detail={"error": CommonProxyErrors.not_allowed_access.value},
+        )
+
+    if field_name not in ConfigGeneralSettings.model_fields:
+        raise HTTPException(
+            status_code=400,
+            detail={"error": "Invalid field={} passed in.".format(field_name)},
+        )
+
+    ## get general settings from db
+    db_general_settings = await prisma_client.db.litellm_config.find_first(
+        where={"param_name": "general_settings"}
+    )
+    ### pop the value
+
+    if db_general_settings is None or db_general_settings.param_value is None:
+        raise HTTPException(
+            status_code=400,
+            detail={"error": "Field name={} not in DB".format(field_name)},
+        )
+    else:
+        general_settings = dict(db_general_settings.param_value)
+
+        if field_name in general_settings:
+            return {
+                "field_name": field_name,
+                "field_value": general_settings[field_name],
+            }
+        else:
+            raise HTTPException(
+                status_code=400,
+                detail={"error": "Field name={} not in DB".format(field_name)},
+            )
+
+
+@router.get(
+    "/config/list",
+    tags=["config.yaml"],
+    dependencies=[Depends(user_api_key_auth)],
+)
+async def get_config_list(
+    config_type: Literal["general_settings"],
+    user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
+) -> List[ConfigList]:
+    """
+    List the available fields + current values for a given type of setting (currently just 'general_settings'user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),)
+    """
+    global prisma_client, general_settings
+
+    ## VALIDATION ##
+    """
+    - Check if prisma_client is None
+    - Check if user allowed to call this endpoint (admin-only)
+    - Check if param in general settings 
+    """
+    if prisma_client is None:
+        raise HTTPException(
+            status_code=400,
+            detail={"error": CommonProxyErrors.db_not_connected_error.value},
+        )
+
+    if user_api_key_dict.user_role != "proxy_admin":
+        raise HTTPException(
+            status_code=400,
+            detail={
+                "error": "{}, your role={}".format(
+                    CommonProxyErrors.not_allowed_access.value,
+                    user_api_key_dict.user_role,
+                )
+            },
+        )
+
+    ## get general settings from db
+    db_general_settings = await prisma_client.db.litellm_config.find_first(
+        where={"param_name": "general_settings"}
+    )
+
+    if db_general_settings is not None and db_general_settings.param_value is not None:
+        db_general_settings_dict = dict(db_general_settings.param_value)
+    else:
+        db_general_settings_dict = {}
+
+    allowed_args = {
+        "max_parallel_requests": {"type": "Integer"},
+        "global_max_parallel_requests": {"type": "Integer"},
+    }
+
+    return_val = []
+
+    for field_name, field_info in ConfigGeneralSettings.model_fields.items():
+        if field_name in allowed_args:
+
+            _stored_in_db = None
+            if field_name in db_general_settings_dict:
+                _stored_in_db = True
+            elif field_name in general_settings:
+                _stored_in_db = False
+
+            _response_obj = ConfigList(
+                field_name=field_name,
+                field_type=allowed_args[field_name]["type"],
+                field_description=field_info.description or "",
+                field_value=general_settings.get(field_name, None),
+                stored_in_db=_stored_in_db,
+            )
+            return_val.append(_response_obj)
+
+    return return_val
+
+
+@router.post(
+    "/config/field/delete",
+    tags=["config.yaml"],
+    dependencies=[Depends(user_api_key_auth)],
+)
+async def delete_config_general_settings(
+    data: ConfigFieldDelete,
+    user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
+):
+    """
+    Delete the db value of this field in litellm general settings. Resets it to it's initial default value on litellm.
+    """
+    global prisma_client
+    ## VALIDATION ##
+    """
+    - Check if prisma_client is None
+    - Check if user allowed to call this endpoint (admin-only)
+    - Check if param in general settings 
+    """
+    if prisma_client is None:
+        raise HTTPException(
+            status_code=400,
+            detail={"error": CommonProxyErrors.db_not_connected_error.value},
+        )
+
+    if user_api_key_dict.user_role != "proxy_admin":
+        raise HTTPException(
+            status_code=400,
+            detail={
+                "error": "{}, your role={}".format(
+                    CommonProxyErrors.not_allowed_access.value,
+                    user_api_key_dict.user_role,
+                )
+            },
+        )
+
+    if data.field_name not in ConfigGeneralSettings.model_fields:
+        raise HTTPException(
+            status_code=400,
+            detail={"error": "Invalid field={} passed in.".format(data.field_name)},
+        )
+
+    ## get general settings from db
+    db_general_settings = await prisma_client.db.litellm_config.find_first(
+        where={"param_name": "general_settings"}
+    )
+    ### pop the value
+
+    if db_general_settings is None or db_general_settings.param_value is None:
+        raise HTTPException(
+            status_code=400,
+            detail={"error": "Field name={} not in config".format(data.field_name)},
+        )
+    else:
+        general_settings = dict(db_general_settings.param_value)
+
+    ## update db
+
+    general_settings.pop(data.field_name, None)
+
+    response = await prisma_client.db.litellm_config.upsert(
+        where={"param_name": "general_settings"},
+        data={
+            "create": {"param_name": "general_settings", "param_value": json.dumps(general_settings)},  # type: ignore
+            "update": {"param_value": json.dumps(general_settings)},  # type: ignore
+        },
+    )
+
+    return response
+
+
 @router.get(
     "/get/config/callbacks",
     tags=["config.yaml"],
@@ -9677,6 +10265,7 @@ async def config_yaml_endpoint(config_info: ConfigYAML):
     return {"hello": "world"}
 
 
+#### BASIC ENDPOINTS ####
 @router.get(
     "/test",
     tags=["health"],
@@ -9707,7 +10296,7 @@ async def test_endpoint(request: Request):
 async def health_services_endpoint(
     user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
     service: Literal[
-        "slack_budget_alerts", "langfuse", "slack", "openmeter"
+        "slack_budget_alerts", "langfuse", "slack", "openmeter", "webhook"
     ] = fastapi.Query(description="Specify the service being hit."),
 ):
     """
@@ -9722,7 +10311,13 @@ async def health_services_endpoint(
             raise HTTPException(
                 status_code=400, detail={"error": "Service must be specified."}
             )
-        if service not in ["slack_budget_alerts", "langfuse", "slack", "openmeter"]:
+        if service not in [
+            "slack_budget_alerts",
+            "langfuse",
+            "slack",
+            "openmeter",
+            "webhook",
+        ]:
             raise HTTPException(
                 status_code=400,
                 detail={
@@ -9758,6 +10353,20 @@ async def health_services_endpoint(
                 "message": "Mock LLM request made - check langfuse.",
             }
 
+        if service == "webhook":
+            user_info = CallInfo(
+                token=user_api_key_dict.token or "",
+                spend=1,
+                max_budget=0,
+                user_id=user_api_key_dict.user_id,
+                key_alias=user_api_key_dict.key_alias,
+                team_id=user_api_key_dict.team_id,
+            )
+            await proxy_logging_obj.budget_alerts(
+                type="user_budget",
+                user_info=user_info,
+            )
+
         if service == "slack" or service == "slack_budget_alerts":
             if "slack" in general_settings.get("alerting", []):
                 # test_message = f"""\n🚨 `ProjectedLimitExceededError` 💸\n\n`Key Alias:` litellm-ui-test-alert \n`Expected Day of Error`: 28th March \n`Current Spend`: $100.00 \n`Projected Spend at end of month`: $1000.00 \n`Soft Limit`: $700"""
@@ -9813,8 +10422,14 @@ async def health_services_endpoint(
                     asyncio.create_task(
                         proxy_logging_obj.slack_alerting_instance.send_weekly_spend_report()
                     )
+
+                alert_types = (
+                    proxy_logging_obj.slack_alerting_instance.alert_types or []
+                )
+                alert_types = list(alert_types)
                 return {
                     "status": "success",
+                    "alert_types": alert_types,
                     "message": "Mock Slack Alert sent, verify Slack Alert Received on your channel",
                 }
             else:
@@ -9827,6 +10442,7 @@ async def health_services_endpoint(
                     },
                 )
     except Exception as e:
+        traceback.print_exc()
         if isinstance(e, HTTPException):
             raise ProxyException(
                 message=getattr(e, "detail", f"Authentication Error({str(e)})"),
@@ -10350,6 +10966,7 @@ def _has_user_setup_sso():
 @router.on_event("shutdown")
 async def shutdown_event():
     global prisma_client, master_key, user_custom_auth, user_custom_key_generate
+    verbose_proxy_logger.info("Shutting down LiteLLM Proxy Server")
     if prisma_client:
         verbose_proxy_logger.debug("Disconnecting from Prisma")
         await prisma_client.disconnect()
@@ -10361,6 +10978,18 @@ async def shutdown_event():
 
     if db_writer_client is not None:
         await db_writer_client.close()
+
+    # flush remaining langfuse logs
+    if "langfuse" in litellm.success_callback:
+        try:
+            # flush langfuse logs on shutdow
+            from litellm.utils import langFuseLogger
+
+            langFuseLogger.Langfuse.flush()
+        except:
+            # [DO NOT BLOCK shutdown events for this]
+            pass
+
     ## RESET CUSTOM VARIABLES ##
     cleanup_router_config_variables()
 
diff --git a/litellm/proxy/utils.py b/litellm/proxy/utils.py
index a97864f000..f12cd4b43a 100644
--- a/litellm/proxy/utils.py
+++ b/litellm/proxy/utils.py
@@ -11,6 +11,7 @@ from litellm.proxy._types import (
     LiteLLM_EndUserTable,
     LiteLLM_TeamTable,
     Member,
+    CallInfo,
 )
 from litellm.caching import DualCache, RedisCache
 from litellm.router import Deployment, ModelInfo, LiteLLM_Params
@@ -18,8 +19,18 @@ from litellm.llms.custom_httpx.httpx_handler import HTTPHandler
 from litellm.proxy.hooks.parallel_request_limiter import (
     _PROXY_MaxParallelRequestsHandler,
 )
+from litellm.exceptions import RejectedRequestError
 from litellm._service_logger import ServiceLogging, ServiceTypes
-from litellm import ModelResponse, EmbeddingResponse, ImageResponse
+from litellm import (
+    ModelResponse,
+    EmbeddingResponse,
+    ImageResponse,
+    TranscriptionResponse,
+    TextCompletionResponse,
+    CustomStreamWrapper,
+    TextCompletionStreamWrapper,
+)
+from litellm.utils import ModelResponseIterator
 from litellm.proxy.hooks.max_budget_limiter import _PROXY_MaxBudgetLimiter
 from litellm.proxy.hooks.tpm_rpm_limiter import _PROXY_MaxTPMRPMLimiter
 from litellm.proxy.hooks.cache_control_check import _PROXY_CacheControlCheck
@@ -32,6 +43,7 @@ from email.mime.text import MIMEText
 from email.mime.multipart import MIMEMultipart
 from datetime import datetime, timedelta
 from litellm.integrations.slack_alerting import SlackAlerting
+from typing_extensions import overload
 
 
 def print_verbose(print_statement):
@@ -74,6 +86,9 @@ class ProxyLogging:
                 "budget_alerts",
                 "db_exceptions",
                 "daily_reports",
+                "spend_reports",
+                "cooldown_deployment",
+                "new_model_added",
             ]
         ] = [
             "llm_exceptions",
@@ -82,6 +97,9 @@ class ProxyLogging:
             "budget_alerts",
             "db_exceptions",
             "daily_reports",
+            "spend_reports",
+            "cooldown_deployment",
+            "new_model_added",
         ]
         self.slack_alerting_instance = SlackAlerting(
             alerting_threshold=self.alerting_threshold,
@@ -104,6 +122,9 @@ class ProxyLogging:
                     "budget_alerts",
                     "db_exceptions",
                     "daily_reports",
+                    "spend_reports",
+                    "cooldown_deployment",
+                    "new_model_added",
                 ]
             ]
         ] = None,
@@ -122,7 +143,13 @@ class ProxyLogging:
             alerting_args=alerting_args,
         )
 
-        if "daily_reports" in self.alert_types:
+        if (
+            self.alerting is not None
+            and "slack" in self.alerting
+            and "daily_reports" in self.alert_types
+        ):
+            # NOTE: ENSURE we only add callbacks when alerting is on
+            # We should NOT add callbacks when alerting is off
             litellm.callbacks.append(self.slack_alerting_instance)  # type: ignore
 
         if redis_cache is not None:
@@ -140,6 +167,8 @@ class ProxyLogging:
             self.slack_alerting_instance.response_taking_too_long_callback
         )
         for callback in litellm.callbacks:
+            if isinstance(callback, str):
+                callback = litellm.utils._init_custom_logger_compatible_class(callback)
             if callback not in litellm.input_callback:
                 litellm.input_callback.append(callback)
             if callback not in litellm.success_callback:
@@ -165,18 +194,20 @@ class ProxyLogging:
             )
             litellm.utils.set_callbacks(callback_list=callback_list)
 
+    # The actual implementation of the function
     async def pre_call_hook(
         self,
         user_api_key_dict: UserAPIKeyAuth,
         data: dict,
         call_type: Literal[
             "completion",
+            "text_completion",
             "embeddings",
             "image_generation",
             "moderation",
             "audio_transcription",
         ],
-    ):
+    ) -> dict:
         """
         Allows users to modify/reject the incoming request to the proxy, without having to deal with parsing Request body.
 
@@ -203,8 +234,25 @@ class ProxyLogging:
                         call_type=call_type,
                     )
                     if response is not None:
-                        data = response
-
+                        if isinstance(response, Exception):
+                            raise response
+                        elif isinstance(response, dict):
+                            data = response
+                        elif isinstance(response, str):
+                            if (
+                                call_type == "completion"
+                                or call_type == "text_completion"
+                            ):
+                                raise RejectedRequestError(
+                                    message=response,
+                                    model=data.get("model", ""),
+                                    llm_provider="",
+                                    request_data=data,
+                                )
+                            else:
+                                raise HTTPException(
+                                    status_code=400, detail={"error": response}
+                                )
             print_verbose(f"final data being sent to {call_type} call: {data}")
             return data
         except Exception as e:
@@ -252,8 +300,8 @@ class ProxyLogging:
         """
         Runs the CustomLogger's async_moderation_hook()
         """
+        new_data = copy.deepcopy(data)
         for callback in litellm.callbacks:
-            new_data = copy.deepcopy(data)
             try:
                 if isinstance(callback, CustomLogger):
                     await callback.async_moderation_hook(
@@ -265,30 +313,30 @@ class ProxyLogging:
                 raise e
         return data
 
+    async def failed_tracking_alert(self, error_message: str):
+        if self.alerting is None:
+            return
+        await self.slack_alerting_instance.failed_tracking_alert(
+            error_message=error_message
+        )
+
     async def budget_alerts(
         self,
         type: Literal[
             "token_budget",
             "user_budget",
-            "user_and_proxy_budget",
-            "failed_budgets",
-            "failed_tracking",
+            "team_budget",
+            "proxy_budget",
             "projected_limit_exceeded",
         ],
-        user_max_budget: float,
-        user_current_spend: float,
-        user_info=None,
-        error_message="",
+        user_info: CallInfo,
     ):
         if self.alerting is None:
             # do nothing if alerting is not switched on
             return
         await self.slack_alerting_instance.budget_alerts(
             type=type,
-            user_max_budget=user_max_budget,
-            user_current_spend=user_current_spend,
             user_info=user_info,
-            error_message=error_message,
         )
 
     async def alerting_handler(
@@ -344,7 +392,11 @@ class ProxyLogging:
         for client in self.alerting:
             if client == "slack":
                 await self.slack_alerting_instance.send_alert(
-                    message=message, level=level, alert_type=alert_type, **extra_kwargs
+                    message=message,
+                    level=level,
+                    alert_type=alert_type,
+                    user_info=None,
+                    **extra_kwargs,
                 )
             elif client == "sentry":
                 if litellm.utils.sentry_sdk_instance is not None:
@@ -418,9 +470,14 @@ class ProxyLogging:
 
             Related issue - https://github.com/BerriAI/litellm/issues/3395
             """
+            litellm_debug_info = getattr(original_exception, "litellm_debug_info", None)
+            exception_str = str(original_exception)
+            if litellm_debug_info is not None:
+                exception_str += litellm_debug_info
+
             asyncio.create_task(
                 self.alerting_handler(
-                    message=f"LLM API call failed: {str(original_exception)}",
+                    message=f"LLM API call failed: `{exception_str}`",
                     level="High",
                     alert_type="llm_exceptions",
                     request_data=request_data,
@@ -1787,7 +1844,9 @@ def hash_token(token: str):
     return hashed_token
 
 
-def get_logging_payload(kwargs, response_obj, start_time, end_time):
+def get_logging_payload(
+    kwargs, response_obj, start_time, end_time, end_user_id: Optional[str]
+):
     from litellm.proxy._types import LiteLLM_SpendLogs
     from pydantic import Json
     import uuid
@@ -1865,7 +1924,7 @@ def get_logging_payload(kwargs, response_obj, start_time, end_time):
         "prompt_tokens": usage.get("prompt_tokens", 0),
         "completion_tokens": usage.get("completion_tokens", 0),
         "request_tags": metadata.get("tags", []),
-        "end_user": kwargs.get("user", ""),
+        "end_user": end_user_id or "",
         "api_base": litellm_params.get("api_base", ""),
     }
 
@@ -2028,6 +2087,11 @@ async def update_spend(
                 raise e
 
     ### UPDATE END-USER TABLE ###
+    verbose_proxy_logger.debug(
+        "End-User Spend transactions: {}".format(
+            len(prisma_client.end_user_list_transactons.keys())
+        )
+    )
     if len(prisma_client.end_user_list_transactons.keys()) > 0:
         for i in range(n_retry_times + 1):
             start_time = time.time()
@@ -2043,13 +2107,18 @@ async def update_spend(
                             max_end_user_budget = None
                             if litellm.max_end_user_budget is not None:
                                 max_end_user_budget = litellm.max_end_user_budget
-                            new_user_obj = LiteLLM_EndUserTable(
-                                user_id=end_user_id, spend=response_cost, blocked=False
-                            )
-                            batcher.litellm_endusertable.update_many(
+                            batcher.litellm_endusertable.upsert(
                                 where={"user_id": end_user_id},
-                                data={"spend": {"increment": response_cost}},
+                                data={
+                                    "create": {
+                                        "user_id": end_user_id,
+                                        "spend": response_cost,
+                                        "blocked": False,
+                                    },
+                                    "update": {"spend": {"increment": response_cost}},
+                                },
                             )
+
                 prisma_client.end_user_list_transactons = (
                     {}
                 )  # Clear the remaining transactions after processing all batches in the loop.
diff --git a/litellm/router.py b/litellm/router.py
index ec7df1124c..d678e5912f 100644
--- a/litellm/router.py
+++ b/litellm/router.py
@@ -262,13 +262,22 @@ class Router:
 
         self.retry_after = retry_after
         self.routing_strategy = routing_strategy
-        self.fallbacks = fallbacks or litellm.fallbacks
+
+        ## SETTING FALLBACKS ##
+        ### validate if it's set + in correct format
+        _fallbacks = fallbacks or litellm.fallbacks
+
+        self.validate_fallbacks(fallback_param=_fallbacks)
+        ### set fallbacks
+        self.fallbacks = _fallbacks
+
         if default_fallbacks is not None or litellm.default_fallbacks is not None:
             _fallbacks = default_fallbacks or litellm.default_fallbacks
             if self.fallbacks is not None:
                 self.fallbacks.append({"*": _fallbacks})
             else:
                 self.fallbacks = [{"*": _fallbacks}]
+
         self.context_window_fallbacks = (
             context_window_fallbacks or litellm.context_window_fallbacks
         )
@@ -336,6 +345,21 @@ class Router:
         if self.alerting_config is not None:
             self._initialize_alerting()
 
+    def validate_fallbacks(self, fallback_param: Optional[List]):
+        if fallback_param is None:
+            return
+        if len(fallback_param) > 0:  # if set
+            ## for dictionary in list, check if only 1 key in dict
+            for _dict in fallback_param:
+                assert isinstance(_dict, dict), "Item={}, not a dictionary".format(
+                    _dict
+                )
+                assert (
+                    len(_dict.keys()) == 1
+                ), "Only 1 key allows in dictionary. You set={} for dict={}".format(
+                    len(_dict.keys()), _dict
+                )
+
     def routing_strategy_init(self, routing_strategy: str, routing_strategy_args: dict):
         if routing_strategy == "least-busy":
             self.leastbusy_logger = LeastBusyLoggingHandler(
@@ -638,6 +662,10 @@ class Router:
     async def abatch_completion(
         self, models: List[str], messages: List[Dict[str, str]], **kwargs
     ):
+        """
+        Async Batch Completion - Batch Process 1 request to multiple model_group on litellm.Router
+        Use this for sending the same request to N models
+        """
 
         async def _async_completion_no_exceptions(
             model: str, messages: List[Dict[str, str]], **kwargs
@@ -662,6 +690,51 @@ class Router:
         response = await asyncio.gather(*_tasks)
         return response
 
+    async def abatch_completion_one_model_multiple_requests(
+        self, model: str, messages: List[List[Dict[str, str]]], **kwargs
+    ):
+        """
+        Async Batch Completion - Batch Process multiple Messages to one model_group on litellm.Router
+
+        Use this for sending multiple requests to 1 model
+
+        Args:
+            model (List[str]): model group
+            messages (List[List[Dict[str, str]]]): list of messages. Each element in the list is one request
+            **kwargs: additional kwargs
+        Usage:
+            response = await self.abatch_completion_one_model_multiple_requests(
+                model="gpt-3.5-turbo",
+                messages=[
+                    [{"role": "user", "content": "hello"}, {"role": "user", "content": "tell me something funny"}],
+                    [{"role": "user", "content": "hello good mornign"}],
+                ]
+            )
+        """
+
+        async def _async_completion_no_exceptions(
+            model: str, messages: List[Dict[str, str]], **kwargs
+        ):
+            """
+            Wrapper around self.async_completion that catches exceptions and returns them as a result
+            """
+            try:
+                return await self.acompletion(model=model, messages=messages, **kwargs)
+            except Exception as e:
+                return e
+
+        _tasks = []
+        for message_request in messages:
+            # add each task but if the task fails
+            _tasks.append(
+                _async_completion_no_exceptions(
+                    model=model, messages=message_request, **kwargs
+                )
+            )
+
+        response = await asyncio.gather(*_tasks)
+        return response
+
     def image_generation(self, prompt: str, model: str, **kwargs):
         try:
             kwargs["model"] = model
@@ -1899,10 +1972,28 @@ class Router:
             metadata = kwargs.get("litellm_params", {}).get("metadata", None)
             _model_info = kwargs.get("litellm_params", {}).get("model_info", {})
 
+            exception_response = getattr(exception, "response", {})
+            exception_headers = getattr(exception_response, "headers", None)
+            _time_to_cooldown = self.cooldown_time
+
+            if exception_headers is not None:
+
+                _time_to_cooldown = (
+                    litellm.utils._get_retry_after_from_exception_header(
+                        response_headers=exception_headers
+                    )
+                )
+
+                if _time_to_cooldown < 0:
+                    # if the response headers did not read it -> set to default cooldown time
+                    _time_to_cooldown = self.cooldown_time
+
             if isinstance(_model_info, dict):
                 deployment_id = _model_info.get("id", None)
                 self._set_cooldown_deployments(
-                    exception_status=exception_status, deployment=deployment_id
+                    exception_status=exception_status,
+                    deployment=deployment_id,
+                    time_to_cooldown=_time_to_cooldown,
                 )  # setting deployment_id in cooldown deployments
             if custom_llm_provider:
                 model_name = f"{custom_llm_provider}/{model_name}"
@@ -1962,8 +2053,50 @@ class Router:
                 key=rpm_key, value=request_count, local_only=True
             )  # don't change existing ttl
 
+    def _is_cooldown_required(self, exception_status: Union[str, int]):
+        """
+        A function to determine if a cooldown is required based on the exception status.
+
+        Parameters:
+            exception_status (Union[str, int]): The status of the exception.
+
+        Returns:
+            bool: True if a cooldown is required, False otherwise.
+        """
+        try:
+
+            if isinstance(exception_status, str):
+                exception_status = int(exception_status)
+
+            if exception_status >= 400 and exception_status < 500:
+                if exception_status == 429:
+                    # Cool down 429 Rate Limit Errors
+                    return True
+
+                elif exception_status == 401:
+                    # Cool down 401 Auth Errors
+                    return True
+
+                elif exception_status == 408:
+                    return True
+
+                else:
+                    # Do NOT cool down all other 4XX Errors
+                    return False
+
+            else:
+                # should cool down for all other errors
+                return True
+
+        except:
+            # Catch all - if any exceptions default to cooling down
+            return True
+
     def _set_cooldown_deployments(
-        self, exception_status: Union[str, int], deployment: Optional[str] = None
+        self,
+        exception_status: Union[str, int],
+        deployment: Optional[str] = None,
+        time_to_cooldown: Optional[float] = None,
     ):
         """
         Add a model to the list of models being cooled down for that minute, if it exceeds the allowed fails / minute
@@ -1975,6 +2108,9 @@ class Router:
         if deployment is None:
             return
 
+        if self._is_cooldown_required(exception_status=exception_status) == False:
+            return
+
         dt = get_utc_datetime()
         current_minute = dt.strftime("%H-%M")
         # get current fails for deployment
@@ -1987,6 +2123,8 @@ class Router:
             f"Attempting to add {deployment} to cooldown list. updated_fails: {updated_fails}; self.allowed_fails: {self.allowed_fails}"
         )
         cooldown_time = self.cooldown_time or 1
+        if time_to_cooldown is not None:
+            cooldown_time = time_to_cooldown
 
         if isinstance(exception_status, str):
             try:
@@ -2024,7 +2162,9 @@ class Router:
                 )
 
             self.send_deployment_cooldown_alert(
-                deployment_id=deployment, exception_status=exception_status
+                deployment_id=deployment,
+                exception_status=exception_status,
+                cooldown_time=cooldown_time,
             )
         else:
             self.failed_calls.set_cache(
@@ -2309,7 +2449,7 @@ class Router:
                 organization = litellm.get_secret(organization_env_name)
                 litellm_params["organization"] = organization
 
-            if "azure" in model_name and isinstance(api_key, str):
+            if "azure" in model_name:
                 if api_base is None or not isinstance(api_base, str):
                     raise ValueError(
                         f"api_base is required for Azure OpenAI. Set it on your config. Model - {model}"
@@ -3185,7 +3325,7 @@ class Router:
 
             if _rate_limit_error == True:  # allow generic fallback logic to take place
                 raise ValueError(
-                    f"{RouterErrors.no_deployments_available.value}, passed model={model}"
+                    f"{RouterErrors.no_deployments_available.value}, Try again in {self.cooldown_time} seconds. Passed model={model}. Try again in {self.cooldown_time} seconds."
                 )
             elif _context_window_error == True:
                 raise litellm.ContextWindowExceededError(
@@ -3257,7 +3397,9 @@ class Router:
         litellm.print_verbose(f"initial list of deployments: {healthy_deployments}")
 
         if len(healthy_deployments) == 0:
-            raise ValueError(f"No healthy deployment available, passed model={model}. ")
+            raise ValueError(
+                f"No healthy deployment available, passed model={model}. Try again in {self.cooldown_time} seconds"
+            )
         if litellm.model_alias_map and model in litellm.model_alias_map:
             model = litellm.model_alias_map[
                 model
@@ -3347,7 +3489,7 @@ class Router:
             if _allowed_model_region is None:
                 _allowed_model_region = "n/a"
             raise ValueError(
-                f"{RouterErrors.no_deployments_available.value}, passed model={model}. Enable pre-call-checks={self.enable_pre_call_checks}, allowed_model_region={_allowed_model_region}"
+                f"{RouterErrors.no_deployments_available.value}, Try again in {self.cooldown_time} seconds. Passed model={model}. Enable pre-call-checks={self.enable_pre_call_checks}, allowed_model_region={_allowed_model_region}"
             )
 
         if (
@@ -3415,7 +3557,7 @@ class Router:
                 f"get_available_deployment for model: {model}, No deployment available"
             )
             raise ValueError(
-                f"{RouterErrors.no_deployments_available.value}, passed model={model}"
+                f"{RouterErrors.no_deployments_available.value}, Try again in {self.cooldown_time} seconds. Passed model={model}"
             )
         verbose_router_logger.info(
             f"get_available_deployment for model: {model}, Selected deployment: {self.print_deployment(deployment)} for model: {model}"
@@ -3545,7 +3687,7 @@ class Router:
                 f"get_available_deployment for model: {model}, No deployment available"
             )
             raise ValueError(
-                f"{RouterErrors.no_deployments_available.value}, passed model={model}"
+                f"{RouterErrors.no_deployments_available.value}, Try again in {self.cooldown_time} seconds. Passed model={model}"
             )
         verbose_router_logger.info(
             f"get_available_deployment for model: {model}, Selected deployment: {self.print_deployment(deployment)} for model: {model}"
@@ -3683,7 +3825,10 @@ class Router:
         print("\033[94m\nInitialized Alerting for litellm.Router\033[0m\n")  # noqa
 
     def send_deployment_cooldown_alert(
-        self, deployment_id: str, exception_status: Union[str, int]
+        self,
+        deployment_id: str,
+        exception_status: Union[str, int],
+        cooldown_time: float,
     ):
         try:
             from litellm.proxy.proxy_server import proxy_logging_obj
@@ -3707,7 +3852,7 @@ class Router:
                 )
                 asyncio.create_task(
                     proxy_logging_obj.slack_alerting_instance.send_alert(
-                        message=f"Router: Cooling down deployment: {_api_base}, for {self.cooldown_time} seconds. Got exception: {str(exception_status)}. Change 'cooldown_time' + 'allowed_fails' under 'Router Settings' on proxy UI, or via config - https://docs.litellm.ai/docs/proxy/reliability#fallbacks--retries--timeouts--cooldowns",
+                        message=f"Router: Cooling down Deployment:\nModel Name: `{_model_name}`\nAPI Base: `{_api_base}`\nCooldown Time: `{cooldown_time} seconds`\nException Status Code: `{str(exception_status)}`\n\nChange 'cooldown_time' + 'allowed_fails' under 'Router Settings' on proxy UI, or via config - https://docs.litellm.ai/docs/proxy/reliability#fallbacks--retries--timeouts--cooldowns",
                         alert_type="cooldown_deployment",
                         level="Low",
                     )
diff --git a/litellm/router_strategy/lowest_latency.py b/litellm/router_strategy/lowest_latency.py
index 81a0133a90..c56db83f0b 100644
--- a/litellm/router_strategy/lowest_latency.py
+++ b/litellm/router_strategy/lowest_latency.py
@@ -27,7 +27,7 @@ class LiteLLMBase(BaseModel):
 
 
 class RoutingArgs(LiteLLMBase):
-    ttl: int = 1 * 60 * 60  # 1 hour
+    ttl: float = 1 * 60 * 60  # 1 hour
     lowest_latency_buffer: float = 0
     max_latency_list_size: int = 10
 
diff --git a/litellm/tests/log.txt b/litellm/tests/log.txt
index 4d3027355c..ea07ca7e12 100644
--- a/litellm/tests/log.txt
+++ b/litellm/tests/log.txt
@@ -1,30 +1,413 @@
 ============================= test session starts ==============================
-platform darwin -- Python 3.11.9, pytest-7.3.1, pluggy-1.3.0
-rootdir: /Users/krrishdholakia/Documents/litellm/litellm/tests
-plugins: timeout-2.2.0, asyncio-0.23.2, anyio-3.7.1, xdist-3.3.1
+platform darwin -- Python 3.11.4, pytest-8.2.0, pluggy-1.5.0
+rootdir: /Users/krrishdholakia/Documents/litellm
+configfile: pyproject.toml
+plugins: asyncio-0.23.6, mock-3.14.0, anyio-4.2.0
 asyncio: mode=Mode.STRICT
-collected 2 items
+collected 1 item
 
-test_streaming.py .Token Counter - using hugging face token counter, for model=llama-3-8b-instruct
-Looking up model=llama-3-8b-instruct in model_cost_map
-F                                                     [100%]
+test_amazing_vertex_completion.py F                                      [100%]
 
 =================================== FAILURES ===================================
-__________________ test_completion_predibase_streaming[True] ___________________
+____________________________ test_gemini_pro_vision ____________________________
 
-model = 'llama-3-8b-instruct'
-messages = [{'content': 'What is the meaning of life?', 'role': 'user'}]
-timeout = 600.0, temperature = None, top_p = None, n = None, stream = True
+model = 'gemini-1.5-flash-preview-0514'
+messages = [{'content': [{'text': 'Whats in this image?', 'type': 'text'}, {'image_url': {'url': 'gs://cloud-samples-data/generative-ai/image/boats.jpeg'}, 'type': 'image_url'}], 'role': 'user'}]
+model_response = ModelResponse(id='chatcmpl-722df0e7-4e2d-44e6-9e2c-49823faa0189', choices=[Choices(finish_reason='stop', index=0, mess... role='assistant'))], created=1716145725, model=None, object='chat.completion', system_fingerprint=None, usage=Usage())
+print_verbose = <function print_verbose at 0x10c6c20c0>
+encoding = <Encoding 'cl100k_base'>
+logging_obj = <litellm.utils.Logging object at 0x1059c53d0>
+vertex_project = None, vertex_location = None, vertex_credentials = None
+optional_params = {}
+litellm_params = {'acompletion': False, 'api_base': '', 'api_key': None, 'completion_call_id': None, ...}
+logger_fn = None, acompletion = False
+
+    def completion(
+        model: str,
+        messages: list,
+        model_response: ModelResponse,
+        print_verbose: Callable,
+        encoding,
+        logging_obj,
+        vertex_project=None,
+        vertex_location=None,
+        vertex_credentials=None,
+        optional_params=None,
+        litellm_params=None,
+        logger_fn=None,
+        acompletion: bool = False,
+    ):
+        try:
+            import vertexai
+        except:
+            raise VertexAIError(
+                status_code=400,
+                message="vertexai import failed please run `pip install google-cloud-aiplatform`",
+            )
+    
+        if not (
+            hasattr(vertexai, "preview") or hasattr(vertexai.preview, "language_models")
+        ):
+            raise VertexAIError(
+                status_code=400,
+                message="""Upgrade vertex ai. Run `pip install "google-cloud-aiplatform>=1.38"`""",
+            )
+        try:
+            from vertexai.preview.language_models import (
+                ChatModel,
+                CodeChatModel,
+                InputOutputTextPair,
+            )
+            from vertexai.language_models import TextGenerationModel, CodeGenerationModel
+            from vertexai.preview.generative_models import (
+                GenerativeModel,
+                Part,
+                GenerationConfig,
+            )
+            from google.cloud import aiplatform  # type: ignore
+            from google.protobuf import json_format  # type: ignore
+            from google.protobuf.struct_pb2 import Value  # type: ignore
+            from google.cloud.aiplatform_v1beta1.types import content as gapic_content_types  # type: ignore
+            import google.auth  # type: ignore
+            import proto  # type: ignore
+    
+            ## Load credentials with the correct quota project ref: https://github.com/googleapis/python-aiplatform/issues/2557#issuecomment-1709284744
+            print_verbose(
+                f"VERTEX AI: vertex_project={vertex_project}; vertex_location={vertex_location}"
+            )
+            if vertex_credentials is not None and isinstance(vertex_credentials, str):
+                import google.oauth2.service_account
+    
+                json_obj = json.loads(vertex_credentials)
+    
+                creds = google.oauth2.service_account.Credentials.from_service_account_info(
+                    json_obj,
+                    scopes=["https://www.googleapis.com/auth/cloud-platform"],
+                )
+            else:
+                creds, _ = google.auth.default(quota_project_id=vertex_project)
+            print_verbose(
+                f"VERTEX AI: creds={creds}; google application credentials: {os.getenv('GOOGLE_APPLICATION_CREDENTIALS')}"
+            )
+            vertexai.init(
+                project=vertex_project, location=vertex_location, credentials=creds
+            )
+    
+            ## Load Config
+            config = litellm.VertexAIConfig.get_config()
+            for k, v in config.items():
+                if k not in optional_params:
+                    optional_params[k] = v
+    
+            ## Process safety settings into format expected by vertex AI
+            safety_settings = None
+            if "safety_settings" in optional_params:
+                safety_settings = optional_params.pop("safety_settings")
+                if not isinstance(safety_settings, list):
+                    raise ValueError("safety_settings must be a list")
+                if len(safety_settings) > 0 and not isinstance(safety_settings[0], dict):
+                    raise ValueError("safety_settings must be a list of dicts")
+                safety_settings = [
+                    gapic_content_types.SafetySetting(x) for x in safety_settings
+                ]
+    
+            # vertexai does not use an API key, it looks for credentials.json in the environment
+    
+            prompt = " ".join(
+                [
+                    message["content"]
+                    for message in messages
+                    if isinstance(message["content"], str)
+                ]
+            )
+    
+            mode = ""
+    
+            request_str = ""
+            response_obj = None
+            async_client = None
+            instances = None
+            client_options = {
+                "api_endpoint": f"{vertex_location}-aiplatform.googleapis.com"
+            }
+            if (
+                model in litellm.vertex_language_models
+                or model in litellm.vertex_vision_models
+            ):
+                llm_model = GenerativeModel(model)
+                mode = "vision"
+                request_str += f"llm_model = GenerativeModel({model})\n"
+            elif model in litellm.vertex_chat_models:
+                llm_model = ChatModel.from_pretrained(model)
+                mode = "chat"
+                request_str += f"llm_model = ChatModel.from_pretrained({model})\n"
+            elif model in litellm.vertex_text_models:
+                llm_model = TextGenerationModel.from_pretrained(model)
+                mode = "text"
+                request_str += f"llm_model = TextGenerationModel.from_pretrained({model})\n"
+            elif model in litellm.vertex_code_text_models:
+                llm_model = CodeGenerationModel.from_pretrained(model)
+                mode = "text"
+                request_str += f"llm_model = CodeGenerationModel.from_pretrained({model})\n"
+            elif model in litellm.vertex_code_chat_models:  # vertex_code_llm_models
+                llm_model = CodeChatModel.from_pretrained(model)
+                mode = "chat"
+                request_str += f"llm_model = CodeChatModel.from_pretrained({model})\n"
+            elif model == "private":
+                mode = "private"
+                model = optional_params.pop("model_id", None)
+                # private endpoint requires a dict instead of JSON
+                instances = [optional_params.copy()]
+                instances[0]["prompt"] = prompt
+                llm_model = aiplatform.PrivateEndpoint(
+                    endpoint_name=model,
+                    project=vertex_project,
+                    location=vertex_location,
+                )
+                request_str += f"llm_model = aiplatform.PrivateEndpoint(endpoint_name={model}, project={vertex_project}, location={vertex_location})\n"
+            else:  # assume vertex model garden on public endpoint
+                mode = "custom"
+    
+                instances = [optional_params.copy()]
+                instances[0]["prompt"] = prompt
+                instances = [
+                    json_format.ParseDict(instance_dict, Value())
+                    for instance_dict in instances
+                ]
+                # Will determine the API used based on async parameter
+                llm_model = None
+    
+            # NOTE: async prediction and streaming under "private" mode isn't supported by aiplatform right now
+            if acompletion == True:
+                data = {
+                    "llm_model": llm_model,
+                    "mode": mode,
+                    "prompt": prompt,
+                    "logging_obj": logging_obj,
+                    "request_str": request_str,
+                    "model": model,
+                    "model_response": model_response,
+                    "encoding": encoding,
+                    "messages": messages,
+                    "print_verbose": print_verbose,
+                    "client_options": client_options,
+                    "instances": instances,
+                    "vertex_location": vertex_location,
+                    "vertex_project": vertex_project,
+                    "safety_settings": safety_settings,
+                    **optional_params,
+                }
+                if optional_params.get("stream", False) is True:
+                    # async streaming
+                    return async_streaming(**data)
+    
+                return async_completion(**data)
+    
+            if mode == "vision":
+                print_verbose("\nMaking VertexAI Gemini Pro / Pro Vision Call")
+                print_verbose(f"\nProcessing input messages = {messages}")
+                tools = optional_params.pop("tools", None)
+                content = _gemini_convert_messages_text(messages=messages)
+                stream = optional_params.pop("stream", False)
+                if stream == True:
+                    request_str += f"response = llm_model.generate_content({content}, generation_config=GenerationConfig(**{optional_params}), safety_settings={safety_settings}, stream={stream})\n"
+                    logging_obj.pre_call(
+                        input=prompt,
+                        api_key=None,
+                        additional_args={
+                            "complete_input_dict": optional_params,
+                            "request_str": request_str,
+                        },
+                    )
+    
+                    model_response = llm_model.generate_content(
+                        contents={"content": content},
+                        generation_config=optional_params,
+                        safety_settings=safety_settings,
+                        stream=True,
+                        tools=tools,
+                    )
+    
+                    return model_response
+    
+                request_str += f"response = llm_model.generate_content({content})\n"
+                ## LOGGING
+                logging_obj.pre_call(
+                    input=prompt,
+                    api_key=None,
+                    additional_args={
+                        "complete_input_dict": optional_params,
+                        "request_str": request_str,
+                    },
+                )
+    
+                ## LLM Call
+>               response = llm_model.generate_content(
+                    contents=content,
+                    generation_config=optional_params,
+                    safety_settings=safety_settings,
+                    tools=tools,
+                )
+
+../llms/vertex_ai.py:740: 
+_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
+../proxy/myenv/lib/python3.11/site-packages/vertexai/generative_models/_generative_models.py:405: in generate_content
+    return self._generate_content(
+../proxy/myenv/lib/python3.11/site-packages/vertexai/generative_models/_generative_models.py:487: in _generate_content
+    request = self._prepare_request(
+../proxy/myenv/lib/python3.11/site-packages/vertexai/generative_models/_generative_models.py:274: in _prepare_request
+    contents = [
+../proxy/myenv/lib/python3.11/site-packages/vertexai/generative_models/_generative_models.py:275: in <listcomp>
+    gapic_content_types.Content(content_dict) for content_dict in contents
+_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
+
+self = <[AttributeError('Unknown field for Content: _pb') raised in repr()] Content object at 0x1646aaa90>
+mapping = {'parts': [{'text': 'Whats in this image?'}, file_data {
+  mime_type: "image/jpeg"
+  file_uri: "gs://cloud-samples-data/generative-ai/image/boats.jpeg"
+}
+], 'role': 'user'}
+ignore_unknown_fields = False, kwargs = {}
+params = {'parts': [text: "Whats in this image?"
+, file_data {
+  mime_type: "image/jpeg"
+  file_uri: "gs://cloud-samples-data/generative-ai/image/boats.jpeg"
+}
+], 'role': 'user'}
+marshal = <proto.marshal.marshal.Marshal object at 0x10c6a3190>, key = 'parts'
+value = [{'text': 'Whats in this image?'}, file_data {
+  mime_type: "image/jpeg"
+  file_uri: "gs://cloud-samples-data/generative-ai/image/boats.jpeg"
+}
+]
+pb_value = [text: "Whats in this image?"
+, file_data {
+  mime_type: "image/jpeg"
+  file_uri: "gs://cloud-samples-data/generative-ai/image/boats.jpeg"
+}
+]
+
+    def __init__(
+        self,
+        mapping=None,
+        *,
+        ignore_unknown_fields=False,
+        **kwargs,
+    ):
+        # We accept several things for `mapping`:
+        #   * An instance of this class.
+        #   * An instance of the underlying protobuf descriptor class.
+        #   * A dict
+        #   * Nothing (keyword arguments only).
+        if mapping is None:
+            if not kwargs:
+                # Special fast path for empty construction.
+                super().__setattr__("_pb", self._meta.pb())
+                return
+    
+            mapping = kwargs
+        elif isinstance(mapping, self._meta.pb):
+            # Make a copy of the mapping.
+            # This is a constructor for a new object, so users will assume
+            # that it will not have side effects on the arguments being
+            # passed in.
+            #
+            # The `wrap` method on the metaclass is the public API for taking
+            # ownership of the passed in protobuf object.
+            mapping = copy.deepcopy(mapping)
+            if kwargs:
+                mapping.MergeFrom(self._meta.pb(**kwargs))
+    
+            super().__setattr__("_pb", mapping)
+            return
+        elif isinstance(mapping, type(self)):
+            # Just use the above logic on mapping's underlying pb.
+            self.__init__(mapping=mapping._pb, **kwargs)
+            return
+        elif isinstance(mapping, collections.abc.Mapping):
+            # Can't have side effects on mapping.
+            mapping = copy.copy(mapping)
+            # kwargs entries take priority for duplicate keys.
+            mapping.update(kwargs)
+        else:
+            # Sanity check: Did we get something not a map? Error if so.
+            raise TypeError(
+                "Invalid constructor input for %s: %r"
+                % (
+                    self.__class__.__name__,
+                    mapping,
+                )
+            )
+    
+        params = {}
+        # Update the mapping to address any values that need to be
+        # coerced.
+        marshal = self._meta.marshal
+        for key, value in mapping.items():
+            (key, pb_type) = self._get_pb_type_from_key(key)
+            if pb_type is None:
+                if ignore_unknown_fields:
+                    continue
+    
+                raise ValueError(
+                    "Unknown field for {}: {}".format(self.__class__.__name__, key)
+                )
+    
+            try:
+                pb_value = marshal.to_proto(pb_type, value)
+            except ValueError:
+                # Underscores may be appended to field names
+                # that collide with python or proto-plus keywords.
+                # In case a key only exists with a `_` suffix, coerce the key
+                # to include the `_` suffix. It's not possible to
+                # natively define the same field with a trailing underscore in protobuf.
+                # See related issue
+                # https://github.com/googleapis/python-api-core/issues/227
+                if isinstance(value, dict):
+                    if _upb:
+                        # In UPB, pb_type is MessageMeta which doesn't expose attrs like it used to in Python/CPP.
+                        keys_to_update = [
+                            item
+                            for item in value
+                            if item not in pb_type.DESCRIPTOR.fields_by_name
+                            and f"{item}_" in pb_type.DESCRIPTOR.fields_by_name
+                        ]
+                    else:
+                        keys_to_update = [
+                            item
+                            for item in value
+                            if not hasattr(pb_type, item)
+                            and hasattr(pb_type, f"{item}_")
+                        ]
+                    for item in keys_to_update:
+                        value[f"{item}_"] = value.pop(item)
+    
+                pb_value = marshal.to_proto(pb_type, value)
+    
+            if pb_value is not None:
+                params[key] = pb_value
+    
+        # Create the internal protocol buffer.
+>       super().__setattr__("_pb", self._meta.pb(**params))
+E       TypeError: Parameter to MergeFrom() must be instance of same class: expected <class 'Part'> got <class 'vertexai.generative_models._generative_models.Part'>.
+
+../proxy/myenv/lib/python3.11/site-packages/proto/message.py:615: TypeError
+
+During handling of the above exception, another exception occurred:
+
+model = 'gemini-1.5-flash-preview-0514'
+messages = [{'content': [{'text': 'Whats in this image?', 'type': 'text'}, {'image_url': {'url': 'gs://cloud-samples-data/generative-ai/image/boats.jpeg'}, 'type': 'image_url'}], 'role': 'user'}]
+timeout = 600.0, temperature = None, top_p = None, n = None, stream = None
 stream_options = None, stop = None, max_tokens = None, presence_penalty = None
 frequency_penalty = None, logit_bias = None, user = None, response_format = None
 seed = None, tools = None, tool_choice = None, logprobs = None
 top_logprobs = None, deployment_id = None, extra_headers = None
 functions = None, function_call = None, base_url = None, api_version = None
-api_key = 'pb_Qg9YbQo7UqqHdu0ozxN_aw', model_list = None
-kwargs = {'api_base': 'https://serving.app.predibase.com', 'litellm_call_id': 'cf0ea464-1b45-4473-8e55-6bf6809df7a7', 'litellm_logging_obj': <litellm.utils.Logging object at 0x10a5cbf50>, 'tenant_id': 'c4768f95'}
-args = {'acompletion': False, 'api_base': None, 'api_key': 'pb_Qg9YbQo7UqqHdu0ozxN_aw', 'api_version': None, ...}
+api_key = None, model_list = None
+kwargs = {'litellm_call_id': '7f48b7ab-47b3-4beb-b2b5-fa298be49d3f', 'litellm_logging_obj': <litellm.utils.Logging object at 0x1059c53d0>}
+args = {'acompletion': False, 'api_base': None, 'api_key': None, 'api_version': None, ...}
 api_base = None, mock_response = None, force_timeout = 600, logger_fn = None
-verbose = False, custom_llm_provider = 'predibase'
+verbose = False, custom_llm_provider = 'vertex_ai'
 
     @client
     def completion(
@@ -118,7 +501,7 @@ verbose = False, custom_llm_provider = 'predibase'
         model_info = kwargs.get("model_info", None)
         proxy_server_request = kwargs.get("proxy_server_request", None)
         fallbacks = kwargs.get("fallbacks", None)
-        headers = kwargs.get("headers", None)
+        headers = kwargs.get("headers", None) or extra_headers
         num_retries = kwargs.get("num_retries", None)  ## deprecated
         max_retries = kwargs.get("max_retries", None)
         context_window_fallback_dict = kwargs.get("context_window_fallback_dict", None)
@@ -232,26 +615,14 @@ verbose = False, custom_llm_provider = 'predibase'
             "supports_system_message",
             "region_name",
             "allowed_model_region",
+            "model_config",
         ]
+    
         default_params = openai_params + litellm_params
         non_default_params = {
             k: v for k, v in kwargs.items() if k not in default_params
         }  # model-specific params - pass them straight to the model/provider
     
-        ### TIMEOUT LOGIC ###
-        timeout = timeout or kwargs.get("request_timeout", 600) or 600
-        # set timeout for 10 minutes by default
-    
-        if (
-            timeout is not None
-            and isinstance(timeout, httpx.Timeout)
-            and supports_httpx_timeout(custom_llm_provider) == False
-        ):
-            read_timeout = timeout.read or 600
-            timeout = read_timeout  # default 10 min timeout
-        elif timeout is not None and not isinstance(timeout, httpx.Timeout):
-            timeout = float(timeout)  # type: ignore
-    
         try:
             if base_url is not None:
                 api_base = base_url
@@ -291,9 +662,18 @@ verbose = False, custom_llm_provider = 'predibase'
                     "aws_region_name", None
                 )  # support region-based pricing for bedrock
     
+            ### TIMEOUT LOGIC ###
+            timeout = timeout or kwargs.get("request_timeout", 600) or 600
+            # set timeout for 10 minutes by default
+            if isinstance(timeout, httpx.Timeout) and not supports_httpx_timeout(
+                custom_llm_provider
+            ):
+                timeout = timeout.read or 600  # default 10 min timeout
+            elif not isinstance(timeout, httpx.Timeout):
+                timeout = float(timeout)  # type: ignore
+    
             ### REGISTER CUSTOM MODEL PRICING -- IF GIVEN ###
             if input_cost_per_token is not None and output_cost_per_token is not None:
-                print_verbose(f"Registering model={model} in model cost map")
                 litellm.register_model(
                     {
                         f"{custom_llm_provider}/{model}": {
@@ -415,6 +795,10 @@ verbose = False, custom_llm_provider = 'predibase'
                 proxy_server_request=proxy_server_request,
                 preset_cache_key=preset_cache_key,
                 no_log=no_log,
+                input_cost_per_second=input_cost_per_second,
+                input_cost_per_token=input_cost_per_token,
+                output_cost_per_second=output_cost_per_second,
+                output_cost_per_token=output_cost_per_token,
             )
             logging.update_environment_variables(
                 model=model,
@@ -753,7 +1137,7 @@ verbose = False, custom_llm_provider = 'predibase'
     
                 custom_prompt_dict = custom_prompt_dict or litellm.custom_prompt_dict
     
-                model_response = replicate.completion(
+                model_response = replicate.completion(  # type: ignore
                     model=model,
                     messages=messages,
                     api_base=api_base,
@@ -766,12 +1150,10 @@ verbose = False, custom_llm_provider = 'predibase'
                     api_key=replicate_key,
                     logging_obj=logging,
                     custom_prompt_dict=custom_prompt_dict,
+                    acompletion=acompletion,
                 )
-                if "stream" in optional_params and optional_params["stream"] == True:
-                    # don't try to access stream object,
-                    model_response = CustomStreamWrapper(model_response, model, logging_obj=logging, custom_llm_provider="replicate")  # type: ignore
     
-                if optional_params.get("stream", False) or acompletion == True:
+                if optional_params.get("stream", False) == True:
                     ## LOGGING
                     logging.post_call(
                         input=messages,
@@ -780,6 +1162,61 @@ verbose = False, custom_llm_provider = 'predibase'
                     )
     
                 response = model_response
+            elif (
+                "clarifai" in model
+                or custom_llm_provider == "clarifai"
+                or model in litellm.clarifai_models
+            ):
+                clarifai_key = None
+                clarifai_key = (
+                    api_key
+                    or litellm.clarifai_key
+                    or litellm.api_key
+                    or get_secret("CLARIFAI_API_KEY")
+                    or get_secret("CLARIFAI_API_TOKEN")
+                )
+    
+                api_base = (
+                    api_base
+                    or litellm.api_base
+                    or get_secret("CLARIFAI_API_BASE")
+                    or "https://api.clarifai.com/v2"
+                )
+    
+                custom_prompt_dict = custom_prompt_dict or litellm.custom_prompt_dict
+                model_response = clarifai.completion(
+                    model=model,
+                    messages=messages,
+                    api_base=api_base,
+                    model_response=model_response,
+                    print_verbose=print_verbose,
+                    optional_params=optional_params,
+                    litellm_params=litellm_params,
+                    acompletion=acompletion,
+                    logger_fn=logger_fn,
+                    encoding=encoding,  # for calculating input/output tokens
+                    api_key=clarifai_key,
+                    logging_obj=logging,
+                    custom_prompt_dict=custom_prompt_dict,
+                )
+    
+                if "stream" in optional_params and optional_params["stream"] == True:
+                    # don't try to access stream object,
+                    ## LOGGING
+                    logging.post_call(
+                        input=messages,
+                        api_key=api_key,
+                        original_response=model_response,
+                    )
+    
+                if optional_params.get("stream", False) or acompletion == True:
+                    ## LOGGING
+                    logging.post_call(
+                        input=messages,
+                        api_key=clarifai_key,
+                        original_response=model_response,
+                    )
+                response = model_response
     
             elif custom_llm_provider == "anthropic":
                 api_key = (
@@ -1330,7 +1767,7 @@ verbose = False, custom_llm_provider = 'predibase'
                         acompletion=acompletion,
                     )
                 else:
-                    model_response = vertex_ai.completion(
+>                   model_response = vertex_ai.completion(
                         model=model,
                         messages=messages,
                         model_response=model_response,
@@ -1345,228 +1782,484 @@ verbose = False, custom_llm_provider = 'predibase'
                         logging_obj=logging,
                         acompletion=acompletion,
                     )
-    
-                if (
-                    "stream" in optional_params
-                    and optional_params["stream"] == True
-                    and acompletion == False
-                ):
-                    response = CustomStreamWrapper(
-                        model_response,
-                        model,
-                        custom_llm_provider="vertex_ai",
-                        logging_obj=logging,
-                    )
-                    return response
-                response = model_response
-            elif custom_llm_provider == "predibase":
-                tenant_id = (
-                    optional_params.pop("tenant_id", None)
-                    or optional_params.pop("predibase_tenant_id", None)
-                    or litellm.predibase_tenant_id
-                    or get_secret("PREDIBASE_TENANT_ID")
-                )
-    
-                api_base = (
-                    optional_params.pop("api_base", None)
-                    or optional_params.pop("base_url", None)
-                    or litellm.api_base
-                    or get_secret("PREDIBASE_API_BASE")
-                )
-    
-                api_key = (
-                    api_key
-                    or litellm.api_key
-                    or litellm.predibase_key
-                    or get_secret("PREDIBASE_API_KEY")
-                )
-    
->               model_response = predibase_chat_completions.completion(
-                    model=model,
-                    messages=messages,
-                    model_response=model_response,
-                    print_verbose=print_verbose,
-                    optional_params=optional_params,
-                    litellm_params=litellm_params,
-                    logger_fn=logger_fn,
-                    encoding=encoding,
-                    logging_obj=logging,
-                    acompletion=acompletion,
-                    api_base=api_base,
-                    custom_prompt_dict=custom_prompt_dict,
-                    api_key=api_key,
-                    tenant_id=tenant_id,
-                )
 
-../main.py:1813: 
+../main.py:1824: 
 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
 
-self = <litellm.llms.predibase.PredibaseChatCompletion object at 0x108b4e990>
-model = 'llama-3-8b-instruct'
-messages = [{'content': 'What is the meaning of life?', 'role': 'user'}]
-api_base = None, custom_prompt_dict = {}
-model_response = ModelResponse(id='chatcmpl-755fcb98-22ba-46a2-9d6d-1a85b4363e98', choices=[Choices(finish_reason='stop', index=0, mess... role='assistant'))], created=1715301477, model=None, object='chat.completion', system_fingerprint=None, usage=Usage())
-print_verbose = <function print_verbose at 0x108a5bf60>
-encoding = <Encoding 'cl100k_base'>, api_key = 'pb_Qg9YbQo7UqqHdu0ozxN_aw'
-logging_obj = <litellm.utils.Logging object at 0x10a5cbf50>
-optional_params = {'details': True, 'max_new_tokens': 256, 'return_full_text': False}
-tenant_id = 'c4768f95', acompletion = False
-litellm_params = {'acompletion': False, 'api_base': 'https://serving.app.predibase.com/c4768f95/deployments/v2/llms/llama-3-8b-instruct/generate_stream', 'api_key': 'pb_Qg9YbQo7UqqHdu0ozxN_aw', 'completion_call_id': None, ...}
-logger_fn = None
-headers = {'Authorization': 'Bearer pb_Qg9YbQo7UqqHdu0ozxN_aw', 'content-type': 'application/json'}
+model = 'gemini-1.5-flash-preview-0514'
+messages = [{'content': [{'text': 'Whats in this image?', 'type': 'text'}, {'image_url': {'url': 'gs://cloud-samples-data/generative-ai/image/boats.jpeg'}, 'type': 'image_url'}], 'role': 'user'}]
+model_response = ModelResponse(id='chatcmpl-722df0e7-4e2d-44e6-9e2c-49823faa0189', choices=[Choices(finish_reason='stop', index=0, mess... role='assistant'))], created=1716145725, model=None, object='chat.completion', system_fingerprint=None, usage=Usage())
+print_verbose = <function print_verbose at 0x10c6c20c0>
+encoding = <Encoding 'cl100k_base'>
+logging_obj = <litellm.utils.Logging object at 0x1059c53d0>
+vertex_project = None, vertex_location = None, vertex_credentials = None
+optional_params = {}
+litellm_params = {'acompletion': False, 'api_base': '', 'api_key': None, 'completion_call_id': None, ...}
+logger_fn = None, acompletion = False
 
     def completion(
-        self,
         model: str,
         messages: list,
-        api_base: str,
-        custom_prompt_dict: dict,
         model_response: ModelResponse,
         print_verbose: Callable,
         encoding,
-        api_key: str,
         logging_obj,
-        optional_params: dict,
-        tenant_id: str,
-        acompletion=None,
+        vertex_project=None,
+        vertex_location=None,
+        vertex_credentials=None,
+        optional_params=None,
         litellm_params=None,
         logger_fn=None,
-        headers: dict = {},
-    ) -> Union[ModelResponse, CustomStreamWrapper]:
-        headers = self.validate_environment(api_key, headers)
-        completion_url = ""
-        input_text = ""
-        base_url = "https://serving.app.predibase.com"
-        if "https" in model:
-            completion_url = model
-        elif api_base:
-            base_url = api_base
-        elif "PREDIBASE_API_BASE" in os.environ:
-            base_url = os.getenv("PREDIBASE_API_BASE", "")
-    
-        completion_url = f"{base_url}/{tenant_id}/deployments/v2/llms/{model}"
-    
-        if optional_params.get("stream", False) == True:
-            completion_url += "/generate_stream"
-        else:
-            completion_url += "/generate"
-    
-        if model in custom_prompt_dict:
-            # check if the model has a registered custom prompt
-            model_prompt_details = custom_prompt_dict[model]
-            prompt = custom_prompt(
-                role_dict=model_prompt_details["roles"],
-                initial_prompt_value=model_prompt_details["initial_prompt_value"],
-                final_prompt_value=model_prompt_details["final_prompt_value"],
-                messages=messages,
+        acompletion: bool = False,
+    ):
+        try:
+            import vertexai
+        except:
+            raise VertexAIError(
+                status_code=400,
+                message="vertexai import failed please run `pip install google-cloud-aiplatform`",
             )
-        else:
-            prompt = prompt_factory(model=model, messages=messages)
     
-        ## Load Config
-        config = litellm.PredibaseConfig.get_config()
-        for k, v in config.items():
-            if (
-                k not in optional_params
-            ):  # completion(top_k=3) > anthropic_config(top_k=3) <- allows for dynamic variables to be passed in
-                optional_params[k] = v
+        if not (
+            hasattr(vertexai, "preview") or hasattr(vertexai.preview, "language_models")
+        ):
+            raise VertexAIError(
+                status_code=400,
+                message="""Upgrade vertex ai. Run `pip install "google-cloud-aiplatform>=1.38"`""",
+            )
+        try:
+            from vertexai.preview.language_models import (
+                ChatModel,
+                CodeChatModel,
+                InputOutputTextPair,
+            )
+            from vertexai.language_models import TextGenerationModel, CodeGenerationModel
+            from vertexai.preview.generative_models import (
+                GenerativeModel,
+                Part,
+                GenerationConfig,
+            )
+            from google.cloud import aiplatform  # type: ignore
+            from google.protobuf import json_format  # type: ignore
+            from google.protobuf.struct_pb2 import Value  # type: ignore
+            from google.cloud.aiplatform_v1beta1.types import content as gapic_content_types  # type: ignore
+            import google.auth  # type: ignore
+            import proto  # type: ignore
     
-        stream = optional_params.pop("stream", False)
+            ## Load credentials with the correct quota project ref: https://github.com/googleapis/python-aiplatform/issues/2557#issuecomment-1709284744
+            print_verbose(
+                f"VERTEX AI: vertex_project={vertex_project}; vertex_location={vertex_location}"
+            )
+            if vertex_credentials is not None and isinstance(vertex_credentials, str):
+                import google.oauth2.service_account
     
-        data = {
-            "inputs": prompt,
-            "parameters": optional_params,
-        }
-        input_text = prompt
-        ## LOGGING
-        logging_obj.pre_call(
-            input=input_text,
-            api_key=api_key,
-            additional_args={
-                "complete_input_dict": data,
-                "headers": headers,
-                "api_base": completion_url,
-                "acompletion": acompletion,
-            },
-        )
-        ## COMPLETION CALL
-        if acompletion is True:
-            ### ASYNC STREAMING
-            if stream == True:
-                return self.async_streaming(
-                    model=model,
-                    messages=messages,
-                    data=data,
-                    api_base=completion_url,
-                    model_response=model_response,
-                    print_verbose=print_verbose,
-                    encoding=encoding,
-                    api_key=api_key,
-                    logging_obj=logging_obj,
-                    optional_params=optional_params,
-                    litellm_params=litellm_params,
-                    logger_fn=logger_fn,
-                    headers=headers,
-                )  # type: ignore
+                json_obj = json.loads(vertex_credentials)
+    
+                creds = google.oauth2.service_account.Credentials.from_service_account_info(
+                    json_obj,
+                    scopes=["https://www.googleapis.com/auth/cloud-platform"],
+                )
             else:
-                ### ASYNC COMPLETION
-                return self.async_completion(
-                    model=model,
-                    messages=messages,
-                    data=data,
-                    api_base=api_base,
-                    model_response=model_response,
-                    print_verbose=print_verbose,
-                    encoding=encoding,
-                    api_key=api_key,
-                    logging_obj=logging_obj,
-                    optional_params=optional_params,
-                    stream=False,
-                    litellm_params=litellm_params,
-                    logger_fn=logger_fn,
-                    headers=headers,
-                )  # type: ignore
-    
-        ### SYNC STREAMING
-        if stream == True:
-            response = requests.post(
-                completion_url,
-                headers=headers,
-                data=json.dumps(data),
->               stream=optional_params["stream"],
+                creds, _ = google.auth.default(quota_project_id=vertex_project)
+            print_verbose(
+                f"VERTEX AI: creds={creds}; google application credentials: {os.getenv('GOOGLE_APPLICATION_CREDENTIALS')}"
             )
-E           KeyError: 'stream'
+            vertexai.init(
+                project=vertex_project, location=vertex_location, credentials=creds
+            )
+    
+            ## Load Config
+            config = litellm.VertexAIConfig.get_config()
+            for k, v in config.items():
+                if k not in optional_params:
+                    optional_params[k] = v
+    
+            ## Process safety settings into format expected by vertex AI
+            safety_settings = None
+            if "safety_settings" in optional_params:
+                safety_settings = optional_params.pop("safety_settings")
+                if not isinstance(safety_settings, list):
+                    raise ValueError("safety_settings must be a list")
+                if len(safety_settings) > 0 and not isinstance(safety_settings[0], dict):
+                    raise ValueError("safety_settings must be a list of dicts")
+                safety_settings = [
+                    gapic_content_types.SafetySetting(x) for x in safety_settings
+                ]
+    
+            # vertexai does not use an API key, it looks for credentials.json in the environment
+    
+            prompt = " ".join(
+                [
+                    message["content"]
+                    for message in messages
+                    if isinstance(message["content"], str)
+                ]
+            )
+    
+            mode = ""
+    
+            request_str = ""
+            response_obj = None
+            async_client = None
+            instances = None
+            client_options = {
+                "api_endpoint": f"{vertex_location}-aiplatform.googleapis.com"
+            }
+            if (
+                model in litellm.vertex_language_models
+                or model in litellm.vertex_vision_models
+            ):
+                llm_model = GenerativeModel(model)
+                mode = "vision"
+                request_str += f"llm_model = GenerativeModel({model})\n"
+            elif model in litellm.vertex_chat_models:
+                llm_model = ChatModel.from_pretrained(model)
+                mode = "chat"
+                request_str += f"llm_model = ChatModel.from_pretrained({model})\n"
+            elif model in litellm.vertex_text_models:
+                llm_model = TextGenerationModel.from_pretrained(model)
+                mode = "text"
+                request_str += f"llm_model = TextGenerationModel.from_pretrained({model})\n"
+            elif model in litellm.vertex_code_text_models:
+                llm_model = CodeGenerationModel.from_pretrained(model)
+                mode = "text"
+                request_str += f"llm_model = CodeGenerationModel.from_pretrained({model})\n"
+            elif model in litellm.vertex_code_chat_models:  # vertex_code_llm_models
+                llm_model = CodeChatModel.from_pretrained(model)
+                mode = "chat"
+                request_str += f"llm_model = CodeChatModel.from_pretrained({model})\n"
+            elif model == "private":
+                mode = "private"
+                model = optional_params.pop("model_id", None)
+                # private endpoint requires a dict instead of JSON
+                instances = [optional_params.copy()]
+                instances[0]["prompt"] = prompt
+                llm_model = aiplatform.PrivateEndpoint(
+                    endpoint_name=model,
+                    project=vertex_project,
+                    location=vertex_location,
+                )
+                request_str += f"llm_model = aiplatform.PrivateEndpoint(endpoint_name={model}, project={vertex_project}, location={vertex_location})\n"
+            else:  # assume vertex model garden on public endpoint
+                mode = "custom"
+    
+                instances = [optional_params.copy()]
+                instances[0]["prompt"] = prompt
+                instances = [
+                    json_format.ParseDict(instance_dict, Value())
+                    for instance_dict in instances
+                ]
+                # Will determine the API used based on async parameter
+                llm_model = None
+    
+            # NOTE: async prediction and streaming under "private" mode isn't supported by aiplatform right now
+            if acompletion == True:
+                data = {
+                    "llm_model": llm_model,
+                    "mode": mode,
+                    "prompt": prompt,
+                    "logging_obj": logging_obj,
+                    "request_str": request_str,
+                    "model": model,
+                    "model_response": model_response,
+                    "encoding": encoding,
+                    "messages": messages,
+                    "print_verbose": print_verbose,
+                    "client_options": client_options,
+                    "instances": instances,
+                    "vertex_location": vertex_location,
+                    "vertex_project": vertex_project,
+                    "safety_settings": safety_settings,
+                    **optional_params,
+                }
+                if optional_params.get("stream", False) is True:
+                    # async streaming
+                    return async_streaming(**data)
+    
+                return async_completion(**data)
+    
+            if mode == "vision":
+                print_verbose("\nMaking VertexAI Gemini Pro / Pro Vision Call")
+                print_verbose(f"\nProcessing input messages = {messages}")
+                tools = optional_params.pop("tools", None)
+                content = _gemini_convert_messages_text(messages=messages)
+                stream = optional_params.pop("stream", False)
+                if stream == True:
+                    request_str += f"response = llm_model.generate_content({content}, generation_config=GenerationConfig(**{optional_params}), safety_settings={safety_settings}, stream={stream})\n"
+                    logging_obj.pre_call(
+                        input=prompt,
+                        api_key=None,
+                        additional_args={
+                            "complete_input_dict": optional_params,
+                            "request_str": request_str,
+                        },
+                    )
+    
+                    model_response = llm_model.generate_content(
+                        contents={"content": content},
+                        generation_config=optional_params,
+                        safety_settings=safety_settings,
+                        stream=True,
+                        tools=tools,
+                    )
+    
+                    return model_response
+    
+                request_str += f"response = llm_model.generate_content({content})\n"
+                ## LOGGING
+                logging_obj.pre_call(
+                    input=prompt,
+                    api_key=None,
+                    additional_args={
+                        "complete_input_dict": optional_params,
+                        "request_str": request_str,
+                    },
+                )
+    
+                ## LLM Call
+                response = llm_model.generate_content(
+                    contents=content,
+                    generation_config=optional_params,
+                    safety_settings=safety_settings,
+                    tools=tools,
+                )
+    
+                if tools is not None and bool(
+                    getattr(response.candidates[0].content.parts[0], "function_call", None)
+                ):
+                    function_call = response.candidates[0].content.parts[0].function_call
+                    args_dict = {}
+    
+                    # Check if it's a RepeatedComposite instance
+                    for key, val in function_call.args.items():
+                        if isinstance(
+                            val, proto.marshal.collections.repeated.RepeatedComposite
+                        ):
+                            # If so, convert to list
+                            args_dict[key] = [v for v in val]
+                        else:
+                            args_dict[key] = val
+    
+                    try:
+                        args_str = json.dumps(args_dict)
+                    except Exception as e:
+                        raise VertexAIError(status_code=422, message=str(e))
+                    message = litellm.Message(
+                        content=None,
+                        tool_calls=[
+                            {
+                                "id": f"call_{str(uuid.uuid4())}",
+                                "function": {
+                                    "arguments": args_str,
+                                    "name": function_call.name,
+                                },
+                                "type": "function",
+                            }
+                        ],
+                    )
+                    completion_response = message
+                else:
+                    completion_response = response.text
+                response_obj = response._raw_response
+                optional_params["tools"] = tools
+            elif mode == "chat":
+                chat = llm_model.start_chat()
+                request_str += f"chat = llm_model.start_chat()\n"
+    
+                if "stream" in optional_params and optional_params["stream"] == True:
+                    # NOTE: VertexAI does not accept stream=True as a param and raises an error,
+                    # we handle this by removing 'stream' from optional params and sending the request
+                    # after we get the response we add optional_params["stream"] = True, since main.py needs to know it's a streaming response to then transform it for the OpenAI format
+                    optional_params.pop(
+                        "stream", None
+                    )  # vertex ai raises an error when passing stream in optional params
+                    request_str += (
+                        f"chat.send_message_streaming({prompt}, **{optional_params})\n"
+                    )
+                    ## LOGGING
+                    logging_obj.pre_call(
+                        input=prompt,
+                        api_key=None,
+                        additional_args={
+                            "complete_input_dict": optional_params,
+                            "request_str": request_str,
+                        },
+                    )
+                    model_response = chat.send_message_streaming(prompt, **optional_params)
+    
+                    return model_response
+    
+                request_str += f"chat.send_message({prompt}, **{optional_params}).text\n"
+                ## LOGGING
+                logging_obj.pre_call(
+                    input=prompt,
+                    api_key=None,
+                    additional_args={
+                        "complete_input_dict": optional_params,
+                        "request_str": request_str,
+                    },
+                )
+                completion_response = chat.send_message(prompt, **optional_params).text
+            elif mode == "text":
+                if "stream" in optional_params and optional_params["stream"] == True:
+                    optional_params.pop(
+                        "stream", None
+                    )  # See note above on handling streaming for vertex ai
+                    request_str += (
+                        f"llm_model.predict_streaming({prompt}, **{optional_params})\n"
+                    )
+                    ## LOGGING
+                    logging_obj.pre_call(
+                        input=prompt,
+                        api_key=None,
+                        additional_args={
+                            "complete_input_dict": optional_params,
+                            "request_str": request_str,
+                        },
+                    )
+                    model_response = llm_model.predict_streaming(prompt, **optional_params)
+    
+                    return model_response
+    
+                request_str += f"llm_model.predict({prompt}, **{optional_params}).text\n"
+                ## LOGGING
+                logging_obj.pre_call(
+                    input=prompt,
+                    api_key=None,
+                    additional_args={
+                        "complete_input_dict": optional_params,
+                        "request_str": request_str,
+                    },
+                )
+                completion_response = llm_model.predict(prompt, **optional_params).text
+            elif mode == "custom":
+                """
+                Vertex AI Model Garden
+                """
+                ## LOGGING
+                logging_obj.pre_call(
+                    input=prompt,
+                    api_key=None,
+                    additional_args={
+                        "complete_input_dict": optional_params,
+                        "request_str": request_str,
+                    },
+                )
+                llm_model = aiplatform.gapic.PredictionServiceClient(
+                    client_options=client_options
+                )
+                request_str += f"llm_model = aiplatform.gapic.PredictionServiceClient(client_options={client_options})\n"
+                endpoint_path = llm_model.endpoint_path(
+                    project=vertex_project, location=vertex_location, endpoint=model
+                )
+                request_str += (
+                    f"llm_model.predict(endpoint={endpoint_path}, instances={instances})\n"
+                )
+                response = llm_model.predict(
+                    endpoint=endpoint_path, instances=instances
+                ).predictions
+    
+                completion_response = response[0]
+                if (
+                    isinstance(completion_response, str)
+                    and "\nOutput:\n" in completion_response
+                ):
+                    completion_response = completion_response.split("\nOutput:\n", 1)[1]
+                if "stream" in optional_params and optional_params["stream"] == True:
+                    response = TextStreamer(completion_response)
+                    return response
+            elif mode == "private":
+                """
+                Vertex AI Model Garden deployed on private endpoint
+                """
+                ## LOGGING
+                logging_obj.pre_call(
+                    input=prompt,
+                    api_key=None,
+                    additional_args={
+                        "complete_input_dict": optional_params,
+                        "request_str": request_str,
+                    },
+                )
+                request_str += f"llm_model.predict(instances={instances})\n"
+                response = llm_model.predict(instances=instances).predictions
+    
+                completion_response = response[0]
+                if (
+                    isinstance(completion_response, str)
+                    and "\nOutput:\n" in completion_response
+                ):
+                    completion_response = completion_response.split("\nOutput:\n", 1)[1]
+                if "stream" in optional_params and optional_params["stream"] == True:
+                    response = TextStreamer(completion_response)
+                    return response
+    
+            ## LOGGING
+            logging_obj.post_call(
+                input=prompt, api_key=None, original_response=completion_response
+            )
+    
+            ## RESPONSE OBJECT
+            if isinstance(completion_response, litellm.Message):
+                model_response["choices"][0]["message"] = completion_response
+            elif len(str(completion_response)) > 0:
+                model_response["choices"][0]["message"]["content"] = str(
+                    completion_response
+                )
+            model_response["created"] = int(time.time())
+            model_response["model"] = model
+            ## CALCULATING USAGE
+            if model in litellm.vertex_language_models and response_obj is not None:
+                model_response["choices"][0].finish_reason = map_finish_reason(
+                    response_obj.candidates[0].finish_reason.name
+                )
+                usage = Usage(
+                    prompt_tokens=response_obj.usage_metadata.prompt_token_count,
+                    completion_tokens=response_obj.usage_metadata.candidates_token_count,
+                    total_tokens=response_obj.usage_metadata.total_token_count,
+                )
+            else:
+                # init prompt tokens
+                # this block attempts to get usage from response_obj if it exists, if not it uses the litellm token counter
+                prompt_tokens, completion_tokens, total_tokens = 0, 0, 0
+                if response_obj is not None:
+                    if hasattr(response_obj, "usage_metadata") and hasattr(
+                        response_obj.usage_metadata, "prompt_token_count"
+                    ):
+                        prompt_tokens = response_obj.usage_metadata.prompt_token_count
+                        completion_tokens = (
+                            response_obj.usage_metadata.candidates_token_count
+                        )
+                else:
+                    prompt_tokens = len(encoding.encode(prompt))
+                    completion_tokens = len(
+                        encoding.encode(
+                            model_response["choices"][0]["message"].get("content", "")
+                        )
+                    )
+    
+                usage = Usage(
+                    prompt_tokens=prompt_tokens,
+                    completion_tokens=completion_tokens,
+                    total_tokens=prompt_tokens + completion_tokens,
+                )
+            setattr(model_response, "usage", usage)
+            return model_response
+        except Exception as e:
+            if isinstance(e, VertexAIError):
+                raise e
+>           raise VertexAIError(status_code=500, message=str(e))
+E           litellm.llms.vertex_ai.VertexAIError: Parameter to MergeFrom() must be instance of same class: expected <class 'Part'> got <class 'vertexai.generative_models._generative_models.Part'>.
 
-../llms/predibase.py:412: KeyError
+../llms/vertex_ai.py:971: VertexAIError
 
 During handling of the above exception, another exception occurred:
 
-sync_mode = True
-
-    @pytest.mark.parametrize("sync_mode", [True, False])
-    @pytest.mark.asyncio
-    async def test_completion_predibase_streaming(sync_mode):
-        try:
-            litellm.set_verbose = True
-    
-            if sync_mode:
->               response = completion(
-                    model="predibase/llama-3-8b-instruct",
-                    tenant_id="c4768f95",
-                    api_base="https://serving.app.predibase.com",
-                    api_key=os.getenv("PREDIBASE_API_KEY"),
-                    messages=[{"role": "user", "content": "What is the meaning of life?"}],
-                    stream=True,
-                )
-
-test_streaming.py:317: 
-_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
-
 args = ()
-kwargs = {'api_base': 'https://serving.app.predibase.com', 'api_key': 'pb_Qg9YbQo7UqqHdu0ozxN_aw', 'litellm_call_id': 'cf0ea464-1b45-4473-8e55-6bf6809df7a7', 'litellm_logging_obj': <litellm.utils.Logging object at 0x10a5cbf50>, ...}
-result = None, start_time = datetime.datetime(2024, 5, 9, 17, 37, 57, 884661)
-logging_obj = <litellm.utils.Logging object at 0x10a5cbf50>
-call_type = 'completion', model = 'predibase/llama-3-8b-instruct'
+kwargs = {'litellm_call_id': '7f48b7ab-47b3-4beb-b2b5-fa298be49d3f', 'litellm_logging_obj': <litellm.utils.Logging object at 0x...i/image/boats.jpeg'}, 'type': 'image_url'}], 'role': 'user'}], 'model': 'vertex_ai/gemini-1.5-flash-preview-0514', ...}
+result = None, start_time = datetime.datetime(2024, 5, 19, 12, 8, 45, 542377)
+logging_obj = <litellm.utils.Logging object at 0x1059c53d0>
+call_type = 'completion', model = 'vertex_ai/gemini-1.5-flash-preview-0514'
 k = 'litellm_logging_obj'
 
     @wraps(original_function)
@@ -1606,6 +2299,7 @@ k = 'litellm_logging_obj'
                     )
                 else:
                     return result
+    
             return result
     
         # Prints Exactly what was passed to litellm function - don't execute any logic here - it should just print
@@ -1709,369 +2403,7 @@ k = 'litellm_logging_obj'
                                     model_response_object=ModelResponse(),
                                     stream=kwargs.get("stream", False),
                                 )
-                                if kwargs.get("stream", False) == True:
-                                    cached_result = CustomStreamWrapper(
-                                        completion_stream=cached_result,
-                                        model=model,
-                                        custom_llm_provider="cached_response",
-                                        logging_obj=logging_obj,
-                                    )
-                            elif call_type == CallTypes.embedding.value and isinstance(
-                                cached_result, dict
-                            ):
-                                cached_result = convert_to_model_response_object(
-                                    response_object=cached_result,
-                                    response_type="embedding",
-                                )
     
-                            # LOG SUCCESS
-                            cache_hit = True
-                            end_time = datetime.datetime.now()
-                            (
-                                model,
-                                custom_llm_provider,
-                                dynamic_api_key,
-                                api_base,
-                            ) = litellm.get_llm_provider(
-                                model=model,
-                                custom_llm_provider=kwargs.get(
-                                    "custom_llm_provider", None
-                                ),
-                                api_base=kwargs.get("api_base", None),
-                                api_key=kwargs.get("api_key", None),
-                            )
-                            print_verbose(
-                                f"Async Wrapper: Completed Call, calling async_success_handler: {logging_obj.async_success_handler}"
-                            )
-                            logging_obj.update_environment_variables(
-                                model=model,
-                                user=kwargs.get("user", None),
-                                optional_params={},
-                                litellm_params={
-                                    "logger_fn": kwargs.get("logger_fn", None),
-                                    "acompletion": False,
-                                    "metadata": kwargs.get("metadata", {}),
-                                    "model_info": kwargs.get("model_info", {}),
-                                    "proxy_server_request": kwargs.get(
-                                        "proxy_server_request", None
-                                    ),
-                                    "preset_cache_key": kwargs.get(
-                                        "preset_cache_key", None
-                                    ),
-                                    "stream_response": kwargs.get(
-                                        "stream_response", {}
-                                    ),
-                                },
-                                input=kwargs.get("messages", ""),
-                                api_key=kwargs.get("api_key", None),
-                                original_response=str(cached_result),
-                                additional_args=None,
-                                stream=kwargs.get("stream", False),
-                            )
-                            threading.Thread(
-                                target=logging_obj.success_handler,
-                                args=(cached_result, start_time, end_time, cache_hit),
-                            ).start()
-                            return cached_result
-    
-            # CHECK MAX TOKENS
-            if (
-                kwargs.get("max_tokens", None) is not None
-                and model is not None
-                and litellm.modify_params
-                == True  # user is okay with params being modified
-                and (
-                    call_type == CallTypes.acompletion.value
-                    or call_type == CallTypes.completion.value
-                )
-            ):
-                try:
-                    base_model = model
-                    if kwargs.get("hf_model_name", None) is not None:
-                        base_model = f"huggingface/{kwargs.get('hf_model_name')}"
-                    max_output_tokens = (
-                        get_max_tokens(model=base_model) or 4096
-                    )  # assume min context window is 4k tokens
-                    user_max_tokens = kwargs.get("max_tokens")
-                    ## Scenario 1: User limit + prompt > model limit
-                    messages = None
-                    if len(args) > 1:
-                        messages = args[1]
-                    elif kwargs.get("messages", None):
-                        messages = kwargs["messages"]
-                    input_tokens = token_counter(model=base_model, messages=messages)
-                    input_tokens += max(
-                        0.1 * input_tokens, 10
-                    )  # give at least a 10 token buffer. token counting can be imprecise.
-                    if input_tokens > max_output_tokens:
-                        pass  # allow call to fail normally
-                    elif user_max_tokens + input_tokens > max_output_tokens:
-                        user_max_tokens = max_output_tokens - input_tokens
-                    print_verbose(f"user_max_tokens: {user_max_tokens}")
-                    kwargs["max_tokens"] = int(
-                        round(user_max_tokens)
-                    )  # make sure max tokens is always an int
-                except Exception as e:
-                    print_verbose(f"Error while checking max token limit: {str(e)}")
-            # MODEL CALL
-            result = original_function(*args, **kwargs)
-            end_time = datetime.datetime.now()
-            if "stream" in kwargs and kwargs["stream"] == True:
-                if (
-                    "complete_response" in kwargs
-                    and kwargs["complete_response"] == True
-                ):
-                    chunks = []
-                    for idx, chunk in enumerate(result):
-                        chunks.append(chunk)
-                    return litellm.stream_chunk_builder(
-                        chunks, messages=kwargs.get("messages", None)
-                    )
-                else:
-                    return result
-            elif "acompletion" in kwargs and kwargs["acompletion"] == True:
-                return result
-            elif "aembedding" in kwargs and kwargs["aembedding"] == True:
-                return result
-            elif "aimg_generation" in kwargs and kwargs["aimg_generation"] == True:
-                return result
-            elif "atranscription" in kwargs and kwargs["atranscription"] == True:
-                return result
-    
-            ### POST-CALL RULES ###
-            post_call_processing(original_response=result, model=model or None)
-    
-            # [OPTIONAL] ADD TO CACHE
-            if (
-                litellm.cache is not None
-                and str(original_function.__name__)
-                in litellm.cache.supported_call_types
-            ) and (kwargs.get("cache", {}).get("no-store", False) != True):
-                litellm.cache.add_cache(result, *args, **kwargs)
-    
-            # LOG SUCCESS - handle streaming success logging in the _next_ object, remove `handle_success` once it's deprecated
-            verbose_logger.info(f"Wrapper: Completed Call, calling success_handler")
-            threading.Thread(
-                target=logging_obj.success_handler, args=(result, start_time, end_time)
-            ).start()
-            # RETURN RESULT
-            if hasattr(result, "_hidden_params"):
-                result._hidden_params["model_id"] = kwargs.get("model_info", {}).get(
-                    "id", None
-                )
-                result._hidden_params["api_base"] = get_api_base(
-                    model=model,
-                    optional_params=getattr(logging_obj, "optional_params", {}),
-                )
-            result._response_ms = (
-                end_time - start_time
-            ).total_seconds() * 1000  # return response latency in ms like openai
-            return result
-        except Exception as e:
-            call_type = original_function.__name__
-            if call_type == CallTypes.completion.value:
-                num_retries = (
-                    kwargs.get("num_retries", None) or litellm.num_retries or None
-                )
-                litellm.num_retries = (
-                    None  # set retries to None to prevent infinite loops
-                )
-                context_window_fallback_dict = kwargs.get(
-                    "context_window_fallback_dict", {}
-                )
-    
-                _is_litellm_router_call = "model_group" in kwargs.get(
-                    "metadata", {}
-                )  # check if call from litellm.router/proxy
-                if (
-                    num_retries and not _is_litellm_router_call
-                ):  # only enter this if call is not from litellm router/proxy. router has it's own logic for retrying
-                    if (
-                        isinstance(e, openai.APIError)
-                        or isinstance(e, openai.Timeout)
-                        or isinstance(e, openai.APIConnectionError)
-                    ):
-                        kwargs["num_retries"] = num_retries
-                        return litellm.completion_with_retries(*args, **kwargs)
-                elif (
-                    isinstance(e, litellm.exceptions.ContextWindowExceededError)
-                    and context_window_fallback_dict
-                    and model in context_window_fallback_dict
-                ):
-                    if len(args) > 0:
-                        args[0] = context_window_fallback_dict[model]
-                    else:
-                        kwargs["model"] = context_window_fallback_dict[model]
-                    return original_function(*args, **kwargs)
-            traceback_exception = traceback.format_exc()
-            end_time = datetime.datetime.now()
-            # LOG FAILURE - handle streaming failure logging in the _next_ object, remove `handle_failure` once it's deprecated
-            if logging_obj:
-                logging_obj.failure_handler(
-                    e, traceback_exception, start_time, end_time
-                )  # DO NOT MAKE THREADED - router retry fallback relies on this!
-                my_thread = threading.Thread(
-                    target=handle_failure,
-                    args=(e, traceback_exception, start_time, end_time, args, kwargs),
-                )  # don't interrupt execution of main thread
-                my_thread.start()
-                if hasattr(e, "message"):
-                    if (
-                        liteDebuggerClient and liteDebuggerClient.dashboard_url != None
-                    ):  # make it easy to get to the debugger logs if you've initialized it
-                        e.message += f"\n Check the log in your dashboard - {liteDebuggerClient.dashboard_url}"
->           raise e
-
-../utils.py:3229: 
-_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
-
-args = ()
-kwargs = {'api_base': 'https://serving.app.predibase.com', 'api_key': 'pb_Qg9YbQo7UqqHdu0ozxN_aw', 'litellm_call_id': 'cf0ea464-1b45-4473-8e55-6bf6809df7a7', 'litellm_logging_obj': <litellm.utils.Logging object at 0x10a5cbf50>, ...}
-result = None, start_time = datetime.datetime(2024, 5, 9, 17, 37, 57, 884661)
-logging_obj = <litellm.utils.Logging object at 0x10a5cbf50>
-call_type = 'completion', model = 'predibase/llama-3-8b-instruct'
-k = 'litellm_logging_obj'
-
-    @wraps(original_function)
-    def wrapper(*args, **kwargs):
-        # DO NOT MOVE THIS. It always needs to run first
-        # Check if this is an async function. If so only execute the async function
-        if (
-            kwargs.get("acompletion", False) == True
-            or kwargs.get("aembedding", False) == True
-            or kwargs.get("aimg_generation", False) == True
-            or kwargs.get("amoderation", False) == True
-            or kwargs.get("atext_completion", False) == True
-            or kwargs.get("atranscription", False) == True
-        ):
-            # [OPTIONAL] CHECK MAX RETRIES / REQUEST
-            if litellm.num_retries_per_request is not None:
-                # check if previous_models passed in as ['litellm_params']['metadata]['previous_models']
-                previous_models = kwargs.get("metadata", {}).get(
-                    "previous_models", None
-                )
-                if previous_models is not None:
-                    if litellm.num_retries_per_request <= len(previous_models):
-                        raise Exception(f"Max retries per request hit!")
-    
-            # MODEL CALL
-            result = original_function(*args, **kwargs)
-            if "stream" in kwargs and kwargs["stream"] == True:
-                if (
-                    "complete_response" in kwargs
-                    and kwargs["complete_response"] == True
-                ):
-                    chunks = []
-                    for idx, chunk in enumerate(result):
-                        chunks.append(chunk)
-                    return litellm.stream_chunk_builder(
-                        chunks, messages=kwargs.get("messages", None)
-                    )
-                else:
-                    return result
-            return result
-    
-        # Prints Exactly what was passed to litellm function - don't execute any logic here - it should just print
-        print_args_passed_to_litellm(original_function, args, kwargs)
-        start_time = datetime.datetime.now()
-        result = None
-        logging_obj = kwargs.get("litellm_logging_obj", None)
-    
-        # only set litellm_call_id if its not in kwargs
-        call_type = original_function.__name__
-        if "litellm_call_id" not in kwargs:
-            kwargs["litellm_call_id"] = str(uuid.uuid4())
-        try:
-            model = args[0] if len(args) > 0 else kwargs["model"]
-        except:
-            model = None
-            if (
-                call_type != CallTypes.image_generation.value
-                and call_type != CallTypes.text_completion.value
-            ):
-                raise ValueError("model param not passed in.")
-    
-        try:
-            if logging_obj is None:
-                logging_obj, kwargs = function_setup(
-                    original_function.__name__, rules_obj, start_time, *args, **kwargs
-                )
-            kwargs["litellm_logging_obj"] = logging_obj
-    
-            # CHECK FOR 'os.environ/' in kwargs
-            for k, v in kwargs.items():
-                if v is not None and isinstance(v, str) and v.startswith("os.environ/"):
-                    kwargs[k] = litellm.get_secret(v)
-            # [OPTIONAL] CHECK BUDGET
-            if litellm.max_budget:
-                if litellm._current_cost > litellm.max_budget:
-                    raise BudgetExceededError(
-                        current_cost=litellm._current_cost,
-                        max_budget=litellm.max_budget,
-                    )
-    
-            # [OPTIONAL] CHECK MAX RETRIES / REQUEST
-            if litellm.num_retries_per_request is not None:
-                # check if previous_models passed in as ['litellm_params']['metadata]['previous_models']
-                previous_models = kwargs.get("metadata", {}).get(
-                    "previous_models", None
-                )
-                if previous_models is not None:
-                    if litellm.num_retries_per_request <= len(previous_models):
-                        raise Exception(f"Max retries per request hit!")
-    
-            # [OPTIONAL] CHECK CACHE
-            print_verbose(
-                f"SYNC kwargs[caching]: {kwargs.get('caching', False)}; litellm.cache: {litellm.cache}; kwargs.get('cache')['no-cache']: {kwargs.get('cache', {}).get('no-cache', False)}"
-            )
-            # if caching is false or cache["no-cache"]==True, don't run this
-            if (
-                (
-                    (
-                        (
-                            kwargs.get("caching", None) is None
-                            and litellm.cache is not None
-                        )
-                        or kwargs.get("caching", False) == True
-                    )
-                    and kwargs.get("cache", {}).get("no-cache", False) != True
-                )
-                and kwargs.get("aembedding", False) != True
-                and kwargs.get("atext_completion", False) != True
-                and kwargs.get("acompletion", False) != True
-                and kwargs.get("aimg_generation", False) != True
-                and kwargs.get("atranscription", False) != True
-            ):  # allow users to control returning cached responses from the completion function
-                # checking cache
-                print_verbose(f"INSIDE CHECKING CACHE")
-                if (
-                    litellm.cache is not None
-                    and str(original_function.__name__)
-                    in litellm.cache.supported_call_types
-                ):
-                    print_verbose(f"Checking Cache")
-                    preset_cache_key = litellm.cache.get_cache_key(*args, **kwargs)
-                    kwargs["preset_cache_key"] = (
-                        preset_cache_key  # for streaming calls, we need to pass the preset_cache_key
-                    )
-                    cached_result = litellm.cache.get_cache(*args, **kwargs)
-                    if cached_result != None:
-                        if "detail" in cached_result:
-                            # implies an error occurred
-                            pass
-                        else:
-                            call_type = original_function.__name__
-                            print_verbose(
-                                f"Cache Response Object routing: call_type - {call_type}; cached_result instace: {type(cached_result)}"
-                            )
-                            if call_type == CallTypes.completion.value and isinstance(
-                                cached_result, dict
-                            ):
-                                cached_result = convert_to_model_response_object(
-                                    response_object=cached_result,
-                                    model_response_object=ModelResponse(),
-                                    stream=kwargs.get("stream", False),
-                                )
                                 if kwargs.get("stream", False) == True:
                                     cached_result = CustomStreamWrapper(
                                         completion_stream=cached_result,
@@ -2179,1925 +2511,83 @@ k = 'litellm_logging_obj'
             # MODEL CALL
 >           result = original_function(*args, **kwargs)
 
-../utils.py:3123: 
+../utils.py:3211: 
+_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
+../main.py:2368: in completion
+    raise exception_type(
+../utils.py:9709: in exception_type
+    raise e
 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
 
-model = 'llama-3-8b-instruct'
-messages = [{'content': 'What is the meaning of life?', 'role': 'user'}]
-timeout = 600.0, temperature = None, top_p = None, n = None, stream = True
-stream_options = None, stop = None, max_tokens = None, presence_penalty = None
-frequency_penalty = None, logit_bias = None, user = None, response_format = None
-seed = None, tools = None, tool_choice = None, logprobs = None
-top_logprobs = None, deployment_id = None, extra_headers = None
-functions = None, function_call = None, base_url = None, api_version = None
-api_key = 'pb_Qg9YbQo7UqqHdu0ozxN_aw', model_list = None
-kwargs = {'api_base': 'https://serving.app.predibase.com', 'litellm_call_id': 'cf0ea464-1b45-4473-8e55-6bf6809df7a7', 'litellm_logging_obj': <litellm.utils.Logging object at 0x10a5cbf50>, 'tenant_id': 'c4768f95'}
-args = {'acompletion': False, 'api_base': None, 'api_key': 'pb_Qg9YbQo7UqqHdu0ozxN_aw', 'api_version': None, ...}
-api_base = None, mock_response = None, force_timeout = 600, logger_fn = None
-verbose = False, custom_llm_provider = 'predibase'
+model = 'gemini-1.5-flash-preview-0514'
+original_exception = VertexAIError("Parameter to MergeFrom() must be instance of same class: expected <class 'Part'> got <class 'vertexai.generative_models._generative_models.Part'>.")
+custom_llm_provider = 'vertex_ai'
+completion_kwargs = {'acompletion': False, 'api_base': None, 'api_key': None, 'api_version': None, ...}
+extra_kwargs = {'litellm_call_id': '7f48b7ab-47b3-4beb-b2b5-fa298be49d3f', 'litellm_logging_obj': <litellm.utils.Logging object at 0x1059c53d0>}
 
-    @client
-    def completion(
-        model: str,
-        # Optional OpenAI params: see https://platform.openai.com/docs/api-reference/chat/create
-        messages: List = [],
-        timeout: Optional[Union[float, str, httpx.Timeout]] = None,
-        temperature: Optional[float] = None,
-        top_p: Optional[float] = None,
-        n: Optional[int] = None,
-        stream: Optional[bool] = None,
-        stream_options: Optional[dict] = None,
-        stop=None,
-        max_tokens: Optional[int] = None,
-        presence_penalty: Optional[float] = None,
-        frequency_penalty: Optional[float] = None,
-        logit_bias: Optional[dict] = None,
-        user: Optional[str] = None,
-        # openai v1.0+ new params
-        response_format: Optional[dict] = None,
-        seed: Optional[int] = None,
-        tools: Optional[List] = None,
-        tool_choice: Optional[str] = None,
-        logprobs: Optional[bool] = None,
-        top_logprobs: Optional[int] = None,
-        deployment_id=None,
-        extra_headers: Optional[dict] = None,
-        # soon to be deprecated params by OpenAI
-        functions: Optional[List] = None,
-        function_call: Optional[str] = None,
-        # set api_base, api_version, api_key
-        base_url: Optional[str] = None,
-        api_version: Optional[str] = None,
-        api_key: Optional[str] = None,
-        model_list: Optional[list] = None,  # pass in a list of api_base,keys, etc.
-        # Optional liteLLM function params
-        **kwargs,
-    ) -> Union[ModelResponse, CustomStreamWrapper]:
-        """
-        Perform a completion() using any of litellm supported llms (example gpt-4, gpt-3.5-turbo, claude-2, command-nightly)
-        Parameters:
-            model (str): The name of the language model to use for text completion. see all supported LLMs: https://docs.litellm.ai/docs/providers/
-            messages (List): A list of message objects representing the conversation context (default is an empty list).
-    
-            OPTIONAL PARAMS
-            functions (List, optional): A list of functions to apply to the conversation messages (default is an empty list).
-            function_call (str, optional): The name of the function to call within the conversation (default is an empty string).
-            temperature (float, optional): The temperature parameter for controlling the randomness of the output (default is 1.0).
-            top_p (float, optional): The top-p parameter for nucleus sampling (default is 1.0).
-            n (int, optional): The number of completions to generate (default is 1).
-            stream (bool, optional): If True, return a streaming response (default is False).
-            stream_options (dict, optional): A dictionary containing options for the streaming response. Only set this when you set stream: true.
-            stop(string/list, optional): - Up to 4 sequences where the LLM API will stop generating further tokens.
-            max_tokens (integer, optional): The maximum number of tokens in the generated completion (default is infinity).
-            presence_penalty (float, optional): It is used to penalize new tokens based on their existence in the text so far.
-            frequency_penalty: It is used to penalize new tokens based on their frequency in the text so far.
-            logit_bias (dict, optional): Used to modify the probability of specific tokens appearing in the completion.
-            user (str, optional):  A unique identifier representing your end-user. This can help the LLM provider to monitor and detect abuse.
-            logprobs (bool, optional): Whether to return log probabilities of the output tokens or not. If true, returns the log probabilities of each output token returned in the content of message
-            top_logprobs (int, optional): An integer between 0 and 5 specifying the number of most likely tokens to return at each token position, each with an associated log probability. logprobs must be set to true if this parameter is used.
-            metadata (dict, optional): Pass in additional metadata to tag your completion calls - eg. prompt version, details, etc.
-            api_base (str, optional): Base URL for the API (default is None).
-            api_version (str, optional): API version (default is None).
-            api_key (str, optional): API key (default is None).
-            model_list (list, optional): List of api base, version, keys
-            extra_headers (dict, optional): Additional headers to include in the request.
-    
-            LITELLM Specific Params
-            mock_response (str, optional): If provided, return a mock completion response for testing or debugging purposes (default is None).
-            custom_llm_provider (str, optional): Used for Non-OpenAI LLMs, Example usage for bedrock, set model="amazon.titan-tg1-large" and custom_llm_provider="bedrock"
-            max_retries (int, optional): The number of retries to attempt (default is 0).
-        Returns:
-            ModelResponse: A response object containing the generated completion and associated metadata.
-    
-        Note:
-            - This function is used to perform completions() using the specified language model.
-            - It supports various optional parameters for customizing the completion behavior.
-            - If 'mock_response' is provided, a mock completion response is returned for testing or debugging.
-        """
-        ######### unpacking kwargs #####################
-        args = locals()
-        api_base = kwargs.get("api_base", None)
-        mock_response = kwargs.get("mock_response", None)
-        force_timeout = kwargs.get("force_timeout", 600)  ## deprecated
-        logger_fn = kwargs.get("logger_fn", None)
-        verbose = kwargs.get("verbose", False)
-        custom_llm_provider = kwargs.get("custom_llm_provider", None)
-        litellm_logging_obj = kwargs.get("litellm_logging_obj", None)
-        id = kwargs.get("id", None)
-        metadata = kwargs.get("metadata", None)
-        model_info = kwargs.get("model_info", None)
-        proxy_server_request = kwargs.get("proxy_server_request", None)
-        fallbacks = kwargs.get("fallbacks", None)
-        headers = kwargs.get("headers", None)
-        num_retries = kwargs.get("num_retries", None)  ## deprecated
-        max_retries = kwargs.get("max_retries", None)
-        context_window_fallback_dict = kwargs.get("context_window_fallback_dict", None)
-        organization = kwargs.get("organization", None)
-        ### CUSTOM MODEL COST ###
-        input_cost_per_token = kwargs.get("input_cost_per_token", None)
-        output_cost_per_token = kwargs.get("output_cost_per_token", None)
-        input_cost_per_second = kwargs.get("input_cost_per_second", None)
-        output_cost_per_second = kwargs.get("output_cost_per_second", None)
-        ### CUSTOM PROMPT TEMPLATE ###
-        initial_prompt_value = kwargs.get("initial_prompt_value", None)
-        roles = kwargs.get("roles", None)
-        final_prompt_value = kwargs.get("final_prompt_value", None)
-        bos_token = kwargs.get("bos_token", None)
-        eos_token = kwargs.get("eos_token", None)
-        preset_cache_key = kwargs.get("preset_cache_key", None)
-        hf_model_name = kwargs.get("hf_model_name", None)
-        supports_system_message = kwargs.get("supports_system_message", None)
-        ### TEXT COMPLETION CALLS ###
-        text_completion = kwargs.get("text_completion", False)
-        atext_completion = kwargs.get("atext_completion", False)
-        ### ASYNC CALLS ###
-        acompletion = kwargs.get("acompletion", False)
-        client = kwargs.get("client", None)
-        ### Admin Controls ###
-        no_log = kwargs.get("no-log", False)
-        ######## end of unpacking kwargs ###########
-        openai_params = [
-            "functions",
-            "function_call",
-            "temperature",
-            "temperature",
-            "top_p",
-            "n",
-            "stream",
-            "stream_options",
-            "stop",
-            "max_tokens",
-            "presence_penalty",
-            "frequency_penalty",
-            "logit_bias",
-            "user",
-            "request_timeout",
-            "api_base",
-            "api_version",
-            "api_key",
-            "deployment_id",
-            "organization",
-            "base_url",
-            "default_headers",
-            "timeout",
-            "response_format",
-            "seed",
-            "tools",
-            "tool_choice",
-            "max_retries",
-            "logprobs",
-            "top_logprobs",
-            "extra_headers",
-        ]
-        litellm_params = [
-            "metadata",
-            "acompletion",
-            "atext_completion",
-            "text_completion",
-            "caching",
-            "mock_response",
-            "api_key",
-            "api_version",
-            "api_base",
-            "force_timeout",
-            "logger_fn",
-            "verbose",
-            "custom_llm_provider",
-            "litellm_logging_obj",
-            "litellm_call_id",
-            "use_client",
-            "id",
-            "fallbacks",
-            "azure",
-            "headers",
-            "model_list",
-            "num_retries",
-            "context_window_fallback_dict",
-            "retry_policy",
-            "roles",
-            "final_prompt_value",
-            "bos_token",
-            "eos_token",
-            "request_timeout",
-            "complete_response",
-            "self",
-            "client",
-            "rpm",
-            "tpm",
-            "max_parallel_requests",
-            "input_cost_per_token",
-            "output_cost_per_token",
-            "input_cost_per_second",
-            "output_cost_per_second",
-            "hf_model_name",
-            "model_info",
-            "proxy_server_request",
-            "preset_cache_key",
-            "caching_groups",
-            "ttl",
-            "cache",
-            "no-log",
-            "base_model",
-            "stream_timeout",
-            "supports_system_message",
-            "region_name",
-            "allowed_model_region",
-        ]
-        default_params = openai_params + litellm_params
-        non_default_params = {
-            k: v for k, v in kwargs.items() if k not in default_params
-        }  # model-specific params - pass them straight to the model/provider
-    
-        ### TIMEOUT LOGIC ###
-        timeout = timeout or kwargs.get("request_timeout", 600) or 600
-        # set timeout for 10 minutes by default
-    
-        if (
-            timeout is not None
-            and isinstance(timeout, httpx.Timeout)
-            and supports_httpx_timeout(custom_llm_provider) == False
-        ):
-            read_timeout = timeout.read or 600
-            timeout = read_timeout  # default 10 min timeout
-        elif timeout is not None and not isinstance(timeout, httpx.Timeout):
-            timeout = float(timeout)  # type: ignore
-    
+    def exception_type(
+        model,
+        original_exception,
+        custom_llm_provider,
+        completion_kwargs={},
+        extra_kwargs={},
+    ):
+        global user_logger_fn, liteDebuggerClient
+        exception_mapping_worked = False
+        if litellm.suppress_debug_info is False:
+            print()  # noqa
+            print(  # noqa
+                "\033[1;31mGive Feedback / Get Help: https://github.com/BerriAI/litellm/issues/new\033[0m"  # noqa
+            )  # noqa
+            print(  # noqa
+                "LiteLLM.Info: If you need to debug this error, use `litellm.set_verbose=True'."  # noqa
+            )  # noqa
+            print()  # noqa
         try:
-            if base_url is not None:
-                api_base = base_url
-            if max_retries is not None:  # openai allows openai.OpenAI(max_retries=3)
-                num_retries = max_retries
-            logging = litellm_logging_obj
-            fallbacks = fallbacks or litellm.model_fallbacks
-            if fallbacks is not None:
-                return completion_with_fallbacks(**args)
-            if model_list is not None:
-                deployments = [
-                    m["litellm_params"] for m in model_list if m["model_name"] == model
-                ]
-                return batch_completion_models(deployments=deployments, **args)
-            if litellm.model_alias_map and model in litellm.model_alias_map:
-                model = litellm.model_alias_map[
-                    model
-                ]  # update the model to the actual value if an alias has been passed in
-            model_response = ModelResponse()
-            setattr(model_response, "usage", litellm.Usage())
-            if (
-                kwargs.get("azure", False) == True
-            ):  # don't remove flag check, to remain backwards compatible for repos like Codium
-                custom_llm_provider = "azure"
-            if deployment_id != None:  # azure llms
-                model = deployment_id
-                custom_llm_provider = "azure"
-            model, custom_llm_provider, dynamic_api_key, api_base = get_llm_provider(
-                model=model,
-                custom_llm_provider=custom_llm_provider,
-                api_base=api_base,
-                api_key=api_key,
-            )
-            if model_response is not None and hasattr(model_response, "_hidden_params"):
-                model_response._hidden_params["custom_llm_provider"] = custom_llm_provider
-                model_response._hidden_params["region_name"] = kwargs.get(
-                    "aws_region_name", None
-                )  # support region-based pricing for bedrock
+            if model:
+                error_str = str(original_exception)
+                if isinstance(original_exception, BaseException):
+                    exception_type = type(original_exception).__name__
+                else:
+                    exception_type = ""
     
-            ### REGISTER CUSTOM MODEL PRICING -- IF GIVEN ###
-            if input_cost_per_token is not None and output_cost_per_token is not None:
-                print_verbose(f"Registering model={model} in model cost map")
-                litellm.register_model(
-                    {
-                        f"{custom_llm_provider}/{model}": {
-                            "input_cost_per_token": input_cost_per_token,
-                            "output_cost_per_token": output_cost_per_token,
-                            "litellm_provider": custom_llm_provider,
-                        },
-                        model: {
-                            "input_cost_per_token": input_cost_per_token,
-                            "output_cost_per_token": output_cost_per_token,
-                            "litellm_provider": custom_llm_provider,
-                        },
-                    }
-                )
-            elif (
-                input_cost_per_second is not None
-            ):  # time based pricing just needs cost in place
-                output_cost_per_second = output_cost_per_second
-                litellm.register_model(
-                    {
-                        f"{custom_llm_provider}/{model}": {
-                            "input_cost_per_second": input_cost_per_second,
-                            "output_cost_per_second": output_cost_per_second,
-                            "litellm_provider": custom_llm_provider,
-                        },
-                        model: {
-                            "input_cost_per_second": input_cost_per_second,
-                            "output_cost_per_second": output_cost_per_second,
-                            "litellm_provider": custom_llm_provider,
-                        },
-                    }
-                )
-            ### BUILD CUSTOM PROMPT TEMPLATE -- IF GIVEN ###
-            custom_prompt_dict = {}  # type: ignore
-            if (
-                initial_prompt_value
-                or roles
-                or final_prompt_value
-                or bos_token
-                or eos_token
-            ):
-                custom_prompt_dict = {model: {}}
-                if initial_prompt_value:
-                    custom_prompt_dict[model]["initial_prompt_value"] = initial_prompt_value
-                if roles:
-                    custom_prompt_dict[model]["roles"] = roles
-                if final_prompt_value:
-                    custom_prompt_dict[model]["final_prompt_value"] = final_prompt_value
-                if bos_token:
-                    custom_prompt_dict[model]["bos_token"] = bos_token
-                if eos_token:
-                    custom_prompt_dict[model]["eos_token"] = eos_token
-    
-            if (
-                supports_system_message is not None
-                and isinstance(supports_system_message, bool)
-                and supports_system_message == False
-            ):
-                messages = map_system_message_pt(messages=messages)
-            model_api_key = get_api_key(
-                llm_provider=custom_llm_provider, dynamic_api_key=api_key
-            )  # get the api key from the environment if required for the model
-    
-            if dynamic_api_key is not None:
-                api_key = dynamic_api_key
-            # check if user passed in any of the OpenAI optional params
-            optional_params = get_optional_params(
-                functions=functions,
-                function_call=function_call,
-                temperature=temperature,
-                top_p=top_p,
-                n=n,
-                stream=stream,
-                stream_options=stream_options,
-                stop=stop,
-                max_tokens=max_tokens,
-                presence_penalty=presence_penalty,
-                frequency_penalty=frequency_penalty,
-                logit_bias=logit_bias,
-                user=user,
-                # params to identify the model
-                model=model,
-                custom_llm_provider=custom_llm_provider,
-                response_format=response_format,
-                seed=seed,
-                tools=tools,
-                tool_choice=tool_choice,
-                max_retries=max_retries,
-                logprobs=logprobs,
-                top_logprobs=top_logprobs,
-                extra_headers=extra_headers,
-                **non_default_params,
-            )
-    
-            if litellm.add_function_to_prompt and optional_params.get(
-                "functions_unsupported_model", None
-            ):  # if user opts to add it to prompt, when API doesn't support function calling
-                functions_unsupported_model = optional_params.pop(
-                    "functions_unsupported_model"
-                )
-                messages = function_call_prompt(
-                    messages=messages, functions=functions_unsupported_model
-                )
-    
-            # For logging - save the values of the litellm-specific params passed in
-            litellm_params = get_litellm_params(
-                acompletion=acompletion,
-                api_key=api_key,
-                force_timeout=force_timeout,
-                logger_fn=logger_fn,
-                verbose=verbose,
-                custom_llm_provider=custom_llm_provider,
-                api_base=api_base,
-                litellm_call_id=kwargs.get("litellm_call_id", None),
-                model_alias_map=litellm.model_alias_map,
-                completion_call_id=id,
-                metadata=metadata,
-                model_info=model_info,
-                proxy_server_request=proxy_server_request,
-                preset_cache_key=preset_cache_key,
-                no_log=no_log,
-            )
-            logging.update_environment_variables(
-                model=model,
-                user=user,
-                optional_params=optional_params,
-                litellm_params=litellm_params,
-            )
-            if mock_response:
-                return mock_completion(
-                    model,
-                    messages,
-                    stream=stream,
-                    mock_response=mock_response,
-                    logging=logging,
-                    acompletion=acompletion,
-                )
-            if custom_llm_provider == "azure":
-                # azure configs
-                api_type = get_secret("AZURE_API_TYPE") or "azure"
-    
-                api_base = api_base or litellm.api_base or get_secret("AZURE_API_BASE")
-    
-                api_version = (
-                    api_version or litellm.api_version or get_secret("AZURE_API_VERSION")
-                )
-    
-                api_key = (
-                    api_key
-                    or litellm.api_key
-                    or litellm.azure_key
-                    or get_secret("AZURE_OPENAI_API_KEY")
-                    or get_secret("AZURE_API_KEY")
-                )
-    
-                azure_ad_token = optional_params.get("extra_body", {}).pop(
-                    "azure_ad_token", None
-                ) or get_secret("AZURE_AD_TOKEN")
-    
-                headers = headers or litellm.headers
-    
-                ## LOAD CONFIG - if set
-                config = litellm.AzureOpenAIConfig.get_config()
-                for k, v in config.items():
-                    if (
-                        k not in optional_params
-                    ):  # completion(top_k=3) > azure_config(top_k=3) <- allows for dynamic variables to be passed in
-                        optional_params[k] = v
-    
-                ## COMPLETION CALL
-                response = azure_chat_completions.completion(
-                    model=model,
-                    messages=messages,
-                    headers=headers,
-                    api_key=api_key,
-                    api_base=api_base,
-                    api_version=api_version,
-                    api_type=api_type,
-                    azure_ad_token=azure_ad_token,
-                    model_response=model_response,
-                    print_verbose=print_verbose,
-                    optional_params=optional_params,
-                    litellm_params=litellm_params,
-                    logger_fn=logger_fn,
-                    logging_obj=logging,
-                    acompletion=acompletion,
-                    timeout=timeout,  # type: ignore
-                    client=client,  # pass AsyncAzureOpenAI, AzureOpenAI client
-                )
-    
-                if optional_params.get("stream", False) or acompletion == True:
-                    ## LOGGING
-                    logging.post_call(
-                        input=messages,
-                        api_key=api_key,
-                        original_response=response,
-                        additional_args={
-                            "headers": headers,
-                            "api_version": api_version,
-                            "api_base": api_base,
-                        },
-                    )
-            elif custom_llm_provider == "azure_text":
-                # azure configs
-                api_type = get_secret("AZURE_API_TYPE") or "azure"
-    
-                api_base = api_base or litellm.api_base or get_secret("AZURE_API_BASE")
-    
-                api_version = (
-                    api_version or litellm.api_version or get_secret("AZURE_API_VERSION")
-                )
-    
-                api_key = (
-                    api_key
-                    or litellm.api_key
-                    or litellm.azure_key
-                    or get_secret("AZURE_OPENAI_API_KEY")
-                    or get_secret("AZURE_API_KEY")
-                )
-    
-                azure_ad_token = optional_params.get("extra_body", {}).pop(
-                    "azure_ad_token", None
-                ) or get_secret("AZURE_AD_TOKEN")
-    
-                headers = headers or litellm.headers
-    
-                ## LOAD CONFIG - if set
-                config = litellm.AzureOpenAIConfig.get_config()
-                for k, v in config.items():
-                    if (
-                        k not in optional_params
-                    ):  # completion(top_k=3) > azure_config(top_k=3) <- allows for dynamic variables to be passed in
-                        optional_params[k] = v
-    
-                ## COMPLETION CALL
-                response = azure_text_completions.completion(
-                    model=model,
-                    messages=messages,
-                    headers=headers,
-                    api_key=api_key,
-                    api_base=api_base,
-                    api_version=api_version,
-                    api_type=api_type,
-                    azure_ad_token=azure_ad_token,
-                    model_response=model_response,
-                    print_verbose=print_verbose,
-                    optional_params=optional_params,
-                    litellm_params=litellm_params,
-                    logger_fn=logger_fn,
-                    logging_obj=logging,
-                    acompletion=acompletion,
-                    timeout=timeout,
-                    client=client,  # pass AsyncAzureOpenAI, AzureOpenAI client
-                )
-    
-                if optional_params.get("stream", False) or acompletion == True:
-                    ## LOGGING
-                    logging.post_call(
-                        input=messages,
-                        api_key=api_key,
-                        original_response=response,
-                        additional_args={
-                            "headers": headers,
-                            "api_version": api_version,
-                            "api_base": api_base,
-                        },
-                    )
-            elif (
-                model in litellm.open_ai_chat_completion_models
-                or custom_llm_provider == "custom_openai"
-                or custom_llm_provider == "deepinfra"
-                or custom_llm_provider == "perplexity"
-                or custom_llm_provider == "groq"
-                or custom_llm_provider == "deepseek"
-                or custom_llm_provider == "anyscale"
-                or custom_llm_provider == "mistral"
-                or custom_llm_provider == "openai"
-                or custom_llm_provider == "together_ai"
-                or custom_llm_provider in litellm.openai_compatible_providers
-                or "ft:gpt-3.5-turbo" in model  # finetune gpt-3.5-turbo
-            ):  # allow user to make an openai call with a custom base
-                # note: if a user sets a custom base - we should ensure this works
-                # allow for the setting of dynamic and stateful api-bases
-                api_base = (
-                    api_base  # for deepinfra/perplexity/anyscale/groq we check in get_llm_provider and pass in the api base from there
-                    or litellm.api_base
-                    or get_secret("OPENAI_API_BASE")
-                    or "https://api.openai.com/v1"
-                )
-                openai.organization = (
-                    organization
-                    or litellm.organization
-                    or get_secret("OPENAI_ORGANIZATION")
-                    or None  # default - https://github.com/openai/openai-python/blob/284c1799070c723c6a553337134148a7ab088dd8/openai/util.py#L105
-                )
-                # set API KEY
-                api_key = (
-                    api_key
-                    or litellm.api_key  # for deepinfra/perplexity/anyscale we check in get_llm_provider and pass in the api key from there
-                    or litellm.openai_key
-                    or get_secret("OPENAI_API_KEY")
-                )
-    
-                headers = headers or litellm.headers
-    
-                ## LOAD CONFIG - if set
-                config = litellm.OpenAIConfig.get_config()
-                for k, v in config.items():
-                    if (
-                        k not in optional_params
-                    ):  # completion(top_k=3) > openai_config(top_k=3) <- allows for dynamic variables to be passed in
-                        optional_params[k] = v
-    
-                ## COMPLETION CALL
+                ################################################################################
+                # Common Extra information needed for all providers
+                # We pass num retries, api_base, vertex_deployment etc to the exception here
+                ################################################################################
+                extra_information = ""
                 try:
-                    response = openai_chat_completions.completion(
-                        model=model,
-                        messages=messages,
-                        headers=headers,
-                        model_response=model_response,
-                        print_verbose=print_verbose,
-                        api_key=api_key,
-                        api_base=api_base,
-                        acompletion=acompletion,
-                        logging_obj=logging,
-                        optional_params=optional_params,
-                        litellm_params=litellm_params,
-                        logger_fn=logger_fn,
-                        timeout=timeout,  # type: ignore
-                        custom_prompt_dict=custom_prompt_dict,
-                        client=client,  # pass AsyncOpenAI, OpenAI client
-                        organization=organization,
-                        custom_llm_provider=custom_llm_provider,
+                    _api_base = litellm.get_api_base(
+                        model=model, optional_params=extra_kwargs
                     )
-                except Exception as e:
-                    ## LOGGING - log the original exception returned
-                    logging.post_call(
-                        input=messages,
-                        api_key=api_key,
-                        original_response=str(e),
-                        additional_args={"headers": headers},
+                    messages = litellm.get_first_chars_messages(kwargs=completion_kwargs)
+                    _vertex_project = extra_kwargs.get("vertex_project")
+                    _vertex_location = extra_kwargs.get("vertex_location")
+                    _metadata = extra_kwargs.get("metadata", {}) or {}
+                    _model_group = _metadata.get("model_group")
+                    _deployment = _metadata.get("deployment")
+                    extra_information = f"\nModel: {model}"
+                    if _api_base:
+                        extra_information += f"\nAPI Base: {_api_base}"
+                    if messages and len(messages) > 0:
+                        extra_information += f"\nMessages: {messages}"
+    
+                    if _model_group is not None:
+                        extra_information += f"\nmodel_group: {_model_group}\n"
+                    if _deployment is not None:
+                        extra_information += f"\ndeployment: {_deployment}\n"
+                    if _vertex_project is not None:
+                        extra_information += f"\nvertex_project: {_vertex_project}\n"
+                    if _vertex_location is not None:
+                        extra_information += f"\nvertex_location: {_vertex_location}\n"
+    
+                    # on litellm proxy add key name + team to exceptions
+                    extra_information = _add_key_name_and_team_to_alert(
+                        request_info=extra_information, metadata=_metadata
                     )
-                    raise e
-    
-                if optional_params.get("stream", False):
-                    ## LOGGING
-                    logging.post_call(
-                        input=messages,
-                        api_key=api_key,
-                        original_response=response,
-                        additional_args={"headers": headers},
-                    )
-            elif (
-                custom_llm_provider == "text-completion-openai"
-                or "ft:babbage-002" in model
-                or "ft:davinci-002" in model  # support for finetuned completion models
-            ):
-                openai.api_type = "openai"
-    
-                api_base = (
-                    api_base
-                    or litellm.api_base
-                    or get_secret("OPENAI_API_BASE")
-                    or "https://api.openai.com/v1"
-                )
-    
-                openai.api_version = None
-                # set API KEY
-    
-                api_key = (
-                    api_key
-                    or litellm.api_key
-                    or litellm.openai_key
-                    or get_secret("OPENAI_API_KEY")
-                )
-    
-                headers = headers or litellm.headers
-    
-                ## LOAD CONFIG - if set
-                config = litellm.OpenAITextCompletionConfig.get_config()
-                for k, v in config.items():
-                    if (
-                        k not in optional_params
-                    ):  # completion(top_k=3) > openai_text_config(top_k=3) <- allows for dynamic variables to be passed in
-                        optional_params[k] = v
-                if litellm.organization:
-                    openai.organization = litellm.organization
-    
-                if (
-                    len(messages) > 0
-                    and "content" in messages[0]
-                    and type(messages[0]["content"]) == list
-                ):
-                    # text-davinci-003 can accept a string or array, if it's an array, assume the array is set in messages[0]['content']
-                    # https://platform.openai.com/docs/api-reference/completions/create
-                    prompt = messages[0]["content"]
-                else:
-                    prompt = " ".join([message["content"] for message in messages])  # type: ignore
-    
-                ## COMPLETION CALL
-                _response = openai_text_completions.completion(
-                    model=model,
-                    messages=messages,
-                    model_response=model_response,
-                    print_verbose=print_verbose,
-                    api_key=api_key,
-                    api_base=api_base,
-                    acompletion=acompletion,
-                    client=client,  # pass AsyncOpenAI, OpenAI client
-                    logging_obj=logging,
-                    optional_params=optional_params,
-                    litellm_params=litellm_params,
-                    logger_fn=logger_fn,
-                    timeout=timeout,  # type: ignore
-                )
-    
-                if (
-                    optional_params.get("stream", False) == False
-                    and acompletion == False
-                    and text_completion == False
-                ):
-                    # convert to chat completion response
-                    _response = litellm.OpenAITextCompletionConfig().convert_to_chat_model_response_object(
-                        response_object=_response, model_response_object=model_response
-                    )
-    
-                if optional_params.get("stream", False) or acompletion == True:
-                    ## LOGGING
-                    logging.post_call(
-                        input=messages,
-                        api_key=api_key,
-                        original_response=_response,
-                        additional_args={"headers": headers},
-                    )
-                response = _response
-            elif (
-                "replicate" in model
-                or custom_llm_provider == "replicate"
-                or model in litellm.replicate_models
-            ):
-                # Setting the relevant API KEY for replicate, replicate defaults to using os.environ.get("REPLICATE_API_TOKEN")
-                replicate_key = None
-                replicate_key = (
-                    api_key
-                    or litellm.replicate_key
-                    or litellm.api_key
-                    or get_secret("REPLICATE_API_KEY")
-                    or get_secret("REPLICATE_API_TOKEN")
-                )
-    
-                api_base = (
-                    api_base
-                    or litellm.api_base
-                    or get_secret("REPLICATE_API_BASE")
-                    or "https://api.replicate.com/v1"
-                )
-    
-                custom_prompt_dict = custom_prompt_dict or litellm.custom_prompt_dict
-    
-                model_response = replicate.completion(
-                    model=model,
-                    messages=messages,
-                    api_base=api_base,
-                    model_response=model_response,
-                    print_verbose=print_verbose,
-                    optional_params=optional_params,
-                    litellm_params=litellm_params,
-                    logger_fn=logger_fn,
-                    encoding=encoding,  # for calculating input/output tokens
-                    api_key=replicate_key,
-                    logging_obj=logging,
-                    custom_prompt_dict=custom_prompt_dict,
-                )
-                if "stream" in optional_params and optional_params["stream"] == True:
-                    # don't try to access stream object,
-                    model_response = CustomStreamWrapper(model_response, model, logging_obj=logging, custom_llm_provider="replicate")  # type: ignore
-    
-                if optional_params.get("stream", False) or acompletion == True:
-                    ## LOGGING
-                    logging.post_call(
-                        input=messages,
-                        api_key=replicate_key,
-                        original_response=model_response,
-                    )
-    
-                response = model_response
-    
-            elif custom_llm_provider == "anthropic":
-                api_key = (
-                    api_key
-                    or litellm.anthropic_key
-                    or litellm.api_key
-                    or os.environ.get("ANTHROPIC_API_KEY")
-                )
-                custom_prompt_dict = custom_prompt_dict or litellm.custom_prompt_dict
-    
-                if (model == "claude-2") or (model == "claude-instant-1"):
-                    # call anthropic /completion, only use this route for claude-2, claude-instant-1
-                    api_base = (
-                        api_base
-                        or litellm.api_base
-                        or get_secret("ANTHROPIC_API_BASE")
-                        or "https://api.anthropic.com/v1/complete"
-                    )
-                    response = anthropic_text_completions.completion(
-                        model=model,
-                        messages=messages,
-                        api_base=api_base,
-                        acompletion=acompletion,
-                        custom_prompt_dict=litellm.custom_prompt_dict,
-                        model_response=model_response,
-                        print_verbose=print_verbose,
-                        optional_params=optional_params,
-                        litellm_params=litellm_params,
-                        logger_fn=logger_fn,
-                        encoding=encoding,  # for calculating input/output tokens
-                        api_key=api_key,
-                        logging_obj=logging,
-                        headers=headers,
-                    )
-                else:
-                    # call /messages
-                    # default route for all anthropic models
-                    api_base = (
-                        api_base
-                        or litellm.api_base
-                        or get_secret("ANTHROPIC_API_BASE")
-                        or "https://api.anthropic.com/v1/messages"
-                    )
-                    response = anthropic_chat_completions.completion(
-                        model=model,
-                        messages=messages,
-                        api_base=api_base,
-                        acompletion=acompletion,
-                        custom_prompt_dict=litellm.custom_prompt_dict,
-                        model_response=model_response,
-                        print_verbose=print_verbose,
-                        optional_params=optional_params,
-                        litellm_params=litellm_params,
-                        logger_fn=logger_fn,
-                        encoding=encoding,  # for calculating input/output tokens
-                        api_key=api_key,
-                        logging_obj=logging,
-                        headers=headers,
-                    )
-                if optional_params.get("stream", False) or acompletion == True:
-                    ## LOGGING
-                    logging.post_call(
-                        input=messages,
-                        api_key=api_key,
-                        original_response=response,
-                    )
-                response = response
-            elif custom_llm_provider == "nlp_cloud":
-                nlp_cloud_key = (
-                    api_key
-                    or litellm.nlp_cloud_key
-                    or get_secret("NLP_CLOUD_API_KEY")
-                    or litellm.api_key
-                )
-    
-                api_base = (
-                    api_base
-                    or litellm.api_base
-                    or get_secret("NLP_CLOUD_API_BASE")
-                    or "https://api.nlpcloud.io/v1/gpu/"
-                )
-    
-                response = nlp_cloud.completion(
-                    model=model,
-                    messages=messages,
-                    api_base=api_base,
-                    model_response=model_response,
-                    print_verbose=print_verbose,
-                    optional_params=optional_params,
-                    litellm_params=litellm_params,
-                    logger_fn=logger_fn,
-                    encoding=encoding,
-                    api_key=nlp_cloud_key,
-                    logging_obj=logging,
-                )
-    
-                if "stream" in optional_params and optional_params["stream"] == True:
-                    # don't try to access stream object,
-                    response = CustomStreamWrapper(
-                        response,
-                        model,
-                        custom_llm_provider="nlp_cloud",
-                        logging_obj=logging,
-                    )
-    
-                if optional_params.get("stream", False) or acompletion == True:
-                    ## LOGGING
-                    logging.post_call(
-                        input=messages,
-                        api_key=api_key,
-                        original_response=response,
-                    )
-    
-                response = response
-            elif custom_llm_provider == "aleph_alpha":
-                aleph_alpha_key = (
-                    api_key
-                    or litellm.aleph_alpha_key
-                    or get_secret("ALEPH_ALPHA_API_KEY")
-                    or get_secret("ALEPHALPHA_API_KEY")
-                    or litellm.api_key
-                )
-    
-                api_base = (
-                    api_base
-                    or litellm.api_base
-                    or get_secret("ALEPH_ALPHA_API_BASE")
-                    or "https://api.aleph-alpha.com/complete"
-                )
-    
-                model_response = aleph_alpha.completion(
-                    model=model,
-                    messages=messages,
-                    api_base=api_base,
-                    model_response=model_response,
-                    print_verbose=print_verbose,
-                    optional_params=optional_params,
-                    litellm_params=litellm_params,
-                    logger_fn=logger_fn,
-                    encoding=encoding,
-                    default_max_tokens_to_sample=litellm.max_tokens,
-                    api_key=aleph_alpha_key,
-                    logging_obj=logging,  # model call logging done inside the class as we make need to modify I/O to fit aleph alpha's requirements
-                )
-    
-                if "stream" in optional_params and optional_params["stream"] == True:
-                    # don't try to access stream object,
-                    response = CustomStreamWrapper(
-                        model_response,
-                        model,
-                        custom_llm_provider="aleph_alpha",
-                        logging_obj=logging,
-                    )
-                    return response
-                response = model_response
-            elif custom_llm_provider == "cohere":
-                cohere_key = (
-                    api_key
-                    or litellm.cohere_key
-                    or get_secret("COHERE_API_KEY")
-                    or get_secret("CO_API_KEY")
-                    or litellm.api_key
-                )
-    
-                api_base = (
-                    api_base
-                    or litellm.api_base
-                    or get_secret("COHERE_API_BASE")
-                    or "https://api.cohere.ai/v1/generate"
-                )
-    
-                model_response = cohere.completion(
-                    model=model,
-                    messages=messages,
-                    api_base=api_base,
-                    model_response=model_response,
-                    print_verbose=print_verbose,
-                    optional_params=optional_params,
-                    litellm_params=litellm_params,
-                    logger_fn=logger_fn,
-                    encoding=encoding,
-                    api_key=cohere_key,
-                    logging_obj=logging,  # model call logging done inside the class as we make need to modify I/O to fit aleph alpha's requirements
-                )
-    
-                if "stream" in optional_params and optional_params["stream"] == True:
-                    # don't try to access stream object,
-                    response = CustomStreamWrapper(
-                        model_response,
-                        model,
-                        custom_llm_provider="cohere",
-                        logging_obj=logging,
-                    )
-                    return response
-                response = model_response
-            elif custom_llm_provider == "cohere_chat":
-                cohere_key = (
-                    api_key
-                    or litellm.cohere_key
-                    or get_secret("COHERE_API_KEY")
-                    or get_secret("CO_API_KEY")
-                    or litellm.api_key
-                )
-    
-                api_base = (
-                    api_base
-                    or litellm.api_base
-                    or get_secret("COHERE_API_BASE")
-                    or "https://api.cohere.ai/v1/chat"
-                )
-    
-                model_response = cohere_chat.completion(
-                    model=model,
-                    messages=messages,
-                    api_base=api_base,
-                    model_response=model_response,
-                    print_verbose=print_verbose,
-                    optional_params=optional_params,
-                    litellm_params=litellm_params,
-                    logger_fn=logger_fn,
-                    encoding=encoding,
-                    api_key=cohere_key,
-                    logging_obj=logging,  # model call logging done inside the class as we make need to modify I/O to fit aleph alpha's requirements
-                )
-    
-                if "stream" in optional_params and optional_params["stream"] == True:
-                    # don't try to access stream object,
-                    response = CustomStreamWrapper(
-                        model_response,
-                        model,
-                        custom_llm_provider="cohere_chat",
-                        logging_obj=logging,
-                    )
-                    return response
-                response = model_response
-            elif custom_llm_provider == "maritalk":
-                maritalk_key = (
-                    api_key
-                    or litellm.maritalk_key
-                    or get_secret("MARITALK_API_KEY")
-                    or litellm.api_key
-                )
-    
-                api_base = (
-                    api_base
-                    or litellm.api_base
-                    or get_secret("MARITALK_API_BASE")
-                    or "https://chat.maritaca.ai/api/chat/inference"
-                )
-    
-                model_response = maritalk.completion(
-                    model=model,
-                    messages=messages,
-                    api_base=api_base,
-                    model_response=model_response,
-                    print_verbose=print_verbose,
-                    optional_params=optional_params,
-                    litellm_params=litellm_params,
-                    logger_fn=logger_fn,
-                    encoding=encoding,
-                    api_key=maritalk_key,
-                    logging_obj=logging,
-                )
-    
-                if "stream" in optional_params and optional_params["stream"] == True:
-                    # don't try to access stream object,
-                    response = CustomStreamWrapper(
-                        model_response,
-                        model,
-                        custom_llm_provider="maritalk",
-                        logging_obj=logging,
-                    )
-                    return response
-                response = model_response
-            elif custom_llm_provider == "huggingface":
-                custom_llm_provider = "huggingface"
-                huggingface_key = (
-                    api_key
-                    or litellm.huggingface_key
-                    or os.environ.get("HF_TOKEN")
-                    or os.environ.get("HUGGINGFACE_API_KEY")
-                    or litellm.api_key
-                )
-                hf_headers = headers or litellm.headers
-    
-                custom_prompt_dict = custom_prompt_dict or litellm.custom_prompt_dict
-                model_response = huggingface.completion(
-                    model=model,
-                    messages=messages,
-                    api_base=api_base,  # type: ignore
-                    headers=hf_headers,
-                    model_response=model_response,
-                    print_verbose=print_verbose,
-                    optional_params=optional_params,
-                    litellm_params=litellm_params,
-                    logger_fn=logger_fn,
-                    encoding=encoding,
-                    api_key=huggingface_key,
-                    acompletion=acompletion,
-                    logging_obj=logging,
-                    custom_prompt_dict=custom_prompt_dict,
-                    timeout=timeout,  # type: ignore
-                )
-                if (
-                    "stream" in optional_params
-                    and optional_params["stream"] == True
-                    and acompletion is False
-                ):
-                    # don't try to access stream object,
-                    response = CustomStreamWrapper(
-                        model_response,
-                        model,
-                        custom_llm_provider="huggingface",
-                        logging_obj=logging,
-                    )
-                    return response
-                response = model_response
-            elif custom_llm_provider == "oobabooga":
-                custom_llm_provider = "oobabooga"
-                model_response = oobabooga.completion(
-                    model=model,
-                    messages=messages,
-                    model_response=model_response,
-                    api_base=api_base,  # type: ignore
-                    print_verbose=print_verbose,
-                    optional_params=optional_params,
-                    litellm_params=litellm_params,
-                    api_key=None,
-                    logger_fn=logger_fn,
-                    encoding=encoding,
-                    logging_obj=logging,
-                )
-                if "stream" in optional_params and optional_params["stream"] == True:
-                    # don't try to access stream object,
-                    response = CustomStreamWrapper(
-                        model_response,
-                        model,
-                        custom_llm_provider="oobabooga",
-                        logging_obj=logging,
-                    )
-                    return response
-                response = model_response
-            elif custom_llm_provider == "openrouter":
-                api_base = api_base or litellm.api_base or "https://openrouter.ai/api/v1"
-    
-                api_key = (
-                    api_key
-                    or litellm.api_key
-                    or litellm.openrouter_key
-                    or get_secret("OPENROUTER_API_KEY")
-                    or get_secret("OR_API_KEY")
-                )
-    
-                openrouter_site_url = get_secret("OR_SITE_URL") or "https://litellm.ai"
-    
-                openrouter_app_name = get_secret("OR_APP_NAME") or "liteLLM"
-    
-                headers = (
-                    headers
-                    or litellm.headers
-                    or {
-                        "HTTP-Referer": openrouter_site_url,
-                        "X-Title": openrouter_app_name,
-                    }
-                )
-    
-                ## Load Config
-                config = openrouter.OpenrouterConfig.get_config()
-                for k, v in config.items():
-                    if k == "extra_body":
-                        # we use openai 'extra_body' to pass openrouter specific params - transforms, route, models
-                        if "extra_body" in optional_params:
-                            optional_params[k].update(v)
-                        else:
-                            optional_params[k] = v
-                    elif k not in optional_params:
-                        optional_params[k] = v
-    
-                data = {"model": model, "messages": messages, **optional_params}
-    
-                ## COMPLETION CALL
-                response = openai_chat_completions.completion(
-                    model=model,
-                    messages=messages,
-                    headers=headers,
-                    api_key=api_key,
-                    api_base=api_base,
-                    model_response=model_response,
-                    print_verbose=print_verbose,
-                    optional_params=optional_params,
-                    litellm_params=litellm_params,
-                    logger_fn=logger_fn,
-                    logging_obj=logging,
-                    acompletion=acompletion,
-                    timeout=timeout,  # type: ignore
-                )
-                ## LOGGING
-                logging.post_call(
-                    input=messages, api_key=openai.api_key, original_response=response
-                )
-            elif (
-                custom_llm_provider == "together_ai"
-                or ("togethercomputer" in model)
-                or (model in litellm.together_ai_models)
-            ):
-                """
-                Deprecated. We now do together ai calls via the openai client - https://docs.together.ai/docs/openai-api-compatibility
-                """
-                custom_llm_provider = "together_ai"
-                together_ai_key = (
-                    api_key
-                    or litellm.togetherai_api_key
-                    or get_secret("TOGETHER_AI_TOKEN")
-                    or get_secret("TOGETHERAI_API_KEY")
-                    or litellm.api_key
-                )
-    
-                api_base = (
-                    api_base
-                    or litellm.api_base
-                    or get_secret("TOGETHERAI_API_BASE")
-                    or "https://api.together.xyz/inference"
-                )
-    
-                custom_prompt_dict = custom_prompt_dict or litellm.custom_prompt_dict
-    
-                model_response = together_ai.completion(
-                    model=model,
-                    messages=messages,
-                    api_base=api_base,
-                    model_response=model_response,
-                    print_verbose=print_verbose,
-                    optional_params=optional_params,
-                    litellm_params=litellm_params,
-                    logger_fn=logger_fn,
-                    encoding=encoding,
-                    api_key=together_ai_key,
-                    logging_obj=logging,
-                    custom_prompt_dict=custom_prompt_dict,
-                )
-                if (
-                    "stream_tokens" in optional_params
-                    and optional_params["stream_tokens"] == True
-                ):
-                    # don't try to access stream object,
-                    response = CustomStreamWrapper(
-                        model_response,
-                        model,
-                        custom_llm_provider="together_ai",
-                        logging_obj=logging,
-                    )
-                    return response
-                response = model_response
-            elif custom_llm_provider == "palm":
-                palm_api_key = api_key or get_secret("PALM_API_KEY") or litellm.api_key
-    
-                # palm does not support streaming as yet :(
-                model_response = palm.completion(
-                    model=model,
-                    messages=messages,
-                    model_response=model_response,
-                    print_verbose=print_verbose,
-                    optional_params=optional_params,
-                    litellm_params=litellm_params,
-                    logger_fn=logger_fn,
-                    encoding=encoding,
-                    api_key=palm_api_key,
-                    logging_obj=logging,
-                )
-                # fake palm streaming
-                if "stream" in optional_params and optional_params["stream"] == True:
-                    # fake streaming for palm
-                    resp_string = model_response["choices"][0]["message"]["content"]
-                    response = CustomStreamWrapper(
-                        resp_string, model, custom_llm_provider="palm", logging_obj=logging
-                    )
-                    return response
-                response = model_response
-            elif custom_llm_provider == "gemini":
-                gemini_api_key = (
-                    api_key
-                    or get_secret("GEMINI_API_KEY")
-                    or get_secret("PALM_API_KEY")  # older palm api key should also work
-                    or litellm.api_key
-                )
-    
-                # palm does not support streaming as yet :(
-                model_response = gemini.completion(
-                    model=model,
-                    messages=messages,
-                    model_response=model_response,
-                    print_verbose=print_verbose,
-                    optional_params=optional_params,
-                    litellm_params=litellm_params,
-                    logger_fn=logger_fn,
-                    encoding=encoding,
-                    api_key=gemini_api_key,
-                    logging_obj=logging,
-                    acompletion=acompletion,
-                    custom_prompt_dict=custom_prompt_dict,
-                )
-                if (
-                    "stream" in optional_params
-                    and optional_params["stream"] == True
-                    and acompletion == False
-                ):
-                    response = CustomStreamWrapper(
-                        iter(model_response),
-                        model,
-                        custom_llm_provider="gemini",
-                        logging_obj=logging,
-                    )
-                    return response
-                response = model_response
-            elif custom_llm_provider == "vertex_ai":
-                vertex_ai_project = (
-                    optional_params.pop("vertex_project", None)
-                    or optional_params.pop("vertex_ai_project", None)
-                    or litellm.vertex_project
-                    or get_secret("VERTEXAI_PROJECT")
-                )
-                vertex_ai_location = (
-                    optional_params.pop("vertex_location", None)
-                    or optional_params.pop("vertex_ai_location", None)
-                    or litellm.vertex_location
-                    or get_secret("VERTEXAI_LOCATION")
-                )
-                vertex_credentials = (
-                    optional_params.pop("vertex_credentials", None)
-                    or optional_params.pop("vertex_ai_credentials", None)
-                    or get_secret("VERTEXAI_CREDENTIALS")
-                )
-                new_params = deepcopy(optional_params)
-                if "claude-3" in model:
-                    model_response = vertex_ai_anthropic.completion(
-                        model=model,
-                        messages=messages,
-                        model_response=model_response,
-                        print_verbose=print_verbose,
-                        optional_params=new_params,
-                        litellm_params=litellm_params,
-                        logger_fn=logger_fn,
-                        encoding=encoding,
-                        vertex_location=vertex_ai_location,
-                        vertex_project=vertex_ai_project,
-                        vertex_credentials=vertex_credentials,
-                        logging_obj=logging,
-                        acompletion=acompletion,
-                    )
-                else:
-                    model_response = vertex_ai.completion(
-                        model=model,
-                        messages=messages,
-                        model_response=model_response,
-                        print_verbose=print_verbose,
-                        optional_params=new_params,
-                        litellm_params=litellm_params,
-                        logger_fn=logger_fn,
-                        encoding=encoding,
-                        vertex_location=vertex_ai_location,
-                        vertex_project=vertex_ai_project,
-                        vertex_credentials=vertex_credentials,
-                        logging_obj=logging,
-                        acompletion=acompletion,
-                    )
-    
-                if (
-                    "stream" in optional_params
-                    and optional_params["stream"] == True
-                    and acompletion == False
-                ):
-                    response = CustomStreamWrapper(
-                        model_response,
-                        model,
-                        custom_llm_provider="vertex_ai",
-                        logging_obj=logging,
-                    )
-                    return response
-                response = model_response
-            elif custom_llm_provider == "predibase":
-                tenant_id = (
-                    optional_params.pop("tenant_id", None)
-                    or optional_params.pop("predibase_tenant_id", None)
-                    or litellm.predibase_tenant_id
-                    or get_secret("PREDIBASE_TENANT_ID")
-                )
-    
-                api_base = (
-                    optional_params.pop("api_base", None)
-                    or optional_params.pop("base_url", None)
-                    or litellm.api_base
-                    or get_secret("PREDIBASE_API_BASE")
-                )
-    
-                api_key = (
-                    api_key
-                    or litellm.api_key
-                    or litellm.predibase_key
-                    or get_secret("PREDIBASE_API_KEY")
-                )
-    
-                model_response = predibase_chat_completions.completion(
-                    model=model,
-                    messages=messages,
-                    model_response=model_response,
-                    print_verbose=print_verbose,
-                    optional_params=optional_params,
-                    litellm_params=litellm_params,
-                    logger_fn=logger_fn,
-                    encoding=encoding,
-                    logging_obj=logging,
-                    acompletion=acompletion,
-                    api_base=api_base,
-                    custom_prompt_dict=custom_prompt_dict,
-                    api_key=api_key,
-                    tenant_id=tenant_id,
-                )
-    
-                if (
-                    "stream" in optional_params
-                    and optional_params["stream"] == True
-                    and acompletion == False
-                ):
-                    return response
-                response = model_response
-            elif custom_llm_provider == "ai21":
-                custom_llm_provider = "ai21"
-                ai21_key = (
-                    api_key
-                    or litellm.ai21_key
-                    or os.environ.get("AI21_API_KEY")
-                    or litellm.api_key
-                )
-    
-                api_base = (
-                    api_base
-                    or litellm.api_base
-                    or get_secret("AI21_API_BASE")
-                    or "https://api.ai21.com/studio/v1/"
-                )
-    
-                model_response = ai21.completion(
-                    model=model,
-                    messages=messages,
-                    api_base=api_base,
-                    model_response=model_response,
-                    print_verbose=print_verbose,
-                    optional_params=optional_params,
-                    litellm_params=litellm_params,
-                    logger_fn=logger_fn,
-                    encoding=encoding,
-                    api_key=ai21_key,
-                    logging_obj=logging,
-                )
-    
-                if "stream" in optional_params and optional_params["stream"] == True:
-                    # don't try to access stream object,
-                    response = CustomStreamWrapper(
-                        model_response,
-                        model,
-                        custom_llm_provider="ai21",
-                        logging_obj=logging,
-                    )
-                    return response
-    
-                ## RESPONSE OBJECT
-                response = model_response
-            elif custom_llm_provider == "sagemaker":
-                # boto3 reads keys from .env
-                model_response = sagemaker.completion(
-                    model=model,
-                    messages=messages,
-                    model_response=model_response,
-                    print_verbose=print_verbose,
-                    optional_params=optional_params,
-                    litellm_params=litellm_params,
-                    custom_prompt_dict=custom_prompt_dict,
-                    hf_model_name=hf_model_name,
-                    logger_fn=logger_fn,
-                    encoding=encoding,
-                    logging_obj=logging,
-                    acompletion=acompletion,
-                )
-                if (
-                    "stream" in optional_params and optional_params["stream"] == True
-                ):  ## [BETA]
-                    print_verbose(f"ENTERS SAGEMAKER CUSTOMSTREAMWRAPPER")
-                    from .llms.sagemaker import TokenIterator
-    
-                    tokenIterator = TokenIterator(model_response, acompletion=acompletion)
-                    response = CustomStreamWrapper(
-                        completion_stream=tokenIterator,
-                        model=model,
-                        custom_llm_provider="sagemaker",
-                        logging_obj=logging,
-                    )
-                    ## LOGGING
-                    logging.post_call(
-                        input=messages,
-                        api_key=None,
-                        original_response=response,
-                    )
-                    return response
-    
-                ## RESPONSE OBJECT
-                response = model_response
-            elif custom_llm_provider == "bedrock":
-                # boto3 reads keys from .env
-                custom_prompt_dict = custom_prompt_dict or litellm.custom_prompt_dict
-                response = bedrock.completion(
-                    model=model,
-                    messages=messages,
-                    custom_prompt_dict=litellm.custom_prompt_dict,
-                    model_response=model_response,
-                    print_verbose=print_verbose,
-                    optional_params=optional_params,
-                    litellm_params=litellm_params,
-                    logger_fn=logger_fn,
-                    encoding=encoding,
-                    logging_obj=logging,
-                    extra_headers=extra_headers,
-                    timeout=timeout,
-                )
-    
-                if (
-                    "stream" in optional_params
-                    and optional_params["stream"] == True
-                    and not isinstance(response, CustomStreamWrapper)
-                ):
-                    # don't try to access stream object,
-                    if "ai21" in model:
-                        response = CustomStreamWrapper(
-                            response,
-                            model,
-                            custom_llm_provider="bedrock",
-                            logging_obj=logging,
-                        )
-                    else:
-                        response = CustomStreamWrapper(
-                            iter(response),
-                            model,
-                            custom_llm_provider="bedrock",
-                            logging_obj=logging,
-                        )
-    
-                if optional_params.get("stream", False):
-                    ## LOGGING
-                    logging.post_call(
-                        input=messages,
-                        api_key=None,
-                        original_response=response,
-                    )
-    
-                ## RESPONSE OBJECT
-                response = response
-            elif custom_llm_provider == "watsonx":
-                custom_prompt_dict = custom_prompt_dict or litellm.custom_prompt_dict
-                response = watsonx.IBMWatsonXAI().completion(
-                    model=model,
-                    messages=messages,
-                    custom_prompt_dict=custom_prompt_dict,
-                    model_response=model_response,
-                    print_verbose=print_verbose,
-                    optional_params=optional_params,
-                    litellm_params=litellm_params,  # type: ignore
-                    logger_fn=logger_fn,
-                    encoding=encoding,
-                    logging_obj=logging,
-                    timeout=timeout,  # type: ignore
-                )
-                if (
-                    "stream" in optional_params
-                    and optional_params["stream"] == True
-                    and not isinstance(response, CustomStreamWrapper)
-                ):
-                    # don't try to access stream object,
-                    response = CustomStreamWrapper(
-                        iter(response),
-                        model,
-                        custom_llm_provider="watsonx",
-                        logging_obj=logging,
-                    )
-    
-                if optional_params.get("stream", False):
-                    ## LOGGING
-                    logging.post_call(
-                        input=messages,
-                        api_key=None,
-                        original_response=response,
-                    )
-                ## RESPONSE OBJECT
-                response = response
-            elif custom_llm_provider == "vllm":
-                custom_prompt_dict = custom_prompt_dict or litellm.custom_prompt_dict
-                model_response = vllm.completion(
-                    model=model,
-                    messages=messages,
-                    custom_prompt_dict=custom_prompt_dict,
-                    model_response=model_response,
-                    print_verbose=print_verbose,
-                    optional_params=optional_params,
-                    litellm_params=litellm_params,
-                    logger_fn=logger_fn,
-                    encoding=encoding,
-                    logging_obj=logging,
-                )
-    
-                if (
-                    "stream" in optional_params and optional_params["stream"] == True
-                ):  ## [BETA]
-                    # don't try to access stream object,
-                    response = CustomStreamWrapper(
-                        model_response,
-                        model,
-                        custom_llm_provider="vllm",
-                        logging_obj=logging,
-                    )
-                    return response
-    
-                ## RESPONSE OBJECT
-                response = model_response
-            elif custom_llm_provider == "ollama":
-                api_base = (
-                    litellm.api_base
-                    or api_base
-                    or get_secret("OLLAMA_API_BASE")
-                    or "http://localhost:11434"
-                )
-                custom_prompt_dict = custom_prompt_dict or litellm.custom_prompt_dict
-                if model in custom_prompt_dict:
-                    # check if the model has a registered custom prompt
-                    model_prompt_details = custom_prompt_dict[model]
-                    prompt = custom_prompt(
-                        role_dict=model_prompt_details["roles"],
-                        initial_prompt_value=model_prompt_details["initial_prompt_value"],
-                        final_prompt_value=model_prompt_details["final_prompt_value"],
-                        messages=messages,
-                    )
-                else:
-                    prompt = prompt_factory(
-                        model=model,
-                        messages=messages,
-                        custom_llm_provider=custom_llm_provider,
-                    )
-                    if isinstance(prompt, dict):
-                        # for multimode models - ollama/llava prompt_factory returns a dict {
-                        #     "prompt": prompt,
-                        #     "images": images
-                        # }
-                        prompt, images = prompt["prompt"], prompt["images"]
-                        optional_params["images"] = images
-    
-                ## LOGGING
-                generator = ollama.get_ollama_response(
-                    api_base,
-                    model,
-                    prompt,
-                    optional_params,
-                    logging_obj=logging,
-                    acompletion=acompletion,
-                    model_response=model_response,
-                    encoding=encoding,
-                )
-                if acompletion is True or optional_params.get("stream", False) == True:
-                    return generator
-    
-                response = generator
-            elif custom_llm_provider == "ollama_chat":
-                api_base = (
-                    litellm.api_base
-                    or api_base
-                    or get_secret("OLLAMA_API_BASE")
-                    or "http://localhost:11434"
-                )
-    
-                api_key = (
-                    api_key
-                    or litellm.ollama_key
-                    or os.environ.get("OLLAMA_API_KEY")
-                    or litellm.api_key
-                )
-                ## LOGGING
-                generator = ollama_chat.get_ollama_response(
-                    api_base,
-                    api_key,
-                    model,
-                    messages,
-                    optional_params,
-                    logging_obj=logging,
-                    acompletion=acompletion,
-                    model_response=model_response,
-                    encoding=encoding,
-                )
-                if acompletion is True or optional_params.get("stream", False) == True:
-                    return generator
-    
-                response = generator
-            elif custom_llm_provider == "cloudflare":
-                api_key = (
-                    api_key
-                    or litellm.cloudflare_api_key
-                    or litellm.api_key
-                    or get_secret("CLOUDFLARE_API_KEY")
-                )
-                account_id = get_secret("CLOUDFLARE_ACCOUNT_ID")
-                api_base = (
-                    api_base
-                    or litellm.api_base
-                    or get_secret("CLOUDFLARE_API_BASE")
-                    or f"https://api.cloudflare.com/client/v4/accounts/{account_id}/ai/run/"
-                )
-    
-                custom_prompt_dict = custom_prompt_dict or litellm.custom_prompt_dict
-                response = cloudflare.completion(
-                    model=model,
-                    messages=messages,
-                    api_base=api_base,
-                    custom_prompt_dict=litellm.custom_prompt_dict,
-                    model_response=model_response,
-                    print_verbose=print_verbose,
-                    optional_params=optional_params,
-                    litellm_params=litellm_params,
-                    logger_fn=logger_fn,
-                    encoding=encoding,  # for calculating input/output tokens
-                    api_key=api_key,
-                    logging_obj=logging,
-                )
-                if "stream" in optional_params and optional_params["stream"] == True:
-                    # don't try to access stream object,
-                    response = CustomStreamWrapper(
-                        response,
-                        model,
-                        custom_llm_provider="cloudflare",
-                        logging_obj=logging,
-                    )
-    
-                if optional_params.get("stream", False) or acompletion == True:
-                    ## LOGGING
-                    logging.post_call(
-                        input=messages,
-                        api_key=api_key,
-                        original_response=response,
-                    )
-                response = response
-            elif (
-                custom_llm_provider == "baseten"
-                or litellm.api_base == "https://app.baseten.co"
-            ):
-                custom_llm_provider = "baseten"
-                baseten_key = (
-                    api_key
-                    or litellm.baseten_key
-                    or os.environ.get("BASETEN_API_KEY")
-                    or litellm.api_key
-                )
-    
-                model_response = baseten.completion(
-                    model=model,
-                    messages=messages,
-                    model_response=model_response,
-                    print_verbose=print_verbose,
-                    optional_params=optional_params,
-                    litellm_params=litellm_params,
-                    logger_fn=logger_fn,
-                    encoding=encoding,
-                    api_key=baseten_key,
-                    logging_obj=logging,
-                )
-                if inspect.isgenerator(model_response) or (
-                    "stream" in optional_params and optional_params["stream"] == True
-                ):
-                    # don't try to access stream object,
-                    response = CustomStreamWrapper(
-                        model_response,
-                        model,
-                        custom_llm_provider="baseten",
-                        logging_obj=logging,
-                    )
-                    return response
-                response = model_response
-            elif custom_llm_provider == "petals" or model in litellm.petals_models:
-                api_base = api_base or litellm.api_base
-    
-                custom_llm_provider = "petals"
-                stream = optional_params.pop("stream", False)
-                model_response = petals.completion(
-                    model=model,
-                    messages=messages,
-                    api_base=api_base,
-                    model_response=model_response,
-                    print_verbose=print_verbose,
-                    optional_params=optional_params,
-                    litellm_params=litellm_params,
-                    logger_fn=logger_fn,
-                    encoding=encoding,
-                    logging_obj=logging,
-                )
-                if stream == True:  ## [BETA]
-                    # Fake streaming for petals
-                    resp_string = model_response["choices"][0]["message"]["content"]
-                    response = CustomStreamWrapper(
-                        resp_string,
-                        model,
-                        custom_llm_provider="petals",
-                        logging_obj=logging,
-                    )
-                    return response
-                response = model_response
-            elif custom_llm_provider == "custom":
-                import requests
-    
-                url = litellm.api_base or api_base or ""
-                if url == None or url == "":
-                    raise ValueError(
-                        "api_base not set. Set api_base or litellm.api_base for custom endpoints"
-                    )
-    
-                """
-                assume input to custom LLM api bases follow this format:
-                resp = requests.post(
-                    api_base,
-                    json={
-                        'model': 'meta-llama/Llama-2-13b-hf', # model name
-                        'params': {
-                            'prompt': ["The capital of France is P"],
-                            'max_tokens': 32,
-                            'temperature': 0.7,
-                            'top_p': 1.0,
-                            'top_k': 40,
-                        }
-                    }
-                )
-    
-                """
-                prompt = " ".join([message["content"] for message in messages])  # type: ignore
-                resp = requests.post(
-                    url,
-                    json={
-                        "model": model,
-                        "params": {
-                            "prompt": [prompt],
-                            "max_tokens": max_tokens,
-                            "temperature": temperature,
-                            "top_p": top_p,
-                            "top_k": kwargs.get("top_k", 40),
-                        },
-                    },
-                )
-                response_json = resp.json()
-                """
-                assume all responses from custom api_bases of this format:
-                {
-                    'data': [
-                        {
-                            'prompt': 'The capital of France is P',
-                            'output': ['The capital of France is PARIS.\nThe capital of France is PARIS.\nThe capital of France is PARIS.\nThe capital of France is PARIS.\nThe capital of France is PARIS.\nThe capital of France is PARIS.\nThe capital of France is PARIS.\nThe capital of France is PARIS.\nThe capital of France is PARIS.\nThe capital of France is PARIS.\nThe capital of France is PARIS.\nThe capital of France is PARIS.\nThe capital of France is PARIS.\nThe capital of France'],
-                            'params': {'temperature': 0.7, 'top_k': 40, 'top_p': 1}}],
-                            'message': 'ok'
-                        }
-                    ]
-                }
-                """
-                string_response = response_json["data"][0]["output"][0]
-                ## RESPONSE OBJECT
-                model_response["choices"][0]["message"]["content"] = string_response
-                model_response["created"] = int(time.time())
-                model_response["model"] = model
-                response = model_response
-            else:
-                raise ValueError(
-                    f"Unable to map your input to a model. Check your input - {args}"
-                )
-            return response
-        except Exception as e:
-            ## Map to OpenAI Exception
->           raise exception_type(
-                model=model,
-                custom_llm_provider=custom_llm_provider,
-                original_exception=e,
-                completion_kwargs=args,
-                extra_kwargs=kwargs,
-            )
-
-../main.py:2287: 
-_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
-
-model = 'llama-3-8b-instruct', original_exception = KeyError('stream')
-custom_llm_provider = 'predibase'
-completion_kwargs = {'acompletion': False, 'api_base': None, 'api_key': 'pb_Qg9YbQo7UqqHdu0ozxN_aw', 'api_version': None, ...}
-extra_kwargs = {'api_base': 'https://serving.app.predibase.com', 'litellm_call_id': 'cf0ea464-1b45-4473-8e55-6bf6809df7a7', 'litellm_logging_obj': <litellm.utils.Logging object at 0x10a5cbf50>, 'tenant_id': 'c4768f95'}
-
-    def exception_type(
-        model,
-        original_exception,
-        custom_llm_provider,
-        completion_kwargs={},
-        extra_kwargs={},
-    ):
-        global user_logger_fn, liteDebuggerClient
-        exception_mapping_worked = False
-        if litellm.suppress_debug_info is False:
-            print()  # noqa
-            print(  # noqa
-                "\033[1;31mGive Feedback / Get Help: https://github.com/BerriAI/litellm/issues/new\033[0m"  # noqa
-            )  # noqa
-            print(  # noqa
-                "LiteLLM.Info: If you need to debug this error, use `litellm.set_verbose=True'."  # noqa
-            )  # noqa
-            print()  # noqa
-        try:
-            if model:
-                error_str = str(original_exception)
-                if isinstance(original_exception, BaseException):
-                    exception_type = type(original_exception).__name__
-                else:
-                    exception_type = ""
-    
-                ################################################################################
-                # Common Extra information needed for all providers
-                # We pass num retries, api_base, vertex_deployment etc to the exception here
-                ################################################################################
-    
-                _api_base = litellm.get_api_base(model=model, optional_params=extra_kwargs)
-                messages = litellm.get_first_chars_messages(kwargs=completion_kwargs)
-                _vertex_project = extra_kwargs.get("vertex_project")
-                _vertex_location = extra_kwargs.get("vertex_location")
-                _metadata = extra_kwargs.get("metadata", {}) or {}
-                _model_group = _metadata.get("model_group")
-                _deployment = _metadata.get("deployment")
-                extra_information = f"\nModel: {model}"
-                if _api_base:
-                    extra_information += f"\nAPI Base: {_api_base}"
-                if messages and len(messages) > 0:
-                    extra_information += f"\nMessages: {messages}"
-    
-                if _model_group is not None:
-                    extra_information += f"\nmodel_group: {_model_group}\n"
-                if _deployment is not None:
-                    extra_information += f"\ndeployment: {_deployment}\n"
-                if _vertex_project is not None:
-                    extra_information += f"\nvertex_project: {_vertex_project}\n"
-                if _vertex_location is not None:
-                    extra_information += f"\nvertex_location: {_vertex_location}\n"
-    
-                # on litellm proxy add key name + team to exceptions
-                extra_information = _add_key_name_and_team_to_alert(
-                    request_info=extra_information, metadata=_metadata
-                )
+                except:
+                    # DO NOT LET this Block raising the original exception
+                    pass
     
                 ################################################################################
                 # End of Common Extra information Needed for all providers
@@ -4110,9 +2600,10 @@ extra_kwargs = {'api_base': 'https://serving.app.predibase.com', 'litellm_call_i
                 if "Request Timeout Error" in error_str or "Request timed out" in error_str:
                     exception_mapping_worked = True
                     raise Timeout(
-                        message=f"APITimeoutError - Request timed out. {extra_information} \n error_str: {error_str}",
+                        message=f"APITimeoutError - Request timed out. \nerror_str: {error_str}",
                         model=model,
                         llm_provider=custom_llm_provider,
+                        litellm_debug_info=extra_information,
                     )
     
                 if (
@@ -4139,16 +2630,14 @@ extra_kwargs = {'api_base': 'https://serving.app.predibase.com', 'litellm_call_i
                             + "Exception"
                         )
     
-                    if (
-                        "This model's maximum context length is" in error_str
-                        or "Request too large" in error_str
-                    ):
+                    if "This model's maximum context length is" in error_str:
                         exception_mapping_worked = True
                         raise ContextWindowExceededError(
-                            message=f"{exception_provider} - {message} {extra_information}",
+                            message=f"{exception_provider} - {message}",
                             llm_provider=custom_llm_provider,
                             model=model,
                             response=original_exception.response,
+                            litellm_debug_info=extra_information,
                         )
                     elif (
                         "invalid_request_error" in error_str
@@ -4156,10 +2645,11 @@ extra_kwargs = {'api_base': 'https://serving.app.predibase.com', 'litellm_call_i
                     ):
                         exception_mapping_worked = True
                         raise NotFoundError(
-                            message=f"{exception_provider} - {message} {extra_information}",
+                            message=f"{exception_provider} - {message}",
                             llm_provider=custom_llm_provider,
                             model=model,
                             response=original_exception.response,
+                            litellm_debug_info=extra_information,
                         )
                     elif (
                         "invalid_request_error" in error_str
@@ -4167,10 +2657,11 @@ extra_kwargs = {'api_base': 'https://serving.app.predibase.com', 'litellm_call_i
                     ):
                         exception_mapping_worked = True
                         raise ContentPolicyViolationError(
-                            message=f"{exception_provider} - {message} {extra_information}",
+                            message=f"{exception_provider} - {message}",
                             llm_provider=custom_llm_provider,
                             model=model,
                             response=original_exception.response,
+                            litellm_debug_info=extra_information,
                         )
                     elif (
                         "invalid_request_error" in error_str
@@ -4178,10 +2669,19 @@ extra_kwargs = {'api_base': 'https://serving.app.predibase.com', 'litellm_call_i
                     ):
                         exception_mapping_worked = True
                         raise BadRequestError(
-                            message=f"{exception_provider} - {message} {extra_information}",
+                            message=f"{exception_provider} - {message}",
                             llm_provider=custom_llm_provider,
                             model=model,
                             response=original_exception.response,
+                            litellm_debug_info=extra_information,
+                        )
+                    elif "Request too large" in error_str:
+                        raise RateLimitError(
+                            message=f"{exception_provider} - {message}",
+                            model=model,
+                            llm_provider=custom_llm_provider,
+                            response=original_exception.response,
+                            litellm_debug_info=extra_information,
                         )
                     elif (
                         "The api_key client option must be set either by passing api_key to the client or by setting the OPENAI_API_KEY environment variable"
@@ -4189,10 +2689,11 @@ extra_kwargs = {'api_base': 'https://serving.app.predibase.com', 'litellm_call_i
                     ):
                         exception_mapping_worked = True
                         raise AuthenticationError(
-                            message=f"{exception_provider} - {message} {extra_information}",
+                            message=f"{exception_provider} - {message}",
                             llm_provider=custom_llm_provider,
                             model=model,
                             response=original_exception.response,
+                            litellm_debug_info=extra_information,
                         )
                     elif "Mistral API raised a streaming error" in error_str:
                         exception_mapping_worked = True
@@ -4201,82 +2702,92 @@ extra_kwargs = {'api_base': 'https://serving.app.predibase.com', 'litellm_call_i
                         )
                         raise APIError(
                             status_code=500,
-                            message=f"{exception_provider} - {message} {extra_information}",
+                            message=f"{exception_provider} - {message}",
                             llm_provider=custom_llm_provider,
                             model=model,
                             request=_request,
+                            litellm_debug_info=extra_information,
                         )
                     elif hasattr(original_exception, "status_code"):
                         exception_mapping_worked = True
                         if original_exception.status_code == 401:
                             exception_mapping_worked = True
                             raise AuthenticationError(
-                                message=f"{exception_provider} - {message} {extra_information}",
+                                message=f"{exception_provider} - {message}",
                                 llm_provider=custom_llm_provider,
                                 model=model,
                                 response=original_exception.response,
+                                litellm_debug_info=extra_information,
                             )
                         elif original_exception.status_code == 404:
                             exception_mapping_worked = True
                             raise NotFoundError(
-                                message=f"{exception_provider} - {message} {extra_information}",
+                                message=f"{exception_provider} - {message}",
                                 model=model,
                                 llm_provider=custom_llm_provider,
                                 response=original_exception.response,
+                                litellm_debug_info=extra_information,
                             )
                         elif original_exception.status_code == 408:
                             exception_mapping_worked = True
                             raise Timeout(
-                                message=f"{exception_provider} - {message} {extra_information}",
+                                message=f"{exception_provider} - {message}",
                                 model=model,
                                 llm_provider=custom_llm_provider,
+                                litellm_debug_info=extra_information,
                             )
                         elif original_exception.status_code == 422:
                             exception_mapping_worked = True
                             raise BadRequestError(
-                                message=f"{exception_provider} - {message} {extra_information}",
+                                message=f"{exception_provider} - {message}",
                                 model=model,
                                 llm_provider=custom_llm_provider,
                                 response=original_exception.response,
+                                litellm_debug_info=extra_information,
                             )
                         elif original_exception.status_code == 429:
                             exception_mapping_worked = True
                             raise RateLimitError(
-                                message=f"{exception_provider} - {message} {extra_information}",
+                                message=f"{exception_provider} - {message}",
                                 model=model,
                                 llm_provider=custom_llm_provider,
                                 response=original_exception.response,
+                                litellm_debug_info=extra_information,
                             )
                         elif original_exception.status_code == 503:
                             exception_mapping_worked = True
                             raise ServiceUnavailableError(
-                                message=f"{exception_provider} - {message} {extra_information}",
+                                message=f"{exception_provider} - {message}",
                                 model=model,
                                 llm_provider=custom_llm_provider,
                                 response=original_exception.response,
+                                litellm_debug_info=extra_information,
                             )
                         elif original_exception.status_code == 504:  # gateway timeout error
                             exception_mapping_worked = True
                             raise Timeout(
-                                message=f"{exception_provider} - {message} {extra_information}",
+                                message=f"{exception_provider} - {message}",
                                 model=model,
                                 llm_provider=custom_llm_provider,
+                                litellm_debug_info=extra_information,
                             )
                         else:
                             exception_mapping_worked = True
                             raise APIError(
                                 status_code=original_exception.status_code,
-                                message=f"{exception_provider} - {message} {extra_information}",
+                                message=f"{exception_provider} - {message}",
                                 llm_provider=custom_llm_provider,
                                 model=model,
                                 request=original_exception.request,
+                                litellm_debug_info=extra_information,
                             )
                     else:
                         # if no status code then it is an APIConnectionError: https://github.com/openai/openai-python#handling-errors
                         raise APIConnectionError(
-                            message=f"{exception_provider} - {message} {extra_information}",
+                            message=f"{exception_provider} - {message}",
                             llm_provider=custom_llm_provider,
                             model=model,
+                            litellm_debug_info=extra_information,
                             request=httpx.Request(
                                 method="POST", url="https://api.openai.com/v1/"
                             ),
@@ -4430,8 +2941,42 @@ extra_kwargs = {'api_base': 'https://serving.app.predibase.com', 'litellm_call_i
                         message=f"ReplicateException - {str(original_exception)}",
                         llm_provider="replicate",
                         model=model,
-                        request=original_exception.request,
+                        request=httpx.Request(
+                            method="POST",
+                            url="https://api.replicate.com/v1/deployments",
+                        ),
                     )
+                elif custom_llm_provider == "watsonx":
+                    if "token_quota_reached" in error_str:
+                        exception_mapping_worked = True
+                        raise RateLimitError(
+                            message=f"WatsonxException: Rate Limit Errror - {error_str}",
+                            llm_provider="watsonx",
+                            model=model,
+                            response=original_exception.response,
+                        )
+                elif custom_llm_provider == "predibase":
+                    if "authorization denied for" in error_str:
+                        exception_mapping_worked = True
+    
+                        # Predibase returns the raw API Key in the response - this block ensures it's not returned in the exception
+                        if (
+                            error_str is not None
+                            and isinstance(error_str, str)
+                            and "bearer" in error_str.lower()
+                        ):
+                            # only keep the first 10 chars after the occurnence of "bearer"
+                            _bearer_token_start_index = error_str.lower().find("bearer")
+                            error_str = error_str[: _bearer_token_start_index + 14]
+                            error_str += "XXXXXXX" + '"'
+    
+                        raise AuthenticationError(
+                            message=f"PredibaseException: Authentication Error - {error_str}",
+                            llm_provider="predibase",
+                            model=model,
+                            response=original_exception.response,
+                            litellm_debug_info=extra_information,
+                        )
                 elif custom_llm_provider == "bedrock":
                     if (
                         "too many tokens" in error_str
@@ -4447,7 +2992,7 @@ extra_kwargs = {'api_base': 'https://serving.app.predibase.com', 'litellm_call_i
                             llm_provider="bedrock",
                             response=original_exception.response,
                         )
-                    if "Malformed input request" in error_str:
+                    elif "Malformed input request" in error_str:
                         exception_mapping_worked = True
                         raise BadRequestError(
                             message=f"BedrockException - {error_str}",
@@ -4455,7 +3000,7 @@ extra_kwargs = {'api_base': 'https://serving.app.predibase.com', 'litellm_call_i
                             llm_provider="bedrock",
                             response=original_exception.response,
                         )
-                    if (
+                    elif (
                         "Unable to locate credentials" in error_str
                         or "The security token included in the request is invalid"
                         in error_str
@@ -4467,7 +3012,7 @@ extra_kwargs = {'api_base': 'https://serving.app.predibase.com', 'litellm_call_i
                             llm_provider="bedrock",
                             response=original_exception.response,
                         )
-                    if "AccessDeniedException" in error_str:
+                    elif "AccessDeniedException" in error_str:
                         exception_mapping_worked = True
                         raise PermissionDeniedError(
                             message=f"BedrockException PermissionDeniedError - {error_str}",
@@ -4475,7 +3020,7 @@ extra_kwargs = {'api_base': 'https://serving.app.predibase.com', 'litellm_call_i
                             llm_provider="bedrock",
                             response=original_exception.response,
                         )
-                    if (
+                    elif (
                         "throttlingException" in error_str
                         or "ThrottlingException" in error_str
                     ):
@@ -4486,14 +3031,17 @@ extra_kwargs = {'api_base': 'https://serving.app.predibase.com', 'litellm_call_i
                             llm_provider="bedrock",
                             response=original_exception.response,
                         )
-                    if "Connect timeout on endpoint URL" in error_str:
+                    elif (
+                        "Connect timeout on endpoint URL" in error_str
+                        or "timed out" in error_str
+                    ):
                         exception_mapping_worked = True
                         raise Timeout(
                             message=f"BedrockException: Timeout Error - {error_str}",
                             model=model,
                             llm_provider="bedrock",
                         )
-                    if hasattr(original_exception, "status_code"):
+                    elif hasattr(original_exception, "status_code"):
                         if original_exception.status_code == 500:
                             exception_mapping_worked = True
                             raise ServiceUnavailableError(
@@ -4531,6 +3079,49 @@ extra_kwargs = {'api_base': 'https://serving.app.predibase.com', 'litellm_call_i
                                 model=model,
                                 response=original_exception.response,
                             )
+                        elif original_exception.status_code == 408:
+                            exception_mapping_worked = True
+                            raise Timeout(
+                                message=f"BedrockException - {original_exception.message}",
+                                model=model,
+                                llm_provider=custom_llm_provider,
+                                litellm_debug_info=extra_information,
+                            )
+                        elif original_exception.status_code == 422:
+                            exception_mapping_worked = True
+                            raise BadRequestError(
+                                message=f"BedrockException - {original_exception.message}",
+                                model=model,
+                                llm_provider=custom_llm_provider,
+                                response=original_exception.response,
+                                litellm_debug_info=extra_information,
+                            )
+                        elif original_exception.status_code == 429:
+                            exception_mapping_worked = True
+                            raise RateLimitError(
+                                message=f"BedrockException - {original_exception.message}",
+                                model=model,
+                                llm_provider=custom_llm_provider,
+                                response=original_exception.response,
+                                litellm_debug_info=extra_information,
+                            )
+                        elif original_exception.status_code == 503:
+                            exception_mapping_worked = True
+                            raise ServiceUnavailableError(
+                                message=f"BedrockException - {original_exception.message}",
+                                model=model,
+                                llm_provider=custom_llm_provider,
+                                response=original_exception.response,
+                                litellm_debug_info=extra_information,
+                            )
+                        elif original_exception.status_code == 504:  # gateway timeout error
+                            exception_mapping_worked = True
+                            raise Timeout(
+                                message=f"BedrockException - {original_exception.message}",
+                                model=model,
+                                llm_provider=custom_llm_provider,
+                                litellm_debug_info=extra_information,
+                            )
                 elif custom_llm_provider == "sagemaker":
                     if "Unable to locate credentials" in error_str:
                         exception_mapping_worked = True
@@ -4569,10 +3160,11 @@ extra_kwargs = {'api_base': 'https://serving.app.predibase.com', 'litellm_call_i
                     ):
                         exception_mapping_worked = True
                         raise BadRequestError(
-                            message=f"VertexAIException - {error_str} {extra_information}",
+                            message=f"VertexAIException - {error_str}",
                             model=model,
                             llm_provider="vertex_ai",
                             response=original_exception.response,
+                            litellm_debug_info=extra_information,
                         )
                     elif (
                         "None Unknown Error." in error_str
@@ -4580,26 +3172,29 @@ extra_kwargs = {'api_base': 'https://serving.app.predibase.com', 'litellm_call_i
                     ):
                         exception_mapping_worked = True
                         raise APIError(
-                            message=f"VertexAIException - {error_str} {extra_information}",
+                            message=f"VertexAIException - {error_str}",
                             status_code=500,
                             model=model,
                             llm_provider="vertex_ai",
                             request=original_exception.request,
+                            litellm_debug_info=extra_information,
                         )
                     elif "403" in error_str:
                         exception_mapping_worked = True
                         raise BadRequestError(
-                            message=f"VertexAIException - {error_str} {extra_information}",
+                            message=f"VertexAIException - {error_str}",
                             model=model,
                             llm_provider="vertex_ai",
                             response=original_exception.response,
+                            litellm_debug_info=extra_information,
                         )
                     elif "The response was blocked." in error_str:
                         exception_mapping_worked = True
                         raise UnprocessableEntityError(
-                            message=f"VertexAIException - {error_str} {extra_information}",
+                            message=f"VertexAIException - {error_str}",
                             model=model,
                             llm_provider="vertex_ai",
+                            litellm_debug_info=extra_information,
                             response=httpx.Response(
                                 status_code=429,
                                 request=httpx.Request(
@@ -4616,9 +3211,10 @@ extra_kwargs = {'api_base': 'https://serving.app.predibase.com', 'litellm_call_i
                     ):
                         exception_mapping_worked = True
                         raise RateLimitError(
-                            message=f"VertexAIException - {error_str} {extra_information}",
+                            message=f"VertexAIException - {error_str}",
                             model=model,
                             llm_provider="vertex_ai",
+                            litellm_debug_info=extra_information,
                             response=httpx.Response(
                                 status_code=429,
                                 request=httpx.Request(
@@ -4631,2243 +3227,977 @@ extra_kwargs = {'api_base': 'https://serving.app.predibase.com', 'litellm_call_i
                         if original_exception.status_code == 400:
                             exception_mapping_worked = True
                             raise BadRequestError(
-                                message=f"VertexAIException - {error_str} {extra_information}",
+                                message=f"VertexAIException - {error_str}",
                                 model=model,
                                 llm_provider="vertex_ai",
+                                litellm_debug_info=extra_information,
                                 response=original_exception.response,
                             )
                         if original_exception.status_code == 500:
                             exception_mapping_worked = True
-                            raise APIError(
-                                message=f"VertexAIException - {error_str} {extra_information}",
+>                           raise APIError(
+                                message=f"VertexAIException - {error_str}",
                                 status_code=500,
                                 model=model,
                                 llm_provider="vertex_ai",
+                                litellm_debug_info=extra_information,
                                 request=original_exception.request,
-                            )
-                elif custom_llm_provider == "palm" or custom_llm_provider == "gemini":
-                    if "503 Getting metadata" in error_str:
-                        # auth errors look like this
-                        # 503 Getting metadata from plugin failed with error: Reauthentication is needed. Please run `gcloud auth application-default login` to reauthenticate.
-                        exception_mapping_worked = True
-                        raise BadRequestError(
-                            message=f"GeminiException - Invalid api key",
-                            model=model,
-                            llm_provider="palm",
-                            response=original_exception.response,
-                        )
-                    if (
-                        "504 Deadline expired before operation could complete." in error_str
-                        or "504 Deadline Exceeded" in error_str
-                    ):
-                        exception_mapping_worked = True
-                        raise Timeout(
-                            message=f"GeminiException - {original_exception.message}",
-                            model=model,
-                            llm_provider="palm",
-                        )
-                    if "400 Request payload size exceeds" in error_str:
-                        exception_mapping_worked = True
-                        raise ContextWindowExceededError(
-                            message=f"GeminiException - {error_str}",
-                            model=model,
-                            llm_provider="palm",
-                            response=original_exception.response,
-                        )
-                    if (
-                        "500 An internal error has occurred." in error_str
-                        or "list index out of range" in error_str
-                    ):
-                        exception_mapping_worked = True
-                        raise APIError(
-                            status_code=getattr(original_exception, "status_code", 500),
-                            message=f"GeminiException - {original_exception.message}",
-                            llm_provider="palm",
-                            model=model,
-                            request=httpx.Response(
-                                status_code=429,
-                                request=httpx.Request(
-                                    method="POST",
-                                    url=" https://cloud.google.com/vertex-ai/",
-                                ),
-                            ),
-                        )
-                    if hasattr(original_exception, "status_code"):
-                        if original_exception.status_code == 400:
-                            exception_mapping_worked = True
-                            raise BadRequestError(
-                                message=f"GeminiException - {error_str}",
-                                model=model,
-                                llm_provider="palm",
-                                response=original_exception.response,
-                            )
-                    # Dailed: Error occurred: 400 Request payload size exceeds the limit: 20000 bytes
-                elif custom_llm_provider == "cloudflare":
-                    if "Authentication error" in error_str:
-                        exception_mapping_worked = True
-                        raise AuthenticationError(
-                            message=f"Cloudflare Exception - {original_exception.message}",
-                            llm_provider="cloudflare",
-                            model=model,
-                            response=original_exception.response,
-                        )
-                    if "must have required property" in error_str:
-                        exception_mapping_worked = True
-                        raise BadRequestError(
-                            message=f"Cloudflare Exception - {original_exception.message}",
-                            llm_provider="cloudflare",
-                            model=model,
-                            response=original_exception.response,
-                        )
-                elif (
-                    custom_llm_provider == "cohere" or custom_llm_provider == "cohere_chat"
-                ):  # Cohere
-                    if (
-                        "invalid api token" in error_str
-                        or "No API key provided." in error_str
-                    ):
-                        exception_mapping_worked = True
-                        raise AuthenticationError(
-                            message=f"CohereException - {original_exception.message}",
-                            llm_provider="cohere",
-                            model=model,
-                            response=original_exception.response,
-                        )
-                    elif "too many tokens" in error_str:
-                        exception_mapping_worked = True
-                        raise ContextWindowExceededError(
-                            message=f"CohereException - {original_exception.message}",
-                            model=model,
-                            llm_provider="cohere",
-                            response=original_exception.response,
-                        )
-                    elif hasattr(original_exception, "status_code"):
-                        if (
-                            original_exception.status_code == 400
-                            or original_exception.status_code == 498
-                        ):
-                            exception_mapping_worked = True
-                            raise BadRequestError(
-                                message=f"CohereException - {original_exception.message}",
-                                llm_provider="cohere",
-                                model=model,
-                                response=original_exception.response,
-                            )
-                        elif original_exception.status_code == 500:
-                            exception_mapping_worked = True
-                            raise ServiceUnavailableError(
-                                message=f"CohereException - {original_exception.message}",
-                                llm_provider="cohere",
-                                model=model,
-                                response=original_exception.response,
-                            )
-                    elif (
-                        "CohereConnectionError" in exception_type
-                    ):  # cohere seems to fire these errors when we load test it (1k+ messages / min)
-                        exception_mapping_worked = True
-                        raise RateLimitError(
-                            message=f"CohereException - {original_exception.message}",
-                            llm_provider="cohere",
-                            model=model,
-                            response=original_exception.response,
-                        )
-                    elif "invalid type:" in error_str:
-                        exception_mapping_worked = True
-                        raise BadRequestError(
-                            message=f"CohereException - {original_exception.message}",
-                            llm_provider="cohere",
-                            model=model,
-                            response=original_exception.response,
-                        )
-                    elif "Unexpected server error" in error_str:
-                        exception_mapping_worked = True
-                        raise ServiceUnavailableError(
-                            message=f"CohereException - {original_exception.message}",
-                            llm_provider="cohere",
-                            model=model,
-                            response=original_exception.response,
-                        )
-                    else:
-                        if hasattr(original_exception, "status_code"):
-                            exception_mapping_worked = True
-                            raise APIError(
-                                status_code=original_exception.status_code,
-                                message=f"CohereException - {original_exception.message}",
-                                llm_provider="cohere",
-                                model=model,
-                                request=original_exception.request,
-                            )
-                        raise original_exception
-                elif custom_llm_provider == "huggingface":
-                    if "length limit exceeded" in error_str:
-                        exception_mapping_worked = True
-                        raise ContextWindowExceededError(
-                            message=error_str,
-                            model=model,
-                            llm_provider="huggingface",
-                            response=original_exception.response,
-                        )
-                    elif "A valid user token is required" in error_str:
-                        exception_mapping_worked = True
-                        raise BadRequestError(
-                            message=error_str,
-                            llm_provider="huggingface",
-                            model=model,
-                            response=original_exception.response,
-                        )
-                    if hasattr(original_exception, "status_code"):
-                        if original_exception.status_code == 401:
-                            exception_mapping_worked = True
-                            raise AuthenticationError(
-                                message=f"HuggingfaceException - {original_exception.message}",
-                                llm_provider="huggingface",
-                                model=model,
-                                response=original_exception.response,
-                            )
-                        elif original_exception.status_code == 400:
-                            exception_mapping_worked = True
-                            raise BadRequestError(
-                                message=f"HuggingfaceException - {original_exception.message}",
-                                model=model,
-                                llm_provider="huggingface",
-                                response=original_exception.response,
-                            )
-                        elif original_exception.status_code == 408:
-                            exception_mapping_worked = True
-                            raise Timeout(
-                                message=f"HuggingfaceException - {original_exception.message}",
-                                model=model,
-                                llm_provider="huggingface",
-                            )
-                        elif original_exception.status_code == 429:
-                            exception_mapping_worked = True
-                            raise RateLimitError(
-                                message=f"HuggingfaceException - {original_exception.message}",
-                                llm_provider="huggingface",
-                                model=model,
-                                response=original_exception.response,
-                            )
-                        elif original_exception.status_code == 503:
-                            exception_mapping_worked = True
-                            raise ServiceUnavailableError(
-                                message=f"HuggingfaceException - {original_exception.message}",
-                                llm_provider="huggingface",
-                                model=model,
-                                response=original_exception.response,
-                            )
-                        else:
-                            exception_mapping_worked = True
-                            raise APIError(
-                                status_code=original_exception.status_code,
-                                message=f"HuggingfaceException - {original_exception.message}",
-                                llm_provider="huggingface",
-                                model=model,
-                                request=original_exception.request,
-                            )
-                elif custom_llm_provider == "ai21":
-                    if hasattr(original_exception, "message"):
-                        if "Prompt has too many tokens" in original_exception.message:
-                            exception_mapping_worked = True
-                            raise ContextWindowExceededError(
-                                message=f"AI21Exception - {original_exception.message}",
-                                model=model,
-                                llm_provider="ai21",
-                                response=original_exception.response,
-                            )
-                        if "Bad or missing API token." in original_exception.message:
-                            exception_mapping_worked = True
-                            raise BadRequestError(
-                                message=f"AI21Exception - {original_exception.message}",
-                                model=model,
-                                llm_provider="ai21",
-                                response=original_exception.response,
-                            )
-                    if hasattr(original_exception, "status_code"):
-                        if original_exception.status_code == 401:
-                            exception_mapping_worked = True
-                            raise AuthenticationError(
-                                message=f"AI21Exception - {original_exception.message}",
-                                llm_provider="ai21",
-                                model=model,
-                                response=original_exception.response,
-                            )
-                        elif original_exception.status_code == 408:
-                            exception_mapping_worked = True
-                            raise Timeout(
-                                message=f"AI21Exception - {original_exception.message}",
-                                model=model,
-                                llm_provider="ai21",
-                            )
-                        if original_exception.status_code == 422:
-                            exception_mapping_worked = True
-                            raise BadRequestError(
-                                message=f"AI21Exception - {original_exception.message}",
-                                model=model,
-                                llm_provider="ai21",
-                                response=original_exception.response,
-                            )
-                        elif original_exception.status_code == 429:
-                            exception_mapping_worked = True
-                            raise RateLimitError(
-                                message=f"AI21Exception - {original_exception.message}",
-                                llm_provider="ai21",
-                                model=model,
-                                response=original_exception.response,
-                            )
-                        else:
-                            exception_mapping_worked = True
-                            raise APIError(
-                                status_code=original_exception.status_code,
-                                message=f"AI21Exception - {original_exception.message}",
-                                llm_provider="ai21",
-                                model=model,
-                                request=original_exception.request,
-                            )
-                elif custom_llm_provider == "nlp_cloud":
-                    if "detail" in error_str:
-                        if "Input text length should not exceed" in error_str:
-                            exception_mapping_worked = True
-                            raise ContextWindowExceededError(
-                                message=f"NLPCloudException - {error_str}",
-                                model=model,
-                                llm_provider="nlp_cloud",
-                                response=original_exception.response,
-                            )
-                        elif "value is not a valid" in error_str:
-                            exception_mapping_worked = True
-                            raise BadRequestError(
-                                message=f"NLPCloudException - {error_str}",
-                                model=model,
-                                llm_provider="nlp_cloud",
-                                response=original_exception.response,
-                            )
-                        else:
-                            exception_mapping_worked = True
-                            raise APIError(
-                                status_code=500,
-                                message=f"NLPCloudException - {error_str}",
-                                model=model,
-                                llm_provider="nlp_cloud",
-                                request=original_exception.request,
-                            )
-                    if hasattr(
-                        original_exception, "status_code"
-                    ):  # https://docs.nlpcloud.com/?shell#errors
-                        if (
-                            original_exception.status_code == 400
-                            or original_exception.status_code == 406
-                            or original_exception.status_code == 413
-                            or original_exception.status_code == 422
-                        ):
-                            exception_mapping_worked = True
-                            raise BadRequestError(
-                                message=f"NLPCloudException - {original_exception.message}",
-                                llm_provider="nlp_cloud",
-                                model=model,
-                                response=original_exception.response,
-                            )
-                        elif (
-                            original_exception.status_code == 401
-                            or original_exception.status_code == 403
-                        ):
-                            exception_mapping_worked = True
-                            raise AuthenticationError(
-                                message=f"NLPCloudException - {original_exception.message}",
-                                llm_provider="nlp_cloud",
-                                model=model,
-                                response=original_exception.response,
-                            )
-                        elif (
-                            original_exception.status_code == 522
-                            or original_exception.status_code == 524
-                        ):
-                            exception_mapping_worked = True
-                            raise Timeout(
-                                message=f"NLPCloudException - {original_exception.message}",
-                                model=model,
-                                llm_provider="nlp_cloud",
-                            )
-                        elif (
-                            original_exception.status_code == 429
-                            or original_exception.status_code == 402
-                        ):
-                            exception_mapping_worked = True
-                            raise RateLimitError(
-                                message=f"NLPCloudException - {original_exception.message}",
-                                llm_provider="nlp_cloud",
-                                model=model,
-                                response=original_exception.response,
-                            )
-                        elif (
-                            original_exception.status_code == 500
-                            or original_exception.status_code == 503
-                        ):
-                            exception_mapping_worked = True
-                            raise APIError(
-                                status_code=original_exception.status_code,
-                                message=f"NLPCloudException - {original_exception.message}",
-                                llm_provider="nlp_cloud",
-                                model=model,
-                                request=original_exception.request,
-                            )
-                        elif (
-                            original_exception.status_code == 504
-                            or original_exception.status_code == 520
-                        ):
-                            exception_mapping_worked = True
-                            raise ServiceUnavailableError(
-                                message=f"NLPCloudException - {original_exception.message}",
-                                model=model,
-                                llm_provider="nlp_cloud",
-                                response=original_exception.response,
-                            )
-                        else:
-                            exception_mapping_worked = True
-                            raise APIError(
-                                status_code=original_exception.status_code,
-                                message=f"NLPCloudException - {original_exception.message}",
-                                llm_provider="nlp_cloud",
-                                model=model,
-                                request=original_exception.request,
-                            )
-                elif custom_llm_provider == "together_ai":
-                    import json
-    
-                    try:
-                        error_response = json.loads(error_str)
-                    except:
-                        error_response = {"error": error_str}
-                    if (
-                        "error" in error_response
-                        and "`inputs` tokens + `max_new_tokens` must be <="
-                        in error_response["error"]
-                    ):
-                        exception_mapping_worked = True
-                        raise ContextWindowExceededError(
-                            message=f"TogetherAIException - {error_response['error']}",
-                            model=model,
-                            llm_provider="together_ai",
-                            response=original_exception.response,
-                        )
-                    elif (
-                        "error" in error_response
-                        and "invalid private key" in error_response["error"]
-                    ):
-                        exception_mapping_worked = True
-                        raise AuthenticationError(
-                            message=f"TogetherAIException - {error_response['error']}",
-                            llm_provider="together_ai",
-                            model=model,
-                            response=original_exception.response,
-                        )
-                    elif (
-                        "error" in error_response
-                        and "INVALID_ARGUMENT" in error_response["error"]
-                    ):
-                        exception_mapping_worked = True
-                        raise BadRequestError(
-                            message=f"TogetherAIException - {error_response['error']}",
-                            model=model,
-                            llm_provider="together_ai",
-                            response=original_exception.response,
-                        )
-    
-                    elif (
-                        "error" in error_response
-                        and "API key doesn't match expected format."
-                        in error_response["error"]
-                    ):
-                        exception_mapping_worked = True
-                        raise BadRequestError(
-                            message=f"TogetherAIException - {error_response['error']}",
-                            model=model,
-                            llm_provider="together_ai",
-                            response=original_exception.response,
-                        )
-                    elif (
-                        "error_type" in error_response
-                        and error_response["error_type"] == "validation"
-                    ):
-                        exception_mapping_worked = True
-                        raise BadRequestError(
-                            message=f"TogetherAIException - {error_response['error']}",
-                            model=model,
-                            llm_provider="together_ai",
-                            response=original_exception.response,
-                        )
-                    if hasattr(original_exception, "status_code"):
-                        if original_exception.status_code == 408:
-                            exception_mapping_worked = True
-                            raise Timeout(
-                                message=f"TogetherAIException - {original_exception.message}",
-                                model=model,
-                                llm_provider="together_ai",
-                            )
-                        elif original_exception.status_code == 422:
-                            exception_mapping_worked = True
-                            raise BadRequestError(
-                                message=f"TogetherAIException - {error_response['error']}",
-                                model=model,
-                                llm_provider="together_ai",
-                                response=original_exception.response,
-                            )
-                        elif original_exception.status_code == 429:
-                            exception_mapping_worked = True
-                            raise RateLimitError(
-                                message=f"TogetherAIException - {original_exception.message}",
-                                llm_provider="together_ai",
-                                model=model,
-                                response=original_exception.response,
-                            )
-                        elif original_exception.status_code == 524:
-                            exception_mapping_worked = True
-                            raise Timeout(
-                                message=f"TogetherAIException - {original_exception.message}",
-                                llm_provider="together_ai",
-                                model=model,
-                            )
-                    else:
-                        exception_mapping_worked = True
-                        raise APIError(
-                            status_code=original_exception.status_code,
-                            message=f"TogetherAIException - {original_exception.message}",
-                            llm_provider="together_ai",
-                            model=model,
-                            request=original_exception.request,
-                        )
-                elif custom_llm_provider == "aleph_alpha":
-                    if (
-                        "This is longer than the model's maximum context length"
-                        in error_str
-                    ):
-                        exception_mapping_worked = True
-                        raise ContextWindowExceededError(
-                            message=f"AlephAlphaException - {original_exception.message}",
-                            llm_provider="aleph_alpha",
-                            model=model,
-                            response=original_exception.response,
-                        )
-                    elif "InvalidToken" in error_str or "No token provided" in error_str:
-                        exception_mapping_worked = True
-                        raise BadRequestError(
-                            message=f"AlephAlphaException - {original_exception.message}",
-                            llm_provider="aleph_alpha",
-                            model=model,
-                            response=original_exception.response,
-                        )
-                    elif hasattr(original_exception, "status_code"):
-                        print_verbose(f"status code: {original_exception.status_code}")
-                        if original_exception.status_code == 401:
-                            exception_mapping_worked = True
-                            raise AuthenticationError(
-                                message=f"AlephAlphaException - {original_exception.message}",
-                                llm_provider="aleph_alpha",
-                                model=model,
-                            )
-                        elif original_exception.status_code == 400:
-                            exception_mapping_worked = True
-                            raise BadRequestError(
-                                message=f"AlephAlphaException - {original_exception.message}",
-                                llm_provider="aleph_alpha",
-                                model=model,
-                                response=original_exception.response,
-                            )
-                        elif original_exception.status_code == 429:
-                            exception_mapping_worked = True
-                            raise RateLimitError(
-                                message=f"AlephAlphaException - {original_exception.message}",
-                                llm_provider="aleph_alpha",
-                                model=model,
-                                response=original_exception.response,
-                            )
-                        elif original_exception.status_code == 500:
-                            exception_mapping_worked = True
-                            raise ServiceUnavailableError(
-                                message=f"AlephAlphaException - {original_exception.message}",
-                                llm_provider="aleph_alpha",
-                                model=model,
-                                response=original_exception.response,
-                            )
-                        raise original_exception
-                    raise original_exception
-                elif (
-                    custom_llm_provider == "ollama" or custom_llm_provider == "ollama_chat"
-                ):
-                    if isinstance(original_exception, dict):
-                        error_str = original_exception.get("error", "")
-                    else:
-                        error_str = str(original_exception)
-                    if "no such file or directory" in error_str:
-                        exception_mapping_worked = True
-                        raise BadRequestError(
-                            message=f"OllamaException: Invalid Model/Model not loaded - {original_exception}",
-                            model=model,
-                            llm_provider="ollama",
-                            response=original_exception.response,
-                        )
-                    elif "Failed to establish a new connection" in error_str:
-                        exception_mapping_worked = True
-                        raise ServiceUnavailableError(
-                            message=f"OllamaException: {original_exception}",
-                            llm_provider="ollama",
-                            model=model,
-                            response=original_exception.response,
-                        )
-                    elif "Invalid response object from API" in error_str:
-                        exception_mapping_worked = True
-                        raise BadRequestError(
-                            message=f"OllamaException: {original_exception}",
-                            llm_provider="ollama",
-                            model=model,
-                            response=original_exception.response,
-                        )
-                    elif "Read timed out" in error_str:
-                        exception_mapping_worked = True
-                        raise Timeout(
-                            message=f"OllamaException: {original_exception}",
-                            llm_provider="ollama",
-                            model=model,
-                        )
-                elif custom_llm_provider == "vllm":
-                    if hasattr(original_exception, "status_code"):
-                        if original_exception.status_code == 0:
-                            exception_mapping_worked = True
-                            raise APIConnectionError(
-                                message=f"VLLMException - {original_exception.message}",
-                                llm_provider="vllm",
-                                model=model,
-                                request=original_exception.request,
-                            )
-                elif custom_llm_provider == "azure":
-                    if "Internal server error" in error_str:
-                        exception_mapping_worked = True
-                        raise APIError(
-                            status_code=500,
-                            message=f"AzureException - {original_exception.message} {extra_information}",
-                            llm_provider="azure",
-                            model=model,
-                            request=httpx.Request(method="POST", url="https://openai.com/"),
-                        )
-                    elif "This model's maximum context length is" in error_str:
-                        exception_mapping_worked = True
-                        raise ContextWindowExceededError(
-                            message=f"AzureException - {original_exception.message} {extra_information}",
-                            llm_provider="azure",
-                            model=model,
-                            response=original_exception.response,
-                        )
-                    elif "DeploymentNotFound" in error_str:
-                        exception_mapping_worked = True
-                        raise NotFoundError(
-                            message=f"AzureException - {original_exception.message} {extra_information}",
-                            llm_provider="azure",
-                            model=model,
-                            response=original_exception.response,
-                        )
-                    elif (
-                        "invalid_request_error" in error_str
-                        and "content_policy_violation" in error_str
-                    ) or (
-                        "The response was filtered due to the prompt triggering Azure OpenAI's content management"
-                        in error_str
-                    ):
-                        exception_mapping_worked = True
-                        raise ContentPolicyViolationError(
-                            message=f"AzureException - {original_exception.message} {extra_information}",
-                            llm_provider="azure",
-                            model=model,
-                            response=original_exception.response,
-                        )
-                    elif "invalid_request_error" in error_str:
-                        exception_mapping_worked = True
-                        raise BadRequestError(
-                            message=f"AzureException - {original_exception.message} {extra_information}",
-                            llm_provider="azure",
-                            model=model,
-                            response=original_exception.response,
-                        )
-                    elif (
-                        "The api_key client option must be set either by passing api_key to the client or by setting"
-                        in error_str
-                    ):
-                        exception_mapping_worked = True
-                        raise AuthenticationError(
-                            message=f"{exception_provider} - {original_exception.message} {extra_information}",
-                            llm_provider=custom_llm_provider,
-                            model=model,
-                            response=original_exception.response,
-                        )
-                    elif hasattr(original_exception, "status_code"):
-                        exception_mapping_worked = True
-                        if original_exception.status_code == 401:
-                            exception_mapping_worked = True
-                            raise AuthenticationError(
-                                message=f"AzureException - {original_exception.message} {extra_information}",
-                                llm_provider="azure",
-                                model=model,
-                                response=original_exception.response,
-                            )
-                        elif original_exception.status_code == 408:
-                            exception_mapping_worked = True
-                            raise Timeout(
-                                message=f"AzureException - {original_exception.message} {extra_information}",
-                                model=model,
-                                llm_provider="azure",
-                            )
-                        if original_exception.status_code == 422:
-                            exception_mapping_worked = True
-                            raise BadRequestError(
-                                message=f"AzureException - {original_exception.message} {extra_information}",
-                                model=model,
-                                llm_provider="azure",
-                                response=original_exception.response,
-                            )
-                        elif original_exception.status_code == 429:
-                            exception_mapping_worked = True
-                            raise RateLimitError(
-                                message=f"AzureException - {original_exception.message} {extra_information}",
-                                model=model,
-                                llm_provider="azure",
-                                response=original_exception.response,
-                            )
-                        elif original_exception.status_code == 503:
-                            exception_mapping_worked = True
-                            raise ServiceUnavailableError(
-                                message=f"AzureException - {original_exception.message} {extra_information}",
-                                model=model,
-                                llm_provider="azure",
-                                response=original_exception.response,
-                            )
-                        elif original_exception.status_code == 504:  # gateway timeout error
-                            exception_mapping_worked = True
-                            raise Timeout(
-                                message=f"AzureException - {original_exception.message} {extra_information}",
-                                model=model,
-                                llm_provider="azure",
-                            )
-                        else:
-                            exception_mapping_worked = True
-                            raise APIError(
-                                status_code=original_exception.status_code,
-                                message=f"AzureException - {original_exception.message} {extra_information}",
-                                llm_provider="azure",
-                                model=model,
-                                request=httpx.Request(
-                                    method="POST", url="https://openai.com/"
-                                ),
-                            )
-                    else:
-                        # if no status code then it is an APIConnectionError: https://github.com/openai/openai-python#handling-errors
-                        raise APIConnectionError(
-                            message=f"{exception_provider} - {message} {extra_information}",
-                            llm_provider="azure",
-                            model=model,
-                            request=httpx.Request(method="POST", url="https://openai.com/"),
-                        )
-            if (
-                "BadRequestError.__init__() missing 1 required positional argument: 'param'"
-                in str(original_exception)
-            ):  # deal with edge-case invalid request error bug in openai-python sdk
-                exception_mapping_worked = True
-                raise BadRequestError(
-                    message=f"{exception_provider}: This can happen due to missing AZURE_API_VERSION: {str(original_exception)}",
-                    model=model,
-                    llm_provider=custom_llm_provider,
-                    response=original_exception.response,
-                )
-            else:  # ensure generic errors always return APIConnectionError=
-                exception_mapping_worked = True
-                if hasattr(original_exception, "request"):
-                    raise APIConnectionError(
-                        message=f"{str(original_exception)}",
-                        llm_provider=custom_llm_provider,
-                        model=model,
-                        request=original_exception.request,
-                    )
-                else:
-                    raise APIConnectionError(
-                        message=f"{str(original_exception)}",
-                        llm_provider=custom_llm_provider,
-                        model=model,
-                        request=httpx.Request(
-                            method="POST", url="https://api.openai.com/v1/"
-                        ),  # stub the request
-                    )
-        except Exception as e:
-            # LOGGING
-            exception_logging(
-                logger_fn=user_logger_fn,
-                additional_args={
-                    "exception_mapping_worked": exception_mapping_worked,
-                    "original_exception": original_exception,
-                },
-                exception=e,
-            )
-            ## AUTH ERROR
-            if isinstance(e, AuthenticationError) and (
-                litellm.email or "LITELLM_EMAIL" in os.environ
-            ):
-                threading.Thread(target=get_all_keys, args=(e.llm_provider,)).start()
-            # don't let an error with mapping interrupt the user from receiving an error from the llm api calls
-            if exception_mapping_worked:
->               raise e
+E                               litellm.exceptions.APIError: VertexAIException - Parameter to MergeFrom() must be instance of same class: expected <class 'Part'> got <class 'vertexai.generative_models._generative_models.Part'>.
 
-../utils.py:9353: 
-_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
-
-model = 'llama-3-8b-instruct', original_exception = KeyError('stream')
-custom_llm_provider = 'predibase'
-completion_kwargs = {'acompletion': False, 'api_base': None, 'api_key': 'pb_Qg9YbQo7UqqHdu0ozxN_aw', 'api_version': None, ...}
-extra_kwargs = {'api_base': 'https://serving.app.predibase.com', 'litellm_call_id': 'cf0ea464-1b45-4473-8e55-6bf6809df7a7', 'litellm_logging_obj': <litellm.utils.Logging object at 0x10a5cbf50>, 'tenant_id': 'c4768f95'}
-
-    def exception_type(
-        model,
-        original_exception,
-        custom_llm_provider,
-        completion_kwargs={},
-        extra_kwargs={},
-    ):
-        global user_logger_fn, liteDebuggerClient
-        exception_mapping_worked = False
-        if litellm.suppress_debug_info is False:
-            print()  # noqa
-            print(  # noqa
-                "\033[1;31mGive Feedback / Get Help: https://github.com/BerriAI/litellm/issues/new\033[0m"  # noqa
-            )  # noqa
-            print(  # noqa
-                "LiteLLM.Info: If you need to debug this error, use `litellm.set_verbose=True'."  # noqa
-            )  # noqa
-            print()  # noqa
-        try:
-            if model:
-                error_str = str(original_exception)
-                if isinstance(original_exception, BaseException):
-                    exception_type = type(original_exception).__name__
-                else:
-                    exception_type = ""
-    
-                ################################################################################
-                # Common Extra information needed for all providers
-                # We pass num retries, api_base, vertex_deployment etc to the exception here
-                ################################################################################
-    
-                _api_base = litellm.get_api_base(model=model, optional_params=extra_kwargs)
-                messages = litellm.get_first_chars_messages(kwargs=completion_kwargs)
-                _vertex_project = extra_kwargs.get("vertex_project")
-                _vertex_location = extra_kwargs.get("vertex_location")
-                _metadata = extra_kwargs.get("metadata", {}) or {}
-                _model_group = _metadata.get("model_group")
-                _deployment = _metadata.get("deployment")
-                extra_information = f"\nModel: {model}"
-                if _api_base:
-                    extra_information += f"\nAPI Base: {_api_base}"
-                if messages and len(messages) > 0:
-                    extra_information += f"\nMessages: {messages}"
-    
-                if _model_group is not None:
-                    extra_information += f"\nmodel_group: {_model_group}\n"
-                if _deployment is not None:
-                    extra_information += f"\ndeployment: {_deployment}\n"
-                if _vertex_project is not None:
-                    extra_information += f"\nvertex_project: {_vertex_project}\n"
-                if _vertex_location is not None:
-                    extra_information += f"\nvertex_location: {_vertex_location}\n"
-    
-                # on litellm proxy add key name + team to exceptions
-                extra_information = _add_key_name_and_team_to_alert(
-                    request_info=extra_information, metadata=_metadata
-                )
-    
-                ################################################################################
-                # End of Common Extra information Needed for all providers
-                ################################################################################
-    
-                ################################################################################
-                #################### Start of Provider Exception mapping ####################
-                ################################################################################
-    
-                if "Request Timeout Error" in error_str or "Request timed out" in error_str:
-                    exception_mapping_worked = True
-                    raise Timeout(
-                        message=f"APITimeoutError - Request timed out. {extra_information} \n error_str: {error_str}",
-                        model=model,
-                        llm_provider=custom_llm_provider,
-                    )
-    
-                if (
-                    custom_llm_provider == "openai"
-                    or custom_llm_provider == "text-completion-openai"
-                    or custom_llm_provider == "custom_openai"
-                    or custom_llm_provider in litellm.openai_compatible_providers
-                ):
-                    # custom_llm_provider is openai, make it OpenAI
-                    if hasattr(original_exception, "message"):
-                        message = original_exception.message
-                    else:
-                        message = str(original_exception)
-                    if message is not None and isinstance(message, str):
-                        message = message.replace("OPENAI", custom_llm_provider.upper())
-                        message = message.replace("openai", custom_llm_provider)
-                        message = message.replace("OpenAI", custom_llm_provider)
-                    if custom_llm_provider == "openai":
-                        exception_provider = "OpenAI" + "Exception"
-                    else:
-                        exception_provider = (
-                            custom_llm_provider[0].upper()
-                            + custom_llm_provider[1:]
-                            + "Exception"
-                        )
-    
-                    if (
-                        "This model's maximum context length is" in error_str
-                        or "Request too large" in error_str
-                    ):
-                        exception_mapping_worked = True
-                        raise ContextWindowExceededError(
-                            message=f"{exception_provider} - {message} {extra_information}",
-                            llm_provider=custom_llm_provider,
-                            model=model,
-                            response=original_exception.response,
-                        )
-                    elif (
-                        "invalid_request_error" in error_str
-                        and "model_not_found" in error_str
-                    ):
-                        exception_mapping_worked = True
-                        raise NotFoundError(
-                            message=f"{exception_provider} - {message} {extra_information}",
-                            llm_provider=custom_llm_provider,
-                            model=model,
-                            response=original_exception.response,
-                        )
-                    elif (
-                        "invalid_request_error" in error_str
-                        and "content_policy_violation" in error_str
-                    ):
-                        exception_mapping_worked = True
-                        raise ContentPolicyViolationError(
-                            message=f"{exception_provider} - {message} {extra_information}",
-                            llm_provider=custom_llm_provider,
-                            model=model,
-                            response=original_exception.response,
-                        )
-                    elif (
-                        "invalid_request_error" in error_str
-                        and "Incorrect API key provided" not in error_str
-                    ):
-                        exception_mapping_worked = True
-                        raise BadRequestError(
-                            message=f"{exception_provider} - {message} {extra_information}",
-                            llm_provider=custom_llm_provider,
-                            model=model,
-                            response=original_exception.response,
-                        )
-                    elif (
-                        "The api_key client option must be set either by passing api_key to the client or by setting the OPENAI_API_KEY environment variable"
-                        in error_str
-                    ):
-                        exception_mapping_worked = True
-                        raise AuthenticationError(
-                            message=f"{exception_provider} - {message} {extra_information}",
-                            llm_provider=custom_llm_provider,
-                            model=model,
-                            response=original_exception.response,
-                        )
-                    elif "Mistral API raised a streaming error" in error_str:
-                        exception_mapping_worked = True
-                        _request = httpx.Request(
-                            method="POST", url="https://api.openai.com/v1"
-                        )
-                        raise APIError(
-                            status_code=500,
-                            message=f"{exception_provider} - {message} {extra_information}",
-                            llm_provider=custom_llm_provider,
-                            model=model,
-                            request=_request,
-                        )
-                    elif hasattr(original_exception, "status_code"):
-                        exception_mapping_worked = True
-                        if original_exception.status_code == 401:
-                            exception_mapping_worked = True
-                            raise AuthenticationError(
-                                message=f"{exception_provider} - {message} {extra_information}",
-                                llm_provider=custom_llm_provider,
-                                model=model,
-                                response=original_exception.response,
-                            )
-                        elif original_exception.status_code == 404:
-                            exception_mapping_worked = True
-                            raise NotFoundError(
-                                message=f"{exception_provider} - {message} {extra_information}",
-                                model=model,
-                                llm_provider=custom_llm_provider,
-                                response=original_exception.response,
-                            )
-                        elif original_exception.status_code == 408:
-                            exception_mapping_worked = True
-                            raise Timeout(
-                                message=f"{exception_provider} - {message} {extra_information}",
-                                model=model,
-                                llm_provider=custom_llm_provider,
-                            )
-                        elif original_exception.status_code == 422:
-                            exception_mapping_worked = True
-                            raise BadRequestError(
-                                message=f"{exception_provider} - {message} {extra_information}",
-                                model=model,
-                                llm_provider=custom_llm_provider,
-                                response=original_exception.response,
-                            )
-                        elif original_exception.status_code == 429:
-                            exception_mapping_worked = True
-                            raise RateLimitError(
-                                message=f"{exception_provider} - {message} {extra_information}",
-                                model=model,
-                                llm_provider=custom_llm_provider,
-                                response=original_exception.response,
-                            )
-                        elif original_exception.status_code == 503:
-                            exception_mapping_worked = True
-                            raise ServiceUnavailableError(
-                                message=f"{exception_provider} - {message} {extra_information}",
-                                model=model,
-                                llm_provider=custom_llm_provider,
-                                response=original_exception.response,
-                            )
-                        elif original_exception.status_code == 504:  # gateway timeout error
-                            exception_mapping_worked = True
-                            raise Timeout(
-                                message=f"{exception_provider} - {message} {extra_information}",
-                                model=model,
-                                llm_provider=custom_llm_provider,
-                            )
-                        else:
-                            exception_mapping_worked = True
-                            raise APIError(
-                                status_code=original_exception.status_code,
-                                message=f"{exception_provider} - {message} {extra_information}",
-                                llm_provider=custom_llm_provider,
-                                model=model,
-                                request=original_exception.request,
-                            )
-                    else:
-                        # if no status code then it is an APIConnectionError: https://github.com/openai/openai-python#handling-errors
-                        raise APIConnectionError(
-                            message=f"{exception_provider} - {message} {extra_information}",
-                            llm_provider=custom_llm_provider,
-                            model=model,
-                            request=httpx.Request(
-                                method="POST", url="https://api.openai.com/v1/"
-                            ),
-                        )
-                elif custom_llm_provider == "anthropic":  # one of the anthropics
-                    if hasattr(original_exception, "message"):
-                        if (
-                            "prompt is too long" in original_exception.message
-                            or "prompt: length" in original_exception.message
-                        ):
-                            exception_mapping_worked = True
-                            raise ContextWindowExceededError(
-                                message=original_exception.message,
-                                model=model,
-                                llm_provider="anthropic",
-                                response=original_exception.response,
-                            )
-                        if "Invalid API Key" in original_exception.message:
-                            exception_mapping_worked = True
-                            raise AuthenticationError(
-                                message=original_exception.message,
-                                model=model,
-                                llm_provider="anthropic",
-                                response=original_exception.response,
-                            )
-                    if hasattr(original_exception, "status_code"):
-                        print_verbose(f"status_code: {original_exception.status_code}")
-                        if original_exception.status_code == 401:
-                            exception_mapping_worked = True
-                            raise AuthenticationError(
-                                message=f"AnthropicException - {original_exception.message}",
-                                llm_provider="anthropic",
-                                model=model,
-                                response=original_exception.response,
-                            )
-                        elif (
-                            original_exception.status_code == 400
-                            or original_exception.status_code == 413
-                        ):
-                            exception_mapping_worked = True
-                            raise BadRequestError(
-                                message=f"AnthropicException - {original_exception.message}",
-                                model=model,
-                                llm_provider="anthropic",
-                                response=original_exception.response,
-                            )
-                        elif original_exception.status_code == 408:
-                            exception_mapping_worked = True
-                            raise Timeout(
-                                message=f"AnthropicException - {original_exception.message}",
-                                model=model,
-                                llm_provider="anthropic",
-                            )
-                        elif original_exception.status_code == 429:
-                            exception_mapping_worked = True
-                            raise RateLimitError(
-                                message=f"AnthropicException - {original_exception.message}",
-                                llm_provider="anthropic",
-                                model=model,
-                                response=original_exception.response,
-                            )
-                        elif original_exception.status_code == 500:
-                            exception_mapping_worked = True
-                            raise APIError(
-                                status_code=500,
-                                message=f"AnthropicException - {original_exception.message}. Handle with `litellm.APIError`.",
-                                llm_provider="anthropic",
-                                model=model,
-                                request=original_exception.request,
-                            )
-                elif custom_llm_provider == "replicate":
-                    if "Incorrect authentication token" in error_str:
-                        exception_mapping_worked = True
-                        raise AuthenticationError(
-                            message=f"ReplicateException - {error_str}",
-                            llm_provider="replicate",
-                            model=model,
-                            response=original_exception.response,
-                        )
-                    elif "input is too long" in error_str:
-                        exception_mapping_worked = True
-                        raise ContextWindowExceededError(
-                            message=f"ReplicateException - {error_str}",
-                            model=model,
-                            llm_provider="replicate",
-                            response=original_exception.response,
-                        )
-                    elif exception_type == "ModelError":
-                        exception_mapping_worked = True
-                        raise BadRequestError(
-                            message=f"ReplicateException - {error_str}",
-                            model=model,
-                            llm_provider="replicate",
-                            response=original_exception.response,
-                        )
-                    elif "Request was throttled" in error_str:
-                        exception_mapping_worked = True
-                        raise RateLimitError(
-                            message=f"ReplicateException - {error_str}",
-                            llm_provider="replicate",
-                            model=model,
-                            response=original_exception.response,
-                        )
-                    elif hasattr(original_exception, "status_code"):
-                        if original_exception.status_code == 401:
-                            exception_mapping_worked = True
-                            raise AuthenticationError(
-                                message=f"ReplicateException - {original_exception.message}",
-                                llm_provider="replicate",
-                                model=model,
-                                response=original_exception.response,
-                            )
-                        elif (
-                            original_exception.status_code == 400
-                            or original_exception.status_code == 422
-                            or original_exception.status_code == 413
-                        ):
-                            exception_mapping_worked = True
-                            raise BadRequestError(
-                                message=f"ReplicateException - {original_exception.message}",
-                                model=model,
-                                llm_provider="replicate",
-                                response=original_exception.response,
-                            )
-                        elif original_exception.status_code == 408:
-                            exception_mapping_worked = True
-                            raise Timeout(
-                                message=f"ReplicateException - {original_exception.message}",
-                                model=model,
-                                llm_provider="replicate",
-                            )
-                        elif original_exception.status_code == 429:
-                            exception_mapping_worked = True
-                            raise RateLimitError(
-                                message=f"ReplicateException - {original_exception.message}",
-                                llm_provider="replicate",
-                                model=model,
-                                response=original_exception.response,
-                            )
-                        elif original_exception.status_code == 500:
-                            exception_mapping_worked = True
-                            raise ServiceUnavailableError(
-                                message=f"ReplicateException - {original_exception.message}",
-                                llm_provider="replicate",
-                                model=model,
-                                response=original_exception.response,
-                            )
-                    exception_mapping_worked = True
-                    raise APIError(
-                        status_code=500,
-                        message=f"ReplicateException - {str(original_exception)}",
-                        llm_provider="replicate",
-                        model=model,
-                        request=original_exception.request,
-                    )
-                elif custom_llm_provider == "bedrock":
-                    if (
-                        "too many tokens" in error_str
-                        or "expected maxLength:" in error_str
-                        or "Input is too long" in error_str
-                        or "prompt: length: 1.." in error_str
-                        or "Too many input tokens" in error_str
-                    ):
-                        exception_mapping_worked = True
-                        raise ContextWindowExceededError(
-                            message=f"BedrockException: Context Window Error - {error_str}",
-                            model=model,
-                            llm_provider="bedrock",
-                            response=original_exception.response,
-                        )
-                    if "Malformed input request" in error_str:
-                        exception_mapping_worked = True
-                        raise BadRequestError(
-                            message=f"BedrockException - {error_str}",
-                            model=model,
-                            llm_provider="bedrock",
-                            response=original_exception.response,
-                        )
-                    if (
-                        "Unable to locate credentials" in error_str
-                        or "The security token included in the request is invalid"
-                        in error_str
-                    ):
-                        exception_mapping_worked = True
-                        raise AuthenticationError(
-                            message=f"BedrockException Invalid Authentication - {error_str}",
-                            model=model,
-                            llm_provider="bedrock",
-                            response=original_exception.response,
-                        )
-                    if "AccessDeniedException" in error_str:
-                        exception_mapping_worked = True
-                        raise PermissionDeniedError(
-                            message=f"BedrockException PermissionDeniedError - {error_str}",
-                            model=model,
-                            llm_provider="bedrock",
-                            response=original_exception.response,
-                        )
-                    if (
-                        "throttlingException" in error_str
-                        or "ThrottlingException" in error_str
-                    ):
-                        exception_mapping_worked = True
-                        raise RateLimitError(
-                            message=f"BedrockException: Rate Limit Error - {error_str}",
-                            model=model,
-                            llm_provider="bedrock",
-                            response=original_exception.response,
-                        )
-                    if "Connect timeout on endpoint URL" in error_str:
-                        exception_mapping_worked = True
-                        raise Timeout(
-                            message=f"BedrockException: Timeout Error - {error_str}",
-                            model=model,
-                            llm_provider="bedrock",
-                        )
-                    if hasattr(original_exception, "status_code"):
-                        if original_exception.status_code == 500:
-                            exception_mapping_worked = True
-                            raise ServiceUnavailableError(
-                                message=f"BedrockException - {original_exception.message}",
-                                llm_provider="bedrock",
-                                model=model,
-                                response=httpx.Response(
-                                    status_code=500,
-                                    request=httpx.Request(
-                                        method="POST", url="https://api.openai.com/v1/"
-                                    ),
-                                ),
-                            )
-                        elif original_exception.status_code == 401:
-                            exception_mapping_worked = True
-                            raise AuthenticationError(
-                                message=f"BedrockException - {original_exception.message}",
-                                llm_provider="bedrock",
-                                model=model,
-                                response=original_exception.response,
-                            )
-                        elif original_exception.status_code == 400:
-                            exception_mapping_worked = True
-                            raise BadRequestError(
-                                message=f"BedrockException - {original_exception.message}",
-                                llm_provider="bedrock",
-                                model=model,
-                                response=original_exception.response,
-                            )
-                        elif original_exception.status_code == 404:
-                            exception_mapping_worked = True
-                            raise NotFoundError(
-                                message=f"BedrockException - {original_exception.message}",
-                                llm_provider="bedrock",
-                                model=model,
-                                response=original_exception.response,
-                            )
-                elif custom_llm_provider == "sagemaker":
-                    if "Unable to locate credentials" in error_str:
-                        exception_mapping_worked = True
-                        raise BadRequestError(
-                            message=f"SagemakerException - {error_str}",
-                            model=model,
-                            llm_provider="sagemaker",
-                            response=original_exception.response,
-                        )
-                    elif (
-                        "Input validation error: `best_of` must be > 0 and <= 2"
-                        in error_str
-                    ):
-                        exception_mapping_worked = True
-                        raise BadRequestError(
-                            message=f"SagemakerException - the value of 'n' must be > 0 and <= 2 for sagemaker endpoints",
-                            model=model,
-                            llm_provider="sagemaker",
-                            response=original_exception.response,
-                        )
-                    elif (
-                        "`inputs` tokens + `max_new_tokens` must be <=" in error_str
-                        or "instance type with more CPU capacity or memory" in error_str
-                    ):
-                        exception_mapping_worked = True
-                        raise ContextWindowExceededError(
-                            message=f"SagemakerException - {error_str}",
-                            model=model,
-                            llm_provider="sagemaker",
-                            response=original_exception.response,
-                        )
-                elif custom_llm_provider == "vertex_ai":
-                    if (
-                        "Vertex AI API has not been used in project" in error_str
-                        or "Unable to find your project" in error_str
-                    ):
-                        exception_mapping_worked = True
-                        raise BadRequestError(
-                            message=f"VertexAIException - {error_str} {extra_information}",
-                            model=model,
-                            llm_provider="vertex_ai",
-                            response=original_exception.response,
-                        )
-                    elif (
-                        "None Unknown Error." in error_str
-                        or "Content has no parts." in error_str
-                    ):
-                        exception_mapping_worked = True
-                        raise APIError(
-                            message=f"VertexAIException - {error_str} {extra_information}",
-                            status_code=500,
-                            model=model,
-                            llm_provider="vertex_ai",
-                            request=original_exception.request,
-                        )
-                    elif "403" in error_str:
-                        exception_mapping_worked = True
-                        raise BadRequestError(
-                            message=f"VertexAIException - {error_str} {extra_information}",
-                            model=model,
-                            llm_provider="vertex_ai",
-                            response=original_exception.response,
-                        )
-                    elif "The response was blocked." in error_str:
-                        exception_mapping_worked = True
-                        raise UnprocessableEntityError(
-                            message=f"VertexAIException - {error_str} {extra_information}",
-                            model=model,
-                            llm_provider="vertex_ai",
-                            response=httpx.Response(
-                                status_code=429,
-                                request=httpx.Request(
-                                    method="POST",
-                                    url=" https://cloud.google.com/vertex-ai/",
-                                ),
-                            ),
-                        )
-                    elif (
-                        "429 Quota exceeded" in error_str
-                        or "IndexError: list index out of range" in error_str
-                        or "429 Unable to submit request because the service is temporarily out of capacity."
-                        in error_str
-                    ):
-                        exception_mapping_worked = True
-                        raise RateLimitError(
-                            message=f"VertexAIException - {error_str} {extra_information}",
-                            model=model,
-                            llm_provider="vertex_ai",
-                            response=httpx.Response(
-                                status_code=429,
-                                request=httpx.Request(
-                                    method="POST",
-                                    url=" https://cloud.google.com/vertex-ai/",
-                                ),
-                            ),
-                        )
-                    if hasattr(original_exception, "status_code"):
-                        if original_exception.status_code == 400:
-                            exception_mapping_worked = True
-                            raise BadRequestError(
-                                message=f"VertexAIException - {error_str} {extra_information}",
-                                model=model,
-                                llm_provider="vertex_ai",
-                                response=original_exception.response,
-                            )
-                        if original_exception.status_code == 500:
-                            exception_mapping_worked = True
-                            raise APIError(
-                                message=f"VertexAIException - {error_str} {extra_information}",
-                                status_code=500,
-                                model=model,
-                                llm_provider="vertex_ai",
-                                request=original_exception.request,
-                            )
-                elif custom_llm_provider == "palm" or custom_llm_provider == "gemini":
-                    if "503 Getting metadata" in error_str:
-                        # auth errors look like this
-                        # 503 Getting metadata from plugin failed with error: Reauthentication is needed. Please run `gcloud auth application-default login` to reauthenticate.
-                        exception_mapping_worked = True
-                        raise BadRequestError(
-                            message=f"GeminiException - Invalid api key",
-                            model=model,
-                            llm_provider="palm",
-                            response=original_exception.response,
-                        )
-                    if (
-                        "504 Deadline expired before operation could complete." in error_str
-                        or "504 Deadline Exceeded" in error_str
-                    ):
-                        exception_mapping_worked = True
-                        raise Timeout(
-                            message=f"GeminiException - {original_exception.message}",
-                            model=model,
-                            llm_provider="palm",
-                        )
-                    if "400 Request payload size exceeds" in error_str:
-                        exception_mapping_worked = True
-                        raise ContextWindowExceededError(
-                            message=f"GeminiException - {error_str}",
-                            model=model,
-                            llm_provider="palm",
-                            response=original_exception.response,
-                        )
-                    if (
-                        "500 An internal error has occurred." in error_str
-                        or "list index out of range" in error_str
-                    ):
-                        exception_mapping_worked = True
-                        raise APIError(
-                            status_code=getattr(original_exception, "status_code", 500),
-                            message=f"GeminiException - {original_exception.message}",
-                            llm_provider="palm",
-                            model=model,
-                            request=httpx.Response(
-                                status_code=429,
-                                request=httpx.Request(
-                                    method="POST",
-                                    url=" https://cloud.google.com/vertex-ai/",
-                                ),
-                            ),
-                        )
-                    if hasattr(original_exception, "status_code"):
-                        if original_exception.status_code == 400:
-                            exception_mapping_worked = True
-                            raise BadRequestError(
-                                message=f"GeminiException - {error_str}",
-                                model=model,
-                                llm_provider="palm",
-                                response=original_exception.response,
-                            )
-                    # Dailed: Error occurred: 400 Request payload size exceeds the limit: 20000 bytes
-                elif custom_llm_provider == "cloudflare":
-                    if "Authentication error" in error_str:
-                        exception_mapping_worked = True
-                        raise AuthenticationError(
-                            message=f"Cloudflare Exception - {original_exception.message}",
-                            llm_provider="cloudflare",
-                            model=model,
-                            response=original_exception.response,
-                        )
-                    if "must have required property" in error_str:
-                        exception_mapping_worked = True
-                        raise BadRequestError(
-                            message=f"Cloudflare Exception - {original_exception.message}",
-                            llm_provider="cloudflare",
-                            model=model,
-                            response=original_exception.response,
-                        )
-                elif (
-                    custom_llm_provider == "cohere" or custom_llm_provider == "cohere_chat"
-                ):  # Cohere
-                    if (
-                        "invalid api token" in error_str
-                        or "No API key provided." in error_str
-                    ):
-                        exception_mapping_worked = True
-                        raise AuthenticationError(
-                            message=f"CohereException - {original_exception.message}",
-                            llm_provider="cohere",
-                            model=model,
-                            response=original_exception.response,
-                        )
-                    elif "too many tokens" in error_str:
-                        exception_mapping_worked = True
-                        raise ContextWindowExceededError(
-                            message=f"CohereException - {original_exception.message}",
-                            model=model,
-                            llm_provider="cohere",
-                            response=original_exception.response,
-                        )
-                    elif hasattr(original_exception, "status_code"):
-                        if (
-                            original_exception.status_code == 400
-                            or original_exception.status_code == 498
-                        ):
-                            exception_mapping_worked = True
-                            raise BadRequestError(
-                                message=f"CohereException - {original_exception.message}",
-                                llm_provider="cohere",
-                                model=model,
-                                response=original_exception.response,
-                            )
-                        elif original_exception.status_code == 500:
-                            exception_mapping_worked = True
-                            raise ServiceUnavailableError(
-                                message=f"CohereException - {original_exception.message}",
-                                llm_provider="cohere",
-                                model=model,
-                                response=original_exception.response,
-                            )
-                    elif (
-                        "CohereConnectionError" in exception_type
-                    ):  # cohere seems to fire these errors when we load test it (1k+ messages / min)
-                        exception_mapping_worked = True
-                        raise RateLimitError(
-                            message=f"CohereException - {original_exception.message}",
-                            llm_provider="cohere",
-                            model=model,
-                            response=original_exception.response,
-                        )
-                    elif "invalid type:" in error_str:
-                        exception_mapping_worked = True
-                        raise BadRequestError(
-                            message=f"CohereException - {original_exception.message}",
-                            llm_provider="cohere",
-                            model=model,
-                            response=original_exception.response,
-                        )
-                    elif "Unexpected server error" in error_str:
-                        exception_mapping_worked = True
-                        raise ServiceUnavailableError(
-                            message=f"CohereException - {original_exception.message}",
-                            llm_provider="cohere",
-                            model=model,
-                            response=original_exception.response,
-                        )
-                    else:
-                        if hasattr(original_exception, "status_code"):
-                            exception_mapping_worked = True
-                            raise APIError(
-                                status_code=original_exception.status_code,
-                                message=f"CohereException - {original_exception.message}",
-                                llm_provider="cohere",
-                                model=model,
-                                request=original_exception.request,
-                            )
-                        raise original_exception
-                elif custom_llm_provider == "huggingface":
-                    if "length limit exceeded" in error_str:
-                        exception_mapping_worked = True
-                        raise ContextWindowExceededError(
-                            message=error_str,
-                            model=model,
-                            llm_provider="huggingface",
-                            response=original_exception.response,
-                        )
-                    elif "A valid user token is required" in error_str:
-                        exception_mapping_worked = True
-                        raise BadRequestError(
-                            message=error_str,
-                            llm_provider="huggingface",
-                            model=model,
-                            response=original_exception.response,
-                        )
-                    if hasattr(original_exception, "status_code"):
-                        if original_exception.status_code == 401:
-                            exception_mapping_worked = True
-                            raise AuthenticationError(
-                                message=f"HuggingfaceException - {original_exception.message}",
-                                llm_provider="huggingface",
-                                model=model,
-                                response=original_exception.response,
-                            )
-                        elif original_exception.status_code == 400:
-                            exception_mapping_worked = True
-                            raise BadRequestError(
-                                message=f"HuggingfaceException - {original_exception.message}",
-                                model=model,
-                                llm_provider="huggingface",
-                                response=original_exception.response,
-                            )
-                        elif original_exception.status_code == 408:
-                            exception_mapping_worked = True
-                            raise Timeout(
-                                message=f"HuggingfaceException - {original_exception.message}",
-                                model=model,
-                                llm_provider="huggingface",
-                            )
-                        elif original_exception.status_code == 429:
-                            exception_mapping_worked = True
-                            raise RateLimitError(
-                                message=f"HuggingfaceException - {original_exception.message}",
-                                llm_provider="huggingface",
-                                model=model,
-                                response=original_exception.response,
-                            )
-                        elif original_exception.status_code == 503:
-                            exception_mapping_worked = True
-                            raise ServiceUnavailableError(
-                                message=f"HuggingfaceException - {original_exception.message}",
-                                llm_provider="huggingface",
-                                model=model,
-                                response=original_exception.response,
-                            )
-                        else:
-                            exception_mapping_worked = True
-                            raise APIError(
-                                status_code=original_exception.status_code,
-                                message=f"HuggingfaceException - {original_exception.message}",
-                                llm_provider="huggingface",
-                                model=model,
-                                request=original_exception.request,
-                            )
-                elif custom_llm_provider == "ai21":
-                    if hasattr(original_exception, "message"):
-                        if "Prompt has too many tokens" in original_exception.message:
-                            exception_mapping_worked = True
-                            raise ContextWindowExceededError(
-                                message=f"AI21Exception - {original_exception.message}",
-                                model=model,
-                                llm_provider="ai21",
-                                response=original_exception.response,
-                            )
-                        if "Bad or missing API token." in original_exception.message:
-                            exception_mapping_worked = True
-                            raise BadRequestError(
-                                message=f"AI21Exception - {original_exception.message}",
-                                model=model,
-                                llm_provider="ai21",
-                                response=original_exception.response,
-                            )
-                    if hasattr(original_exception, "status_code"):
-                        if original_exception.status_code == 401:
-                            exception_mapping_worked = True
-                            raise AuthenticationError(
-                                message=f"AI21Exception - {original_exception.message}",
-                                llm_provider="ai21",
-                                model=model,
-                                response=original_exception.response,
-                            )
-                        elif original_exception.status_code == 408:
-                            exception_mapping_worked = True
-                            raise Timeout(
-                                message=f"AI21Exception - {original_exception.message}",
-                                model=model,
-                                llm_provider="ai21",
-                            )
-                        if original_exception.status_code == 422:
-                            exception_mapping_worked = True
-                            raise BadRequestError(
-                                message=f"AI21Exception - {original_exception.message}",
-                                model=model,
-                                llm_provider="ai21",
-                                response=original_exception.response,
-                            )
-                        elif original_exception.status_code == 429:
-                            exception_mapping_worked = True
-                            raise RateLimitError(
-                                message=f"AI21Exception - {original_exception.message}",
-                                llm_provider="ai21",
-                                model=model,
-                                response=original_exception.response,
-                            )
-                        else:
-                            exception_mapping_worked = True
-                            raise APIError(
-                                status_code=original_exception.status_code,
-                                message=f"AI21Exception - {original_exception.message}",
-                                llm_provider="ai21",
-                                model=model,
-                                request=original_exception.request,
-                            )
-                elif custom_llm_provider == "nlp_cloud":
-                    if "detail" in error_str:
-                        if "Input text length should not exceed" in error_str:
-                            exception_mapping_worked = True
-                            raise ContextWindowExceededError(
-                                message=f"NLPCloudException - {error_str}",
-                                model=model,
-                                llm_provider="nlp_cloud",
-                                response=original_exception.response,
-                            )
-                        elif "value is not a valid" in error_str:
-                            exception_mapping_worked = True
-                            raise BadRequestError(
-                                message=f"NLPCloudException - {error_str}",
-                                model=model,
-                                llm_provider="nlp_cloud",
-                                response=original_exception.response,
-                            )
-                        else:
-                            exception_mapping_worked = True
-                            raise APIError(
-                                status_code=500,
-                                message=f"NLPCloudException - {error_str}",
-                                model=model,
-                                llm_provider="nlp_cloud",
-                                request=original_exception.request,
-                            )
-                    if hasattr(
-                        original_exception, "status_code"
-                    ):  # https://docs.nlpcloud.com/?shell#errors
-                        if (
-                            original_exception.status_code == 400
-                            or original_exception.status_code == 406
-                            or original_exception.status_code == 413
-                            or original_exception.status_code == 422
-                        ):
-                            exception_mapping_worked = True
-                            raise BadRequestError(
-                                message=f"NLPCloudException - {original_exception.message}",
-                                llm_provider="nlp_cloud",
-                                model=model,
-                                response=original_exception.response,
-                            )
-                        elif (
-                            original_exception.status_code == 401
-                            or original_exception.status_code == 403
-                        ):
-                            exception_mapping_worked = True
-                            raise AuthenticationError(
-                                message=f"NLPCloudException - {original_exception.message}",
-                                llm_provider="nlp_cloud",
-                                model=model,
-                                response=original_exception.response,
-                            )
-                        elif (
-                            original_exception.status_code == 522
-                            or original_exception.status_code == 524
-                        ):
-                            exception_mapping_worked = True
-                            raise Timeout(
-                                message=f"NLPCloudException - {original_exception.message}",
-                                model=model,
-                                llm_provider="nlp_cloud",
-                            )
-                        elif (
-                            original_exception.status_code == 429
-                            or original_exception.status_code == 402
-                        ):
-                            exception_mapping_worked = True
-                            raise RateLimitError(
-                                message=f"NLPCloudException - {original_exception.message}",
-                                llm_provider="nlp_cloud",
-                                model=model,
-                                response=original_exception.response,
-                            )
-                        elif (
-                            original_exception.status_code == 500
-                            or original_exception.status_code == 503
-                        ):
-                            exception_mapping_worked = True
-                            raise APIError(
-                                status_code=original_exception.status_code,
-                                message=f"NLPCloudException - {original_exception.message}",
-                                llm_provider="nlp_cloud",
-                                model=model,
-                                request=original_exception.request,
-                            )
-                        elif (
-                            original_exception.status_code == 504
-                            or original_exception.status_code == 520
-                        ):
-                            exception_mapping_worked = True
-                            raise ServiceUnavailableError(
-                                message=f"NLPCloudException - {original_exception.message}",
-                                model=model,
-                                llm_provider="nlp_cloud",
-                                response=original_exception.response,
-                            )
-                        else:
-                            exception_mapping_worked = True
-                            raise APIError(
-                                status_code=original_exception.status_code,
-                                message=f"NLPCloudException - {original_exception.message}",
-                                llm_provider="nlp_cloud",
-                                model=model,
-                                request=original_exception.request,
-                            )
-                elif custom_llm_provider == "together_ai":
-                    import json
-    
-                    try:
-                        error_response = json.loads(error_str)
-                    except:
-                        error_response = {"error": error_str}
-                    if (
-                        "error" in error_response
-                        and "`inputs` tokens + `max_new_tokens` must be <="
-                        in error_response["error"]
-                    ):
-                        exception_mapping_worked = True
-                        raise ContextWindowExceededError(
-                            message=f"TogetherAIException - {error_response['error']}",
-                            model=model,
-                            llm_provider="together_ai",
-                            response=original_exception.response,
-                        )
-                    elif (
-                        "error" in error_response
-                        and "invalid private key" in error_response["error"]
-                    ):
-                        exception_mapping_worked = True
-                        raise AuthenticationError(
-                            message=f"TogetherAIException - {error_response['error']}",
-                            llm_provider="together_ai",
-                            model=model,
-                            response=original_exception.response,
-                        )
-                    elif (
-                        "error" in error_response
-                        and "INVALID_ARGUMENT" in error_response["error"]
-                    ):
-                        exception_mapping_worked = True
-                        raise BadRequestError(
-                            message=f"TogetherAIException - {error_response['error']}",
-                            model=model,
-                            llm_provider="together_ai",
-                            response=original_exception.response,
-                        )
-    
-                    elif (
-                        "error" in error_response
-                        and "API key doesn't match expected format."
-                        in error_response["error"]
-                    ):
-                        exception_mapping_worked = True
-                        raise BadRequestError(
-                            message=f"TogetherAIException - {error_response['error']}",
-                            model=model,
-                            llm_provider="together_ai",
-                            response=original_exception.response,
-                        )
-                    elif (
-                        "error_type" in error_response
-                        and error_response["error_type"] == "validation"
-                    ):
-                        exception_mapping_worked = True
-                        raise BadRequestError(
-                            message=f"TogetherAIException - {error_response['error']}",
-                            model=model,
-                            llm_provider="together_ai",
-                            response=original_exception.response,
-                        )
-                    if hasattr(original_exception, "status_code"):
-                        if original_exception.status_code == 408:
-                            exception_mapping_worked = True
-                            raise Timeout(
-                                message=f"TogetherAIException - {original_exception.message}",
-                                model=model,
-                                llm_provider="together_ai",
-                            )
-                        elif original_exception.status_code == 422:
-                            exception_mapping_worked = True
-                            raise BadRequestError(
-                                message=f"TogetherAIException - {error_response['error']}",
-                                model=model,
-                                llm_provider="together_ai",
-                                response=original_exception.response,
-                            )
-                        elif original_exception.status_code == 429:
-                            exception_mapping_worked = True
-                            raise RateLimitError(
-                                message=f"TogetherAIException - {original_exception.message}",
-                                llm_provider="together_ai",
-                                model=model,
-                                response=original_exception.response,
-                            )
-                        elif original_exception.status_code == 524:
-                            exception_mapping_worked = True
-                            raise Timeout(
-                                message=f"TogetherAIException - {original_exception.message}",
-                                llm_provider="together_ai",
-                                model=model,
-                            )
-                    else:
-                        exception_mapping_worked = True
-                        raise APIError(
-                            status_code=original_exception.status_code,
-                            message=f"TogetherAIException - {original_exception.message}",
-                            llm_provider="together_ai",
-                            model=model,
-                            request=original_exception.request,
-                        )
-                elif custom_llm_provider == "aleph_alpha":
-                    if (
-                        "This is longer than the model's maximum context length"
-                        in error_str
-                    ):
-                        exception_mapping_worked = True
-                        raise ContextWindowExceededError(
-                            message=f"AlephAlphaException - {original_exception.message}",
-                            llm_provider="aleph_alpha",
-                            model=model,
-                            response=original_exception.response,
-                        )
-                    elif "InvalidToken" in error_str or "No token provided" in error_str:
-                        exception_mapping_worked = True
-                        raise BadRequestError(
-                            message=f"AlephAlphaException - {original_exception.message}",
-                            llm_provider="aleph_alpha",
-                            model=model,
-                            response=original_exception.response,
-                        )
-                    elif hasattr(original_exception, "status_code"):
-                        print_verbose(f"status code: {original_exception.status_code}")
-                        if original_exception.status_code == 401:
-                            exception_mapping_worked = True
-                            raise AuthenticationError(
-                                message=f"AlephAlphaException - {original_exception.message}",
-                                llm_provider="aleph_alpha",
-                                model=model,
-                            )
-                        elif original_exception.status_code == 400:
-                            exception_mapping_worked = True
-                            raise BadRequestError(
-                                message=f"AlephAlphaException - {original_exception.message}",
-                                llm_provider="aleph_alpha",
-                                model=model,
-                                response=original_exception.response,
-                            )
-                        elif original_exception.status_code == 429:
-                            exception_mapping_worked = True
-                            raise RateLimitError(
-                                message=f"AlephAlphaException - {original_exception.message}",
-                                llm_provider="aleph_alpha",
-                                model=model,
-                                response=original_exception.response,
-                            )
-                        elif original_exception.status_code == 500:
-                            exception_mapping_worked = True
-                            raise ServiceUnavailableError(
-                                message=f"AlephAlphaException - {original_exception.message}",
-                                llm_provider="aleph_alpha",
-                                model=model,
-                                response=original_exception.response,
-                            )
-                        raise original_exception
-                    raise original_exception
-                elif (
-                    custom_llm_provider == "ollama" or custom_llm_provider == "ollama_chat"
-                ):
-                    if isinstance(original_exception, dict):
-                        error_str = original_exception.get("error", "")
-                    else:
-                        error_str = str(original_exception)
-                    if "no such file or directory" in error_str:
-                        exception_mapping_worked = True
-                        raise BadRequestError(
-                            message=f"OllamaException: Invalid Model/Model not loaded - {original_exception}",
-                            model=model,
-                            llm_provider="ollama",
-                            response=original_exception.response,
-                        )
-                    elif "Failed to establish a new connection" in error_str:
-                        exception_mapping_worked = True
-                        raise ServiceUnavailableError(
-                            message=f"OllamaException: {original_exception}",
-                            llm_provider="ollama",
-                            model=model,
-                            response=original_exception.response,
-                        )
-                    elif "Invalid response object from API" in error_str:
-                        exception_mapping_worked = True
-                        raise BadRequestError(
-                            message=f"OllamaException: {original_exception}",
-                            llm_provider="ollama",
-                            model=model,
-                            response=original_exception.response,
-                        )
-                    elif "Read timed out" in error_str:
-                        exception_mapping_worked = True
-                        raise Timeout(
-                            message=f"OllamaException: {original_exception}",
-                            llm_provider="ollama",
-                            model=model,
-                        )
-                elif custom_llm_provider == "vllm":
-                    if hasattr(original_exception, "status_code"):
-                        if original_exception.status_code == 0:
-                            exception_mapping_worked = True
-                            raise APIConnectionError(
-                                message=f"VLLMException - {original_exception.message}",
-                                llm_provider="vllm",
-                                model=model,
-                                request=original_exception.request,
-                            )
-                elif custom_llm_provider == "azure":
-                    if "Internal server error" in error_str:
-                        exception_mapping_worked = True
-                        raise APIError(
-                            status_code=500,
-                            message=f"AzureException - {original_exception.message} {extra_information}",
-                            llm_provider="azure",
-                            model=model,
-                            request=httpx.Request(method="POST", url="https://openai.com/"),
-                        )
-                    elif "This model's maximum context length is" in error_str:
-                        exception_mapping_worked = True
-                        raise ContextWindowExceededError(
-                            message=f"AzureException - {original_exception.message} {extra_information}",
-                            llm_provider="azure",
-                            model=model,
-                            response=original_exception.response,
-                        )
-                    elif "DeploymentNotFound" in error_str:
-                        exception_mapping_worked = True
-                        raise NotFoundError(
-                            message=f"AzureException - {original_exception.message} {extra_information}",
-                            llm_provider="azure",
-                            model=model,
-                            response=original_exception.response,
-                        )
-                    elif (
-                        "invalid_request_error" in error_str
-                        and "content_policy_violation" in error_str
-                    ) or (
-                        "The response was filtered due to the prompt triggering Azure OpenAI's content management"
-                        in error_str
-                    ):
-                        exception_mapping_worked = True
-                        raise ContentPolicyViolationError(
-                            message=f"AzureException - {original_exception.message} {extra_information}",
-                            llm_provider="azure",
-                            model=model,
-                            response=original_exception.response,
-                        )
-                    elif "invalid_request_error" in error_str:
-                        exception_mapping_worked = True
-                        raise BadRequestError(
-                            message=f"AzureException - {original_exception.message} {extra_information}",
-                            llm_provider="azure",
-                            model=model,
-                            response=original_exception.response,
-                        )
-                    elif (
-                        "The api_key client option must be set either by passing api_key to the client or by setting"
-                        in error_str
-                    ):
-                        exception_mapping_worked = True
-                        raise AuthenticationError(
-                            message=f"{exception_provider} - {original_exception.message} {extra_information}",
-                            llm_provider=custom_llm_provider,
-                            model=model,
-                            response=original_exception.response,
-                        )
-                    elif hasattr(original_exception, "status_code"):
-                        exception_mapping_worked = True
-                        if original_exception.status_code == 401:
-                            exception_mapping_worked = True
-                            raise AuthenticationError(
-                                message=f"AzureException - {original_exception.message} {extra_information}",
-                                llm_provider="azure",
-                                model=model,
-                                response=original_exception.response,
-                            )
-                        elif original_exception.status_code == 408:
-                            exception_mapping_worked = True
-                            raise Timeout(
-                                message=f"AzureException - {original_exception.message} {extra_information}",
-                                model=model,
-                                llm_provider="azure",
-                            )
-                        if original_exception.status_code == 422:
-                            exception_mapping_worked = True
-                            raise BadRequestError(
-                                message=f"AzureException - {original_exception.message} {extra_information}",
-                                model=model,
-                                llm_provider="azure",
-                                response=original_exception.response,
-                            )
-                        elif original_exception.status_code == 429:
-                            exception_mapping_worked = True
-                            raise RateLimitError(
-                                message=f"AzureException - {original_exception.message} {extra_information}",
-                                model=model,
-                                llm_provider="azure",
-                                response=original_exception.response,
-                            )
-                        elif original_exception.status_code == 503:
-                            exception_mapping_worked = True
-                            raise ServiceUnavailableError(
-                                message=f"AzureException - {original_exception.message} {extra_information}",
-                                model=model,
-                                llm_provider="azure",
-                                response=original_exception.response,
-                            )
-                        elif original_exception.status_code == 504:  # gateway timeout error
-                            exception_mapping_worked = True
-                            raise Timeout(
-                                message=f"AzureException - {original_exception.message} {extra_information}",
-                                model=model,
-                                llm_provider="azure",
-                            )
-                        else:
-                            exception_mapping_worked = True
-                            raise APIError(
-                                status_code=original_exception.status_code,
-                                message=f"AzureException - {original_exception.message} {extra_information}",
-                                llm_provider="azure",
-                                model=model,
-                                request=httpx.Request(
-                                    method="POST", url="https://openai.com/"
-                                ),
-                            )
-                    else:
-                        # if no status code then it is an APIConnectionError: https://github.com/openai/openai-python#handling-errors
-                        raise APIConnectionError(
-                            message=f"{exception_provider} - {message} {extra_information}",
-                            llm_provider="azure",
-                            model=model,
-                            request=httpx.Request(method="POST", url="https://openai.com/"),
-                        )
-            if (
-                "BadRequestError.__init__() missing 1 required positional argument: 'param'"
-                in str(original_exception)
-            ):  # deal with edge-case invalid request error bug in openai-python sdk
-                exception_mapping_worked = True
-                raise BadRequestError(
-                    message=f"{exception_provider}: This can happen due to missing AZURE_API_VERSION: {str(original_exception)}",
-                    model=model,
-                    llm_provider=custom_llm_provider,
-                    response=original_exception.response,
-                )
-            else:  # ensure generic errors always return APIConnectionError=
-                exception_mapping_worked = True
-                if hasattr(original_exception, "request"):
-                    raise APIConnectionError(
-                        message=f"{str(original_exception)}",
-                        llm_provider=custom_llm_provider,
-                        model=model,
-                        request=original_exception.request,
-                    )
-                else:
->                   raise APIConnectionError(
-                        message=f"{str(original_exception)}",
-                        llm_provider=custom_llm_provider,
-                        model=model,
-                        request=httpx.Request(
-                            method="POST", url="https://api.openai.com/v1/"
-                        ),  # stub the request
-                    )
-E                   litellm.exceptions.APIConnectionError: 'stream'
-
-../utils.py:9328: APIConnectionError
+../utils.py:8922: APIError
 
 During handling of the above exception, another exception occurred:
 
-sync_mode = True
-
-    @pytest.mark.parametrize("sync_mode", [True, False])
-    @pytest.mark.asyncio
-    async def test_completion_predibase_streaming(sync_mode):
+    def test_gemini_pro_vision():
         try:
+            load_vertex_ai_credentials()
             litellm.set_verbose = True
+            litellm.num_retries = 3
+>           resp = litellm.completion(
+                model="vertex_ai/gemini-1.5-flash-preview-0514",
+                messages=[
+                    {
+                        "role": "user",
+                        "content": [
+                            {"type": "text", "text": "Whats in this image?"},
+                            {
+                                "type": "image_url",
+                                "image_url": {
+                                    "url": "gs://cloud-samples-data/generative-ai/image/boats.jpeg"
+                                },
+                            },
+                        ],
+                    }
+                ],
+            )
+
+test_amazing_vertex_completion.py:510: 
+_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
+../utils.py:3289: in wrapper
+    return litellm.completion_with_retries(*args, **kwargs)
+../main.py:2401: in completion_with_retries
+    return retryer(original_function, *args, **kwargs)
+../proxy/myenv/lib/python3.11/site-packages/tenacity/__init__.py:379: in __call__
+    do = self.iter(retry_state=retry_state)
+../proxy/myenv/lib/python3.11/site-packages/tenacity/__init__.py:325: in iter
+    raise retry_exc.reraise()
+../proxy/myenv/lib/python3.11/site-packages/tenacity/__init__.py:158: in reraise
+    raise self.last_attempt.result()
+/Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/concurrent/futures/_base.py:449: in result
+    return self.__get_result()
+/Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/concurrent/futures/_base.py:401: in __get_result
+    raise self._exception
+../proxy/myenv/lib/python3.11/site-packages/tenacity/__init__.py:382: in __call__
+    result = fn(*args, **kwargs)
+../utils.py:3317: in wrapper
+    raise e
+../utils.py:3211: in wrapper
+    result = original_function(*args, **kwargs)
+../main.py:2368: in completion
+    raise exception_type(
+../utils.py:9709: in exception_type
+    raise e
+_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
+
+model = 'gemini-1.5-flash-preview-0514'
+original_exception = VertexAIError("Parameter to MergeFrom() must be instance of same class: expected <class 'Part'> got <class 'vertexai.generative_models._generative_models.Part'>.")
+custom_llm_provider = 'vertex_ai'
+completion_kwargs = {'acompletion': False, 'api_base': None, 'api_key': None, 'api_version': None, ...}
+extra_kwargs = {'litellm_call_id': '7f48b7ab-47b3-4beb-b2b5-fa298be49d3f', 'litellm_logging_obj': <litellm.utils.Logging object at 0x1059c53d0>}
+
+    def exception_type(
+        model,
+        original_exception,
+        custom_llm_provider,
+        completion_kwargs={},
+        extra_kwargs={},
+    ):
+        global user_logger_fn, liteDebuggerClient
+        exception_mapping_worked = False
+        if litellm.suppress_debug_info is False:
+            print()  # noqa
+            print(  # noqa
+                "\033[1;31mGive Feedback / Get Help: https://github.com/BerriAI/litellm/issues/new\033[0m"  # noqa
+            )  # noqa
+            print(  # noqa
+                "LiteLLM.Info: If you need to debug this error, use `litellm.set_verbose=True'."  # noqa
+            )  # noqa
+            print()  # noqa
+        try:
+            if model:
+                error_str = str(original_exception)
+                if isinstance(original_exception, BaseException):
+                    exception_type = type(original_exception).__name__
+                else:
+                    exception_type = ""
     
-            if sync_mode:
-                response = completion(
-                    model="predibase/llama-3-8b-instruct",
-                    tenant_id="c4768f95",
-                    api_base="https://serving.app.predibase.com",
-                    api_key=os.getenv("PREDIBASE_API_KEY"),
-                    messages=[{"role": "user", "content": "What is the meaning of life?"}],
-                    stream=True,
-                )
+                ################################################################################
+                # Common Extra information needed for all providers
+                # We pass num retries, api_base, vertex_deployment etc to the exception here
+                ################################################################################
+                extra_information = ""
+                try:
+                    _api_base = litellm.get_api_base(
+                        model=model, optional_params=extra_kwargs
+                    )
+                    messages = litellm.get_first_chars_messages(kwargs=completion_kwargs)
+                    _vertex_project = extra_kwargs.get("vertex_project")
+                    _vertex_location = extra_kwargs.get("vertex_location")
+                    _metadata = extra_kwargs.get("metadata", {}) or {}
+                    _model_group = _metadata.get("model_group")
+                    _deployment = _metadata.get("deployment")
+                    extra_information = f"\nModel: {model}"
+                    if _api_base:
+                        extra_information += f"\nAPI Base: {_api_base}"
+                    if messages and len(messages) > 0:
+                        extra_information += f"\nMessages: {messages}"
     
-                complete_response = ""
-                for idx, init_chunk in enumerate(response):
-                    chunk, finished = streaming_format_tests(idx, init_chunk)
-                    complete_response += chunk
-                    custom_llm_provider = init_chunk._hidden_params["custom_llm_provider"]
-                    print(f"custom_llm_provider: {custom_llm_provider}")
-                    assert custom_llm_provider == "predibase"
-                    if finished:
-                        assert isinstance(
-                            init_chunk.choices[0], litellm.utils.StreamingChoices
+                    if _model_group is not None:
+                        extra_information += f"\nmodel_group: {_model_group}\n"
+                    if _deployment is not None:
+                        extra_information += f"\ndeployment: {_deployment}\n"
+                    if _vertex_project is not None:
+                        extra_information += f"\nvertex_project: {_vertex_project}\n"
+                    if _vertex_location is not None:
+                        extra_information += f"\nvertex_location: {_vertex_location}\n"
+    
+                    # on litellm proxy add key name + team to exceptions
+                    extra_information = _add_key_name_and_team_to_alert(
+                        request_info=extra_information, metadata=_metadata
+                    )
+                except:
+                    # DO NOT LET this Block raising the original exception
+                    pass
+    
+                ################################################################################
+                # End of Common Extra information Needed for all providers
+                ################################################################################
+    
+                ################################################################################
+                #################### Start of Provider Exception mapping ####################
+                ################################################################################
+    
+                if "Request Timeout Error" in error_str or "Request timed out" in error_str:
+                    exception_mapping_worked = True
+                    raise Timeout(
+                        message=f"APITimeoutError - Request timed out. \nerror_str: {error_str}",
+                        model=model,
+                        llm_provider=custom_llm_provider,
+                        litellm_debug_info=extra_information,
+                    )
+    
+                if (
+                    custom_llm_provider == "openai"
+                    or custom_llm_provider == "text-completion-openai"
+                    or custom_llm_provider == "custom_openai"
+                    or custom_llm_provider in litellm.openai_compatible_providers
+                ):
+                    # custom_llm_provider is openai, make it OpenAI
+                    if hasattr(original_exception, "message"):
+                        message = original_exception.message
+                    else:
+                        message = str(original_exception)
+                    if message is not None and isinstance(message, str):
+                        message = message.replace("OPENAI", custom_llm_provider.upper())
+                        message = message.replace("openai", custom_llm_provider)
+                        message = message.replace("OpenAI", custom_llm_provider)
+                    if custom_llm_provider == "openai":
+                        exception_provider = "OpenAI" + "Exception"
+                    else:
+                        exception_provider = (
+                            custom_llm_provider[0].upper()
+                            + custom_llm_provider[1:]
+                            + "Exception"
                         )
-                        break
-                if complete_response.strip() == "":
-                    raise Exception("Empty response received")
-            else:
-                response = await litellm.acompletion(
-                    model="predibase/llama-3-8b-instruct",
-                    tenant_id="c4768f95",
-                    api_base="https://serving.app.predibase.com",
-                    api_key=os.getenv("PREDIBASE_API_KEY"),
-                    messages=[{"role": "user", "content": "What is the meaning of life?"}],
-                    stream=True,
-                )
     
-                # await response
-    
-                complete_response = ""
-                idx = 0
-                async for init_chunk in response:
-                    chunk, finished = streaming_format_tests(idx, init_chunk)
-                    complete_response += chunk
-                    custom_llm_provider = init_chunk._hidden_params["custom_llm_provider"]
-                    print(f"custom_llm_provider: {custom_llm_provider}")
-                    assert custom_llm_provider == "predibase"
-                    idx += 1
-                    if finished:
-                        assert isinstance(
-                            init_chunk.choices[0], litellm.utils.StreamingChoices
+                    if "This model's maximum context length is" in error_str:
+                        exception_mapping_worked = True
+                        raise ContextWindowExceededError(
+                            message=f"{exception_provider} - {message}",
+                            llm_provider=custom_llm_provider,
+                            model=model,
+                            response=original_exception.response,
+                            litellm_debug_info=extra_information,
                         )
-                        break
-                if complete_response.strip() == "":
-                    raise Exception("Empty response received")
+                    elif (
+                        "invalid_request_error" in error_str
+                        and "model_not_found" in error_str
+                    ):
+                        exception_mapping_worked = True
+                        raise NotFoundError(
+                            message=f"{exception_provider} - {message}",
+                            llm_provider=custom_llm_provider,
+                            model=model,
+                            response=original_exception.response,
+                            litellm_debug_info=extra_information,
+                        )
+                    elif (
+                        "invalid_request_error" in error_str
+                        and "content_policy_violation" in error_str
+                    ):
+                        exception_mapping_worked = True
+                        raise ContentPolicyViolationError(
+                            message=f"{exception_provider} - {message}",
+                            llm_provider=custom_llm_provider,
+                            model=model,
+                            response=original_exception.response,
+                            litellm_debug_info=extra_information,
+                        )
+                    elif (
+                        "invalid_request_error" in error_str
+                        and "Incorrect API key provided" not in error_str
+                    ):
+                        exception_mapping_worked = True
+                        raise BadRequestError(
+                            message=f"{exception_provider} - {message}",
+                            llm_provider=custom_llm_provider,
+                            model=model,
+                            response=original_exception.response,
+                            litellm_debug_info=extra_information,
+                        )
+                    elif "Request too large" in error_str:
+                        raise RateLimitError(
+                            message=f"{exception_provider} - {message}",
+                            model=model,
+                            llm_provider=custom_llm_provider,
+                            response=original_exception.response,
+                            litellm_debug_info=extra_information,
+                        )
+                    elif (
+                        "The api_key client option must be set either by passing api_key to the client or by setting the OPENAI_API_KEY environment variable"
+                        in error_str
+                    ):
+                        exception_mapping_worked = True
+                        raise AuthenticationError(
+                            message=f"{exception_provider} - {message}",
+                            llm_provider=custom_llm_provider,
+                            model=model,
+                            response=original_exception.response,
+                            litellm_debug_info=extra_information,
+                        )
+                    elif "Mistral API raised a streaming error" in error_str:
+                        exception_mapping_worked = True
+                        _request = httpx.Request(
+                            method="POST", url="https://api.openai.com/v1"
+                        )
+                        raise APIError(
+                            status_code=500,
+                            message=f"{exception_provider} - {message}",
+                            llm_provider=custom_llm_provider,
+                            model=model,
+                            request=_request,
+                            litellm_debug_info=extra_information,
+                        )
+                    elif hasattr(original_exception, "status_code"):
+                        exception_mapping_worked = True
+                        if original_exception.status_code == 401:
+                            exception_mapping_worked = True
+                            raise AuthenticationError(
+                                message=f"{exception_provider} - {message}",
+                                llm_provider=custom_llm_provider,
+                                model=model,
+                                response=original_exception.response,
+                                litellm_debug_info=extra_information,
+                            )
+                        elif original_exception.status_code == 404:
+                            exception_mapping_worked = True
+                            raise NotFoundError(
+                                message=f"{exception_provider} - {message}",
+                                model=model,
+                                llm_provider=custom_llm_provider,
+                                response=original_exception.response,
+                                litellm_debug_info=extra_information,
+                            )
+                        elif original_exception.status_code == 408:
+                            exception_mapping_worked = True
+                            raise Timeout(
+                                message=f"{exception_provider} - {message}",
+                                model=model,
+                                llm_provider=custom_llm_provider,
+                                litellm_debug_info=extra_information,
+                            )
+                        elif original_exception.status_code == 422:
+                            exception_mapping_worked = True
+                            raise BadRequestError(
+                                message=f"{exception_provider} - {message}",
+                                model=model,
+                                llm_provider=custom_llm_provider,
+                                response=original_exception.response,
+                                litellm_debug_info=extra_information,
+                            )
+                        elif original_exception.status_code == 429:
+                            exception_mapping_worked = True
+                            raise RateLimitError(
+                                message=f"{exception_provider} - {message}",
+                                model=model,
+                                llm_provider=custom_llm_provider,
+                                response=original_exception.response,
+                                litellm_debug_info=extra_information,
+                            )
+                        elif original_exception.status_code == 503:
+                            exception_mapping_worked = True
+                            raise ServiceUnavailableError(
+                                message=f"{exception_provider} - {message}",
+                                model=model,
+                                llm_provider=custom_llm_provider,
+                                response=original_exception.response,
+                                litellm_debug_info=extra_information,
+                            )
+                        elif original_exception.status_code == 504:  # gateway timeout error
+                            exception_mapping_worked = True
+                            raise Timeout(
+                                message=f"{exception_provider} - {message}",
+                                model=model,
+                                llm_provider=custom_llm_provider,
+                                litellm_debug_info=extra_information,
+                            )
+                        else:
+                            exception_mapping_worked = True
+                            raise APIError(
+                                status_code=original_exception.status_code,
+                                message=f"{exception_provider} - {message}",
+                                llm_provider=custom_llm_provider,
+                                model=model,
+                                request=original_exception.request,
+                                litellm_debug_info=extra_information,
+                            )
+                    else:
+                        # if no status code then it is an APIConnectionError: https://github.com/openai/openai-python#handling-errors
+                        raise APIConnectionError(
+                            message=f"{exception_provider} - {message}",
+                            llm_provider=custom_llm_provider,
+                            model=model,
+                            litellm_debug_info=extra_information,
+                            request=httpx.Request(
+                                method="POST", url="https://api.openai.com/v1/"
+                            ),
+                        )
+                elif custom_llm_provider == "anthropic":  # one of the anthropics
+                    if hasattr(original_exception, "message"):
+                        if (
+                            "prompt is too long" in original_exception.message
+                            or "prompt: length" in original_exception.message
+                        ):
+                            exception_mapping_worked = True
+                            raise ContextWindowExceededError(
+                                message=original_exception.message,
+                                model=model,
+                                llm_provider="anthropic",
+                                response=original_exception.response,
+                            )
+                        if "Invalid API Key" in original_exception.message:
+                            exception_mapping_worked = True
+                            raise AuthenticationError(
+                                message=original_exception.message,
+                                model=model,
+                                llm_provider="anthropic",
+                                response=original_exception.response,
+                            )
+                    if hasattr(original_exception, "status_code"):
+                        print_verbose(f"status_code: {original_exception.status_code}")
+                        if original_exception.status_code == 401:
+                            exception_mapping_worked = True
+                            raise AuthenticationError(
+                                message=f"AnthropicException - {original_exception.message}",
+                                llm_provider="anthropic",
+                                model=model,
+                                response=original_exception.response,
+                            )
+                        elif (
+                            original_exception.status_code == 400
+                            or original_exception.status_code == 413
+                        ):
+                            exception_mapping_worked = True
+                            raise BadRequestError(
+                                message=f"AnthropicException - {original_exception.message}",
+                                model=model,
+                                llm_provider="anthropic",
+                                response=original_exception.response,
+                            )
+                        elif original_exception.status_code == 408:
+                            exception_mapping_worked = True
+                            raise Timeout(
+                                message=f"AnthropicException - {original_exception.message}",
+                                model=model,
+                                llm_provider="anthropic",
+                            )
+                        elif original_exception.status_code == 429:
+                            exception_mapping_worked = True
+                            raise RateLimitError(
+                                message=f"AnthropicException - {original_exception.message}",
+                                llm_provider="anthropic",
+                                model=model,
+                                response=original_exception.response,
+                            )
+                        elif original_exception.status_code == 500:
+                            exception_mapping_worked = True
+                            raise APIError(
+                                status_code=500,
+                                message=f"AnthropicException - {original_exception.message}. Handle with `litellm.APIError`.",
+                                llm_provider="anthropic",
+                                model=model,
+                                request=original_exception.request,
+                            )
+                elif custom_llm_provider == "replicate":
+                    if "Incorrect authentication token" in error_str:
+                        exception_mapping_worked = True
+                        raise AuthenticationError(
+                            message=f"ReplicateException - {error_str}",
+                            llm_provider="replicate",
+                            model=model,
+                            response=original_exception.response,
+                        )
+                    elif "input is too long" in error_str:
+                        exception_mapping_worked = True
+                        raise ContextWindowExceededError(
+                            message=f"ReplicateException - {error_str}",
+                            model=model,
+                            llm_provider="replicate",
+                            response=original_exception.response,
+                        )
+                    elif exception_type == "ModelError":
+                        exception_mapping_worked = True
+                        raise BadRequestError(
+                            message=f"ReplicateException - {error_str}",
+                            model=model,
+                            llm_provider="replicate",
+                            response=original_exception.response,
+                        )
+                    elif "Request was throttled" in error_str:
+                        exception_mapping_worked = True
+                        raise RateLimitError(
+                            message=f"ReplicateException - {error_str}",
+                            llm_provider="replicate",
+                            model=model,
+                            response=original_exception.response,
+                        )
+                    elif hasattr(original_exception, "status_code"):
+                        if original_exception.status_code == 401:
+                            exception_mapping_worked = True
+                            raise AuthenticationError(
+                                message=f"ReplicateException - {original_exception.message}",
+                                llm_provider="replicate",
+                                model=model,
+                                response=original_exception.response,
+                            )
+                        elif (
+                            original_exception.status_code == 400
+                            or original_exception.status_code == 422
+                            or original_exception.status_code == 413
+                        ):
+                            exception_mapping_worked = True
+                            raise BadRequestError(
+                                message=f"ReplicateException - {original_exception.message}",
+                                model=model,
+                                llm_provider="replicate",
+                                response=original_exception.response,
+                            )
+                        elif original_exception.status_code == 408:
+                            exception_mapping_worked = True
+                            raise Timeout(
+                                message=f"ReplicateException - {original_exception.message}",
+                                model=model,
+                                llm_provider="replicate",
+                            )
+                        elif original_exception.status_code == 429:
+                            exception_mapping_worked = True
+                            raise RateLimitError(
+                                message=f"ReplicateException - {original_exception.message}",
+                                llm_provider="replicate",
+                                model=model,
+                                response=original_exception.response,
+                            )
+                        elif original_exception.status_code == 500:
+                            exception_mapping_worked = True
+                            raise ServiceUnavailableError(
+                                message=f"ReplicateException - {original_exception.message}",
+                                llm_provider="replicate",
+                                model=model,
+                                response=original_exception.response,
+                            )
+                    exception_mapping_worked = True
+                    raise APIError(
+                        status_code=500,
+                        message=f"ReplicateException - {str(original_exception)}",
+                        llm_provider="replicate",
+                        model=model,
+                        request=httpx.Request(
+                            method="POST",
+                            url="https://api.replicate.com/v1/deployments",
+                        ),
+                    )
+                elif custom_llm_provider == "watsonx":
+                    if "token_quota_reached" in error_str:
+                        exception_mapping_worked = True
+                        raise RateLimitError(
+                            message=f"WatsonxException: Rate Limit Errror - {error_str}",
+                            llm_provider="watsonx",
+                            model=model,
+                            response=original_exception.response,
+                        )
+                elif custom_llm_provider == "predibase":
+                    if "authorization denied for" in error_str:
+                        exception_mapping_worked = True
     
-            print(f"complete_response: {complete_response}")
-        except litellm.Timeout as e:
+                        # Predibase returns the raw API Key in the response - this block ensures it's not returned in the exception
+                        if (
+                            error_str is not None
+                            and isinstance(error_str, str)
+                            and "bearer" in error_str.lower()
+                        ):
+                            # only keep the first 10 chars after the occurnence of "bearer"
+                            _bearer_token_start_index = error_str.lower().find("bearer")
+                            error_str = error_str[: _bearer_token_start_index + 14]
+                            error_str += "XXXXXXX" + '"'
+    
+                        raise AuthenticationError(
+                            message=f"PredibaseException: Authentication Error - {error_str}",
+                            llm_provider="predibase",
+                            model=model,
+                            response=original_exception.response,
+                            litellm_debug_info=extra_information,
+                        )
+                elif custom_llm_provider == "bedrock":
+                    if (
+                        "too many tokens" in error_str
+                        or "expected maxLength:" in error_str
+                        or "Input is too long" in error_str
+                        or "prompt: length: 1.." in error_str
+                        or "Too many input tokens" in error_str
+                    ):
+                        exception_mapping_worked = True
+                        raise ContextWindowExceededError(
+                            message=f"BedrockException: Context Window Error - {error_str}",
+                            model=model,
+                            llm_provider="bedrock",
+                            response=original_exception.response,
+                        )
+                    elif "Malformed input request" in error_str:
+                        exception_mapping_worked = True
+                        raise BadRequestError(
+                            message=f"BedrockException - {error_str}",
+                            model=model,
+                            llm_provider="bedrock",
+                            response=original_exception.response,
+                        )
+                    elif (
+                        "Unable to locate credentials" in error_str
+                        or "The security token included in the request is invalid"
+                        in error_str
+                    ):
+                        exception_mapping_worked = True
+                        raise AuthenticationError(
+                            message=f"BedrockException Invalid Authentication - {error_str}",
+                            model=model,
+                            llm_provider="bedrock",
+                            response=original_exception.response,
+                        )
+                    elif "AccessDeniedException" in error_str:
+                        exception_mapping_worked = True
+                        raise PermissionDeniedError(
+                            message=f"BedrockException PermissionDeniedError - {error_str}",
+                            model=model,
+                            llm_provider="bedrock",
+                            response=original_exception.response,
+                        )
+                    elif (
+                        "throttlingException" in error_str
+                        or "ThrottlingException" in error_str
+                    ):
+                        exception_mapping_worked = True
+                        raise RateLimitError(
+                            message=f"BedrockException: Rate Limit Error - {error_str}",
+                            model=model,
+                            llm_provider="bedrock",
+                            response=original_exception.response,
+                        )
+                    elif (
+                        "Connect timeout on endpoint URL" in error_str
+                        or "timed out" in error_str
+                    ):
+                        exception_mapping_worked = True
+                        raise Timeout(
+                            message=f"BedrockException: Timeout Error - {error_str}",
+                            model=model,
+                            llm_provider="bedrock",
+                        )
+                    elif hasattr(original_exception, "status_code"):
+                        if original_exception.status_code == 500:
+                            exception_mapping_worked = True
+                            raise ServiceUnavailableError(
+                                message=f"BedrockException - {original_exception.message}",
+                                llm_provider="bedrock",
+                                model=model,
+                                response=httpx.Response(
+                                    status_code=500,
+                                    request=httpx.Request(
+                                        method="POST", url="https://api.openai.com/v1/"
+                                    ),
+                                ),
+                            )
+                        elif original_exception.status_code == 401:
+                            exception_mapping_worked = True
+                            raise AuthenticationError(
+                                message=f"BedrockException - {original_exception.message}",
+                                llm_provider="bedrock",
+                                model=model,
+                                response=original_exception.response,
+                            )
+                        elif original_exception.status_code == 400:
+                            exception_mapping_worked = True
+                            raise BadRequestError(
+                                message=f"BedrockException - {original_exception.message}",
+                                llm_provider="bedrock",
+                                model=model,
+                                response=original_exception.response,
+                            )
+                        elif original_exception.status_code == 404:
+                            exception_mapping_worked = True
+                            raise NotFoundError(
+                                message=f"BedrockException - {original_exception.message}",
+                                llm_provider="bedrock",
+                                model=model,
+                                response=original_exception.response,
+                            )
+                        elif original_exception.status_code == 408:
+                            exception_mapping_worked = True
+                            raise Timeout(
+                                message=f"BedrockException - {original_exception.message}",
+                                model=model,
+                                llm_provider=custom_llm_provider,
+                                litellm_debug_info=extra_information,
+                            )
+                        elif original_exception.status_code == 422:
+                            exception_mapping_worked = True
+                            raise BadRequestError(
+                                message=f"BedrockException - {original_exception.message}",
+                                model=model,
+                                llm_provider=custom_llm_provider,
+                                response=original_exception.response,
+                                litellm_debug_info=extra_information,
+                            )
+                        elif original_exception.status_code == 429:
+                            exception_mapping_worked = True
+                            raise RateLimitError(
+                                message=f"BedrockException - {original_exception.message}",
+                                model=model,
+                                llm_provider=custom_llm_provider,
+                                response=original_exception.response,
+                                litellm_debug_info=extra_information,
+                            )
+                        elif original_exception.status_code == 503:
+                            exception_mapping_worked = True
+                            raise ServiceUnavailableError(
+                                message=f"BedrockException - {original_exception.message}",
+                                model=model,
+                                llm_provider=custom_llm_provider,
+                                response=original_exception.response,
+                                litellm_debug_info=extra_information,
+                            )
+                        elif original_exception.status_code == 504:  # gateway timeout error
+                            exception_mapping_worked = True
+                            raise Timeout(
+                                message=f"BedrockException - {original_exception.message}",
+                                model=model,
+                                llm_provider=custom_llm_provider,
+                                litellm_debug_info=extra_information,
+                            )
+                elif custom_llm_provider == "sagemaker":
+                    if "Unable to locate credentials" in error_str:
+                        exception_mapping_worked = True
+                        raise BadRequestError(
+                            message=f"SagemakerException - {error_str}",
+                            model=model,
+                            llm_provider="sagemaker",
+                            response=original_exception.response,
+                        )
+                    elif (
+                        "Input validation error: `best_of` must be > 0 and <= 2"
+                        in error_str
+                    ):
+                        exception_mapping_worked = True
+                        raise BadRequestError(
+                            message=f"SagemakerException - the value of 'n' must be > 0 and <= 2 for sagemaker endpoints",
+                            model=model,
+                            llm_provider="sagemaker",
+                            response=original_exception.response,
+                        )
+                    elif (
+                        "`inputs` tokens + `max_new_tokens` must be <=" in error_str
+                        or "instance type with more CPU capacity or memory" in error_str
+                    ):
+                        exception_mapping_worked = True
+                        raise ContextWindowExceededError(
+                            message=f"SagemakerException - {error_str}",
+                            model=model,
+                            llm_provider="sagemaker",
+                            response=original_exception.response,
+                        )
+                elif custom_llm_provider == "vertex_ai":
+                    if (
+                        "Vertex AI API has not been used in project" in error_str
+                        or "Unable to find your project" in error_str
+                    ):
+                        exception_mapping_worked = True
+                        raise BadRequestError(
+                            message=f"VertexAIException - {error_str}",
+                            model=model,
+                            llm_provider="vertex_ai",
+                            response=original_exception.response,
+                            litellm_debug_info=extra_information,
+                        )
+                    elif (
+                        "None Unknown Error." in error_str
+                        or "Content has no parts." in error_str
+                    ):
+                        exception_mapping_worked = True
+                        raise APIError(
+                            message=f"VertexAIException - {error_str}",
+                            status_code=500,
+                            model=model,
+                            llm_provider="vertex_ai",
+                            request=original_exception.request,
+                            litellm_debug_info=extra_information,
+                        )
+                    elif "403" in error_str:
+                        exception_mapping_worked = True
+                        raise BadRequestError(
+                            message=f"VertexAIException - {error_str}",
+                            model=model,
+                            llm_provider="vertex_ai",
+                            response=original_exception.response,
+                            litellm_debug_info=extra_information,
+                        )
+                    elif "The response was blocked." in error_str:
+                        exception_mapping_worked = True
+                        raise UnprocessableEntityError(
+                            message=f"VertexAIException - {error_str}",
+                            model=model,
+                            llm_provider="vertex_ai",
+                            litellm_debug_info=extra_information,
+                            response=httpx.Response(
+                                status_code=429,
+                                request=httpx.Request(
+                                    method="POST",
+                                    url=" https://cloud.google.com/vertex-ai/",
+                                ),
+                            ),
+                        )
+                    elif (
+                        "429 Quota exceeded" in error_str
+                        or "IndexError: list index out of range" in error_str
+                        or "429 Unable to submit request because the service is temporarily out of capacity."
+                        in error_str
+                    ):
+                        exception_mapping_worked = True
+                        raise RateLimitError(
+                            message=f"VertexAIException - {error_str}",
+                            model=model,
+                            llm_provider="vertex_ai",
+                            litellm_debug_info=extra_information,
+                            response=httpx.Response(
+                                status_code=429,
+                                request=httpx.Request(
+                                    method="POST",
+                                    url=" https://cloud.google.com/vertex-ai/",
+                                ),
+                            ),
+                        )
+                    if hasattr(original_exception, "status_code"):
+                        if original_exception.status_code == 400:
+                            exception_mapping_worked = True
+                            raise BadRequestError(
+                                message=f"VertexAIException - {error_str}",
+                                model=model,
+                                llm_provider="vertex_ai",
+                                litellm_debug_info=extra_information,
+                                response=original_exception.response,
+                            )
+                        if original_exception.status_code == 500:
+                            exception_mapping_worked = True
+>                           raise APIError(
+                                message=f"VertexAIException - {error_str}",
+                                status_code=500,
+                                model=model,
+                                llm_provider="vertex_ai",
+                                litellm_debug_info=extra_information,
+                                request=original_exception.request,
+E                               litellm.exceptions.APIError: VertexAIException - Parameter to MergeFrom() must be instance of same class: expected <class 'Part'> got <class 'vertexai.generative_models._generative_models.Part'>.
+
+../utils.py:8922: APIError
+
+During handling of the above exception, another exception occurred:
+
+    def test_gemini_pro_vision():
+        try:
+            load_vertex_ai_credentials()
+            litellm.set_verbose = True
+            litellm.num_retries = 3
+            resp = litellm.completion(
+                model="vertex_ai/gemini-1.5-flash-preview-0514",
+                messages=[
+                    {
+                        "role": "user",
+                        "content": [
+                            {"type": "text", "text": "Whats in this image?"},
+                            {
+                                "type": "image_url",
+                                "image_url": {
+                                    "url": "gs://cloud-samples-data/generative-ai/image/boats.jpeg"
+                                },
+                            },
+                        ],
+                    }
+                ],
+            )
+            print(resp)
+    
+            prompt_tokens = resp.usage.prompt_tokens
+    
+            # DO Not DELETE this ASSERT
+            # Google counts the prompt tokens for us, we should ensure we use the tokens from the orignal response
+            assert prompt_tokens == 263  # the gemini api returns 263 to us
+        except litellm.RateLimitError as e:
             pass
         except Exception as e:
->           pytest.fail(f"Error occurred: {e}")
-E           Failed: Error occurred: 'stream'
+            if "500 Internal error encountered.'" in str(e):
+                pass
+            else:
+>               pytest.fail(f"An exception occurred - {str(e)}")
+E               Failed: An exception occurred - VertexAIException - Parameter to MergeFrom() must be instance of same class: expected <class 'Part'> got <class 'vertexai.generative_models._generative_models.Part'>.
 
-test_streaming.py:373: Failed
+test_amazing_vertex_completion.py:540: Failed
 ---------------------------- Captured stdout setup -----------------------------
 <module 'litellm' from '/Users/krrishdholakia/Documents/litellm/litellm/__init__.py'>
 ----------------------------- Captured stdout call -----------------------------
+loading vertex ai credentials
+Read vertexai file path
 
 
 [92mRequest to litellm:[0m
-[92mlitellm.completion(model='predibase/llama-3-8b-instruct', tenant_id='c4768f95', api_base='https://serving.app.predibase.com', api_key='pb_Qg9YbQo7UqqHdu0ozxN_aw', messages=[{'role': 'user', 'content': 'What is the meaning of life?'}], stream=True)[0m
+[92mlitellm.completion(model='vertex_ai/gemini-1.5-flash-preview-0514', messages=[{'role': 'user', 'content': [{'type': 'text', 'text': 'Whats in this image?'}, {'type': 'image_url', 'image_url': {'url': 'gs://cloud-samples-data/generative-ai/image/boats.jpeg'}}]}])[0m
 
 
 self.optional_params: {}
 SYNC kwargs[caching]: False; litellm.cache: None; kwargs.get('cache')['no-cache']: False
-UNMAPPED PROVIDER, ASSUMING IT'S OPENAI/AZURE - model=llama-3-8b-instruct, custom_llm_provider=predibase
-Final returned optional params: {'stream': True, 'tenant_id': 'c4768f95'}
-self.optional_params: {'stream': True, 'tenant_id': 'c4768f95'}
-[92m
+(start) INSIDE THE VERTEX AI OPTIONAL PARAM BLOCK
+(end) INSIDE THE VERTEX AI OPTIONAL PARAM BLOCK - optional_params: {}
+Final returned optional params: {}
+self.optional_params: {}
+VERTEX AI: vertex_project=None; vertex_location=None
+VERTEX AI: creds=<google.oauth2.service_account.Credentials object at 0x164696490>; google application credentials: /var/folders/gf/5h3fnlwx40sdrycs4y5qzqx40000gn/T/tmpolsest5s
 
-POST Request Sent from LiteLLM:
-curl -X POST \
-https://serving.app.predibase.com/c4768f95/deployments/v2/llms/llama-3-8b-instruct/generate_stream \
--H 'content-type: application/json' -H 'Authorization: Bearer pb_Qg********************' \
--d '{'inputs': 'What is the meaning of life?', 'parameters': {'details': True, 'max_new_tokens': 256, 'return_full_text': False}}'
+Making VertexAI Gemini Pro / Pro Vision Call
+
+Processing input messages = [{'role': 'user', 'content': [{'type': 'text', 'text': 'Whats in this image?'}, {'type': 'image_url', 'image_url': {'url': 'gs://cloud-samples-data/generative-ai/image/boats.jpeg'}}]}]
+[92m
+Request Sent from LiteLLM:
+llm_model = GenerativeModel(gemini-1.5-flash-preview-0514)
+response = llm_model.generate_content([{'role': 'user', 'parts': [{'text': 'Whats in this image?'}, file_data {
+  mime_type: "image/jpeg"
+  file_uri: "gs://cloud-samples-data/generative-ai/image/boats.jpeg"
+}
+]}])
+[0m
+
+
+[1;31mGive Feedback / Get Help: https://github.com/BerriAI/litellm/issues/new[0m
+LiteLLM.Info: If you need to debug this error, use `litellm.set_verbose=True'.
+
+Logging Details: logger_fn - None | callable(logger_fn) - False
+
+
+[92mRequest to litellm:[0m
+[92mlitellm.completion(model='vertex_ai/gemini-1.5-flash-preview-0514', messages=[{'role': 'user', 'content': [{'type': 'text', 'text': 'Whats in this image?'}, {'type': 'image_url', 'image_url': {'url': 'gs://cloud-samples-data/generative-ai/image/boats.jpeg'}}]}], litellm_call_id='7f48b7ab-47b3-4beb-b2b5-fa298be49d3f', litellm_logging_obj=<litellm.utils.Logging object at 0x1059c53d0>)[0m
+
+
+SYNC kwargs[caching]: False; litellm.cache: None; kwargs.get('cache')['no-cache']: False
+(start) INSIDE THE VERTEX AI OPTIONAL PARAM BLOCK
+(end) INSIDE THE VERTEX AI OPTIONAL PARAM BLOCK - optional_params: {}
+Final returned optional params: {}
+self.optional_params: {}
+VERTEX AI: vertex_project=None; vertex_location=None
+VERTEX AI: creds=<google.oauth2.service_account.Credentials object at 0x164c00f10>; google application credentials: /var/folders/gf/5h3fnlwx40sdrycs4y5qzqx40000gn/T/tmpolsest5s
+
+Making VertexAI Gemini Pro / Pro Vision Call
+
+Processing input messages = [{'role': 'user', 'content': [{'type': 'text', 'text': 'Whats in this image?'}, {'type': 'image_url', 'image_url': {'url': 'gs://cloud-samples-data/generative-ai/image/boats.jpeg'}}]}]
+[92m
+Request Sent from LiteLLM:
+llm_model = GenerativeModel(gemini-1.5-flash-preview-0514)
+response = llm_model.generate_content([{'role': 'user', 'parts': [{'text': 'Whats in this image?'}, file_data {
+  mime_type: "image/jpeg"
+  file_uri: "gs://cloud-samples-data/generative-ai/image/boats.jpeg"
+}
+]}])
+[0m
+
+
+[1;31mGive Feedback / Get Help: https://github.com/BerriAI/litellm/issues/new[0m
+LiteLLM.Info: If you need to debug this error, use `litellm.set_verbose=True'.
+
+Logging Details: logger_fn - None | callable(logger_fn) - False
+Logging Details LiteLLM-Failure Call
+self.failure_callback: []
+
+
+[92mRequest to litellm:[0m
+[92mlitellm.completion(model='vertex_ai/gemini-1.5-flash-preview-0514', messages=[{'role': 'user', 'content': [{'type': 'text', 'text': 'Whats in this image?'}, {'type': 'image_url', 'image_url': {'url': 'gs://cloud-samples-data/generative-ai/image/boats.jpeg'}}]}], litellm_call_id='7f48b7ab-47b3-4beb-b2b5-fa298be49d3f', litellm_logging_obj=<litellm.utils.Logging object at 0x1059c53d0>)[0m
+
+
+SYNC kwargs[caching]: False; litellm.cache: None; kwargs.get('cache')['no-cache']: False
+(start) INSIDE THE VERTEX AI OPTIONAL PARAM BLOCK
+(end) INSIDE THE VERTEX AI OPTIONAL PARAM BLOCK - optional_params: {}
+Final returned optional params: {}
+self.optional_params: {}
+VERTEX AI: vertex_project=None; vertex_location=None
+VERTEX AI: creds=<google.oauth2.service_account.Credentials object at 0x1635f7750>; google application credentials: /var/folders/gf/5h3fnlwx40sdrycs4y5qzqx40000gn/T/tmpolsest5s
+
+Making VertexAI Gemini Pro / Pro Vision Call
+
+Processing input messages = [{'role': 'user', 'content': [{'type': 'text', 'text': 'Whats in this image?'}, {'type': 'image_url', 'image_url': {'url': 'gs://cloud-samples-data/generative-ai/image/boats.jpeg'}}]}]
+[92m
+Request Sent from LiteLLM:
+llm_model = GenerativeModel(gemini-1.5-flash-preview-0514)
+response = llm_model.generate_content([{'role': 'user', 'parts': [{'text': 'Whats in this image?'}, file_data {
+  mime_type: "image/jpeg"
+  file_uri: "gs://cloud-samples-data/generative-ai/image/boats.jpeg"
+}
+]}])
+[0m
+
+
+[1;31mGive Feedback / Get Help: https://github.com/BerriAI/litellm/issues/new[0m
+LiteLLM.Info: If you need to debug this error, use `litellm.set_verbose=True'.
+
+Logging Details: logger_fn - None | callable(logger_fn) - False
+Logging Details LiteLLM-Failure Call
+self.failure_callback: []
+
+
+[92mRequest to litellm:[0m
+[92mlitellm.completion(model='vertex_ai/gemini-1.5-flash-preview-0514', messages=[{'role': 'user', 'content': [{'type': 'text', 'text': 'Whats in this image?'}, {'type': 'image_url', 'image_url': {'url': 'gs://cloud-samples-data/generative-ai/image/boats.jpeg'}}]}], litellm_call_id='7f48b7ab-47b3-4beb-b2b5-fa298be49d3f', litellm_logging_obj=<litellm.utils.Logging object at 0x1059c53d0>)[0m
+
+
+SYNC kwargs[caching]: False; litellm.cache: None; kwargs.get('cache')['no-cache']: False
+(start) INSIDE THE VERTEX AI OPTIONAL PARAM BLOCK
+(end) INSIDE THE VERTEX AI OPTIONAL PARAM BLOCK - optional_params: {}
+Final returned optional params: {}
+self.optional_params: {}
+VERTEX AI: vertex_project=None; vertex_location=None
+VERTEX AI: creds=<google.oauth2.service_account.Credentials object at 0x1646c5d50>; google application credentials: /var/folders/gf/5h3fnlwx40sdrycs4y5qzqx40000gn/T/tmpolsest5s
+
+Making VertexAI Gemini Pro / Pro Vision Call
+
+Processing input messages = [{'role': 'user', 'content': [{'type': 'text', 'text': 'Whats in this image?'}, {'type': 'image_url', 'image_url': {'url': 'gs://cloud-samples-data/generative-ai/image/boats.jpeg'}}]}]
+[92m
+Request Sent from LiteLLM:
+llm_model = GenerativeModel(gemini-1.5-flash-preview-0514)
+response = llm_model.generate_content([{'role': 'user', 'parts': [{'text': 'Whats in this image?'}, file_data {
+  mime_type: "image/jpeg"
+  file_uri: "gs://cloud-samples-data/generative-ai/image/boats.jpeg"
+}
+]}])
 [0m
 
 
@@ -6878,118 +4208,67 @@ Logging Details: logger_fn - None | callable(logger_fn) - False
 Logging Details LiteLLM-Failure Call
 self.failure_callback: []
 =============================== warnings summary ===============================
-../../../../../../opt/homebrew/lib/python3.11/site-packages/pydantic/_internal/_config.py:284: 25 warnings
-  /opt/homebrew/lib/python3.11/site-packages/pydantic/_internal/_config.py:284: PydanticDeprecatedSince20: Support for class-based `config` is deprecated, use ConfigDict instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.7/migration/
+../proxy/myenv/lib/python3.11/site-packages/pydantic/_internal/_config.py:284: 25 warnings
+  /Users/krrishdholakia/Documents/litellm/litellm/proxy/myenv/lib/python3.11/site-packages/pydantic/_internal/_config.py:284: PydanticDeprecatedSince20: Support for class-based `config` is deprecated, use ConfigDict instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.7/migration/
     warnings.warn(DEPRECATION_MESSAGE, DeprecationWarning)
 
-../proxy/_types.py:219
-  /Users/krrishdholakia/Documents/litellm/litellm/proxy/_types.py:219: PydanticDeprecatedSince20: Pydantic V1 style `@root_validator` validators are deprecated. You should migrate to Pydantic V2 style `@model_validator` validators, see the migration guide for more details. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.7/migration/
+../proxy/_types.py:255
+  /Users/krrishdholakia/Documents/litellm/litellm/proxy/_types.py:255: PydanticDeprecatedSince20: Pydantic V1 style `@root_validator` validators are deprecated. You should migrate to Pydantic V2 style `@model_validator` validators, see the migration guide for more details. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.7/migration/
     @root_validator(pre=True)
 
-../proxy/_types.py:306
-  /Users/krrishdholakia/Documents/litellm/litellm/proxy/_types.py:306: PydanticDeprecatedSince20: `pydantic.config.Extra` is deprecated, use literal values instead (e.g. `extra='allow'`). Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.7/migration/
+../proxy/_types.py:342
+  /Users/krrishdholakia/Documents/litellm/litellm/proxy/_types.py:342: PydanticDeprecatedSince20: `pydantic.config.Extra` is deprecated, use literal values instead (e.g. `extra='allow'`). Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.7/migration/
     extra = Extra.allow  # Allow extra fields
 
-../proxy/_types.py:309
-  /Users/krrishdholakia/Documents/litellm/litellm/proxy/_types.py:309: PydanticDeprecatedSince20: Pydantic V1 style `@root_validator` validators are deprecated. You should migrate to Pydantic V2 style `@model_validator` validators, see the migration guide for more details. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.7/migration/
+../proxy/_types.py:345
+  /Users/krrishdholakia/Documents/litellm/litellm/proxy/_types.py:345: PydanticDeprecatedSince20: Pydantic V1 style `@root_validator` validators are deprecated. You should migrate to Pydantic V2 style `@model_validator` validators, see the migration guide for more details. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.7/migration/
     @root_validator(pre=True)
 
-../proxy/_types.py:338
-  /Users/krrishdholakia/Documents/litellm/litellm/proxy/_types.py:338: PydanticDeprecatedSince20: Pydantic V1 style `@root_validator` validators are deprecated. You should migrate to Pydantic V2 style `@model_validator` validators, see the migration guide for more details. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.7/migration/
+../proxy/_types.py:374
+  /Users/krrishdholakia/Documents/litellm/litellm/proxy/_types.py:374: PydanticDeprecatedSince20: Pydantic V1 style `@root_validator` validators are deprecated. You should migrate to Pydantic V2 style `@model_validator` validators, see the migration guide for more details. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.7/migration/
     @root_validator(pre=True)
 
-../proxy/_types.py:385
-  /Users/krrishdholakia/Documents/litellm/litellm/proxy/_types.py:385: PydanticDeprecatedSince20: Pydantic V1 style `@root_validator` validators are deprecated. You should migrate to Pydantic V2 style `@model_validator` validators, see the migration guide for more details. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.7/migration/
+../proxy/_types.py:421
+  /Users/krrishdholakia/Documents/litellm/litellm/proxy/_types.py:421: PydanticDeprecatedSince20: Pydantic V1 style `@root_validator` validators are deprecated. You should migrate to Pydantic V2 style `@model_validator` validators, see the migration guide for more details. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.7/migration/
     @root_validator(pre=True)
 
-../proxy/_types.py:454
-  /Users/krrishdholakia/Documents/litellm/litellm/proxy/_types.py:454: PydanticDeprecatedSince20: Pydantic V1 style `@root_validator` validators are deprecated. You should migrate to Pydantic V2 style `@model_validator` validators, see the migration guide for more details. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.7/migration/
+../proxy/_types.py:490
+  /Users/krrishdholakia/Documents/litellm/litellm/proxy/_types.py:490: PydanticDeprecatedSince20: Pydantic V1 style `@root_validator` validators are deprecated. You should migrate to Pydantic V2 style `@model_validator` validators, see the migration guide for more details. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.7/migration/
     @root_validator(pre=True)
 
-../proxy/_types.py:474
-  /Users/krrishdholakia/Documents/litellm/litellm/proxy/_types.py:474: PydanticDeprecatedSince20: Pydantic V1 style `@root_validator` validators are deprecated. You should migrate to Pydantic V2 style `@model_validator` validators, see the migration guide for more details. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.7/migration/
+../proxy/_types.py:510
+  /Users/krrishdholakia/Documents/litellm/litellm/proxy/_types.py:510: PydanticDeprecatedSince20: Pydantic V1 style `@root_validator` validators are deprecated. You should migrate to Pydantic V2 style `@model_validator` validators, see the migration guide for more details. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.7/migration/
     @root_validator(pre=True)
 
-../proxy/_types.py:487
-  /Users/krrishdholakia/Documents/litellm/litellm/proxy/_types.py:487: PydanticDeprecatedSince20: Pydantic V1 style `@root_validator` validators are deprecated. You should migrate to Pydantic V2 style `@model_validator` validators, see the migration guide for more details. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.7/migration/
+../proxy/_types.py:523
+  /Users/krrishdholakia/Documents/litellm/litellm/proxy/_types.py:523: PydanticDeprecatedSince20: Pydantic V1 style `@root_validator` validators are deprecated. You should migrate to Pydantic V2 style `@model_validator` validators, see the migration guide for more details. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.7/migration/
     @root_validator(pre=True)
 
-../proxy/_types.py:532
-  /Users/krrishdholakia/Documents/litellm/litellm/proxy/_types.py:532: PydanticDeprecatedSince20: Pydantic V1 style `@root_validator` validators are deprecated. You should migrate to Pydantic V2 style `@model_validator` validators, see the migration guide for more details. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.7/migration/
+../proxy/_types.py:568
+  /Users/krrishdholakia/Documents/litellm/litellm/proxy/_types.py:568: PydanticDeprecatedSince20: Pydantic V1 style `@root_validator` validators are deprecated. You should migrate to Pydantic V2 style `@model_validator` validators, see the migration guide for more details. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.7/migration/
     @root_validator(pre=True)
 
-../proxy/_types.py:569
-  /Users/krrishdholakia/Documents/litellm/litellm/proxy/_types.py:569: PydanticDeprecatedSince20: Pydantic V1 style `@root_validator` validators are deprecated. You should migrate to Pydantic V2 style `@model_validator` validators, see the migration guide for more details. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.7/migration/
+../proxy/_types.py:605
+  /Users/krrishdholakia/Documents/litellm/litellm/proxy/_types.py:605: PydanticDeprecatedSince20: Pydantic V1 style `@root_validator` validators are deprecated. You should migrate to Pydantic V2 style `@model_validator` validators, see the migration guide for more details. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.7/migration/
     @root_validator(pre=True)
 
-../proxy/_types.py:864
-  /Users/krrishdholakia/Documents/litellm/litellm/proxy/_types.py:864: PydanticDeprecatedSince20: Pydantic V1 style `@root_validator` validators are deprecated. You should migrate to Pydantic V2 style `@model_validator` validators, see the migration guide for more details. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.7/migration/
+../proxy/_types.py:923
+  /Users/krrishdholakia/Documents/litellm/litellm/proxy/_types.py:923: PydanticDeprecatedSince20: Pydantic V1 style `@root_validator` validators are deprecated. You should migrate to Pydantic V2 style `@model_validator` validators, see the migration guide for more details. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.7/migration/
     @root_validator(pre=True)
 
-../proxy/_types.py:891
-  /Users/krrishdholakia/Documents/litellm/litellm/proxy/_types.py:891: PydanticDeprecatedSince20: Pydantic V1 style `@root_validator` validators are deprecated. You should migrate to Pydantic V2 style `@model_validator` validators, see the migration guide for more details. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.7/migration/
+../proxy/_types.py:950
+  /Users/krrishdholakia/Documents/litellm/litellm/proxy/_types.py:950: PydanticDeprecatedSince20: Pydantic V1 style `@root_validator` validators are deprecated. You should migrate to Pydantic V2 style `@model_validator` validators, see the migration guide for more details. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.7/migration/
     @root_validator(pre=True)
 
-../proxy/_types.py:912
-  /Users/krrishdholakia/Documents/litellm/litellm/proxy/_types.py:912: PydanticDeprecatedSince20: Pydantic V1 style `@root_validator` validators are deprecated. You should migrate to Pydantic V2 style `@model_validator` validators, see the migration guide for more details. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.7/migration/
+../proxy/_types.py:971
+  /Users/krrishdholakia/Documents/litellm/litellm/proxy/_types.py:971: PydanticDeprecatedSince20: Pydantic V1 style `@root_validator` validators are deprecated. You should migrate to Pydantic V2 style `@model_validator` validators, see the migration guide for more details. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.7/migration/
     @root_validator(pre=True)
 
-../utils.py:39
-  /Users/krrishdholakia/Documents/litellm/litellm/utils.py:39: DeprecationWarning: pkg_resources is deprecated as an API. See https://setuptools.pypa.io/en/latest/pkg_resources.html
-    import pkg_resources  # type: ignore
-
-../../../../../../opt/homebrew/lib/python3.11/site-packages/pkg_resources/__init__.py:2832: 10 warnings
-  /opt/homebrew/lib/python3.11/site-packages/pkg_resources/__init__.py:2832: DeprecationWarning: Deprecated call to `pkg_resources.declare_namespace('google')`.
-  Implementing implicit namespace packages (as specified in PEP 420) is preferred to `pkg_resources.declare_namespace`. See https://setuptools.pypa.io/en/latest/references/keywords.html#keyword-namespace-packages
-    declare_namespace(pkg)
-
-../../../../../../opt/homebrew/lib/python3.11/site-packages/pkg_resources/__init__.py:2832
-../../../../../../opt/homebrew/lib/python3.11/site-packages/pkg_resources/__init__.py:2832
-../../../../../../opt/homebrew/lib/python3.11/site-packages/pkg_resources/__init__.py:2832
-../../../../../../opt/homebrew/lib/python3.11/site-packages/pkg_resources/__init__.py:2832
-../../../../../../opt/homebrew/lib/python3.11/site-packages/pkg_resources/__init__.py:2832
-  /opt/homebrew/lib/python3.11/site-packages/pkg_resources/__init__.py:2832: DeprecationWarning: Deprecated call to `pkg_resources.declare_namespace('google.cloud')`.
-  Implementing implicit namespace packages (as specified in PEP 420) is preferred to `pkg_resources.declare_namespace`. See https://setuptools.pypa.io/en/latest/references/keywords.html#keyword-namespace-packages
-    declare_namespace(pkg)
-
-../../../../../../opt/homebrew/lib/python3.11/site-packages/pkg_resources/__init__.py:2317
-../../../../../../opt/homebrew/lib/python3.11/site-packages/pkg_resources/__init__.py:2317
-../../../../../../opt/homebrew/lib/python3.11/site-packages/pkg_resources/__init__.py:2317
-  /opt/homebrew/lib/python3.11/site-packages/pkg_resources/__init__.py:2317: DeprecationWarning: Deprecated call to `pkg_resources.declare_namespace('google')`.
-  Implementing implicit namespace packages (as specified in PEP 420) is preferred to `pkg_resources.declare_namespace`. See https://setuptools.pypa.io/en/latest/references/keywords.html#keyword-namespace-packages
-    declare_namespace(parent)
-
-../../../../../../opt/homebrew/lib/python3.11/site-packages/pkg_resources/__init__.py:2832
-  /opt/homebrew/lib/python3.11/site-packages/pkg_resources/__init__.py:2832: DeprecationWarning: Deprecated call to `pkg_resources.declare_namespace('google.logging')`.
-  Implementing implicit namespace packages (as specified in PEP 420) is preferred to `pkg_resources.declare_namespace`. See https://setuptools.pypa.io/en/latest/references/keywords.html#keyword-namespace-packages
-    declare_namespace(pkg)
-
-../../../../../../opt/homebrew/lib/python3.11/site-packages/pkg_resources/__init__.py:2832
-  /opt/homebrew/lib/python3.11/site-packages/pkg_resources/__init__.py:2832: DeprecationWarning: Deprecated call to `pkg_resources.declare_namespace('google.iam')`.
-  Implementing implicit namespace packages (as specified in PEP 420) is preferred to `pkg_resources.declare_namespace`. See https://setuptools.pypa.io/en/latest/references/keywords.html#keyword-namespace-packages
-    declare_namespace(pkg)
-
-../../../../../../opt/homebrew/lib/python3.11/site-packages/pkg_resources/__init__.py:2832
-  /opt/homebrew/lib/python3.11/site-packages/pkg_resources/__init__.py:2832: DeprecationWarning: Deprecated call to `pkg_resources.declare_namespace('mpl_toolkits')`.
-  Implementing implicit namespace packages (as specified in PEP 420) is preferred to `pkg_resources.declare_namespace`. See https://setuptools.pypa.io/en/latest/references/keywords.html#keyword-namespace-packages
-    declare_namespace(pkg)
-
-../../../../../../opt/homebrew/lib/python3.11/site-packages/pkg_resources/__init__.py:2832
-  /opt/homebrew/lib/python3.11/site-packages/pkg_resources/__init__.py:2832: DeprecationWarning: Deprecated call to `pkg_resources.declare_namespace('sphinxcontrib')`.
-  Implementing implicit namespace packages (as specified in PEP 420) is preferred to `pkg_resources.declare_namespace`. See https://setuptools.pypa.io/en/latest/references/keywords.html#keyword-namespace-packages
-    declare_namespace(pkg)
-
-../../../../../../opt/homebrew/lib/python3.11/site-packages/pkg_resources/__init__.py:2832
-../../../../../../opt/homebrew/lib/python3.11/site-packages/pkg_resources/__init__.py:2832
-  /opt/homebrew/lib/python3.11/site-packages/pkg_resources/__init__.py:2832: DeprecationWarning: Deprecated call to `pkg_resources.declare_namespace('zope')`.
-  Implementing implicit namespace packages (as specified in PEP 420) is preferred to `pkg_resources.declare_namespace`. See https://setuptools.pypa.io/en/latest/references/keywords.html#keyword-namespace-packages
-    declare_namespace(pkg)
-
-test_streaming.py::test_completion_predibase_streaming[False]
-  /opt/homebrew/lib/python3.11/site-packages/httpx/_content.py:204: DeprecationWarning: Use 'content=<...>' to upload raw bytes/text content.
-    warnings.warn(message, DeprecationWarning)
+../utils.py:60
+  /Users/krrishdholakia/Documents/litellm/litellm/utils.py:60: DeprecationWarning: open_text is deprecated. Use files() instead. Refer to https://importlib-resources.readthedocs.io/en/latest/using.html#migrating-from-legacy for migration advice.
+    with resources.open_text("litellm.llms.tokenizers", "anthropic_tokenizer.json") as f:
 
 -- Docs: https://docs.pytest.org/en/stable/how-to/capture-warnings.html
 =========================== short test summary info ============================
-FAILED test_streaming.py::test_completion_predibase_streaming[True] - Failed:...
-=================== 1 failed, 1 passed, 64 warnings in 5.28s ===================
+FAILED test_amazing_vertex_completion.py::test_gemini_pro_vision - Failed: An...
+======================== 1 failed, 39 warnings in 2.09s ========================
diff --git a/litellm/tests/test_alangfuse.py b/litellm/tests/test_alangfuse.py
index 31f1f7bf86..97d6baaaee 100644
--- a/litellm/tests/test_alangfuse.py
+++ b/litellm/tests/test_alangfuse.py
@@ -242,12 +242,24 @@ async def test_langfuse_masked_input_output(langfuse_client):
         response = await create_async_task(
             model="gpt-3.5-turbo",
             messages=[{"role": "user", "content": "This is a test"}],
-            metadata={"trace_id": _unique_trace_name, "mask_input": mask_value, "mask_output": mask_value},
-            mock_response="This is a test response"
+            metadata={
+                "trace_id": _unique_trace_name,
+                "mask_input": mask_value,
+                "mask_output": mask_value,
+            },
+            mock_response="This is a test response",
         )
         print(response)
-        expected_input = "redacted-by-litellm" if mask_value else {'messages': [{'content': 'This is a test', 'role': 'user'}]}
-        expected_output = "redacted-by-litellm" if mask_value else {'content': 'This is a test response', 'role': 'assistant'}
+        expected_input = (
+            "redacted-by-litellm"
+            if mask_value
+            else {"messages": [{"content": "This is a test", "role": "user"}]}
+        )
+        expected_output = (
+            "redacted-by-litellm"
+            if mask_value
+            else {"content": "This is a test response", "role": "assistant"}
+        )
         langfuse_client.flush()
         await asyncio.sleep(2)
 
@@ -262,6 +274,7 @@ async def test_langfuse_masked_input_output(langfuse_client):
         assert generations[0].input == expected_input
         assert generations[0].output == expected_output
 
+
 @pytest.mark.asyncio
 async def test_langfuse_logging_metadata(langfuse_client):
     """
@@ -523,7 +536,7 @@ def test_langfuse_logging_function_calling():
 # test_langfuse_logging_function_calling()
 
 
-def test_langfuse_existing_trace_id():
+def test_aaalangfuse_existing_trace_id():
     """
     When existing trace id is passed, don't set trace params -> prevents overwriting the trace
 
@@ -577,7 +590,7 @@ def test_langfuse_existing_trace_id():
                 "verbose": False,
                 "custom_llm_provider": "openai",
                 "api_base": "https://api.openai.com/v1/",
-                "litellm_call_id": "508113a1-c6f1-48ce-a3e1-01c6cce9330e",
+                "litellm_call_id": None,
                 "model_alias_map": {},
                 "completion_call_id": None,
                 "metadata": None,
@@ -593,7 +606,7 @@ def test_langfuse_existing_trace_id():
             "stream": False,
             "user": None,
             "call_type": "completion",
-            "litellm_call_id": "508113a1-c6f1-48ce-a3e1-01c6cce9330e",
+            "litellm_call_id": None,
             "completion_start_time": "2024-05-01 07:31:29.903685",
             "temperature": 0.1,
             "extra_body": {},
@@ -633,6 +646,8 @@ def test_langfuse_existing_trace_id():
 
     trace_id = langfuse_response_object["trace_id"]
 
+    assert trace_id is not None
+
     langfuse_client.flush()
 
     time.sleep(2)
diff --git a/litellm/tests/test_alerting.py b/litellm/tests/test_alerting.py
index 6770618423..2f8d7f3efd 100644
--- a/litellm/tests/test_alerting.py
+++ b/litellm/tests/test_alerting.py
@@ -1,7 +1,7 @@
 # What is this?
 ## Tests slack alerting on proxy logging object
 
-import sys, json
+import sys, json, uuid, random
 import os
 import io, asyncio
 from datetime import datetime, timedelta
@@ -22,6 +22,7 @@ import unittest.mock
 from unittest.mock import AsyncMock
 import pytest
 from litellm.router import AlertingConfig, Router
+from litellm.proxy._types import CallInfo
 
 
 @pytest.mark.parametrize(
@@ -123,7 +124,9 @@ from datetime import datetime, timedelta
 
 @pytest.fixture
 def slack_alerting():
-    return SlackAlerting(alerting_threshold=1, internal_usage_cache=DualCache())
+    return SlackAlerting(
+        alerting_threshold=1, internal_usage_cache=DualCache(), alerting=["slack"]
+    )
 
 
 # Test for hanging LLM responses
@@ -161,7 +164,10 @@ async def test_budget_alerts_crossed(slack_alerting):
     user_current_spend = 101
     with patch.object(slack_alerting, "send_alert", new=AsyncMock()) as mock_send_alert:
         await slack_alerting.budget_alerts(
-            "user_budget", user_max_budget, user_current_spend
+            "user_budget",
+            user_info=CallInfo(
+                token="", spend=user_current_spend, max_budget=user_max_budget
+            ),
         )
         mock_send_alert.assert_awaited_once()
 
@@ -173,12 +179,18 @@ async def test_budget_alerts_crossed_again(slack_alerting):
     user_current_spend = 101
     with patch.object(slack_alerting, "send_alert", new=AsyncMock()) as mock_send_alert:
         await slack_alerting.budget_alerts(
-            "user_budget", user_max_budget, user_current_spend
+            "user_budget",
+            user_info=CallInfo(
+                token="", spend=user_current_spend, max_budget=user_max_budget
+            ),
         )
         mock_send_alert.assert_awaited_once()
         mock_send_alert.reset_mock()
         await slack_alerting.budget_alerts(
-            "user_budget", user_max_budget, user_current_spend
+            "user_budget",
+            user_info=CallInfo(
+                token="", spend=user_current_spend, max_budget=user_max_budget
+            ),
         )
         mock_send_alert.assert_not_awaited()
 
@@ -365,27 +377,29 @@ async def test_send_llm_exception_to_slack():
 @pytest.mark.asyncio
 async def test_send_daily_reports_ignores_zero_values():
     router = MagicMock()
-    router.get_model_ids.return_value = ['model1', 'model2', 'model3']
-    
+    router.get_model_ids.return_value = ["model1", "model2", "model3"]
+
     slack_alerting = SlackAlerting(internal_usage_cache=MagicMock())
     # model1:failed=None, model2:failed=0, model3:failed=10, model1:latency=0; model2:latency=0; model3:latency=None
-    slack_alerting.internal_usage_cache.async_batch_get_cache = AsyncMock(return_value=[None, 0, 10, 0, 0, None])
+    slack_alerting.internal_usage_cache.async_batch_get_cache = AsyncMock(
+        return_value=[None, 0, 10, 0, 0, None]
+    )
     slack_alerting.internal_usage_cache.async_batch_set_cache = AsyncMock()
 
     router.get_model_info.side_effect = lambda x: {"litellm_params": {"model": x}}
-    
-    with patch.object(slack_alerting, 'send_alert', new=AsyncMock()) as mock_send_alert:
+
+    with patch.object(slack_alerting, "send_alert", new=AsyncMock()) as mock_send_alert:
         result = await slack_alerting.send_daily_reports(router)
-        
+
         # Check that the send_alert method was called
         mock_send_alert.assert_called_once()
-        message = mock_send_alert.call_args[1]['message']
-        
+        message = mock_send_alert.call_args[1]["message"]
+
         # Ensure the message includes only the non-zero, non-None metrics
         assert "model3" in message
         assert "model2" not in message
         assert "model1" not in message
-    
+
     assert result == True
 
 
@@ -393,15 +407,91 @@ async def test_send_daily_reports_ignores_zero_values():
 @pytest.mark.asyncio
 async def test_send_daily_reports_all_zero_or_none():
     router = MagicMock()
-    router.get_model_ids.return_value = ['model1', 'model2', 'model3']
-    
+    router.get_model_ids.return_value = ["model1", "model2", "model3"]
+
     slack_alerting = SlackAlerting(internal_usage_cache=MagicMock())
-    slack_alerting.internal_usage_cache.async_batch_get_cache = AsyncMock(return_value=[None, 0, None, 0, None, 0])
-    
-    with patch.object(slack_alerting, 'send_alert', new=AsyncMock()) as mock_send_alert:
+    slack_alerting.internal_usage_cache.async_batch_get_cache = AsyncMock(
+        return_value=[None, 0, None, 0, None, 0]
+    )
+
+    with patch.object(slack_alerting, "send_alert", new=AsyncMock()) as mock_send_alert:
         result = await slack_alerting.send_daily_reports(router)
-        
+
         # Check that the send_alert method was not called
         mock_send_alert.assert_not_called()
-    
+
     assert result == False
+
+
+# test user budget crossed alert sent only once, even if user makes multiple calls
+@pytest.mark.parametrize(
+    "alerting_type",
+    [
+        "token_budget",
+        "user_budget",
+        "team_budget",
+        "proxy_budget",
+        "projected_limit_exceeded",
+    ],
+)
+@pytest.mark.asyncio
+async def test_send_token_budget_crossed_alerts(alerting_type):
+    slack_alerting = SlackAlerting()
+
+    with patch.object(slack_alerting, "send_alert", new=AsyncMock()) as mock_send_alert:
+        user_info = {
+            "token": "50e55ca5bfbd0759697538e8d23c0cd5031f52d9e19e176d7233b20c7c4d3403",
+            "spend": 86,
+            "max_budget": 100,
+            "user_id": "ishaan@berri.ai",
+            "user_email": "ishaan@berri.ai",
+            "key_alias": "my-test-key",
+            "projected_exceeded_date": "10/20/2024",
+            "projected_spend": 200,
+        }
+
+        user_info = CallInfo(**user_info)
+
+        for _ in range(50):
+            await slack_alerting.budget_alerts(
+                type=alerting_type,
+                user_info=user_info,
+            )
+        mock_send_alert.assert_awaited_once()
+
+
+@pytest.mark.parametrize(
+    "alerting_type",
+    [
+        "token_budget",
+        "user_budget",
+        "team_budget",
+        "proxy_budget",
+        "projected_limit_exceeded",
+    ],
+)
+@pytest.mark.asyncio
+async def test_webhook_alerting(alerting_type):
+    slack_alerting = SlackAlerting(alerting=["webhook"])
+
+    with patch.object(
+        slack_alerting, "send_webhook_alert", new=AsyncMock()
+    ) as mock_send_alert:
+        user_info = {
+            "token": "50e55ca5bfbd0759697538e8d23c0cd5031f52d9e19e176d7233b20c7c4d3403",
+            "spend": 1,
+            "max_budget": 0,
+            "user_id": "ishaan@berri.ai",
+            "user_email": "ishaan@berri.ai",
+            "key_alias": "my-test-key",
+            "projected_exceeded_date": "10/20/2024",
+            "projected_spend": 200,
+        }
+
+        user_info = CallInfo(**user_info)
+        for _ in range(50):
+            await slack_alerting.budget_alerts(
+                type=alerting_type,
+                user_info=user_info,
+            )
+        mock_send_alert.assert_awaited_once()
diff --git a/litellm/tests/test_amazing_vertex_completion.py b/litellm/tests/test_amazing_vertex_completion.py
index ad3fb3cc3e..4df39fec08 100644
--- a/litellm/tests/test_amazing_vertex_completion.py
+++ b/litellm/tests/test_amazing_vertex_completion.py
@@ -16,6 +16,7 @@ from litellm.tests.test_streaming import streaming_format_tests
 import json
 import os
 import tempfile
+from litellm.llms.vertex_ai import _gemini_convert_messages_with_history
 
 litellm.num_retries = 3
 litellm.cache = None
@@ -98,7 +99,7 @@ def load_vertex_ai_credentials():
 
 
 @pytest.mark.asyncio
-async def get_response():
+async def test_get_response():
     load_vertex_ai_credentials()
     prompt = '\ndef count_nums(arr):\n    """\n    Write a function count_nums which takes an array of integers and returns\n    the number of elements which has a sum of digits > 0.\n    If a number is negative, then its first signed digit will be negative:\n    e.g. -123 has signed digits -1, 2, and 3.\n    >>> count_nums([]) == 0\n    >>> count_nums([-1, 11, -11]) == 1\n    >>> count_nums([1, 1, 2]) == 3\n    """\n'
     try:
@@ -371,14 +372,13 @@ def test_vertex_ai_stream():
                 "gemini-1.5-pro",
                 "gemini-1.5-pro-preview-0215",
             ]:
-                # our account does not have access to this model
+                # ouraccount does not have access to this model
                 continue
             print("making request", model)
             response = completion(
                 model=model,
-                messages=[
-                    {"role": "user", "content": "write 10 line code code for saying hi"}
-                ],
+                messages=[{"role": "user", "content": "hello tell me a short story"}],
+                max_tokens=15,
                 stream=True,
             )
             completed_str = ""
@@ -389,7 +389,7 @@ def test_vertex_ai_stream():
                 completed_str += content
                 assert type(content) == str
                 # pass
-            assert len(completed_str) > 4
+            assert len(completed_str) > 1
         except litellm.RateLimitError as e:
             pass
         except Exception as e:
@@ -595,30 +595,68 @@ def test_gemini_pro_vision_base64():
 async def test_gemini_pro_function_calling(sync_mode):
     try:
         load_vertex_ai_credentials()
-        data = {
-            "model": "vertex_ai/gemini-pro",
-            "messages": [
-                {
-                    "role": "user",
-                    "content": "Call the submit_cities function with San Francisco and New York",
-                }
-            ],
-            "tools": [
-                {
-                    "type": "function",
-                    "function": {
-                        "name": "submit_cities",
-                        "description": "Submits a list of cities",
-                        "parameters": {
-                            "type": "object",
-                            "properties": {
-                                "cities": {"type": "array", "items": {"type": "string"}}
-                            },
-                            "required": ["cities"],
+        litellm.set_verbose = True
+
+        messages = [
+            {
+                "role": "system",
+                "content": "Your name is Litellm Bot, you are a helpful assistant",
+            },
+            # User asks for their name and weather in San Francisco
+            {
+                "role": "user",
+                "content": "Hello, what is your name and can you tell me the weather?",
+            },
+            # Assistant replies with a tool call
+            {
+                "role": "assistant",
+                "content": "",
+                "tool_calls": [
+                    {
+                        "id": "call_123",
+                        "type": "function",
+                        "index": 0,
+                        "function": {
+                            "name": "get_weather",
+                            "arguments": '{"location":"San Francisco, CA"}',
                         },
+                    }
+                ],
+            },
+            # The result of the tool call is added to the history
+            {
+                "role": "tool",
+                "tool_call_id": "call_123",
+                "name": "get_weather",
+                "content": "27 degrees celsius and clear in San Francisco, CA",
+            },
+            # Now the assistant can reply with the result of the tool call.
+        ]
+
+        tools = [
+            {
+                "type": "function",
+                "function": {
+                    "name": "get_weather",
+                    "description": "Get the current weather in a given location",
+                    "parameters": {
+                        "type": "object",
+                        "properties": {
+                            "location": {
+                                "type": "string",
+                                "description": "The city and state, e.g. San Francisco, CA",
+                            }
+                        },
+                        "required": ["location"],
                     },
-                }
-            ],
+                },
+            }
+        ]
+
+        data = {
+            "model": "vertex_ai/gemini-1.5-pro-preview-0514",
+            "messages": messages,
+            "tools": tools,
         }
         if sync_mode:
             response = litellm.completion(**data)
@@ -638,7 +676,7 @@ async def test_gemini_pro_function_calling(sync_mode):
 # gemini_pro_function_calling()
 
 
-@pytest.mark.parametrize("sync_mode", [False, True])
+@pytest.mark.parametrize("sync_mode", [True])
 @pytest.mark.asyncio
 async def test_gemini_pro_function_calling_streaming(sync_mode):
     load_vertex_ai_credentials()
@@ -713,7 +751,7 @@ async def test_gemini_pro_async_function_calling():
                 "type": "function",
                 "function": {
                     "name": "get_current_weather",
-                    "description": "Get the current weather in a given location",
+                    "description": "Get the current weather in a given location.",
                     "parameters": {
                         "type": "object",
                         "properties": {
@@ -743,8 +781,9 @@ async def test_gemini_pro_async_function_calling():
         print(f"completion: {completion}")
         assert completion.choices[0].message.content is None
         assert len(completion.choices[0].message.tool_calls) == 1
-    except litellm.APIError as e:
-        pass
+
+    # except litellm.APIError as e:
+    #     pass
     except litellm.RateLimitError as e:
         pass
     except Exception as e:
@@ -894,3 +933,45 @@ async def test_vertexai_aembedding():
 #         traceback.print_exc()
 #         raise e
 # test_gemini_pro_vision_async()
+
+
+def test_prompt_factory():
+    messages = [
+        {
+            "role": "system",
+            "content": "Your name is Litellm Bot, you are a helpful assistant",
+        },
+        # User asks for their name and weather in San Francisco
+        {
+            "role": "user",
+            "content": "Hello, what is your name and can you tell me the weather?",
+        },
+        # Assistant replies with a tool call
+        {
+            "role": "assistant",
+            "content": "",
+            "tool_calls": [
+                {
+                    "id": "call_123",
+                    "type": "function",
+                    "index": 0,
+                    "function": {
+                        "name": "get_weather",
+                        "arguments": '{"location":"San Francisco, CA"}',
+                    },
+                }
+            ],
+        },
+        # The result of the tool call is added to the history
+        {
+            "role": "tool",
+            "tool_call_id": "call_123",
+            "name": "get_weather",
+            "content": "27 degrees celsius and clear in San Francisco, CA",
+        },
+        # Now the assistant can reply with the result of the tool call.
+    ]
+
+    translated_messages = _gemini_convert_messages_with_history(messages=messages)
+
+    print(f"\n\ntranslated_messages: {translated_messages}\ntranslated_messages")
diff --git a/litellm/tests/test_bedrock_completion.py b/litellm/tests/test_bedrock_completion.py
index b8b08003c5..8d7f692dc2 100644
--- a/litellm/tests/test_bedrock_completion.py
+++ b/litellm/tests/test_bedrock_completion.py
@@ -206,6 +206,7 @@ def test_completion_bedrock_claude_sts_client_auth():
 
 # test_completion_bedrock_claude_sts_client_auth()
 
+
 @pytest.mark.skip(reason="We don't have Circle CI OIDC credentials as yet")
 def test_completion_bedrock_claude_sts_oidc_auth():
     print("\ncalling bedrock claude with oidc auth")
@@ -244,7 +245,7 @@ def test_bedrock_extra_headers():
             messages=messages,
             max_tokens=10,
             temperature=0.78,
-            extra_headers={"x-key": "x_key_value"}
+            extra_headers={"x-key": "x_key_value"},
         )
         # Add any assertions here to check the response
         assert len(response.choices) > 0
@@ -259,7 +260,7 @@ def test_bedrock_claude_3():
     try:
         litellm.set_verbose = True
         data = {
-            "max_tokens": 2000,
+            "max_tokens": 100,
             "stream": False,
             "temperature": 0.3,
             "messages": [
@@ -282,6 +283,7 @@ def test_bedrock_claude_3():
         }
         response: ModelResponse = completion(
             model="bedrock/anthropic.claude-3-sonnet-20240229-v1:0",
+            num_retries=3,
             # messages=messages,
             # max_tokens=10,
             # temperature=0.78,
diff --git a/litellm/tests/test_completion.py b/litellm/tests/test_completion.py
index 4441ddf29a..8ab3805e8e 100644
--- a/litellm/tests/test_completion.py
+++ b/litellm/tests/test_completion.py
@@ -1,4 +1,4 @@
-import sys, os
+import sys, os, json
 import traceback
 from dotenv import load_dotenv
 
@@ -7,7 +7,7 @@ import os, io
 
 sys.path.insert(
     0, os.path.abspath("../..")
-)  # Adds the parent directory to the, system path
+)  # Adds the parent directory to the system path
 import pytest
 import litellm
 from litellm import embedding, completion, completion_cost, Timeout
@@ -38,7 +38,7 @@ def reset_callbacks():
 @pytest.mark.skip(reason="Local test")
 def test_response_model_none():
     """
-    Addresses - https://github.com/BerriAI/litellm/issues/2972
+    Addresses:https://github.com/BerriAI/litellm/issues/2972
     """
     x = completion(
         model="mymodel",
@@ -278,7 +278,8 @@ def test_completion_claude_3_function_call():
             model="anthropic/claude-3-opus-20240229",
             messages=messages,
             tools=tools,
-            tool_choice="auto",
+            tool_choice={"type": "tool", "name": "get_weather"},
+            extra_headers={"anthropic-beta": "tools-2024-05-16"},
         )
         # Add any assertions, here to check response args
         print(response)
@@ -1053,6 +1054,25 @@ def test_completion_azure_gpt4_vision():
 # test_completion_azure_gpt4_vision()
 
 
+@pytest.mark.parametrize("model", ["gpt-3.5-turbo", "gpt-4", "gpt-4o"])
+def test_completion_openai_params(model):
+    litellm.drop_params = True
+    messages = [
+        {
+            "role": "user",
+            "content": """Generate JSON about Bill Gates: { "full_name": "", "title": "" }""",
+        }
+    ]
+
+    response = completion(
+        model=model,
+        messages=messages,
+        response_format={"type": "json_object"},
+    )
+
+    print(f"response: {response}")
+
+
 def test_completion_fireworks_ai():
     try:
         litellm.set_verbose = True
@@ -1161,28 +1181,28 @@ HF Tests we should pass
 # Test util to sort models to TGI, conv, None
 def test_get_hf_task_for_model():
     model = "glaiveai/glaive-coder-7b"
-    model_type = litellm.llms.huggingface_restapi.get_hf_task_for_model(model)
+    model_type, _ = litellm.llms.huggingface_restapi.get_hf_task_for_model(model)
     print(f"model:{model}, model type: {model_type}")
     assert model_type == "text-generation-inference"
 
     model = "meta-llama/Llama-2-7b-hf"
-    model_type = litellm.llms.huggingface_restapi.get_hf_task_for_model(model)
+    model_type, _ = litellm.llms.huggingface_restapi.get_hf_task_for_model(model)
     print(f"model:{model}, model type: {model_type}")
     assert model_type == "text-generation-inference"
 
     model = "facebook/blenderbot-400M-distill"
-    model_type = litellm.llms.huggingface_restapi.get_hf_task_for_model(model)
+    model_type, _ = litellm.llms.huggingface_restapi.get_hf_task_for_model(model)
     print(f"model:{model}, model type: {model_type}")
     assert model_type == "conversational"
 
     model = "facebook/blenderbot-3B"
-    model_type = litellm.llms.huggingface_restapi.get_hf_task_for_model(model)
+    model_type, _ = litellm.llms.huggingface_restapi.get_hf_task_for_model(model)
     print(f"model:{model}, model type: {model_type}")
     assert model_type == "conversational"
 
     # neither Conv or None
     model = "roneneldan/TinyStories-3M"
-    model_type = litellm.llms.huggingface_restapi.get_hf_task_for_model(model)
+    model_type, _ = litellm.llms.huggingface_restapi.get_hf_task_for_model(model)
     print(f"model:{model}, model type: {model_type}")
     assert model_type == "text-generation"
 
@@ -2300,36 +2320,30 @@ def test_completion_azure_deployment_id():
 
 # test_completion_azure_deployment_id()
 
-# Only works for local endpoint
-# def test_completion_anthropic_openai_proxy():
-#     try:
-#         response = completion(
-#             model="custom_openai/claude-2",
-#             messages=messages,
-#             api_base="http://0.0.0.0:8000"
-#         )
-#         # Add any assertions here to check the response
-#         print(response)
-#     except Exception as e:
-#         pytest.fail(f"Error occurred: {e}")
-
-# test_completion_anthropic_openai_proxy()
+import asyncio
 
 
-def test_completion_replicate_llama3():
+@pytest.mark.parametrize("sync_mode", [False, True])
+@pytest.mark.asyncio
+async def test_completion_replicate_llama3(sync_mode):
     litellm.set_verbose = True
     model_name = "replicate/meta/meta-llama-3-8b-instruct"
     try:
-        response = completion(
-            model=model_name,
-            messages=messages,
-        )
+        if sync_mode:
+            response = completion(
+                model=model_name,
+                messages=messages,
+            )
+        else:
+            response = await litellm.acompletion(
+                model=model_name,
+                messages=messages,
+            )
+            print(f"ASYNC REPLICATE RESPONSE - {response}")
         print(response)
         # Add any assertions here to check the response
-        response_str = response["choices"][0]["message"]["content"]
-        print("RESPONSE STRING\n", response_str)
-        if type(response_str) != str:
-            pytest.fail(f"Error occurred: {e}")
+        assert isinstance(response, litellm.ModelResponse)
+        response_format_tests(response=response)
     except Exception as e:
         pytest.fail(f"Error occurred: {e}")
 
@@ -2670,14 +2684,29 @@ def response_format_tests(response: litellm.ModelResponse):
 
 
 @pytest.mark.parametrize("sync_mode", [True, False])
+@pytest.mark.parametrize(
+    "model",
+    [
+        "bedrock/cohere.command-r-plus-v1:0",
+        "anthropic.claude-3-sonnet-20240229-v1:0",
+        "anthropic.claude-instant-v1",
+        "bedrock/ai21.j2-mid",
+        "mistral.mistral-7b-instruct-v0:2",
+        "bedrock/amazon.titan-tg1-large",
+        "meta.llama3-8b-instruct-v1:0",
+        "cohere.command-text-v14",
+    ],
+)
 @pytest.mark.asyncio
-async def test_completion_bedrock_command_r(sync_mode):
+async def test_completion_bedrock_httpx_models(sync_mode, model):
     litellm.set_verbose = True
 
     if sync_mode:
         response = completion(
-            model="bedrock/cohere.command-r-plus-v1:0",
+            model=model,
             messages=[{"role": "user", "content": "Hey! how's it going?"}],
+            temperature=0.2,
+            max_tokens=200,
         )
 
         assert isinstance(response, litellm.ModelResponse)
@@ -2685,8 +2714,10 @@ async def test_completion_bedrock_command_r(sync_mode):
         response_format_tests(response=response)
     else:
         response = await litellm.acompletion(
-            model="bedrock/cohere.command-r-plus-v1:0",
+            model=model,
             messages=[{"role": "user", "content": "Hey! how's it going?"}],
+            temperature=0.2,
+            max_tokens=100,
         )
 
         assert isinstance(response, litellm.ModelResponse)
@@ -2722,69 +2753,12 @@ def test_completion_bedrock_titan_null_response():
         pytest.fail(f"An error occurred - {str(e)}")
 
 
-def test_completion_bedrock_titan():
-    try:
-        response = completion(
-            model="bedrock/amazon.titan-tg1-large",
-            messages=messages,
-            temperature=0.2,
-            max_tokens=200,
-            top_p=0.8,
-            logger_fn=logger_fn,
-        )
-        # Add any assertions here to check the response
-        print(response)
-    except RateLimitError:
-        pass
-    except Exception as e:
-        pytest.fail(f"Error occurred: {e}")
-
-
 # test_completion_bedrock_titan()
 
 
-def test_completion_bedrock_claude():
-    print("calling claude")
-    try:
-        response = completion(
-            model="anthropic.claude-instant-v1",
-            messages=messages,
-            max_tokens=10,
-            temperature=0.1,
-            logger_fn=logger_fn,
-        )
-        # Add any assertions here to check the response
-        print(response)
-    except RateLimitError:
-        pass
-    except Exception as e:
-        pytest.fail(f"Error occurred: {e}")
-
-
 # test_completion_bedrock_claude()
 
 
-def test_completion_bedrock_cohere():
-    print("calling bedrock cohere")
-    litellm.set_verbose = True
-    try:
-        response = completion(
-            model="bedrock/cohere.command-text-v14",
-            messages=[{"role": "user", "content": "hi"}],
-            temperature=0.1,
-            max_tokens=10,
-            stream=True,
-        )
-        # Add any assertions here to check the response
-        print(response)
-        for chunk in response:
-            print(chunk)
-    except RateLimitError:
-        pass
-    except Exception as e:
-        pytest.fail(f"Error occurred: {e}")
-
-
 # test_completion_bedrock_cohere()
 
 
@@ -2807,23 +2781,6 @@ def test_completion_bedrock_cohere():
 #         pytest.fail(f"Error occurred: {e}")
 # test_completion_bedrock_claude_stream()
 
-# def test_completion_bedrock_ai21():
-#     try:
-#         litellm.set_verbose = False
-#         response = completion(
-#             model="bedrock/ai21.j2-mid",
-#             messages=messages,
-#             temperature=0.2,
-#             top_p=0.2,
-#             max_tokens=20
-#         )
-#         # Add any assertions here to check the response
-#         print(response)
-#     except RateLimitError:
-#         pass
-#     except Exception as e:
-#         pytest.fail(f"Error occurred: {e}")
-
 
 ######## Test VLLM ########
 # def test_completion_vllm():
@@ -3096,7 +3053,6 @@ def test_mistral_anyscale_stream():
         print(chunk["choices"][0]["delta"].get("content", ""), end="")
 
 
-# test_mistral_anyscale_stream()
 # test_completion_anyscale_2()
 # def test_completion_with_fallbacks_multiple_keys():
 #     print(f"backup key 1: {os.getenv('BACKUP_OPENAI_API_KEY_1')}")
@@ -3246,6 +3202,7 @@ def test_completion_gemini():
         response = completion(model=model_name, messages=messages)
         # Add any assertions,here to check the response
         print(response)
+        assert response.choices[0]["index"] == 0
     except litellm.APIError as e:
         pass
     except Exception as e:
diff --git a/litellm/tests/test_completion_cost.py b/litellm/tests/test_completion_cost.py
index 8b817eb3cf..1e1d271011 100644
--- a/litellm/tests/test_completion_cost.py
+++ b/litellm/tests/test_completion_cost.py
@@ -65,6 +65,42 @@ async def test_custom_pricing(sync_mode):
     assert new_handler.response_cost == 0
 
 
+def test_custom_pricing_as_completion_cost_param():
+    from litellm import ModelResponse, Choices, Message
+    from litellm.utils import Usage
+
+    resp = ModelResponse(
+        id="chatcmpl-e41836bb-bb8b-4df2-8e70-8f3e160155ac",
+        choices=[
+            Choices(
+                finish_reason=None,
+                index=0,
+                message=Message(
+                    content=" Sure! Here is a short poem about the sky:\n\nA canvas of blue, a",
+                    role="assistant",
+                ),
+            )
+        ],
+        created=1700775391,
+        model="ft:gpt-3.5-turbo:my-org:custom_suffix:id",
+        object="chat.completion",
+        system_fingerprint=None,
+        usage=Usage(prompt_tokens=21, completion_tokens=17, total_tokens=38),
+    )
+
+    cost = litellm.completion_cost(
+        completion_response=resp,
+        custom_cost_per_token={
+            "input_cost_per_token": 1000,
+            "output_cost_per_token": 20,
+        },
+    )
+
+    expected_cost = 1000 * 21 + 17 * 20
+
+    assert round(cost, 5) == round(expected_cost, 5)
+
+
 def test_get_gpt3_tokens():
     max_tokens = get_max_tokens("gpt-3.5-turbo")
     print(max_tokens)
diff --git a/litellm/tests/test_config.py b/litellm/tests/test_config.py
index 96c766919e..47f632b96c 100644
--- a/litellm/tests/test_config.py
+++ b/litellm/tests/test_config.py
@@ -5,7 +5,6 @@
 import sys, os
 import traceback
 from dotenv import load_dotenv
-from pydantic import ConfigDict
 
 load_dotenv()
 import os, io
@@ -14,36 +13,21 @@ sys.path.insert(
     0, os.path.abspath("../..")
 )  # Adds the parent directory to the, system path
 import pytest, litellm
-from pydantic import BaseModel, VERSION
+from pydantic import BaseModel
 from litellm.proxy.proxy_server import ProxyConfig
 from litellm.proxy.utils import encrypt_value, ProxyLogging, DualCache
 from litellm.types.router import Deployment, LiteLLM_Params, ModelInfo
 from typing import Literal
 
 
-# Function to get Pydantic version
-def is_pydantic_v2() -> int:
-    return int(VERSION.split(".")[0])
-
-
-def get_model_config(arbitrary_types_allowed: bool = False) -> ConfigDict:
-    # Version-specific configuration
-    if is_pydantic_v2() >= 2:
-        model_config = ConfigDict(extra="allow", arbitrary_types_allowed=arbitrary_types_allowed, protected_namespaces=())  # type: ignore
-    else:
-        from pydantic import Extra
-
-        model_config = ConfigDict(extra=Extra.allow, arbitrary_types_allowed=arbitrary_types_allowed)  # type: ignore
-
-    return model_config
-
-
 class DBModel(BaseModel):
     model_id: str
     model_name: str
     model_info: dict
     litellm_params: dict
-    model_config = get_model_config()
+
+    class Config:
+        protected_namespaces = ()
 
 
 @pytest.mark.asyncio
@@ -118,7 +102,7 @@ async def test_delete_deployment():
     pc = ProxyConfig()
 
     db_model = DBModel(
-        model_id="12340523",
+        model_id=deployment.model_info.id,
         model_name="gpt-3.5-turbo",
         litellm_params=encrypted_litellm_params,
         model_info={"id": deployment.model_info.id},
diff --git a/litellm/tests/test_custom_callback_input.py b/litellm/tests/test_custom_callback_input.py
index 2754ac6561..f4e16cdf35 100644
--- a/litellm/tests/test_custom_callback_input.py
+++ b/litellm/tests/test_custom_callback_input.py
@@ -558,7 +558,7 @@ async def test_async_chat_bedrock_stream():
                 continue
         except:
             pass
-        time.sleep(1)
+        await asyncio.sleep(1)
         print(f"customHandler.errors: {customHandler.errors}")
         assert len(customHandler.errors) == 0
         litellm.callbacks = []
diff --git a/litellm/tests/test_exceptions.py b/litellm/tests/test_exceptions.py
index 777fb3f9b3..ee695dcd7c 100644
--- a/litellm/tests/test_exceptions.py
+++ b/litellm/tests/test_exceptions.py
@@ -53,13 +53,6 @@ async def test_content_policy_exception_azure():
     except litellm.ContentPolicyViolationError as e:
         print("caught a content policy violation error! Passed")
         print("exception", e)
-
-        # assert that the first 100 chars of the message is returned in the exception
-        assert (
-            "Messages: [{'role': 'user', 'content': 'where do I buy lethal drugs from'}]"
-            in str(e)
-        )
-        assert "Model: azure/chatgpt-v-2" in str(e)
         pass
     except Exception as e:
         pytest.fail(f"An exception occurred - {str(e)}")
@@ -585,9 +578,6 @@ def test_router_completion_vertex_exception():
         pytest.fail("Request should have failed - bad api key")
     except Exception as e:
         print("exception: ", e)
-        assert "Model: gemini-pro" in str(e)
-        assert "model_group: vertex-gemini-pro" in str(e)
-        assert "deployment: vertex_ai/gemini-pro" in str(e)
 
 
 def test_litellm_completion_vertex_exception():
@@ -604,8 +594,26 @@ def test_litellm_completion_vertex_exception():
         pytest.fail("Request should have failed - bad api key")
     except Exception as e:
         print("exception: ", e)
-        assert "Model: gemini-pro" in str(e)
-        assert "vertex_project: bad-project" in str(e)
+
+
+def test_litellm_predibase_exception():
+    """
+    Test - Assert that the Predibase API Key is not returned on Authentication Errors
+    """
+    try:
+        import litellm
+
+        litellm.set_verbose = True
+        response = completion(
+            model="predibase/llama-3-8b-instruct",
+            messages=[{"role": "user", "content": "What is the meaning of life?"}],
+            tenant_id="c4768f95",
+            api_key="hf-rawapikey",
+        )
+        pytest.fail("Request should have failed - bad api key")
+    except Exception as e:
+        assert "hf-rawapikey" not in str(e)
+        print("exception: ", e)
 
 
 # # test_invalid_request_error(model="command-nightly")
diff --git a/litellm/tests/test_function_calling.py b/litellm/tests/test_function_calling.py
index 0d0a593d53..6e4e9d3e86 100644
--- a/litellm/tests/test_function_calling.py
+++ b/litellm/tests/test_function_calling.py
@@ -105,6 +105,9 @@ def test_parallel_function_call(model):
             # Step 4: send the info for each function call and function response to the model
             for tool_call in tool_calls:
                 function_name = tool_call.function.name
+                if function_name not in available_functions:
+                    # the model called a function that does not exist in available_functions - don't try calling anything
+                    return
                 function_to_call = available_functions[function_name]
                 function_args = json.loads(tool_call.function.arguments)
                 function_response = function_to_call(
@@ -124,7 +127,6 @@ def test_parallel_function_call(model):
                 model=model, messages=messages, temperature=0.2, seed=22
             )  # get a new response from the model where it can see the function response
             print("second response\n", second_response)
-            return second_response
     except Exception as e:
         pytest.fail(f"Error occurred: {e}")
 
diff --git a/litellm/tests/test_image_generation.py b/litellm/tests/test_image_generation.py
index 82068a1156..4fabfd04f5 100644
--- a/litellm/tests/test_image_generation.py
+++ b/litellm/tests/test_image_generation.py
@@ -162,6 +162,39 @@ async def test_aimage_generation_bedrock_with_optional_params():
         print(f"response: {response}")
     except litellm.RateLimitError as e:
         pass
+    except litellm.ContentPolicyViolationError:
+        pass  # Azure randomly raises these errors skip when they occur
+    except Exception as e:
+        if "Your task failed as a result of our safety system." in str(e):
+            pass
+        else:
+            pytest.fail(f"An exception occurred - {str(e)}")
+
+
+@pytest.mark.asyncio
+async def test_aimage_generation_vertex_ai():
+    from test_amazing_vertex_completion import load_vertex_ai_credentials
+
+    litellm.set_verbose = True
+
+    load_vertex_ai_credentials()
+    try:
+        response = await litellm.aimage_generation(
+            prompt="An olympic size swimming pool",
+            model="vertex_ai/imagegeneration@006",
+            vertex_ai_project="adroit-crow-413218",
+            vertex_ai_location="us-central1",
+            n=1,
+        )
+        assert response.data is not None
+        assert len(response.data) > 0
+
+        for d in response.data:
+            assert isinstance(d, litellm.ImageObject)
+            print("data in response.data", d)
+            assert d.b64_json is not None
+    except litellm.RateLimitError as e:
+        pass
     except litellm.ContentPolicyViolationError:
         pass  # Azure randomly raises these errors - skip when they occur
     except Exception as e:
diff --git a/litellm/tests/test_jwt.py b/litellm/tests/test_jwt.py
index b3af9913f6..45f4616290 100644
--- a/litellm/tests/test_jwt.py
+++ b/litellm/tests/test_jwt.py
@@ -1,7 +1,7 @@
 #### What this tests ####
 #    Unit tests for JWT-Auth
 
-import sys, os, asyncio, time, random
+import sys, os, asyncio, time, random, uuid
 import traceback
 from dotenv import load_dotenv
 
@@ -24,6 +24,7 @@ public_key = {
     "alg": "RS256",
 }
 
+
 def test_load_config_with_custom_role_names():
     config = {
         "general_settings": {
@@ -77,7 +78,8 @@ async def test_token_single_public_key():
         == "qIgOQfEVrrErJC0E7gsHXi6rs_V0nyFY5qPFui2-tv0o4CwpwDzgfBtLO7o_wLiguq0lnu54sMT2eLNoRiiPuLvv6bg7Iy1H9yc5_4Jf5oYEOrqN5o9ZBOoYp1q68Pv0oNJYyZdGu5ZJfd7V4y953vB2XfEKgXCsAkhVhlvIUMiDNKWoMDWsyb2xela5tRURZ2mJAXcHfSC_sYdZxIA2YYrIHfoevq_vTlaz0qVSe_uOKjEpgOAS08UUrgda4CQL11nzICiIQzc6qmjIQt2cjzB2D_9zb4BYndzEtfl0kwAT0z_I85S3mkwTqHU-1BvKe_4MG4VG3dAAeffLPXJyXQ"
     )
 
-@pytest.mark.parametrize('audience', [None, "litellm-proxy"])
+
+@pytest.mark.parametrize("audience", [None, "litellm-proxy"])
 @pytest.mark.asyncio
 async def test_valid_invalid_token(audience):
     """
@@ -90,7 +92,7 @@ async def test_valid_invalid_token(audience):
     from cryptography.hazmat.primitives.asymmetric import rsa
     from cryptography.hazmat.backends import default_backend
 
-    os.environ.pop('JWT_AUDIENCE', None)
+    os.environ.pop("JWT_AUDIENCE", None)
     if audience:
         os.environ["JWT_AUDIENCE"] = audience
 
@@ -138,7 +140,7 @@ async def test_valid_invalid_token(audience):
         "sub": "user123",
         "exp": expiration_time,  # set the token to expire in 10 minutes
         "scope": "litellm-proxy-admin",
-        "aud": audience
+        "aud": audience,
     }
 
     # Generate the JWT token
@@ -166,7 +168,7 @@ async def test_valid_invalid_token(audience):
         "sub": "user123",
         "exp": expiration_time,  # set the token to expire in 10 minutes
         "scope": "litellm-NO-SCOPE",
-        "aud": audience
+        "aud": audience,
     }
 
     # Generate the JWT token
@@ -183,6 +185,7 @@ async def test_valid_invalid_token(audience):
     except Exception as e:
         pytest.fail(f"An exception occurred - {str(e)}")
 
+
 @pytest.fixture
 def prisma_client():
     import litellm
@@ -205,7 +208,7 @@ def prisma_client():
     return prisma_client
 
 
-@pytest.mark.parametrize('audience', [None, "litellm-proxy"])
+@pytest.mark.parametrize("audience", [None, "litellm-proxy"])
 @pytest.mark.asyncio
 async def test_team_token_output(prisma_client, audience):
     import jwt, json
@@ -222,7 +225,7 @@ async def test_team_token_output(prisma_client, audience):
     setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
     await litellm.proxy.proxy_server.prisma_client.connect()
 
-    os.environ.pop('JWT_AUDIENCE', None)
+    os.environ.pop("JWT_AUDIENCE", None)
     if audience:
         os.environ["JWT_AUDIENCE"] = audience
 
@@ -261,7 +264,7 @@ async def test_team_token_output(prisma_client, audience):
 
     jwt_handler.user_api_key_cache = cache
 
-    jwt_handler.litellm_jwtauth = LiteLLM_JWTAuth()
+    jwt_handler.litellm_jwtauth = LiteLLM_JWTAuth(team_id_jwt_field="client_id")
 
     # VALID TOKEN
     ## GENERATE A TOKEN
@@ -274,7 +277,7 @@ async def test_team_token_output(prisma_client, audience):
         "exp": expiration_time,  # set the token to expire in 10 minutes
         "scope": "litellm_team",
         "client_id": team_id,
-        "aud": audience
+        "aud": audience,
     }
 
     # Generate the JWT token
@@ -289,7 +292,7 @@ async def test_team_token_output(prisma_client, audience):
         "sub": "user123",
         "exp": expiration_time,  # set the token to expire in 10 minutes
         "scope": "litellm_proxy_admin",
-        "aud": audience
+        "aud": audience,
     }
 
     admin_token = jwt.encode(payload, private_key_str, algorithm="RS256")
@@ -315,7 +318,13 @@ async def test_team_token_output(prisma_client, audience):
 
     ## 1. INITIAL TEAM CALL - should fail
     # use generated key to auth in
-    setattr(litellm.proxy.proxy_server, "general_settings", {"enable_jwt_auth": True})
+    setattr(
+        litellm.proxy.proxy_server,
+        "general_settings",
+        {
+            "enable_jwt_auth": True,
+        },
+    )
     setattr(litellm.proxy.proxy_server, "jwt_handler", jwt_handler)
     try:
         result = await user_api_key_auth(request=request, api_key=bearer_token)
@@ -358,9 +367,22 @@ async def test_team_token_output(prisma_client, audience):
     assert team_result.team_models == ["gpt-3.5-turbo", "gpt-4"]
 
 
-@pytest.mark.parametrize('audience', [None, "litellm-proxy"])
+@pytest.mark.parametrize("audience", [None, "litellm-proxy"])
+@pytest.mark.parametrize(
+    "team_id_set, default_team_id",
+    [(True, False), (False, True)],
+)
+@pytest.mark.parametrize("user_id_upsert", [True, False])
 @pytest.mark.asyncio
-async def test_user_token_output(prisma_client, audience):
+async def test_user_token_output(
+    prisma_client, audience, team_id_set, default_team_id, user_id_upsert
+):
+    import uuid
+
+    args = locals()
+    print(f"received args - {args}")
+    if default_team_id:
+        default_team_id = "team_id_12344_{}".format(uuid.uuid4())
     """
     - If user required, check if it exists
     - fail initial request (when user doesn't exist)
@@ -373,7 +395,12 @@ async def test_user_token_output(prisma_client, audience):
     from cryptography.hazmat.backends import default_backend
     from fastapi import Request
     from starlette.datastructures import URL
-    from litellm.proxy.proxy_server import user_api_key_auth, new_team, new_user
+    from litellm.proxy.proxy_server import (
+        user_api_key_auth,
+        new_team,
+        new_user,
+        user_info,
+    )
     from litellm.proxy._types import NewTeamRequest, UserAPIKeyAuth, NewUserRequest
     import litellm
     import uuid
@@ -381,7 +408,7 @@ async def test_user_token_output(prisma_client, audience):
     setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
     await litellm.proxy.proxy_server.prisma_client.connect()
 
-    os.environ.pop('JWT_AUDIENCE', None)
+    os.environ.pop("JWT_AUDIENCE", None)
     if audience:
         os.environ["JWT_AUDIENCE"] = audience
 
@@ -423,6 +450,11 @@ async def test_user_token_output(prisma_client, audience):
     jwt_handler.litellm_jwtauth = LiteLLM_JWTAuth()
 
     jwt_handler.litellm_jwtauth.user_id_jwt_field = "sub"
+    jwt_handler.litellm_jwtauth.team_id_default = default_team_id
+    jwt_handler.litellm_jwtauth.user_id_upsert = user_id_upsert
+
+    if team_id_set:
+        jwt_handler.litellm_jwtauth.team_id_jwt_field = "client_id"
 
     # VALID TOKEN
     ## GENERATE A TOKEN
@@ -436,7 +468,7 @@ async def test_user_token_output(prisma_client, audience):
         "exp": expiration_time,  # set the token to expire in 10 minutes
         "scope": "litellm_team",
         "client_id": team_id,
-        "aud": audience
+        "aud": audience,
     }
 
     # Generate the JWT token
@@ -451,7 +483,7 @@ async def test_user_token_output(prisma_client, audience):
         "sub": user_id,
         "exp": expiration_time,  # set the token to expire in 10 minutes
         "scope": "litellm_proxy_admin",
-        "aud": audience
+        "aud": audience,
     }
 
     admin_token = jwt.encode(payload, private_key_str, algorithm="RS256")
@@ -503,6 +535,16 @@ async def test_user_token_output(prisma_client, audience):
             ),
             user_api_key_dict=result,
         )
+        if default_team_id:
+            await new_team(
+                data=NewTeamRequest(
+                    team_id=default_team_id,
+                    tpm_limit=100,
+                    rpm_limit=99,
+                    models=["gpt-3.5-turbo", "gpt-4"],
+                ),
+                user_api_key_dict=result,
+            )
     except Exception as e:
         pytest.fail(f"This should not fail - {str(e)}")
 
@@ -513,23 +555,35 @@ async def test_user_token_output(prisma_client, audience):
         team_result: UserAPIKeyAuth = await user_api_key_auth(
             request=request, api_key=bearer_token
         )
-        pytest.fail(f"User doesn't exist. this should fail")
+        if user_id_upsert == False:
+            pytest.fail(f"User doesn't exist. this should fail")
     except Exception as e:
         pass
 
     ## 4. Create user
-    try:
-        bearer_token = "Bearer " + admin_token
+    if user_id_upsert:
+        ## check if user already exists
+        try:
+            bearer_token = "Bearer " + admin_token
 
-        request._url = URL(url="/team/new")
-        result = await user_api_key_auth(request=request, api_key=bearer_token)
-        await new_user(
-            data=NewUserRequest(
-                user_id=user_id,
-            ),
-        )
-    except Exception as e:
-        pytest.fail(f"This should not fail - {str(e)}")
+            request._url = URL(url="/team/new")
+            result = await user_api_key_auth(request=request, api_key=bearer_token)
+            await user_info(user_id=user_id)
+        except Exception as e:
+            pytest.fail(f"This should not fail - {str(e)}")
+    else:
+        try:
+            bearer_token = "Bearer " + admin_token
+
+            request._url = URL(url="/team/new")
+            result = await user_api_key_auth(request=request, api_key=bearer_token)
+            await new_user(
+                data=NewUserRequest(
+                    user_id=user_id,
+                ),
+            )
+        except Exception as e:
+            pytest.fail(f"This should not fail - {str(e)}")
 
     ## 5. 3rd call w/ same team, same user -> call should succeed
     bearer_token = "Bearer " + token
@@ -543,7 +597,8 @@ async def test_user_token_output(prisma_client, audience):
 
     ## 6. ASSERT USER_API_KEY_AUTH format (used for tpm/rpm limiting in parallel_request_limiter.py AND cost tracking)
 
-    assert team_result.team_tpm_limit == 100
-    assert team_result.team_rpm_limit == 99
-    assert team_result.team_models == ["gpt-3.5-turbo", "gpt-4"]
+    if team_id_set or default_team_id is not None:
+        assert team_result.team_tpm_limit == 100
+        assert team_result.team_rpm_limit == 99
+        assert team_result.team_models == ["gpt-3.5-turbo", "gpt-4"]
     assert team_result.user_id == user_id
diff --git a/litellm/tests/test_key_generate_prisma.py b/litellm/tests/test_key_generate_prisma.py
index 2eb693cf45..1d3a487da5 100644
--- a/litellm/tests/test_key_generate_prisma.py
+++ b/litellm/tests/test_key_generate_prisma.py
@@ -23,6 +23,7 @@ import sys, os
 import traceback
 from dotenv import load_dotenv
 from fastapi import Request
+from fastapi.routing import APIRoute
 from datetime import datetime
 
 load_dotenv()
@@ -51,6 +52,13 @@ from litellm.proxy.proxy_server import (
     user_info,
     info_key_fn,
     new_team,
+    chat_completion,
+    completion,
+    embeddings,
+    image_generation,
+    audio_transcriptions,
+    moderations,
+    model_list,
 )
 from litellm.proxy.utils import PrismaClient, ProxyLogging, hash_token, update_spend
 from litellm._logging import verbose_proxy_logger
@@ -146,7 +154,38 @@ async def test_new_user_response(prisma_client):
         pytest.fail(f"Got exception {e}")
 
 
-def test_generate_and_call_with_valid_key(prisma_client):
+@pytest.mark.parametrize(
+    "api_route", [
+        # chat_completion
+        APIRoute(path="/engines/{model}/chat/completions", endpoint=chat_completion),
+        APIRoute(path="/openai/deployments/{model}/chat/completions", endpoint=chat_completion),
+        APIRoute(path="/chat/completions", endpoint=chat_completion),
+        APIRoute(path="/v1/chat/completions", endpoint=chat_completion),
+        # completion
+        APIRoute(path="/completions", endpoint=completion),
+        APIRoute(path="/v1/completions", endpoint=completion),
+        APIRoute(path="/engines/{model}/completions", endpoint=completion),
+        APIRoute(path="/openai/deployments/{model}/completions", endpoint=completion),
+        # embeddings
+        APIRoute(path="/v1/embeddings", endpoint=embeddings),
+        APIRoute(path="/embeddings", endpoint=embeddings),
+        APIRoute(path="/openai/deployments/{model}/embeddings", endpoint=embeddings),
+        # image generation
+        APIRoute(path="/v1/images/generations", endpoint=image_generation),
+        APIRoute(path="/images/generations", endpoint=image_generation),
+        # audio transcriptions
+        APIRoute(path="/v1/audio/transcriptions", endpoint=audio_transcriptions),
+        APIRoute(path="/audio/transcriptions", endpoint=audio_transcriptions),
+        # moderations
+        APIRoute(path="/v1/moderations", endpoint=moderations),
+        APIRoute(path="/moderations", endpoint=moderations),
+        # model_list
+        APIRoute(path= "/v1/models", endpoint=model_list),
+        APIRoute(path= "/models", endpoint=model_list),
+    ],
+    ids=lambda route: str(dict(route=route.endpoint.__name__, path=route.path)),
+)
+def test_generate_and_call_with_valid_key(prisma_client, api_route):
     # 1. Generate a Key, and use it to make a call
 
     print("prisma client=", prisma_client)
@@ -181,8 +220,12 @@ def test_generate_and_call_with_valid_key(prisma_client):
             )
             print("token from prisma", value_from_prisma)
 
-            request = Request(scope={"type": "http"})
-            request._url = URL(url="/chat/completions")
+            request = Request({
+                "type": "http",
+                "route": api_route,
+                "path": api_route.path,
+                "headers": [("Authorization", bearer_token)]
+            })
 
             # use generated key to auth in
             result = await user_api_key_auth(request=request, api_key=bearer_token)
diff --git a/litellm/tests/test_lowest_latency_routing.py b/litellm/tests/test_lowest_latency_routing.py
index 4da8792087..3d79e5c8f7 100644
--- a/litellm/tests/test_lowest_latency_routing.py
+++ b/litellm/tests/test_lowest_latency_routing.py
@@ -705,7 +705,7 @@ async def test_lowest_latency_routing_first_pick():
     )  # type: ignore
 
     deployments = {}
-    for _ in range(5):
+    for _ in range(10):
         response = await router.acompletion(
             model="azure-model", messages=[{"role": "user", "content": "hello"}]
         )
diff --git a/litellm/tests/test_parallel_request_limiter.py b/litellm/tests/test_parallel_request_limiter.py
index d0a28926e8..00da199d99 100644
--- a/litellm/tests/test_parallel_request_limiter.py
+++ b/litellm/tests/test_parallel_request_limiter.py
@@ -28,6 +28,37 @@ from datetime import datetime
 ## On Request failure
 
 
+@pytest.mark.asyncio
+async def test_global_max_parallel_requests():
+    """
+    Test if ParallelRequestHandler respects 'global_max_parallel_requests'
+
+    data["metadata"]["global_max_parallel_requests"]
+    """
+    global_max_parallel_requests = 0
+    _api_key = "sk-12345"
+    _api_key = hash_token("sk-12345")
+    user_api_key_dict = UserAPIKeyAuth(api_key=_api_key, max_parallel_requests=100)
+    local_cache = DualCache()
+    parallel_request_handler = MaxParallelRequestsHandler()
+
+    for _ in range(3):
+        try:
+            await parallel_request_handler.async_pre_call_hook(
+                user_api_key_dict=user_api_key_dict,
+                cache=local_cache,
+                data={
+                    "metadata": {
+                        "global_max_parallel_requests": global_max_parallel_requests
+                    }
+                },
+                call_type="",
+            )
+            pytest.fail("Expected call to fail")
+        except Exception as e:
+            pass
+
+
 @pytest.mark.asyncio
 async def test_pre_call_hook():
     """
diff --git a/litellm/tests/test_proxy_token_counter.py b/litellm/tests/test_proxy_token_counter.py
new file mode 100644
index 0000000000..859ddf5c74
--- /dev/null
+++ b/litellm/tests/test_proxy_token_counter.py
@@ -0,0 +1,138 @@
+# Test the following scenarios:
+# 1. Generate a Key, and use it to make a call
+
+
+import sys, os
+import traceback
+from dotenv import load_dotenv
+from fastapi import Request
+from datetime import datetime
+
+load_dotenv()
+import os, io, time
+
+# this file is to test litellm/proxy
+
+sys.path.insert(
+    0, os.path.abspath("../..")
+)  # Adds the parent directory to the system path
+import pytest, logging, asyncio
+import litellm, asyncio
+from litellm.proxy.proxy_server import token_counter
+from litellm.proxy.utils import PrismaClient, ProxyLogging, hash_token, update_spend
+from litellm._logging import verbose_proxy_logger
+
+verbose_proxy_logger.setLevel(level=logging.DEBUG)
+
+from litellm.proxy._types import TokenCountRequest, TokenCountResponse
+
+
+from litellm import Router
+
+
+@pytest.mark.asyncio
+async def test_vLLM_token_counting():
+    """
+    Test Token counter for vLLM models
+    - User passes model="special-alias"
+    - token_counter should infer that special_alias -> maps to wolfram/miquliz-120b-v2.0
+    -> token counter should use hugging face tokenizer
+    """
+
+    llm_router = Router(
+        model_list=[
+            {
+                "model_name": "special-alias",
+                "litellm_params": {
+                    "model": "openai/wolfram/miquliz-120b-v2.0",
+                    "api_base": "https://exampleopenaiendpoint-production.up.railway.app/",
+                },
+            }
+        ]
+    )
+
+    setattr(litellm.proxy.proxy_server, "llm_router", llm_router)
+
+    response = await token_counter(
+        request=TokenCountRequest(
+            model="special-alias",
+            messages=[{"role": "user", "content": "hello"}],
+        )
+    )
+
+    print("response: ", response)
+
+    assert (
+        response.tokenizer_type == "huggingface_tokenizer"
+    )  # SHOULD use the hugging face tokenizer
+    assert response.model_used == "wolfram/miquliz-120b-v2.0"
+
+
+@pytest.mark.asyncio
+async def test_token_counting_model_not_in_model_list():
+    """
+    Test Token counter - when a model is not in model_list
+    -> should use the default OpenAI tokenizer
+    """
+
+    llm_router = Router(
+        model_list=[
+            {
+                "model_name": "gpt-4",
+                "litellm_params": {
+                    "model": "gpt-4",
+                },
+            }
+        ]
+    )
+
+    setattr(litellm.proxy.proxy_server, "llm_router", llm_router)
+
+    response = await token_counter(
+        request=TokenCountRequest(
+            model="special-alias",
+            messages=[{"role": "user", "content": "hello"}],
+        )
+    )
+
+    print("response: ", response)
+
+    assert (
+        response.tokenizer_type == "openai_tokenizer"
+    )  # SHOULD use the OpenAI tokenizer
+    assert response.model_used == "special-alias"
+
+
+@pytest.mark.asyncio
+async def test_gpt_token_counting():
+    """
+    Test Token counter
+    -> should work for gpt-4
+    """
+
+    llm_router = Router(
+        model_list=[
+            {
+                "model_name": "gpt-4",
+                "litellm_params": {
+                    "model": "gpt-4",
+                },
+            }
+        ]
+    )
+
+    setattr(litellm.proxy.proxy_server, "llm_router", llm_router)
+
+    response = await token_counter(
+        request=TokenCountRequest(
+            model="gpt-4",
+            messages=[{"role": "user", "content": "hello"}],
+        )
+    )
+
+    print("response: ", response)
+
+    assert (
+        response.tokenizer_type == "openai_tokenizer"
+    )  # SHOULD use the OpenAI tokenizer
+    assert response.request_model == "gpt-4"
diff --git a/litellm/tests/test_router_cooldowns.py b/litellm/tests/test_router_cooldowns.py
new file mode 100644
index 0000000000..35095bb2cf
--- /dev/null
+++ b/litellm/tests/test_router_cooldowns.py
@@ -0,0 +1,64 @@
+#### What this tests ####
+#    This tests calling router with fallback models
+
+import sys, os, time
+import traceback, asyncio
+import pytest
+
+sys.path.insert(
+    0, os.path.abspath("../..")
+)  # Adds the parent directory to the system path
+
+import litellm
+from litellm import Router
+from litellm.integrations.custom_logger import CustomLogger
+import openai, httpx
+
+
+@pytest.mark.asyncio
+async def test_cooldown_badrequest_error():
+    """
+    Test 1. It SHOULD NOT cooldown a deployment on a BadRequestError
+    """
+
+    router = litellm.Router(
+        model_list=[
+            {
+                "model_name": "gpt-3.5-turbo",
+                "litellm_params": {
+                    "model": "azure/chatgpt-v-2",
+                    "api_key": os.getenv("AZURE_API_KEY"),
+                    "api_version": os.getenv("AZURE_API_VERSION"),
+                    "api_base": os.getenv("AZURE_API_BASE"),
+                },
+            }
+        ],
+        debug_level="DEBUG",
+        set_verbose=True,
+        cooldown_time=300,
+        num_retries=0,
+        allowed_fails=0,
+    )
+
+    # Act & Assert
+    try:
+
+        response = await router.acompletion(
+            model="gpt-3.5-turbo",
+            messages=[{"role": "user", "content": "gm"}],
+            bad_param=200,
+        )
+    except:
+        pass
+
+    await asyncio.sleep(3)  # wait for deployment to get cooled-down
+
+    response = await router.acompletion(
+        model="gpt-3.5-turbo",
+        messages=[{"role": "user", "content": "gm"}],
+        mock_response="hello",
+    )
+
+    assert response is not None
+
+    print(response)
diff --git a/litellm/tests/test_router_debug_logs.py b/litellm/tests/test_router_debug_logs.py
index 1d908abe81..cde9a8edd1 100644
--- a/litellm/tests/test_router_debug_logs.py
+++ b/litellm/tests/test_router_debug_logs.py
@@ -82,7 +82,7 @@ def test_async_fallbacks(caplog):
     # Define the expected log messages
     # - error request, falling back notice, success notice
     expected_logs = [
-        "litellm.acompletion(model=gpt-3.5-turbo)\x1b[31m Exception OpenAIException - Error code: 401 - {'error': {'message': 'Incorrect API key provided: bad-key. You can find your API key at https://platform.openai.com/account/api-keys.', 'type': 'invalid_request_error', 'param': None, 'code': 'invalid_api_key'}} \nModel: gpt-3.5-turbo\nAPI Base: https://api.openai.com\nMessages: [{'content': 'Hello, how are you?', 'role': 'user'}]\nmodel_group: gpt-3.5-turbo\n\ndeployment: gpt-3.5-turbo\n\x1b[0m",
+        "litellm.acompletion(model=gpt-3.5-turbo)\x1b[31m Exception OpenAIException - Error code: 401 - {'error': {'message': 'Incorrect API key provided: bad-key. You can find your API key at https://platform.openai.com/account/api-keys.', 'type': 'invalid_request_error', 'param': None, 'code': 'invalid_api_key'}}\x1b[0m",
         "Falling back to model_group = azure/gpt-3.5-turbo",
         "litellm.acompletion(model=azure/chatgpt-v-2)\x1b[32m 200 OK\x1b[0m",
         "Successful fallback b/w models.",
diff --git a/litellm/tests/test_streaming.py b/litellm/tests/test_streaming.py
index 6dcdbeb177..580adcba23 100644
--- a/litellm/tests/test_streaming.py
+++ b/litellm/tests/test_streaming.py
@@ -950,7 +950,63 @@ def test_vertex_ai_stream():
 
 # test_completion_vertexai_stream_bad_key()
 
-# def test_completion_replicate_stream():
+
+@pytest.mark.parametrize("sync_mode", [False, True])
+@pytest.mark.asyncio
+async def test_completion_replicate_llama3_streaming(sync_mode):
+    litellm.set_verbose = True
+    model_name = "replicate/meta/meta-llama-3-8b-instruct"
+    try:
+        if sync_mode:
+            final_chunk: Optional[litellm.ModelResponse] = None
+            response: litellm.CustomStreamWrapper = completion(  # type: ignore
+                model=model_name,
+                messages=messages,
+                max_tokens=10,  # type: ignore
+                stream=True,
+            )
+            complete_response = ""
+            # Add any assertions here to check the response
+            has_finish_reason = False
+            for idx, chunk in enumerate(response):
+                final_chunk = chunk
+                chunk, finished = streaming_format_tests(idx, chunk)
+                if finished:
+                    has_finish_reason = True
+                    break
+                complete_response += chunk
+            if has_finish_reason == False:
+                raise Exception("finish reason not set")
+            if complete_response.strip() == "":
+                raise Exception("Empty response received")
+        else:
+            response: litellm.CustomStreamWrapper = await litellm.acompletion(  # type: ignore
+                model=model_name,
+                messages=messages,
+                max_tokens=100,  # type: ignore
+                stream=True,
+            )
+            complete_response = ""
+            # Add any assertions here to check the response
+            has_finish_reason = False
+            idx = 0
+            final_chunk: Optional[litellm.ModelResponse] = None
+            async for chunk in response:
+                final_chunk = chunk
+                chunk, finished = streaming_format_tests(idx, chunk)
+                if finished:
+                    has_finish_reason = True
+                    break
+                complete_response += chunk
+                idx += 1
+            if has_finish_reason == False:
+                raise Exception("finish reason not set")
+            if complete_response.strip() == "":
+                raise Exception("Empty response received")
+    except Exception as e:
+        pytest.fail(f"Error occurred: {e}")
+
+
 # TEMP Commented out - replicate throwing an auth error
 #     try:
 #         litellm.set_verbose = True
@@ -984,15 +1040,28 @@ def test_vertex_ai_stream():
 #         pytest.fail(f"Error occurred: {e}")
 
 
-@pytest.mark.parametrize("sync_mode", [True])
+@pytest.mark.parametrize("sync_mode", [True, False])
+@pytest.mark.parametrize(
+    "model",
+    [
+        # "bedrock/cohere.command-r-plus-v1:0",
+        # "anthropic.claude-3-sonnet-20240229-v1:0",
+        # "anthropic.claude-instant-v1",
+        # "bedrock/ai21.j2-mid",
+        # "mistral.mistral-7b-instruct-v0:2",
+        # "bedrock/amazon.titan-tg1-large",
+        # "meta.llama3-8b-instruct-v1:0",
+        "cohere.command-text-v14"
+    ],
+)
 @pytest.mark.asyncio
-async def test_bedrock_cohere_command_r_streaming(sync_mode):
+async def test_bedrock_httpx_streaming(sync_mode, model):
     try:
         litellm.set_verbose = True
         if sync_mode:
             final_chunk: Optional[litellm.ModelResponse] = None
             response: litellm.CustomStreamWrapper = completion(  # type: ignore
-                model="bedrock/cohere.command-r-plus-v1:0",
+                model=model,
                 messages=messages,
                 max_tokens=10,  # type: ignore
                 stream=True,
@@ -1013,7 +1082,7 @@ async def test_bedrock_cohere_command_r_streaming(sync_mode):
                 raise Exception("Empty response received")
         else:
             response: litellm.CustomStreamWrapper = await litellm.acompletion(  # type: ignore
-                model="bedrock/cohere.command-r-plus-v1:0",
+                model=model,
                 messages=messages,
                 max_tokens=100,  # type: ignore
                 stream=True,
diff --git a/litellm/tests/test_token_counter.py b/litellm/tests/test_token_counter.py
index 78e276a85c..194dfb8af3 100644
--- a/litellm/tests/test_token_counter.py
+++ b/litellm/tests/test_token_counter.py
@@ -174,7 +174,6 @@ def test_load_test_token_counter(model):
     """
     import tiktoken
 
-    enc = tiktoken.get_encoding("cl100k_base")
     messages = [{"role": "user", "content": text}] * 10
 
     start_time = time.time()
@@ -186,4 +185,4 @@ def test_load_test_token_counter(model):
 
     total_time = end_time - start_time
     print("model={}, total test time={}".format(model, total_time))
-    assert total_time < 2, f"Total encoding time > 1.5s, {total_time}"
+    assert total_time < 10, f"Total encoding time > 10s, {total_time}"
diff --git a/litellm/types/completion.py b/litellm/types/completion.py
index 87a7629daf..78af7667ba 100644
--- a/litellm/types/completion.py
+++ b/litellm/types/completion.py
@@ -1,27 +1,10 @@
-from typing import List, Optional, Union, Iterable, cast
+from typing import List, Optional, Union, Iterable
 
-from pydantic import ConfigDict, BaseModel, validator, VERSION
+from pydantic import BaseModel, validator
 
 from typing_extensions import Literal, Required, TypedDict
 
 
-# Function to get Pydantic version
-def is_pydantic_v2() -> int:
-    return int(VERSION.split(".")[0])
-
-
-def get_model_config() -> ConfigDict:
-    # Version-specific configuration
-    if is_pydantic_v2() >= 2:
-        model_config = ConfigDict(extra="allow", protected_namespaces=())  # type: ignore
-    else:
-        from pydantic import Extra
-
-        model_config = ConfigDict(extra=Extra.allow)  # type: ignore
-
-    return model_config
-
-
 class ChatCompletionSystemMessageParam(TypedDict, total=False):
     content: Required[str]
     """The contents of the system message."""
@@ -208,4 +191,6 @@ class CompletionRequest(BaseModel):
     api_key: Optional[str] = None
     model_list: Optional[List[str]] = None
 
-    model_config = get_model_config()
+    class Config:
+        extra = "allow"
+        protected_namespaces = ()
diff --git a/litellm/types/embedding.py b/litellm/types/embedding.py
index 831c4266c3..9db0ef2907 100644
--- a/litellm/types/embedding.py
+++ b/litellm/types/embedding.py
@@ -1,23 +1,6 @@
 from typing import List, Optional, Union
 
-from pydantic import ConfigDict, BaseModel, validator, VERSION
-
-
-# Function to get Pydantic version
-def is_pydantic_v2() -> int:
-    return int(VERSION.split(".")[0])
-
-
-def get_model_config(arbitrary_types_allowed: bool = False) -> ConfigDict:
-    # Version-specific configuration
-    if is_pydantic_v2() >= 2:
-        model_config = ConfigDict(extra="allow", arbitrary_types_allowed=arbitrary_types_allowed, protected_namespaces=())  # type: ignore
-    else:
-        from pydantic import Extra
-
-        model_config = ConfigDict(extra=Extra.allow, arbitrary_types_allowed=arbitrary_types_allowed)  # type: ignore
-
-    return model_config
+from pydantic import BaseModel, validator
 
 
 class EmbeddingRequest(BaseModel):
@@ -34,4 +17,7 @@ class EmbeddingRequest(BaseModel):
     litellm_call_id: Optional[str] = None
     litellm_logging_obj: Optional[dict] = None
     logger_fn: Optional[str] = None
-    model_config = get_model_config()
+
+    class Config:
+        # allow kwargs
+        extra = "allow"
diff --git a/litellm/types/llms/__init__.py b/litellm/types/llms/__init__.py
deleted file mode 100644
index 14952c9aec..0000000000
--- a/litellm/types/llms/__init__.py
+++ /dev/null
@@ -1,3 +0,0 @@
-__all__ = ["openai"]
-
-from . import openai
diff --git a/litellm/types/llms/vertex_ai.py b/litellm/types/llms/vertex_ai.py
new file mode 100644
index 0000000000..3ad3e62c46
--- /dev/null
+++ b/litellm/types/llms/vertex_ai.py
@@ -0,0 +1,53 @@
+from typing import TypedDict, Any, Union, Optional, List, Literal, Dict
+import json
+from typing_extensions import (
+    Self,
+    Protocol,
+    TypeGuard,
+    override,
+    get_origin,
+    runtime_checkable,
+    Required,
+)
+
+
+class Field(TypedDict):
+    key: str
+    value: Dict[str, Any]
+
+
+class FunctionCallArgs(TypedDict):
+    fields: Field
+
+
+class FunctionResponse(TypedDict):
+    name: str
+    response: FunctionCallArgs
+
+
+class FunctionCall(TypedDict):
+    name: str
+    args: FunctionCallArgs
+
+
+class FileDataType(TypedDict):
+    mime_type: str
+    file_uri: str  # the cloud storage uri of storing this file
+
+
+class BlobType(TypedDict):
+    mime_type: Required[str]
+    data: Required[bytes]
+
+
+class PartType(TypedDict, total=False):
+    text: str
+    inline_data: BlobType
+    file_data: FileDataType
+    function_call: FunctionCall
+    function_response: FunctionResponse
+
+
+class ContentType(TypedDict, total=False):
+    role: Literal["user", "model"]
+    parts: Required[List[PartType]]
diff --git a/litellm/types/router.py b/litellm/types/router.py
index 0cc84d8c30..a61e551a70 100644
--- a/litellm/types/router.py
+++ b/litellm/types/router.py
@@ -1,42 +1,19 @@
 from typing import List, Optional, Union, Dict, Tuple, Literal, TypedDict
 import httpx
-from pydantic import (
-    ConfigDict,
-    BaseModel,
-    validator,
-    Field,
-    __version__ as pydantic_version,
-    VERSION,
-)
+from pydantic import BaseModel, validator, Field
 from .completion import CompletionRequest
 from .embedding import EmbeddingRequest
 import uuid, enum
 
 
-# Function to get Pydantic version
-def is_pydantic_v2() -> int:
-    return int(VERSION.split(".")[0])
-
-
-def get_model_config(arbitrary_types_allowed: bool = False) -> ConfigDict:
-    # Version-specific configuration
-    if is_pydantic_v2() >= 2:
-        model_config = ConfigDict(extra="allow", arbitrary_types_allowed=arbitrary_types_allowed, protected_namespaces=())  # type: ignore
-    else:
-        from pydantic import Extra
-
-        model_config = ConfigDict(extra=Extra.allow, arbitrary_types_allowed=arbitrary_types_allowed)  # type: ignore
-
-    return model_config
-
-
 class ModelConfig(BaseModel):
     model_name: str
     litellm_params: Union[CompletionRequest, EmbeddingRequest]
     tpm: int
     rpm: int
 
-    model_config = get_model_config()
+    class Config:
+        protected_namespaces = ()
 
 
 class RouterConfig(BaseModel):
@@ -67,7 +44,8 @@ class RouterConfig(BaseModel):
         "latency-based-routing",
     ] = "simple-shuffle"
 
-    model_config = get_model_config()
+    class Config:
+        protected_namespaces = ()
 
 
 class UpdateRouterConfig(BaseModel):
@@ -87,7 +65,8 @@ class UpdateRouterConfig(BaseModel):
     fallbacks: Optional[List[dict]] = None
     context_window_fallbacks: Optional[List[dict]] = None
 
-    model_config = get_model_config()
+    class Config:
+        protected_namespaces = ()
 
 
 class ModelInfo(BaseModel):
@@ -97,6 +76,9 @@ class ModelInfo(BaseModel):
     db_model: bool = (
         False  # used for proxy - to separate models which are stored in the db vs. config.
     )
+    base_model: Optional[str] = (
+        None  # specify if the base model is azure/gpt-3.5-turbo etc for accurate cost tracking
+    )
 
     def __init__(self, id: Optional[Union[str, int]] = None, **params):
         if id is None:
@@ -105,7 +87,8 @@ class ModelInfo(BaseModel):
             id = str(id)
         super().__init__(id=id, **params)
 
-    model_config = get_model_config()
+    class Config:
+        extra = "allow"
 
     def __contains__(self, key):
         # Define custom behavior for the 'in' operator
@@ -200,15 +183,9 @@ class GenericLiteLLMParams(BaseModel):
             max_retries = int(max_retries)  # cast to int
         super().__init__(max_retries=max_retries, **args, **params)
 
-    model_config = get_model_config(arbitrary_types_allowed=True)
-    if pydantic_version.startswith("1"):
-        # pydantic v2 warns about using a Config class.
-        # But without this, pydantic v1 will raise an error:
-        #     RuntimeError: no validator found for <class 'openai.Timeout'>,
-        #     see `arbitrary_types_allowed` in Config
-        # Putting arbitrary_types_allowed = True in the ConfigDict doesn't work in pydantic v1.
-        class Config:
-            arbitrary_types_allowed = True
+    class Config:
+        extra = "allow"
+        arbitrary_types_allowed = True
 
     def __contains__(self, key):
         # Define custom behavior for the 'in' operator
@@ -267,16 +244,9 @@ class LiteLLM_Params(GenericLiteLLMParams):
             max_retries = int(max_retries)  # cast to int
         super().__init__(max_retries=max_retries, **args, **params)
 
-    model_config = get_model_config(arbitrary_types_allowed=True)
-
-    if pydantic_version.startswith("1"):
-        # pydantic v2 warns about using a Config class.
-        # But without this, pydantic v1 will raise an error:
-        #     RuntimeError: no validator found for <class 'openai.Timeout'>,
-        #     see `arbitrary_types_allowed` in Config
-        # Putting arbitrary_types_allowed = True in the ConfigDict doesn't work in pydantic v1.
-        class Config:
-            arbitrary_types_allowed = True
+    class Config:
+        extra = "allow"
+        arbitrary_types_allowed = True
 
     def __contains__(self, key):
         # Define custom behavior for the 'in' operator
@@ -306,7 +276,8 @@ class updateDeployment(BaseModel):
     litellm_params: Optional[updateLiteLLMParams] = None
     model_info: Optional[ModelInfo] = None
 
-    model_config = get_model_config()
+    class Config:
+        protected_namespaces = ()
 
 
 class LiteLLMParamsTypedDict(TypedDict, total=False):
@@ -380,7 +351,9 @@ class Deployment(BaseModel):
             # if using pydantic v1
             return self.dict(**kwargs)
 
-    model_config = get_model_config()
+    class Config:
+        extra = "allow"
+        protected_namespaces = ()
 
     def __contains__(self, key):
         # Define custom behavior for the 'in' operator
diff --git a/litellm/types/utils.py b/litellm/types/utils.py
new file mode 100644
index 0000000000..10272c6294
--- /dev/null
+++ b/litellm/types/utils.py
@@ -0,0 +1,6 @@
+from typing import List, Optional, Union, Dict, Tuple, Literal, TypedDict
+
+
+class CostPerToken(TypedDict):
+    input_cost_per_token: float
+    output_cost_per_token: float
diff --git a/litellm/utils.py b/litellm/utils.py
index 7df79f3736..30411ef03c 100644
--- a/litellm/utils.py
+++ b/litellm/utils.py
@@ -19,7 +19,7 @@ from functools import wraps, lru_cache
 import datetime, time
 import tiktoken
 import uuid
-from pydantic import ConfigDict, BaseModel, VERSION
+from pydantic import BaseModel
 import aiohttp
 import textwrap
 import logging
@@ -35,6 +35,7 @@ from dataclasses import (
 import litellm._service_logger  # for storing API inputs, outputs, and metadata
 from litellm.llms.custom_httpx.http_handler import HTTPHandler
 from litellm.caching import DualCache
+from litellm.types.utils import CostPerToken
 
 oidc_cache = DualCache()
 
@@ -76,6 +77,7 @@ from .integrations.weights_biases import WeightsBiasesLogger
 from .integrations.custom_logger import CustomLogger
 from .integrations.langfuse import LangFuseLogger
 from .integrations.openmeter import OpenMeterLogger
+from .integrations.lago import LagoLogger
 from .integrations.datadog import DataDogLogger
 from .integrations.prometheus import PrometheusLogger
 from .integrations.prometheus_services import PrometheusServicesLogger
@@ -123,6 +125,7 @@ from typing import (
     BinaryIO,
     Iterable,
     Tuple,
+    Callable,
 )
 from .caching import Cache
 from concurrent.futures import ThreadPoolExecutor
@@ -147,6 +150,7 @@ weightsBiasesLogger = None
 customLogger = None
 langFuseLogger = None
 openMeterLogger = None
+lagoLogger = None
 dataDogLogger = None
 prometheusLogger = None
 dynamoLogger = None
@@ -185,23 +189,6 @@ last_fetched_at_keys = None
 # }
 
 
-# Function to get Pydantic version
-def is_pydantic_v2() -> int:
-    return int(VERSION.split(".")[0])
-
-
-def get_model_config(arbitrary_types_allowed: bool = False) -> ConfigDict:
-    # Version-specific configuration
-    if is_pydantic_v2() >= 2:
-        model_config = ConfigDict(extra="allow", arbitrary_types_allowed=arbitrary_types_allowed, protected_namespaces=())  # type: ignore
-    else:
-        from pydantic import Extra
-
-        model_config = ConfigDict(extra=Extra.allow, arbitrary_types_allowed=arbitrary_types_allowed)  # type: ignore
-
-    return model_config
-
-
 class UnsupportedParamsError(Exception):
     def __init__(self, status_code, message):
         self.status_code = status_code
@@ -348,7 +335,10 @@ class HiddenParams(OpenAIObject):
     original_response: Optional[str] = None
     model_id: Optional[str] = None  # used in Router for individual deployments
     api_base: Optional[str] = None  # returns api base used for making completion call
-    model_config = get_model_config()
+
+    class Config:
+        extra = "allow"
+        protected_namespaces = ()
 
     def get(self, key, default=None):
         # Custom .get() method to access attributes with a default value if the attribute doesn't exist
@@ -975,10 +965,54 @@ class TextCompletionResponse(OpenAIObject):
         setattr(self, key, value)
 
 
+class ImageObject(OpenAIObject):
+    """
+    Represents the url or the content of an image generated by the OpenAI API.
+
+    Attributes:
+    b64_json: The base64-encoded JSON of the generated image, if response_format is b64_json.
+    url: The URL of the generated image, if response_format is url (default).
+    revised_prompt: The prompt that was used to generate the image, if there was any revision to the prompt.
+
+    https://platform.openai.com/docs/api-reference/images/object
+    """
+
+    b64_json: Optional[str] = None
+    url: Optional[str] = None
+    revised_prompt: Optional[str] = None
+
+    def __init__(self, b64_json=None, url=None, revised_prompt=None):
+
+        super().__init__(b64_json=b64_json, url=url, revised_prompt=revised_prompt)
+
+    def __contains__(self, key):
+        # Define custom behavior for the 'in' operator
+        return hasattr(self, key)
+
+    def get(self, key, default=None):
+        # Custom .get() method to access attributes with a default value if the attribute doesn't exist
+        return getattr(self, key, default)
+
+    def __getitem__(self, key):
+        # Allow dictionary-style access to attributes
+        return getattr(self, key)
+
+    def __setitem__(self, key, value):
+        # Allow dictionary-style assignment of attributes
+        setattr(self, key, value)
+
+    def json(self, **kwargs):
+        try:
+            return self.model_dump()  # noqa
+        except:
+            # if using pydantic v1
+            return self.dict()
+
+
 class ImageResponse(OpenAIObject):
     created: Optional[int] = None
 
-    data: Optional[list] = None
+    data: Optional[List[ImageObject]] = None
 
     usage: Optional[dict] = None
 
@@ -2125,7 +2159,7 @@ class Logging:
         """
         Implementing async callbacks, to handle asyncio event loop issues when custom integrations need to use async functions.
         """
-        print_verbose(f"Logging Details LiteLLM-Async Success Call: {cache_hit}")
+        print_verbose(f"Logging Details LiteLLM-Async Success Call")
         start_time, end_time, result = self._success_handler_helper_fn(
             start_time=start_time, end_time=end_time, result=result, cache_hit=cache_hit
         )
@@ -2347,8 +2381,8 @@ class Logging:
                             end_time=end_time,
                             print_verbose=print_verbose,
                         )
-            except:
-                print_verbose(
+            except Exception as e:
+                verbose_logger.error(
                     f"LiteLLM.LoggingError: [Non-Blocking] Exception occurred while success logging {traceback.format_exc()}"
                 )
                 pass
@@ -2579,7 +2613,7 @@ class Logging:
                         response_obj=result,
                         start_time=start_time,
                         end_time=end_time,
-                    )
+                    )  # type: ignore
                 if callable(callback):  # custom logger functions
                     await customLogger.async_log_event(
                         kwargs=self.model_call_details,
@@ -2706,6 +2740,15 @@ class Rules:
         return True
 
 
+def _init_custom_logger_compatible_class(
+    logging_integration: litellm._custom_logger_compatible_callbacks_literal,
+) -> Callable:
+    if logging_integration == "lago":
+        return LagoLogger()  # type: ignore
+    elif logging_integration == "openmeter":
+        return OpenMeterLogger()  # type: ignore
+
+
 ####### CLIENT ###################
 # make it easy to log if completion/embedding runs succeeded or failed + see what happened | Non-Blocking
 def function_setup(
@@ -2716,16 +2759,24 @@ def function_setup(
         function_id = kwargs["id"] if "id" in kwargs else None
         if len(litellm.callbacks) > 0:
             for callback in litellm.callbacks:
+                # check if callback is a string - e.g. "lago", "openmeter"
+                if isinstance(callback, str):
+                    callback = _init_custom_logger_compatible_class(callback)
+                    if any(
+                        isinstance(cb, type(callback))
+                        for cb in litellm._async_success_callback
+                    ):  # don't double add a callback
+                        continue
                 if callback not in litellm.input_callback:
-                    litellm.input_callback.append(callback)
+                    litellm.input_callback.append(callback)  # type: ignore
                 if callback not in litellm.success_callback:
-                    litellm.success_callback.append(callback)
+                    litellm.success_callback.append(callback)  # type: ignore
                 if callback not in litellm.failure_callback:
-                    litellm.failure_callback.append(callback)
+                    litellm.failure_callback.append(callback)  # type: ignore
                 if callback not in litellm._async_success_callback:
-                    litellm._async_success_callback.append(callback)
+                    litellm._async_success_callback.append(callback)  # type: ignore
                 if callback not in litellm._async_failure_callback:
-                    litellm._async_failure_callback.append(callback)
+                    litellm._async_failure_callback.append(callback)  # type: ignore
             print_verbose(
                 f"Initialized litellm callbacks, Async Success Callbacks: {litellm._async_success_callback}"
             )
@@ -3846,7 +3897,7 @@ def get_replicate_completion_pricing(completion_response=None, total_time=0.0):
     )
     if total_time == 0.0:  # total time is in ms
         start_time = completion_response["created"]
-        end_time = completion_response["ended"]
+        end_time = getattr(completion_response, "ended", time.time())
         total_time = end_time - start_time
 
     return a100_80gb_price_per_second_public * total_time / 1000
@@ -3874,7 +3925,12 @@ def _select_tokenizer(model: str):
         return {"type": "huggingface_tokenizer", "tokenizer": tokenizer}
     # default - tiktoken
     else:
-        return {"type": "openai_tokenizer", "tokenizer": encoding}
+        tokenizer = None
+        try:
+            tokenizer = Tokenizer.from_pretrained(model)
+            return {"type": "huggingface_tokenizer", "tokenizer": tokenizer}
+        except:
+            return {"type": "openai_tokenizer", "tokenizer": encoding}
 
 
 def encode(model="", text="", custom_tokenizer: Optional[dict] = None):
@@ -4111,7 +4167,7 @@ def token_counter(
     text: Optional[Union[str, List[str]]] = None,
     messages: Optional[List] = None,
     count_response_tokens: Optional[bool] = False,
-):
+) -> int:
     """
     Count the number of tokens in a given text using a specified model.
 
@@ -4203,10 +4259,32 @@ def token_counter(
                 )
     else:
         num_tokens = len(encoding.encode(text, disallowed_special=()))  # type: ignore
-
     return num_tokens
 
 
+def _cost_per_token_custom_pricing_helper(
+    prompt_tokens=0,
+    completion_tokens=0,
+    response_time_ms=None,
+    ### CUSTOM PRICING ###
+    custom_cost_per_token: Optional[CostPerToken] = None,
+    custom_cost_per_second: Optional[float] = None,
+) -> Optional[Tuple[float, float]]:
+    """Internal helper function for calculating cost, if custom pricing given"""
+    if custom_cost_per_token is None and custom_cost_per_second is None:
+        return None
+
+    if custom_cost_per_token is not None:
+        input_cost = custom_cost_per_token["input_cost_per_token"] * prompt_tokens
+        output_cost = custom_cost_per_token["output_cost_per_token"] * completion_tokens
+        return input_cost, output_cost
+    elif custom_cost_per_second is not None:
+        output_cost = custom_cost_per_second * response_time_ms / 1000  # type: ignore
+        return 0, output_cost
+
+    return None
+
+
 def cost_per_token(
     model="",
     prompt_tokens=0,
@@ -4214,7 +4292,10 @@ def cost_per_token(
     response_time_ms=None,
     custom_llm_provider=None,
     region_name=None,
-):
+    ### CUSTOM PRICING ###
+    custom_cost_per_token: Optional[CostPerToken] = None,
+    custom_cost_per_second: Optional[float] = None,
+) -> Tuple[float, float]:
     """
     Calculates the cost per token for a given model, prompt tokens, and completion tokens.
 
@@ -4222,13 +4303,28 @@ def cost_per_token(
         model (str): The name of the model to use. Default is ""
         prompt_tokens (int): The number of tokens in the prompt.
         completion_tokens (int): The number of tokens in the completion.
+        response_time (float): The amount of time, in milliseconds, it took the call to complete.
+        custom_llm_provider (str): The llm provider to whom the call was made (see init.py for full list)
+        custom_cost_per_token: Optional[CostPerToken]: the cost per input + output token for the llm api call.
+        custom_cost_per_second: Optional[float]: the cost per second for the llm api call.
 
     Returns:
         tuple: A tuple containing the cost in USD dollars for prompt tokens and completion tokens, respectively.
     """
+    ## CUSTOM PRICING ##
+    response_cost = _cost_per_token_custom_pricing_helper(
+        prompt_tokens=prompt_tokens,
+        completion_tokens=completion_tokens,
+        response_time_ms=response_time_ms,
+        custom_cost_per_second=custom_cost_per_second,
+        custom_cost_per_token=custom_cost_per_token,
+    )
+    if response_cost is not None:
+        return response_cost[0], response_cost[1]
+
     # given
-    prompt_tokens_cost_usd_dollar = 0
-    completion_tokens_cost_usd_dollar = 0
+    prompt_tokens_cost_usd_dollar: float = 0
+    completion_tokens_cost_usd_dollar: float = 0
     model_cost_ref = litellm.model_cost
     model_with_provider = model
     if custom_llm_provider is not None:
@@ -4324,6 +4420,28 @@ def cost_per_token(
             * completion_tokens
         )
         return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar
+    elif "ft:davinci-002" in model:
+        print_verbose(f"Cost Tracking: {model} is an OpenAI FinteTuned LLM")
+        # fuzzy match ft:davinci-002:abcd-id-cool-litellm
+        prompt_tokens_cost_usd_dollar = (
+            model_cost_ref["ft:davinci-002"]["input_cost_per_token"] * prompt_tokens
+        )
+        completion_tokens_cost_usd_dollar = (
+            model_cost_ref["ft:davinci-002"]["output_cost_per_token"]
+            * completion_tokens
+        )
+        return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar
+    elif "ft:babbage-002" in model:
+        print_verbose(f"Cost Tracking: {model} is an OpenAI FinteTuned LLM")
+        # fuzzy match ft:babbage-002:abcd-id-cool-litellm
+        prompt_tokens_cost_usd_dollar = (
+            model_cost_ref["ft:babbage-002"]["input_cost_per_token"] * prompt_tokens
+        )
+        completion_tokens_cost_usd_dollar = (
+            model_cost_ref["ft:babbage-002"]["output_cost_per_token"]
+            * completion_tokens
+        )
+        return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar
     elif model in litellm.azure_llms:
         verbose_logger.debug(f"Cost Tracking: {model} is an Azure LLM")
         model = litellm.azure_llms[model]
@@ -4391,6 +4509,9 @@ def completion_cost(
     size=None,
     quality=None,
     n=None,  # number of images
+    ### CUSTOM PRICING ###
+    custom_cost_per_token: Optional[CostPerToken] = None,
+    custom_cost_per_second: Optional[float] = None,
 ) -> float:
     """
     Calculate the cost of a given completion call fot GPT-3.5-turbo, llama2, any litellm supported llm.
@@ -4403,19 +4524,22 @@ def completion_cost(
         prompt (str): Optional. The input prompt passed to the llm
         completion (str): Optional. The output completion text from the llm
         total_time (float): Optional. (Only used for Replicate LLMs) The total time used for the request in seconds
+        custom_cost_per_token: Optional[CostPerToken]: the cost per input + output token for the llm api call.
+        custom_cost_per_second: Optional[float]: the cost per second for the llm api call.
 
     Returns:
         float: The cost in USD dollars for the completion based on the provided parameters.
 
+    Exceptions:
+        Raises exception if model not in the litellm model cost map. Register model, via custom pricing or PR - https://github.com/BerriAI/litellm/blob/main/model_prices_and_context_window.json
+
+
     Note:
         - If completion_response is provided, the function extracts token information and the model name from it.
         - If completion_response is not provided, the function calculates token counts based on the model and input text.
         - The cost is calculated based on the model, prompt tokens, and completion tokens.
         - For certain models containing "togethercomputer" in the name, prices are based on the model size.
         - For un-mapped Replicate models, the cost is calculated based on the total time used for the request.
-
-    Exceptions:
-        - If an error occurs during execution, the error is raised
     """
     try:
         if (
@@ -4545,6 +4669,8 @@ def completion_cost(
             custom_llm_provider=custom_llm_provider,
             response_time_ms=total_time,
             region_name=region_name,
+            custom_cost_per_second=custom_cost_per_second,
+            custom_cost_per_token=custom_cost_per_token,
         )
         _final_cost = prompt_tokens_cost_usd_dollar + completion_tokens_cost_usd_dollar
         print_verbose(
@@ -4820,6 +4946,14 @@ def get_optional_params_image_gen(
             width, height = size.split("x")
             optional_params["width"] = int(width)
             optional_params["height"] = int(height)
+    elif custom_llm_provider == "vertex_ai":
+        supported_params = ["n"]
+        """
+        All params here: https://console.cloud.google.com/vertex-ai/publishers/google/model-garden/imagegeneration?project=adroit-crow-413218
+        """
+        _check_valid_arg(supported_params=supported_params)
+        if n is not None:
+            optional_params["sampleCount"] = int(n)
 
     for k in passed_params.keys():
         if k not in default_params.keys():
@@ -5040,7 +5174,7 @@ def get_optional_params(
         or "tools" in non_default_params
     ):
         if (
-            custom_llm_provider != "openai"
+            custom_llm_provider == "ollama"
             and custom_llm_provider != "text-completion-openai"
             and custom_llm_provider != "azure"
             and custom_llm_provider != "vertex_ai"
@@ -5054,6 +5188,8 @@ def get_optional_params(
             and custom_llm_provider != "cohere"
             and custom_llm_provider != "bedrock"
             and custom_llm_provider != "ollama_chat"
+            and custom_llm_provider != "openrouter"
+            and custom_llm_provider not in litellm.openai_compatible_providers
         ):
             if custom_llm_provider == "ollama":
                 # ollama actually supports json output
@@ -5111,7 +5247,7 @@ def get_optional_params(
         if unsupported_params and not litellm.drop_params:
             raise UnsupportedParamsError(
                 status_code=500,
-                message=f"{custom_llm_provider} does not support parameters: {unsupported_params}. To drop these, set `litellm.drop_params=True` or for proxy:\n\n`litellm_settings:\n drop_params: true`\n",
+                message=f"{custom_llm_provider} does not support parameters: {unsupported_params}, for model={model}. To drop these, set `litellm.drop_params=True` or for proxy:\n\n`litellm_settings:\n drop_params: true`\n",
             )
 
     def _map_and_modify_arg(supported_params: dict, provider: str, model: str):
@@ -5800,12 +5936,21 @@ def get_optional_params(
         optional_params["extra_body"] = (
             extra_body  # openai client supports `extra_body` param
         )
-    else:  # assume passing in params for openai/azure openai
-
+    elif custom_llm_provider == "openai":
         supported_params = get_supported_openai_params(
             model=model, custom_llm_provider="openai"
         )
         _check_valid_arg(supported_params=supported_params)
+        optional_params = litellm.OpenAIConfig().map_openai_params(
+            non_default_params=non_default_params,
+            optional_params=optional_params,
+            model=model,
+        )
+    else:  # assume passing in params for azure openai
+        supported_params = get_supported_openai_params(
+            model=model, custom_llm_provider="azure"
+        )
+        _check_valid_arg(supported_params=supported_params)
         if functions is not None:
             optional_params["functions"] = functions
         if function_call is not None:
@@ -6048,7 +6193,7 @@ def get_api_base(model: str, optional_params: dict) -> Optional[str]:
             )
         )
     except Exception as e:
-        verbose_logger.error("Error occurred in getting api base - {}".format(str(e)))
+        verbose_logger.debug("Error occurred in getting api base - {}".format(str(e)))
         custom_llm_provider = None
         dynamic_api_key = None
         dynamic_api_base = None
@@ -6179,7 +6324,9 @@ def get_supported_openai_params(model: str, custom_llm_provider: str):
             "presence_penalty",
             "stop",
         ]
-    elif custom_llm_provider == "openai" or custom_llm_provider == "azure":
+    elif custom_llm_provider == "openai":
+        return litellm.OpenAIConfig().get_supported_openai_params(model=model)
+    elif custom_llm_provider == "azure":
         return [
             "functions",
             "function_call",
@@ -6345,6 +6492,7 @@ def get_formatted_prompt(
         "image_generation",
         "audio_transcription",
         "moderation",
+        "text_completion",
     ],
 ) -> str:
     """
@@ -6357,6 +6505,8 @@ def get_formatted_prompt(
         for m in data["messages"]:
             if "content" in m and isinstance(m["content"], str):
                 prompt += m["content"]
+    elif call_type == "text_completion":
+        prompt = data["prompt"]
     elif call_type == "embedding" or call_type == "moderation":
         if isinstance(data["input"], str):
             prompt = data["input"]
@@ -6778,7 +6928,7 @@ def get_max_tokens(model: str):
             raise Exception()
     except:
         raise Exception(
-            "This model isn't mapped yet. Add it here - https://github.com/BerriAI/litellm/blob/main/model_prices_and_context_window.json"
+            f"Model {model} isn't mapped yet. Add it here - https://github.com/BerriAI/litellm/blob/main/model_prices_and_context_window.json"
         )
 
 
@@ -7924,11 +8074,8 @@ def _should_retry(status_code: int):
     return False
 
 
-def _calculate_retry_after(
-    remaining_retries: int,
-    max_retries: int,
+def _get_retry_after_from_exception_header(
     response_headers: Optional[httpx.Headers] = None,
-    min_timeout: int = 0,
 ):
     """
     Reimplementation of openai's calculate retry after, since that one can't be imported.
@@ -7954,10 +8101,20 @@ def _calculate_retry_after(
                     retry_after = int(retry_date - time.time())
         else:
             retry_after = -1
+        return retry_after
 
-    except Exception:
+    except Exception as e:
         retry_after = -1
 
+
+def _calculate_retry_after(
+    remaining_retries: int,
+    max_retries: int,
+    response_headers: Optional[httpx.Headers] = None,
+    min_timeout: int = 0,
+):
+    retry_after = _get_retry_after_from_exception_header(response_headers)
+
     # If the API asks us to wait a certain amount of time (and it's a reasonable amount), just do what it says.
     if 0 < retry_after <= 60:
         return retry_after
@@ -8155,33 +8312,39 @@ def exception_type(
             # Common Extra information needed for all providers
             # We pass num retries, api_base, vertex_deployment etc to the exception here
             ################################################################################
+            extra_information = ""
+            try:
+                _api_base = litellm.get_api_base(
+                    model=model, optional_params=extra_kwargs
+                )
+                messages = litellm.get_first_chars_messages(kwargs=completion_kwargs)
+                _vertex_project = extra_kwargs.get("vertex_project")
+                _vertex_location = extra_kwargs.get("vertex_location")
+                _metadata = extra_kwargs.get("metadata", {}) or {}
+                _model_group = _metadata.get("model_group")
+                _deployment = _metadata.get("deployment")
+                extra_information = f"\nModel: {model}"
+                if _api_base:
+                    extra_information += f"\nAPI Base: `{_api_base}`"
+                if messages and len(messages) > 0:
+                    extra_information += f"\nMessages: `{messages}`"
 
-            _api_base = litellm.get_api_base(model=model, optional_params=extra_kwargs)
-            messages = litellm.get_first_chars_messages(kwargs=completion_kwargs)
-            _vertex_project = extra_kwargs.get("vertex_project")
-            _vertex_location = extra_kwargs.get("vertex_location")
-            _metadata = extra_kwargs.get("metadata", {}) or {}
-            _model_group = _metadata.get("model_group")
-            _deployment = _metadata.get("deployment")
-            extra_information = f"\nModel: {model}"
-            if _api_base:
-                extra_information += f"\nAPI Base: {_api_base}"
-            if messages and len(messages) > 0:
-                extra_information += f"\nMessages: {messages}"
+                if _model_group is not None:
+                    extra_information += f"\nmodel_group: `{_model_group}`\n"
+                if _deployment is not None:
+                    extra_information += f"\ndeployment: `{_deployment}`\n"
+                if _vertex_project is not None:
+                    extra_information += f"\nvertex_project: `{_vertex_project}`\n"
+                if _vertex_location is not None:
+                    extra_information += f"\nvertex_location: `{_vertex_location}`\n"
 
-            if _model_group is not None:
-                extra_information += f"\nmodel_group: {_model_group}\n"
-            if _deployment is not None:
-                extra_information += f"\ndeployment: {_deployment}\n"
-            if _vertex_project is not None:
-                extra_information += f"\nvertex_project: {_vertex_project}\n"
-            if _vertex_location is not None:
-                extra_information += f"\nvertex_location: {_vertex_location}\n"
-
-            # on litellm proxy add key name + team to exceptions
-            extra_information = _add_key_name_and_team_to_alert(
-                request_info=extra_information, metadata=_metadata
-            )
+                # on litellm proxy add key name + team to exceptions
+                extra_information = _add_key_name_and_team_to_alert(
+                    request_info=extra_information, metadata=_metadata
+                )
+            except:
+                # DO NOT LET this Block raising the original exception
+                pass
 
             ################################################################################
             # End of Common Extra information Needed for all providers
@@ -8194,9 +8357,10 @@ def exception_type(
             if "Request Timeout Error" in error_str or "Request timed out" in error_str:
                 exception_mapping_worked = True
                 raise Timeout(
-                    message=f"APITimeoutError - Request timed out. {extra_information} \n error_str: {error_str}",
+                    message=f"APITimeoutError - Request timed out. \nerror_str: {error_str}",
                     model=model,
                     llm_provider=custom_llm_provider,
+                    litellm_debug_info=extra_information,
                 )
 
             if (
@@ -8226,10 +8390,11 @@ def exception_type(
                 if "This model's maximum context length is" in error_str:
                     exception_mapping_worked = True
                     raise ContextWindowExceededError(
-                        message=f"{exception_provider} - {message} {extra_information}",
+                        message=f"{exception_provider} - {message}",
                         llm_provider=custom_llm_provider,
                         model=model,
                         response=original_exception.response,
+                        litellm_debug_info=extra_information,
                     )
                 elif (
                     "invalid_request_error" in error_str
@@ -8237,10 +8402,11 @@ def exception_type(
                 ):
                     exception_mapping_worked = True
                     raise NotFoundError(
-                        message=f"{exception_provider} - {message} {extra_information}",
+                        message=f"{exception_provider} - {message}",
                         llm_provider=custom_llm_provider,
                         model=model,
                         response=original_exception.response,
+                        litellm_debug_info=extra_information,
                     )
                 elif (
                     "invalid_request_error" in error_str
@@ -8248,10 +8414,11 @@ def exception_type(
                 ):
                     exception_mapping_worked = True
                     raise ContentPolicyViolationError(
-                        message=f"{exception_provider} - {message} {extra_information}",
+                        message=f"{exception_provider} - {message}",
                         llm_provider=custom_llm_provider,
                         model=model,
                         response=original_exception.response,
+                        litellm_debug_info=extra_information,
                     )
                 elif (
                     "invalid_request_error" in error_str
@@ -8259,17 +8426,19 @@ def exception_type(
                 ):
                     exception_mapping_worked = True
                     raise BadRequestError(
-                        message=f"{exception_provider} - {message} {extra_information}",
+                        message=f"{exception_provider} - {message}",
                         llm_provider=custom_llm_provider,
                         model=model,
                         response=original_exception.response,
+                        litellm_debug_info=extra_information,
                     )
                 elif "Request too large" in error_str:
                     raise RateLimitError(
-                        message=f"{exception_provider} - {message} {extra_information}",
+                        message=f"{exception_provider} - {message}",
                         model=model,
                         llm_provider=custom_llm_provider,
                         response=original_exception.response,
+                        litellm_debug_info=extra_information,
                     )
                 elif (
                     "The api_key client option must be set either by passing api_key to the client or by setting the OPENAI_API_KEY environment variable"
@@ -8277,10 +8446,11 @@ def exception_type(
                 ):
                     exception_mapping_worked = True
                     raise AuthenticationError(
-                        message=f"{exception_provider} - {message} {extra_information}",
+                        message=f"{exception_provider} - {message}",
                         llm_provider=custom_llm_provider,
                         model=model,
                         response=original_exception.response,
+                        litellm_debug_info=extra_information,
                     )
                 elif "Mistral API raised a streaming error" in error_str:
                     exception_mapping_worked = True
@@ -8289,82 +8459,92 @@ def exception_type(
                     )
                     raise APIError(
                         status_code=500,
-                        message=f"{exception_provider} - {message} {extra_information}",
+                        message=f"{exception_provider} - {message}",
                         llm_provider=custom_llm_provider,
                         model=model,
                         request=_request,
+                        litellm_debug_info=extra_information,
                     )
                 elif hasattr(original_exception, "status_code"):
                     exception_mapping_worked = True
                     if original_exception.status_code == 401:
                         exception_mapping_worked = True
                         raise AuthenticationError(
-                            message=f"{exception_provider} - {message} {extra_information}",
+                            message=f"{exception_provider} - {message}",
                             llm_provider=custom_llm_provider,
                             model=model,
                             response=original_exception.response,
+                            litellm_debug_info=extra_information,
                         )
                     elif original_exception.status_code == 404:
                         exception_mapping_worked = True
                         raise NotFoundError(
-                            message=f"{exception_provider} - {message} {extra_information}",
+                            message=f"{exception_provider} - {message}",
                             model=model,
                             llm_provider=custom_llm_provider,
                             response=original_exception.response,
+                            litellm_debug_info=extra_information,
                         )
                     elif original_exception.status_code == 408:
                         exception_mapping_worked = True
                         raise Timeout(
-                            message=f"{exception_provider} - {message} {extra_information}",
+                            message=f"{exception_provider} - {message}",
                             model=model,
                             llm_provider=custom_llm_provider,
+                            litellm_debug_info=extra_information,
                         )
                     elif original_exception.status_code == 422:
                         exception_mapping_worked = True
                         raise BadRequestError(
-                            message=f"{exception_provider} - {message} {extra_information}",
+                            message=f"{exception_provider} - {message}",
                             model=model,
                             llm_provider=custom_llm_provider,
                             response=original_exception.response,
+                            litellm_debug_info=extra_information,
                         )
                     elif original_exception.status_code == 429:
                         exception_mapping_worked = True
                         raise RateLimitError(
-                            message=f"{exception_provider} - {message} {extra_information}",
+                            message=f"{exception_provider} - {message}",
                             model=model,
                             llm_provider=custom_llm_provider,
                             response=original_exception.response,
+                            litellm_debug_info=extra_information,
                         )
                     elif original_exception.status_code == 503:
                         exception_mapping_worked = True
                         raise ServiceUnavailableError(
-                            message=f"{exception_provider} - {message} {extra_information}",
+                            message=f"{exception_provider} - {message}",
                             model=model,
                             llm_provider=custom_llm_provider,
                             response=original_exception.response,
+                            litellm_debug_info=extra_information,
                         )
                     elif original_exception.status_code == 504:  # gateway timeout error
                         exception_mapping_worked = True
                         raise Timeout(
-                            message=f"{exception_provider} - {message} {extra_information}",
+                            message=f"{exception_provider} - {message}",
                             model=model,
                             llm_provider=custom_llm_provider,
+                            litellm_debug_info=extra_information,
                         )
                     else:
                         exception_mapping_worked = True
                         raise APIError(
                             status_code=original_exception.status_code,
-                            message=f"{exception_provider} - {message} {extra_information}",
+                            message=f"{exception_provider} - {message}",
                             llm_provider=custom_llm_provider,
                             model=model,
                             request=original_exception.request,
+                            litellm_debug_info=extra_information,
                         )
                 else:
                     # if no status code then it is an APIConnectionError: https://github.com/openai/openai-python#handling-errors
                     raise APIConnectionError(
-                        message=f"{exception_provider} - {message} {extra_information}",
+                        message=f"{exception_provider} - {message}",
                         llm_provider=custom_llm_provider,
                         model=model,
+                        litellm_debug_info=extra_information,
                         request=httpx.Request(
                             method="POST", url="https://api.openai.com/v1/"
                         ),
@@ -8518,7 +8698,10 @@ def exception_type(
                     message=f"ReplicateException - {str(original_exception)}",
                     llm_provider="replicate",
                     model=model,
-                    request=original_exception.request,
+                    request=httpx.Request(
+                        method="POST",
+                        url="https://api.replicate.com/v1/deployments",
+                    ),
                 )
             elif custom_llm_provider == "watsonx":
                 if "token_quota_reached" in error_str:
@@ -8529,6 +8712,28 @@ def exception_type(
                         model=model,
                         response=original_exception.response,
                     )
+            elif custom_llm_provider == "predibase":
+                if "authorization denied for" in error_str:
+                    exception_mapping_worked = True
+
+                    # Predibase returns the raw API Key in the response - this block ensures it's not returned in the exception
+                    if (
+                        error_str is not None
+                        and isinstance(error_str, str)
+                        and "bearer" in error_str.lower()
+                    ):
+                        # only keep the first 10 chars after the occurnence of "bearer"
+                        _bearer_token_start_index = error_str.lower().find("bearer")
+                        error_str = error_str[: _bearer_token_start_index + 14]
+                        error_str += "XXXXXXX" + '"'
+
+                    raise AuthenticationError(
+                        message=f"PredibaseException: Authentication Error - {error_str}",
+                        llm_provider="predibase",
+                        model=model,
+                        response=original_exception.response,
+                        litellm_debug_info=extra_information,
+                    )
             elif custom_llm_provider == "bedrock":
                 if (
                     "too many tokens" in error_str
@@ -8544,7 +8749,7 @@ def exception_type(
                         llm_provider="bedrock",
                         response=original_exception.response,
                     )
-                if "Malformed input request" in error_str:
+                elif "Malformed input request" in error_str:
                     exception_mapping_worked = True
                     raise BadRequestError(
                         message=f"BedrockException - {error_str}",
@@ -8552,7 +8757,7 @@ def exception_type(
                         llm_provider="bedrock",
                         response=original_exception.response,
                     )
-                if (
+                elif (
                     "Unable to locate credentials" in error_str
                     or "The security token included in the request is invalid"
                     in error_str
@@ -8564,7 +8769,7 @@ def exception_type(
                         llm_provider="bedrock",
                         response=original_exception.response,
                     )
-                if "AccessDeniedException" in error_str:
+                elif "AccessDeniedException" in error_str:
                     exception_mapping_worked = True
                     raise PermissionDeniedError(
                         message=f"BedrockException PermissionDeniedError - {error_str}",
@@ -8572,7 +8777,7 @@ def exception_type(
                         llm_provider="bedrock",
                         response=original_exception.response,
                     )
-                if (
+                elif (
                     "throttlingException" in error_str
                     or "ThrottlingException" in error_str
                 ):
@@ -8583,14 +8788,17 @@ def exception_type(
                         llm_provider="bedrock",
                         response=original_exception.response,
                     )
-                if "Connect timeout on endpoint URL" in error_str:
+                elif (
+                    "Connect timeout on endpoint URL" in error_str
+                    or "timed out" in error_str
+                ):
                     exception_mapping_worked = True
                     raise Timeout(
                         message=f"BedrockException: Timeout Error - {error_str}",
                         model=model,
                         llm_provider="bedrock",
                     )
-                if hasattr(original_exception, "status_code"):
+                elif hasattr(original_exception, "status_code"):
                     if original_exception.status_code == 500:
                         exception_mapping_worked = True
                         raise ServiceUnavailableError(
@@ -8628,6 +8836,49 @@ def exception_type(
                             model=model,
                             response=original_exception.response,
                         )
+                    elif original_exception.status_code == 408:
+                        exception_mapping_worked = True
+                        raise Timeout(
+                            message=f"BedrockException - {original_exception.message}",
+                            model=model,
+                            llm_provider=custom_llm_provider,
+                            litellm_debug_info=extra_information,
+                        )
+                    elif original_exception.status_code == 422:
+                        exception_mapping_worked = True
+                        raise BadRequestError(
+                            message=f"BedrockException - {original_exception.message}",
+                            model=model,
+                            llm_provider=custom_llm_provider,
+                            response=original_exception.response,
+                            litellm_debug_info=extra_information,
+                        )
+                    elif original_exception.status_code == 429:
+                        exception_mapping_worked = True
+                        raise RateLimitError(
+                            message=f"BedrockException - {original_exception.message}",
+                            model=model,
+                            llm_provider=custom_llm_provider,
+                            response=original_exception.response,
+                            litellm_debug_info=extra_information,
+                        )
+                    elif original_exception.status_code == 503:
+                        exception_mapping_worked = True
+                        raise ServiceUnavailableError(
+                            message=f"BedrockException - {original_exception.message}",
+                            model=model,
+                            llm_provider=custom_llm_provider,
+                            response=original_exception.response,
+                            litellm_debug_info=extra_information,
+                        )
+                    elif original_exception.status_code == 504:  # gateway timeout error
+                        exception_mapping_worked = True
+                        raise Timeout(
+                            message=f"BedrockException - {original_exception.message}",
+                            model=model,
+                            llm_provider=custom_llm_provider,
+                            litellm_debug_info=extra_information,
+                        )
             elif custom_llm_provider == "sagemaker":
                 if "Unable to locate credentials" in error_str:
                     exception_mapping_worked = True
@@ -8666,10 +8917,11 @@ def exception_type(
                 ):
                     exception_mapping_worked = True
                     raise BadRequestError(
-                        message=f"VertexAIException - {error_str} {extra_information}",
+                        message=f"VertexAIException - {error_str}",
                         model=model,
                         llm_provider="vertex_ai",
                         response=original_exception.response,
+                        litellm_debug_info=extra_information,
                     )
                 elif (
                     "None Unknown Error." in error_str
@@ -8677,26 +8929,29 @@ def exception_type(
                 ):
                     exception_mapping_worked = True
                     raise APIError(
-                        message=f"VertexAIException - {error_str} {extra_information}",
+                        message=f"VertexAIException - {error_str}",
                         status_code=500,
                         model=model,
                         llm_provider="vertex_ai",
                         request=original_exception.request,
+                        litellm_debug_info=extra_information,
                     )
                 elif "403" in error_str:
                     exception_mapping_worked = True
                     raise BadRequestError(
-                        message=f"VertexAIException - {error_str} {extra_information}",
+                        message=f"VertexAIException - {error_str}",
                         model=model,
                         llm_provider="vertex_ai",
                         response=original_exception.response,
+                        litellm_debug_info=extra_information,
                     )
                 elif "The response was blocked." in error_str:
                     exception_mapping_worked = True
                     raise UnprocessableEntityError(
-                        message=f"VertexAIException - {error_str} {extra_information}",
+                        message=f"VertexAIException - {error_str}",
                         model=model,
                         llm_provider="vertex_ai",
+                        litellm_debug_info=extra_information,
                         response=httpx.Response(
                             status_code=429,
                             request=httpx.Request(
@@ -8713,9 +8968,10 @@ def exception_type(
                 ):
                     exception_mapping_worked = True
                     raise RateLimitError(
-                        message=f"VertexAIException - {error_str} {extra_information}",
+                        message=f"VertexAIException - {error_str}",
                         model=model,
                         llm_provider="vertex_ai",
+                        litellm_debug_info=extra_information,
                         response=httpx.Response(
                             status_code=429,
                             request=httpx.Request(
@@ -8728,18 +8984,20 @@ def exception_type(
                     if original_exception.status_code == 400:
                         exception_mapping_worked = True
                         raise BadRequestError(
-                            message=f"VertexAIException - {error_str} {extra_information}",
+                            message=f"VertexAIException - {error_str}",
                             model=model,
                             llm_provider="vertex_ai",
+                            litellm_debug_info=extra_information,
                             response=original_exception.response,
                         )
                     if original_exception.status_code == 500:
                         exception_mapping_worked = True
                         raise APIError(
-                            message=f"VertexAIException - {error_str} {extra_information}",
+                            message=f"VertexAIException - {error_str}",
                             status_code=500,
                             model=model,
                             llm_provider="vertex_ai",
+                            litellm_debug_info=extra_information,
                             request=original_exception.request,
                         )
             elif custom_llm_provider == "palm" or custom_llm_provider == "gemini":
@@ -9340,25 +9598,28 @@ def exception_type(
                     exception_mapping_worked = True
                     raise APIError(
                         status_code=500,
-                        message=f"AzureException - {original_exception.message} {extra_information}",
+                        message=f"AzureException - {original_exception.message}",
                         llm_provider="azure",
                         model=model,
+                        litellm_debug_info=extra_information,
                         request=httpx.Request(method="POST", url="https://openai.com/"),
                     )
                 elif "This model's maximum context length is" in error_str:
                     exception_mapping_worked = True
                     raise ContextWindowExceededError(
-                        message=f"AzureException - {original_exception.message} {extra_information}",
+                        message=f"AzureException - {original_exception.message}",
                         llm_provider="azure",
                         model=model,
+                        litellm_debug_info=extra_information,
                         response=original_exception.response,
                     )
                 elif "DeploymentNotFound" in error_str:
                     exception_mapping_worked = True
                     raise NotFoundError(
-                        message=f"AzureException - {original_exception.message} {extra_information}",
+                        message=f"AzureException - {original_exception.message}",
                         llm_provider="azure",
                         model=model,
+                        litellm_debug_info=extra_information,
                         response=original_exception.response,
                     )
                 elif (
@@ -9370,17 +9631,19 @@ def exception_type(
                 ):
                     exception_mapping_worked = True
                     raise ContentPolicyViolationError(
-                        message=f"AzureException - {original_exception.message} {extra_information}",
+                        message=f"AzureException - {original_exception.message}",
                         llm_provider="azure",
                         model=model,
+                        litellm_debug_info=extra_information,
                         response=original_exception.response,
                     )
                 elif "invalid_request_error" in error_str:
                     exception_mapping_worked = True
                     raise BadRequestError(
-                        message=f"AzureException - {original_exception.message} {extra_information}",
+                        message=f"AzureException - {original_exception.message}",
                         llm_provider="azure",
                         model=model,
+                        litellm_debug_info=extra_information,
                         response=original_exception.response,
                     )
                 elif (
@@ -9389,9 +9652,10 @@ def exception_type(
                 ):
                     exception_mapping_worked = True
                     raise AuthenticationError(
-                        message=f"{exception_provider} - {original_exception.message} {extra_information}",
+                        message=f"{exception_provider} - {original_exception.message}",
                         llm_provider=custom_llm_provider,
                         model=model,
+                        litellm_debug_info=extra_information,
                         response=original_exception.response,
                     )
                 elif hasattr(original_exception, "status_code"):
@@ -9399,55 +9663,62 @@ def exception_type(
                     if original_exception.status_code == 401:
                         exception_mapping_worked = True
                         raise AuthenticationError(
-                            message=f"AzureException - {original_exception.message} {extra_information}",
+                            message=f"AzureException - {original_exception.message}",
                             llm_provider="azure",
                             model=model,
+                            litellm_debug_info=extra_information,
                             response=original_exception.response,
                         )
                     elif original_exception.status_code == 408:
                         exception_mapping_worked = True
                         raise Timeout(
-                            message=f"AzureException - {original_exception.message} {extra_information}",
+                            message=f"AzureException - {original_exception.message}",
                             model=model,
+                            litellm_debug_info=extra_information,
                             llm_provider="azure",
                         )
                     if original_exception.status_code == 422:
                         exception_mapping_worked = True
                         raise BadRequestError(
-                            message=f"AzureException - {original_exception.message} {extra_information}",
+                            message=f"AzureException - {original_exception.message}",
                             model=model,
                             llm_provider="azure",
+                            litellm_debug_info=extra_information,
                             response=original_exception.response,
                         )
                     elif original_exception.status_code == 429:
                         exception_mapping_worked = True
                         raise RateLimitError(
-                            message=f"AzureException - {original_exception.message} {extra_information}",
+                            message=f"AzureException - {original_exception.message}",
                             model=model,
                             llm_provider="azure",
+                            litellm_debug_info=extra_information,
                             response=original_exception.response,
                         )
                     elif original_exception.status_code == 503:
                         exception_mapping_worked = True
                         raise ServiceUnavailableError(
-                            message=f"AzureException - {original_exception.message} {extra_information}",
+                            message=f"AzureException - {original_exception.message}",
                             model=model,
                             llm_provider="azure",
+                            litellm_debug_info=extra_information,
                             response=original_exception.response,
                         )
                     elif original_exception.status_code == 504:  # gateway timeout error
                         exception_mapping_worked = True
                         raise Timeout(
-                            message=f"AzureException - {original_exception.message} {extra_information}",
+                            message=f"AzureException - {original_exception.message}",
                             model=model,
+                            litellm_debug_info=extra_information,
                             llm_provider="azure",
                         )
                     else:
                         exception_mapping_worked = True
                         raise APIError(
                             status_code=original_exception.status_code,
-                            message=f"AzureException - {original_exception.message} {extra_information}",
+                            message=f"AzureException - {original_exception.message}",
                             llm_provider="azure",
+                            litellm_debug_info=extra_information,
                             model=model,
                             request=httpx.Request(
                                 method="POST", url="https://openai.com/"
@@ -9456,9 +9727,10 @@ def exception_type(
                 else:
                     # if no status code then it is an APIConnectionError: https://github.com/openai/openai-python#handling-errors
                     raise APIConnectionError(
-                        message=f"{exception_provider} - {message} {extra_information}",
+                        message=f"{exception_provider} - {message}",
                         llm_provider="azure",
                         model=model,
+                        litellm_debug_info=extra_information,
                         request=httpx.Request(method="POST", url="https://openai.com/"),
                     )
         if (
@@ -10486,75 +10758,11 @@ class CustomStreamWrapper:
             raise e
 
     def handle_bedrock_stream(self, chunk):
-        if "cohere" in self.model:
-            return {
-                "text": chunk["text"],
-                "is_finished": chunk["is_finished"],
-                "finish_reason": chunk["finish_reason"],
-            }
-        if hasattr(chunk, "get"):
-            chunk = chunk.get("chunk")
-            chunk_data = json.loads(chunk.get("bytes").decode())
-        else:
-            chunk_data = json.loads(chunk.decode())
-        if chunk_data:
-            text = ""
-            is_finished = False
-            finish_reason = ""
-            if "outputText" in chunk_data:
-                text = chunk_data["outputText"]
-            # ai21 mapping
-            if "ai21" in self.model:  # fake ai21 streaming
-                text = chunk_data.get("completions")[0].get("data").get("text")
-                is_finished = True
-                finish_reason = "stop"
-            ######## bedrock.anthropic mappings ###############
-            elif "completion" in chunk_data:  # not claude-3
-                text = chunk_data["completion"]  # bedrock.anthropic
-                stop_reason = chunk_data.get("stop_reason", None)
-                if stop_reason != None:
-                    is_finished = True
-                    finish_reason = stop_reason
-            elif "delta" in chunk_data:
-                if chunk_data["delta"].get("text", None) is not None:
-                    text = chunk_data["delta"]["text"]
-                stop_reason = chunk_data["delta"].get("stop_reason", None)
-                if stop_reason != None:
-                    is_finished = True
-                    finish_reason = stop_reason
-            ######## bedrock.mistral mappings ###############
-            elif "outputs" in chunk_data:
-                if (
-                    len(chunk_data["outputs"]) == 1
-                    and chunk_data["outputs"][0].get("text", None) is not None
-                ):
-                    text = chunk_data["outputs"][0]["text"]
-                stop_reason = chunk_data.get("stop_reason", None)
-                if stop_reason != None:
-                    is_finished = True
-                    finish_reason = stop_reason
-            ######## bedrock.cohere mappings ###############
-            # meta mapping
-            elif "generation" in chunk_data:
-                text = chunk_data["generation"]  # bedrock.meta
-            # cohere mapping
-            elif "text" in chunk_data:
-                text = chunk_data["text"]  # bedrock.cohere
-            # cohere mapping for finish reason
-            elif "finish_reason" in chunk_data:
-                finish_reason = chunk_data["finish_reason"]
-                is_finished = True
-            elif chunk_data.get("completionReason", None):
-                is_finished = True
-                finish_reason = chunk_data["completionReason"]
-            elif chunk.get("error", None):
-                raise Exception(chunk["error"])
-            return {
-                "text": text,
-                "is_finished": is_finished,
-                "finish_reason": finish_reason,
-            }
-        return ""
+        return {
+            "text": chunk["text"],
+            "is_finished": chunk["is_finished"],
+            "finish_reason": chunk["finish_reason"],
+        }
 
     def handle_sagemaker_stream(self, chunk):
         if "data: [DONE]" in chunk:
@@ -11354,9 +11562,10 @@ class CustomStreamWrapper:
                 or self.custom_llm_provider == "vertex_ai"
                 or self.custom_llm_provider == "sagemaker"
                 or self.custom_llm_provider == "gemini"
+                or self.custom_llm_provider == "replicate"
                 or self.custom_llm_provider == "cached_response"
                 or self.custom_llm_provider == "predibase"
-                or (self.custom_llm_provider == "bedrock" and "cohere" in self.model)
+                or self.custom_llm_provider == "bedrock"
                 or self.custom_llm_provider in litellm.openai_compatible_endpoints
             ):
                 async for chunk in self.completion_stream:
@@ -12040,3 +12249,34 @@ def _add_key_name_and_team_to_alert(request_info: str, metadata: dict) -> str:
         return request_info
     except:
         return request_info
+
+
+class ModelResponseIterator:
+    def __init__(self, model_response: ModelResponse, convert_to_delta: bool = False):
+        if convert_to_delta == True:
+            self.model_response = ModelResponse(stream=True)
+            _delta = self.model_response.choices[0].delta  # type: ignore
+            _delta.content = model_response.choices[0].message.content  # type: ignore
+        else:
+            self.model_response = model_response
+        self.is_done = False
+
+    # Sync iterator
+    def __iter__(self):
+        return self
+
+    def __next__(self):
+        if self.is_done:
+            raise StopIteration
+        self.is_done = True
+        return self.model_response
+
+    # Async iterator
+    def __aiter__(self):
+        return self
+
+    async def __anext__(self):
+        if self.is_done:
+            raise StopAsyncIteration
+        self.is_done = True
+        return self.model_response
diff --git a/model_prices_and_context_window.json b/model_prices_and_context_window.json
index a88d6875ca..f3db33c601 100644
--- a/model_prices_and_context_window.json
+++ b/model_prices_and_context_window.json
@@ -234,6 +234,24 @@
         "litellm_provider": "openai",
         "mode": "chat"
     },
+    "ft:davinci-002": {
+        "max_tokens": 16384,
+        "max_input_tokens": 16384,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 0.000002,
+        "output_cost_per_token": 0.000002,
+        "litellm_provider": "text-completion-openai",
+        "mode": "completion"
+    },
+    "ft:babbage-002": {
+        "max_tokens": 16384,
+        "max_input_tokens": 16384,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 0.0000004,
+        "output_cost_per_token": 0.0000004,
+        "litellm_provider": "text-completion-openai",
+        "mode": "completion"
+    },
     "text-embedding-3-large": {
         "max_tokens": 8191,
         "max_input_tokens": 8191,
@@ -1385,6 +1403,24 @@
         "mode": "completion",
         "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
     },
+    "gemini/gemini-1.5-flash-latest": {
+        "max_tokens": 8192,
+        "max_input_tokens": 1000000,
+        "max_output_tokens": 8192,
+        "max_images_per_prompt": 3000,
+        "max_videos_per_prompt": 10,
+        "max_video_length": 1,
+        "max_audio_length_hours": 8.4,
+        "max_audio_per_prompt": 1,
+        "max_pdf_size_mb": 30,
+        "input_cost_per_token": 0, 
+        "output_cost_per_token": 0,
+        "litellm_provider": "vertex_ai-language-models",
+        "mode": "chat",
+        "supports_function_calling": true,
+        "supports_vision": true,
+        "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
+    },
     "gemini/gemini-pro": {
         "max_tokens": 8192,
         "max_input_tokens": 32760,
@@ -1744,6 +1780,30 @@
         "litellm_provider": "openrouter",
         "mode": "chat"
     },
+    "openrouter/openai/gpt-4o": {
+        "max_tokens": 4096,
+        "max_input_tokens": 128000,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 0.000005,
+        "output_cost_per_token": 0.000015,
+        "litellm_provider": "openrouter",
+        "mode": "chat",
+        "supports_function_calling": true,
+        "supports_parallel_function_calling": true,
+        "supports_vision": true
+    },
+    "openrouter/openai/gpt-4o-2024-05-13": {
+        "max_tokens": 4096,
+        "max_input_tokens": 128000,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 0.000005,
+        "output_cost_per_token": 0.000015,
+        "litellm_provider": "openrouter",
+        "mode": "chat",
+        "supports_function_calling": true,
+        "supports_parallel_function_calling": true,
+        "supports_vision": true
+    },
     "openrouter/openai/gpt-4-vision-preview": {
         "max_tokens": 130000,
         "input_cost_per_token": 0.00001,
@@ -2943,6 +3003,24 @@
         "litellm_provider": "ollama",
         "mode": "completion"
     },
+    "ollama/llama3": {
+        "max_tokens": 8192,
+        "max_input_tokens": 8192,
+        "max_output_tokens": 8192,
+        "input_cost_per_token": 0.0,
+        "output_cost_per_token": 0.0,
+        "litellm_provider": "ollama",
+        "mode": "chat"
+    },
+    "ollama/llama3:70b": {
+        "max_tokens": 8192,
+        "max_input_tokens": 8192,
+        "max_output_tokens": 8192,
+        "input_cost_per_token": 0.0,
+        "output_cost_per_token": 0.0,
+        "litellm_provider": "ollama",
+        "mode": "chat"
+    },
     "ollama/mistral": {
         "max_tokens": 8192,
         "max_input_tokens": 8192,
@@ -2952,6 +3030,42 @@
         "litellm_provider": "ollama",
         "mode": "completion"
     },
+    "ollama/mistral-7B-Instruct-v0.1": {
+        "max_tokens": 8192,
+        "max_input_tokens": 8192,
+        "max_output_tokens": 8192,
+        "input_cost_per_token": 0.0,
+        "output_cost_per_token": 0.0,
+        "litellm_provider": "ollama",
+        "mode": "chat"
+    },
+    "ollama/mistral-7B-Instruct-v0.2": {
+        "max_tokens": 32768,
+        "max_input_tokens": 32768,
+        "max_output_tokens": 32768,
+        "input_cost_per_token": 0.0,
+        "output_cost_per_token": 0.0,
+        "litellm_provider": "ollama",
+        "mode": "chat"
+    },
+    "ollama/mixtral-8x7B-Instruct-v0.1": {
+        "max_tokens": 32768,
+        "max_input_tokens": 32768,
+        "max_output_tokens": 32768,
+        "input_cost_per_token": 0.0,
+        "output_cost_per_token": 0.0,
+        "litellm_provider": "ollama",
+        "mode": "chat"
+    },
+    "ollama/mixtral-8x22B-Instruct-v0.1": {
+        "max_tokens": 65536,
+        "max_input_tokens": 65536,
+        "max_output_tokens": 65536,
+        "input_cost_per_token": 0.0,
+        "output_cost_per_token": 0.0,
+        "litellm_provider": "ollama",
+        "mode": "chat"
+    },
     "ollama/codellama": {
         "max_tokens": 4096, 
         "max_input_tokens": 4096, 
diff --git a/proxy_server_config.yaml b/proxy_server_config.yaml
index 10f0d4a751..f9f77c05ae 100644
--- a/proxy_server_config.yaml
+++ b/proxy_server_config.yaml
@@ -89,6 +89,7 @@ model_list:
     litellm_params:
       model: text-completion-openai/gpt-3.5-turbo-instruct
 litellm_settings:
+  # set_verbose: True  # Uncomment this if you want to see verbose logs; not recommended in production
   drop_params: True
   # max_budget: 100 
   # budget_duration: 30d
diff --git a/pyproject.toml b/pyproject.toml
index 3250eed82f..acc449436f 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "litellm"
-version = "1.37.10"
+version = "1.37.19"
 description = "Library to easily interface with LLM API providers"
 authors = ["BerriAI"]
 license = "MIT"
@@ -79,7 +79,7 @@ requires = ["poetry-core", "wheel"]
 build-backend = "poetry.core.masonry.api"
 
 [tool.commitizen]
-version = "1.37.10"
+version = "1.37.19"
 version_files = [
     "pyproject.toml:^version"
 ]
diff --git a/requirements.txt b/requirements.txt
index 88f7174b3e..3c5abae5a8 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,6 +1,6 @@
 # LITELLM PROXY DEPENDENCIES #
 anyio==4.2.0 # openai + http req.
-openai==1.14.3 # openai req. 
+openai==1.27.0 # openai req. 
 fastapi==0.111.0 # server dep
 backoff==2.2.1 # server dep
 pyyaml==6.0.0 # server dep
diff --git a/tests/test_callbacks_on_proxy.py b/tests/test_callbacks_on_proxy.py
index ea15af6b0d..42665c35bc 100644
--- a/tests/test_callbacks_on_proxy.py
+++ b/tests/test_callbacks_on_proxy.py
@@ -129,7 +129,7 @@ async def test_check_num_callbacks():
             set(all_litellm_callbacks_1) - set(all_litellm_callbacks_2),
         )
 
-        assert num_callbacks_1 == num_callbacks_2
+        assert abs(num_callbacks_1 - num_callbacks_2) <= 4
 
         await asyncio.sleep(30)
 
@@ -142,7 +142,7 @@ async def test_check_num_callbacks():
             set(all_litellm_callbacks_3) - set(all_litellm_callbacks_2),
         )
 
-        assert num_callbacks_1 == num_callbacks_2 == num_callbacks_3
+        assert abs(num_callbacks_3 - num_callbacks_2) <= 4
 
 
 @pytest.mark.asyncio
@@ -183,7 +183,7 @@ async def test_check_num_callbacks_on_lowest_latency():
             set(all_litellm_callbacks_2) - set(all_litellm_callbacks_1),
         )
 
-        assert num_callbacks_1 == num_callbacks_2
+        assert abs(num_callbacks_1 - num_callbacks_2) <= 4
 
         await asyncio.sleep(30)
 
@@ -196,7 +196,7 @@ async def test_check_num_callbacks_on_lowest_latency():
             set(all_litellm_callbacks_3) - set(all_litellm_callbacks_2),
         )
 
-        assert num_callbacks_1 == num_callbacks_2 == num_callbacks_3
+        assert abs(num_callbacks_2 - num_callbacks_3) <= 4
 
         assert num_alerts_1 == num_alerts_2 == num_alerts_3
 
diff --git a/ui/litellm-dashboard/out/404.html b/ui/litellm-dashboard/out/404.html
index b70559084d..fa19572edc 100644
--- a/ui/litellm-dashboard/out/404.html
+++ b/ui/litellm-dashboard/out/404.html
@@ -1 +1 @@
-<!DOCTYPE html><html lang="en"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" href="/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2" as="font" crossorigin="" type="font/woff2"/><link rel="stylesheet" href="/ui/_next/static/css/f04e46b02318b660.css" crossorigin="" data-precedence="next"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-de9c0fadf6a94b3b.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-f960ab1e6d32b002.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-04708d7d4a17c1ee.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>404: This page could not be found.</title><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body class="__className_c23dc8"><div style="font-family:system-ui,&quot;Segoe UI&quot;,Roboto,Helvetica,Arial,sans-serif,&quot;Apple Color Emoji&quot;,&quot;Segoe UI Emoji&quot;;height:100vh;text-align:center;display:flex;flex-direction:column;align-items:center;justify-content:center"><div><style>body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}</style><h1 class="next-error-h1" style="display:inline-block;margin:0 20px 0 0;padding:0 23px 0 0;font-size:24px;font-weight:500;vertical-align:top;line-height:49px">404</h1><div style="display:inline-block"><h2 style="font-size:14px;font-weight:400;line-height:49px;margin:0">This page could not be found.</h2></div></div></div><script src="/ui/_next/static/chunks/webpack-de9c0fadf6a94b3b.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/f04e46b02318b660.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[5613,[],\"\"]\n7:I[31778,[],\"\"]\nd:I[48955,[],\"\"]\n8:{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"}\n9:{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"}\na:{\"display\":\"inline-block\"}\nb:{\"fontSize\":14,\"fontWe"])</script><script>self.__next_f.push([1,"ight\":400,\"lineHeight\":\"49px\",\"margin\":0}\ne:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/f04e46b02318b660.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"obp5wqVSVDMiDTC414cR8\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/_not-found\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_c23dc8\",\"children\":[\"$\",\"$L6\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L7\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":\"$8\",\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":\"$9\",\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":\"$a\",\"children\":[\"$\",\"h2\",null,{\"style\":\"$b\",\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$Lc\"],\"globalErrorComponent\":\"$d\",\"missingSlots\":\"$We\"}]]\n"])</script><script>self.__next_f.push([1,"c:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"meta\",\"4\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>
\ No newline at end of file
+<!DOCTYPE html><html lang="en"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" href="/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2" as="font" crossorigin="" type="font/woff2"/><link rel="stylesheet" href="/ui/_next/static/css/f04e46b02318b660.css" crossorigin="" data-precedence="next"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-de9c0fadf6a94b3b.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-f960ab1e6d32b002.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-04708d7d4a17c1ee.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>404: This page could not be found.</title><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body class="__className_c23dc8"><div style="font-family:system-ui,&quot;Segoe UI&quot;,Roboto,Helvetica,Arial,sans-serif,&quot;Apple Color Emoji&quot;,&quot;Segoe UI Emoji&quot;;height:100vh;text-align:center;display:flex;flex-direction:column;align-items:center;justify-content:center"><div><style>body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}</style><h1 class="next-error-h1" style="display:inline-block;margin:0 20px 0 0;padding:0 23px 0 0;font-size:24px;font-weight:500;vertical-align:top;line-height:49px">404</h1><div style="display:inline-block"><h2 style="font-size:14px;font-weight:400;line-height:49px;margin:0">This page could not be found.</h2></div></div></div><script src="/ui/_next/static/chunks/webpack-de9c0fadf6a94b3b.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/f04e46b02318b660.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[5613,[],\"\"]\n7:I[31778,[],\"\"]\nd:I[48955,[],\"\"]\n8:{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"}\n9:{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"}\na:{\"display\":\"inline-block\"}\nb:{\"fontSize\":14,\"fontWe"])</script><script>self.__next_f.push([1,"ight\":400,\"lineHeight\":\"49px\",\"margin\":0}\ne:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/f04e46b02318b660.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"l-0LDfSCdaUCAbcLIx_QC\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/_not-found\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_c23dc8\",\"children\":[\"$\",\"$L6\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L7\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":\"$8\",\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":\"$9\",\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":\"$a\",\"children\":[\"$\",\"h2\",null,{\"style\":\"$b\",\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$Lc\"],\"globalErrorComponent\":\"$d\",\"missingSlots\":\"$We\"}]]\n"])</script><script>self.__next_f.push([1,"c:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"meta\",\"4\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>
\ No newline at end of file
diff --git a/ui/litellm-dashboard/out/_next/static/chunks/app/page-6a39771cacf75ea6.js b/ui/litellm-dashboard/out/_next/static/chunks/app/page-6a39771cacf75ea6.js
deleted file mode 100644
index 7d08a80c96..0000000000
--- a/ui/litellm-dashboard/out/_next/static/chunks/app/page-6a39771cacf75ea6.js
+++ /dev/null
@@ -1 +0,0 @@
-(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[931],{20661:function(e,l,t){Promise.resolve().then(t.bind(t,7926))},7926:function(e,l,t){"use strict";t.r(l),t.d(l,{default:function(){return lb}});var s,a,r=t(3827),n=t(64090),o=t(47907),i=t(8792),c=t(40491),d=t(65270),m=e=>{let{userID:l,userRole:t,userEmail:s,showSSOBanner:a}=e;console.log("User ID:",l),console.log("userEmail:",s),console.log("showSSOBanner:",a);let n=[{key:"1",label:(0,r.jsxs)(r.Fragment,{children:[(0,r.jsxs)("p",{children:["Role: ",t]}),(0,r.jsxs)("p",{children:["ID: ",l]})]})}];return(0,r.jsxs)("nav",{className:"left-0 right-0 top-0 flex justify-between items-center h-12 mb-4",children:[(0,r.jsx)("div",{className:"text-left my-2 absolute top-0 left-0",children:(0,r.jsx)("div",{className:"flex flex-col items-center",children:(0,r.jsx)(i.default,{href:"/",children:(0,r.jsx)("button",{className:"text-gray-800 rounded text-center",children:(0,r.jsx)("img",{src:"/get_image",width:160,height:160,alt:"LiteLLM Brand",className:"mr-2"})})})})}),(0,r.jsxs)("div",{className:"text-right mx-4 my-2 absolute top-0 right-0 flex items-center justify-end space-x-2",children:[a?(0,r.jsx)("div",{style:{padding:"6px",borderRadius:"8px"},children:(0,r.jsx)("a",{href:"https://calendly.com/d/4mp-gd3-k5k/litellm-1-1-onboarding-chat",target:"_blank",style:{fontSize:"14px",textDecoration:"underline"},children:"Request hosted proxy"})}):null,(0,r.jsx)("div",{style:{border:"1px solid #391085",padding:"6px",borderRadius:"8px"},children:(0,r.jsx)(c.Z,{menu:{items:n},children:(0,r.jsx)(d.Z,{children:s})})})]})]})},u=t(80588);let h=async()=>{try{let e=await fetch("https://raw.githubusercontent.com/BerriAI/litellm/main/model_prices_and_context_window.json"),l=await e.json();return console.log("received data: ".concat(l)),l}catch(e){throw console.error("Failed to get model cost map:",e),e}},x=async(e,l)=>{try{let t=await fetch("/model/new",{method:"POST",headers:{Authorization:"Bearer ".concat(e),"Content-Type":"application/json"},body:JSON.stringify({...l})});if(!t.ok){let e=await t.text();throw u.ZP.error("Failed to create key: "+e,10),console.error("Error response from the server:",e),Error("Network response was not ok")}let s=await t.json();return console.log("API Response:",s),u.ZP.success("Model created successfully. Wait 60s and refresh on 'All Models' page"),s}catch(e){throw console.error("Failed to create key:",e),e}},p=async(e,l)=>{console.log("model_id in model delete call: ".concat(l));try{let t=await fetch("/model/delete",{method:"POST",headers:{Authorization:"Bearer ".concat(e),"Content-Type":"application/json"},body:JSON.stringify({id:l})});if(!t.ok){let e=await t.text();throw u.ZP.error("Failed to create key: "+e,10),console.error("Error response from the server:",e),Error("Network response was not ok")}let s=await t.json();return console.log("API Response:",s),u.ZP.success("Model deleted successfully. Restart server to see this."),s}catch(e){throw console.error("Failed to create key:",e),e}},j=async(e,l,t)=>{try{if(console.log("Form Values in keyCreateCall:",t),t.description&&(t.metadata||(t.metadata={}),t.metadata.description=t.description,delete t.description,t.metadata=JSON.stringify(t.metadata)),t.metadata){console.log("formValues.metadata:",t.metadata);try{t.metadata=JSON.parse(t.metadata)}catch(e){throw u.ZP.error("Failed to parse metadata: "+e,10),Error("Failed to parse metadata: "+e)}}console.log("Form Values after check:",t);let s=await fetch("/key/generate",{method:"POST",headers:{Authorization:"Bearer ".concat(e),"Content-Type":"application/json"},body:JSON.stringify({user_id:l,...t})});if(!s.ok){let e=await s.text();throw u.ZP.error("Failed to create key: "+e,10),console.error("Error response from the server:",e),Error("Network response was not ok")}let a=await s.json();return console.log("API Response:",a),a}catch(e){throw console.error("Failed to create key:",e),e}},g=async(e,l,t)=>{try{if(console.log("Form Values in keyCreateCall:",t),t.description&&(t.metadata||(t.metadata={}),t.metadata.description=t.description,delete t.description,t.metadata=JSON.stringify(t.metadata)),t.metadata){console.log("formValues.metadata:",t.metadata);try{t.metadata=JSON.parse(t.metadata)}catch(e){throw u.ZP.error("Failed to parse metadata: "+e,10),Error("Failed to parse metadata: "+e)}}console.log("Form Values after check:",t);let s=await fetch("/user/new",{method:"POST",headers:{Authorization:"Bearer ".concat(e),"Content-Type":"application/json"},body:JSON.stringify({user_id:l,...t})});if(!s.ok){let e=await s.text();throw u.ZP.error("Failed to create key: "+e,10),console.error("Error response from the server:",e),Error("Network response was not ok")}let a=await s.json();return console.log("API Response:",a),a}catch(e){throw console.error("Failed to create key:",e),e}},y=async(e,l)=>{try{console.log("in keyDeleteCall:",l);let t=await fetch("/key/delete",{method:"POST",headers:{Authorization:"Bearer ".concat(e),"Content-Type":"application/json"},body:JSON.stringify({keys:[l]})});if(!t.ok){let e=await t.text();throw u.ZP.error("Failed to delete key: "+e,10),Error("Network response was not ok")}let s=await t.json();return console.log(s),s}catch(e){throw console.error("Failed to create key:",e),e}},f=async(e,l)=>{try{console.log("in teamDeleteCall:",l);let t=await fetch("/team/delete",{method:"POST",headers:{Authorization:"Bearer ".concat(e),"Content-Type":"application/json"},body:JSON.stringify({team_ids:[l]})});if(!t.ok){let e=await t.text();throw u.ZP.error("Failed to delete team: "+e,10),Error("Network response was not ok")}let s=await t.json();return console.log(s),s}catch(e){throw console.error("Failed to delete key:",e),e}},Z=async function(e,l,t){let s=arguments.length>3&&void 0!==arguments[3]&&arguments[3],a=arguments.length>4?arguments[4]:void 0,r=arguments.length>5?arguments[5]:void 0;try{let n="/user/info";"App Owner"==t&&l&&(n="".concat(n,"?user_id=").concat(l)),"App User"==t&&l&&(n="".concat(n,"?user_id=").concat(l)),console.log("in userInfoCall viewAll=",s),s&&r&&null!=a&&void 0!=a&&(n="".concat(n,"?view_all=true&page=").concat(a,"&page_size=").concat(r));let o=await fetch(n,{method:"GET",headers:{Authorization:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!o.ok){let e=await o.text();throw u.ZP.error(e,10),Error("Network response was not ok")}let i=await o.json();return console.log("API Response:",i),i}catch(e){throw console.error("Failed to create key:",e),e}},_=async(e,l)=>{try{let t="/team/info";l&&(t="".concat(t,"?team_id=").concat(l)),console.log("in teamInfoCall");let s=await fetch(t,{method:"GET",headers:{Authorization:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!s.ok){let e=await s.text();throw u.ZP.error(e,10),Error("Network response was not ok")}let a=await s.json();return console.log("API Response:",a),a}catch(e){throw console.error("Failed to create key:",e),e}},w=async e=>{try{let l=await fetch("/global/spend",{method:"GET",headers:{Authorization:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!l.ok){let e=await l.text();throw u.ZP.error(e,10),Error("Network response was not ok")}return await l.json()}catch(e){throw console.error("Failed to create key:",e),e}},b=async(e,l,t)=>{try{let l=await fetch("/v2/model/info",{method:"GET",headers:{Authorization:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!l.ok){let e=await l.text();throw u.ZP.error(e,10),Error("Network response was not ok")}let t=await l.json();return console.log("modelInfoCall:",t),t}catch(e){throw console.error("Failed to create key:",e),e}},k=async(e,l,t,s,a,r)=>{try{let l="/model/metrics";s&&(l="".concat(l,"?_selected_model_group=").concat(s,"&startTime=").concat(a,"&endTime=").concat(r));let t=await fetch(l,{method:"GET",headers:{Authorization:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!t.ok){let e=await t.text();throw u.ZP.error(e,10),Error("Network response was not ok")}return await t.json()}catch(e){throw console.error("Failed to create key:",e),e}},v=async(e,l,t,s,a,r)=>{try{let l="/model/metrics/slow_responses";s&&(l="".concat(l,"?_selected_model_group=").concat(s,"&startTime=").concat(a,"&endTime=").concat(r));let t=await fetch(l,{method:"GET",headers:{Authorization:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!t.ok){let e=await t.text();throw u.ZP.error(e,10),Error("Network response was not ok")}return await t.json()}catch(e){throw console.error("Failed to create key:",e),e}},S=async(e,l,t,s,a,r)=>{try{let l="/model/metrics/exceptions";s&&(l="".concat(l,"?_selected_model_group=").concat(s,"&startTime=").concat(a,"&endTime=").concat(r));let t=await fetch(l,{method:"GET",headers:{Authorization:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!t.ok){let e=await t.text();throw u.ZP.error(e,10),Error("Network response was not ok")}return await t.json()}catch(e){throw console.error("Failed to create key:",e),e}},N=async(e,l,t)=>{try{let l=await fetch("/models",{method:"GET",headers:{Authorization:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!l.ok){let e=await l.text();throw u.ZP.error(e,10),Error("Network response was not ok")}return await l.json()}catch(e){throw console.error("Failed to create key:",e),e}},A=async e=>{try{let l="/global/spend/teams";console.log("in teamSpendLogsCall:",l);let t=await fetch("".concat(l),{method:"GET",headers:{Authorization:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!t.ok){let e=await t.text();throw u.ZP.error(e,10),Error("Network response was not ok")}let s=await t.json();return console.log(s),s}catch(e){throw console.error("Failed to create key:",e),e}},E=async(e,l,t)=>{try{let s="/global/spend/tags";l&&t&&(s="".concat(s,"?start_date=").concat(l,"&end_date=").concat(t)),console.log("in tagsSpendLogsCall:",s);let a=await fetch("".concat(s),{method:"GET",headers:{Authorization:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!a.ok){let e=await a.text();throw u.ZP.error(e,10),Error("Network response was not ok")}let r=await a.json();return console.log(r),r}catch(e){throw console.error("Failed to create key:",e),e}},I=async(e,l,t,s,a,r)=>{try{console.log("user role in spend logs call: ".concat(t));let l="/spend/logs";l="App Owner"==t?"".concat(l,"?user_id=").concat(s,"&start_date=").concat(a,"&end_date=").concat(r):"".concat(l,"?start_date=").concat(a,"&end_date=").concat(r);let n=await fetch(l,{method:"GET",headers:{Authorization:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!n.ok){let e=await n.text();throw u.ZP.error(e,10),Error("Network response was not ok")}let o=await n.json();return console.log(o),o}catch(e){throw console.error("Failed to create key:",e),e}},C=async e=>{try{let l=await fetch("/global/spend/logs",{method:"GET",headers:{Authorization:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!l.ok){let e=await l.text();throw u.ZP.error(e,10),Error("Network response was not ok")}let t=await l.json();return console.log(t),t}catch(e){throw console.error("Failed to create key:",e),e}},P=async e=>{try{let l=await fetch("/global/spend/keys?limit=5",{method:"GET",headers:{Authorization:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!l.ok){let e=await l.text();throw u.ZP.error(e,10),Error("Network response was not ok")}let t=await l.json();return console.log(t),t}catch(e){throw console.error("Failed to create key:",e),e}},T=async(e,l,t,s)=>{try{let a="";a=l?JSON.stringify({api_key:l,startTime:t,endTime:s}):JSON.stringify({startTime:t,endTime:s});let r={method:"POST",headers:{Authorization:"Bearer ".concat(e),"Content-Type":"application/json"}};r.body=a;let n=await fetch("/global/spend/end_users",r);if(!n.ok){let e=await n.text();throw u.ZP.error(e,10),Error("Network response was not ok")}let o=await n.json();return console.log(o),o}catch(e){throw console.error("Failed to create key:",e),e}},O=async e=>{try{let l=await fetch("/global/spend/models?limit=5",{method:"GET",headers:{Authorization:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!l.ok){let e=await l.text();throw u.ZP.error(e,10),Error("Network response was not ok")}let t=await l.json();return console.log(t),t}catch(e){throw console.error("Failed to create key:",e),e}},F=async(e,l)=>{try{let t=await fetch("/v2/key/info",{method:"POST",headers:{Authorization:"Bearer ".concat(e),"Content-Type":"application/json"},body:JSON.stringify({keys:l})});if(!t.ok){let e=await t.text();throw u.ZP.error(e,10),Error("Network response was not ok")}let s=await t.json();return console.log(s),s}catch(e){throw console.error("Failed to create key:",e),e}},R=async(e,l)=>{try{let t="/user/get_users?role=".concat(l);console.log("in userGetAllUsersCall:",t);let s=await fetch(t,{method:"GET",headers:{Authorization:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!s.ok){let e=await s.text();throw u.ZP.error("Failed to delete key: "+e,10),Error("Network response was not ok")}let a=await s.json();return console.log(a),a}catch(e){throw console.error("Failed to get requested models:",e),e}},M=async(e,l)=>{try{console.log("Form Values in teamCreateCall:",l);let t=await fetch("/team/new",{method:"POST",headers:{Authorization:"Bearer ".concat(e),"Content-Type":"application/json"},body:JSON.stringify({...l})});if(!t.ok){let e=await t.text();throw u.ZP.error("Failed to create key: "+e,10),console.error("Error response from the server:",e),Error("Network response was not ok")}let s=await t.json();return console.log("API Response:",s),s}catch(e){throw console.error("Failed to create key:",e),e}},L=async(e,l)=>{try{console.log("Form Values in keyUpdateCall:",l);let t=await fetch("/key/update",{method:"POST",headers:{Authorization:"Bearer ".concat(e),"Content-Type":"application/json"},body:JSON.stringify({...l})});if(!t.ok){let e=await t.text();throw u.ZP.error("Failed to update key: "+e,10),console.error("Error response from the server:",e),Error("Network response was not ok")}let s=await t.json();return console.log("Update key Response:",s),s}catch(e){throw console.error("Failed to create key:",e),e}},U=async(e,l)=>{try{console.log("Form Values in teamUpateCall:",l);let t=await fetch("/team/update",{method:"POST",headers:{Authorization:"Bearer ".concat(e),"Content-Type":"application/json"},body:JSON.stringify({...l})});if(!t.ok){let e=await t.text();throw u.ZP.error("Failed to update team: "+e,10),console.error("Error response from the server:",e),Error("Network response was not ok")}let s=await t.json();return console.log("Update Team Response:",s),s}catch(e){throw console.error("Failed to create key:",e),e}},D=async(e,l)=>{try{console.log("Form Values in modelUpateCall:",l);let t=await fetch("/model/update",{method:"POST",headers:{Authorization:"Bearer ".concat(e),"Content-Type":"application/json"},body:JSON.stringify({...l})});if(!t.ok){let e=await t.text();throw u.ZP.error("Failed to update model: "+e,10),console.error("Error update from the server:",e),Error("Network response was not ok")}let s=await t.json();return console.log("Update model Response:",s),s}catch(e){throw console.error("Failed to update model:",e),e}},K=async(e,l,t)=>{try{console.log("Form Values in teamMemberAddCall:",t);let s=await fetch("/team/member_add",{method:"POST",headers:{Authorization:"Bearer ".concat(e),"Content-Type":"application/json"},body:JSON.stringify({team_id:l,member:t})});if(!s.ok){let e=await s.text();throw u.ZP.error("Failed to create key: "+e,10),console.error("Error response from the server:",e),Error("Network response was not ok")}let a=await s.json();return console.log("API Response:",a),a}catch(e){throw console.error("Failed to create key:",e),e}},B=async(e,l,t)=>{try{console.log("Form Values in userUpdateUserCall:",l);let s={...l};null!==t&&(s.user_role=t),s=JSON.stringify(s);let a=await fetch("/user/update",{method:"POST",headers:{Authorization:"Bearer ".concat(e),"Content-Type":"application/json"},body:s});if(!a.ok){let e=await a.text();throw u.ZP.error("Failed to create key: "+e,10),console.error("Error response from the server:",e),Error("Network response was not ok")}let r=await a.json();return console.log("API Response:",r),r}catch(e){throw console.error("Failed to create key:",e),e}},z=async(e,l)=>{try{let t="/health/services?service=".concat(l);console.log("Checking Slack Budget Alerts service health");let s=await fetch(t,{method:"GET",headers:{Authorization:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!s.ok){let e=await s.text();throw u.ZP.error("Failed ".concat(l," service health check ")+e),Error(e)}let a=await s.json();return u.ZP.success("Test request to ".concat(l," made - check logs/alerts on ").concat(l," to verify")),a}catch(e){throw console.error("Failed to perform health check:",e),e}},q=async(e,l,t)=>{try{let l=await fetch("/get/config/callbacks",{method:"GET",headers:{Authorization:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!l.ok){let e=await l.text();throw u.ZP.error(e,10),Error("Network response was not ok")}return await l.json()}catch(e){throw console.error("Failed to get callbacks:",e),e}},V=async(e,l)=>{try{let t=await fetch("/config/update",{method:"POST",headers:{Authorization:"Bearer ".concat(e),"Content-Type":"application/json"},body:JSON.stringify({...l})});if(!t.ok){let e=await t.text();throw u.ZP.error(e,10),Error("Network response was not ok")}return await t.json()}catch(e){throw console.error("Failed to set callbacks:",e),e}},G=async e=>{try{let l=await fetch("/health",{method:"GET",headers:{Authorization:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!l.ok){let e=await l.text();throw u.ZP.error(e),Error("Network response was not ok")}return await l.json()}catch(e){throw console.error("Failed to call /health:",e),e}};var Y=t(10384),W=t(46453),J=t(16450),H=t(52273),$=t(26780),X=t(15595),Q=t(6698),ee=t(71801),el=t(42440),et=t(42308),es=t(50670),ea=t(81583),er=t(99129),en=t(44839),eo=t(88707),ei=t(1861);let{Option:ec}=es.default;var ed=e=>{let{userID:l,team:t,userRole:s,accessToken:a,data:o,setData:i}=e,[c]=ea.Z.useForm(),[d,m]=(0,n.useState)(!1),[h,x]=(0,n.useState)(null),[p,g]=(0,n.useState)(null),[y,f]=(0,n.useState)([]),[Z,_]=(0,n.useState)([]),w=()=>{m(!1),c.resetFields()},b=()=>{m(!1),x(null),c.resetFields()};(0,n.useEffect)(()=>{(async()=>{try{if(null===l||null===s)return;if(null!==a){let e=(await N(a,l,s)).data.map(e=>e.id);console.log("available_model_names:",e),f(e)}}catch(e){console.error("Error fetching user models:",e)}})()},[a,l,s]);let k=async e=>{try{var t,s,r;let n=null!==(t=null==e?void 0:e.key_alias)&&void 0!==t?t:"",d=null!==(s=null==e?void 0:e.team_id)&&void 0!==s?s:null;if((null!==(r=null==o?void 0:o.filter(e=>e.team_id===d).map(e=>e.key_alias))&&void 0!==r?r:[]).includes(n))throw Error("Key alias ".concat(n," already exists for team with ID ").concat(d,", please provide another key alias"));u.ZP.info("Making API Call"),m(!0);let h=await j(a,l,e);console.log("key create Response:",h),i(e=>e?[...e,h]:[h]),x(h.key),g(h.soft_budget),u.ZP.success("API Key Created"),c.resetFields(),localStorage.removeItem("userData"+l)}catch(e){console.error("Error creating the key:",e),u.ZP.error("Error creating the key: ".concat(e),20)}};return(0,n.useEffect)(()=>{_(t&&t.models.length>0?t.models.includes("all-proxy-models")?y:t.models:y)},[t,y]),(0,r.jsxs)("div",{children:[(0,r.jsx)(J.Z,{className:"mx-auto",onClick:()=>m(!0),children:"+ Create New Key"}),(0,r.jsx)(er.Z,{title:"Create Key",visible:d,width:800,footer:null,onOk:w,onCancel:b,children:(0,r.jsxs)(ea.Z,{form:c,onFinish:k,labelCol:{span:8},wrapperCol:{span:16},labelAlign:"left",children:[(0,r.jsxs)(r.Fragment,{children:[(0,r.jsx)(ea.Z.Item,{label:"Key Name",name:"key_alias",rules:[{required:!0,message:"Please input a key name"}],help:"required",children:(0,r.jsx)(H.Z,{placeholder:""})}),(0,r.jsx)(ea.Z.Item,{label:"Team ID",name:"team_id",hidden:!0,initialValue:t?t.team_id:null,valuePropName:"team_id",className:"mt-8",children:(0,r.jsx)(en.Z,{value:t?t.team_alias:"",disabled:!0})}),(0,r.jsx)(ea.Z.Item,{label:"Models",name:"models",rules:[{required:!0,message:"Please select a model"}],help:"required",children:(0,r.jsxs)(es.default,{mode:"multiple",placeholder:"Select models",style:{width:"100%"},onChange:e=>{e.includes("all-team-models")&&c.setFieldsValue({models:["all-team-models"]})},children:[(0,r.jsx)(ec,{value:"all-team-models",children:"All Team Models"},"all-team-models"),Z.map(e=>(0,r.jsx)(ec,{value:e,children:e},e))]})}),(0,r.jsxs)($.Z,{className:"mt-20 mb-8",children:[(0,r.jsx)(Q.Z,{children:(0,r.jsx)("b",{children:"Optional Settings"})}),(0,r.jsxs)(X.Z,{children:[(0,r.jsx)(ea.Z.Item,{className:"mt-8",label:"Max Budget (USD)",name:"max_budget",help:"Budget cannot exceed team max budget: $".concat((null==t?void 0:t.max_budget)!==null&&(null==t?void 0:t.max_budget)!==void 0?null==t?void 0:t.max_budget:"unlimited"),rules:[{validator:async(e,l)=>{if(l&&t&&null!==t.max_budget&&l>t.max_budget)throw Error("Budget cannot exceed team max budget: $".concat(t.max_budget))}}],children:(0,r.jsx)(eo.Z,{step:.01,precision:2,width:200})}),(0,r.jsx)(ea.Z.Item,{className:"mt-8",label:"Reset Budget",name:"budget_duration",help:"Team Reset Budget: ".concat((null==t?void 0:t.budget_duration)!==null&&(null==t?void 0:t.budget_duration)!==void 0?null==t?void 0:t.budget_duration:"None"),children:(0,r.jsxs)(es.default,{defaultValue:null,placeholder:"n/a",children:[(0,r.jsx)(es.default.Option,{value:"24h",children:"daily"}),(0,r.jsx)(es.default.Option,{value:"30d",children:"monthly"})]})}),(0,r.jsx)(ea.Z.Item,{className:"mt-8",label:"Tokens per minute Limit (TPM)",name:"tpm_limit",help:"TPM cannot exceed team TPM limit: ".concat((null==t?void 0:t.tpm_limit)!==null&&(null==t?void 0:t.tpm_limit)!==void 0?null==t?void 0:t.tpm_limit:"unlimited"),rules:[{validator:async(e,l)=>{if(l&&t&&null!==t.tpm_limit&&l>t.tpm_limit)throw Error("TPM limit cannot exceed team TPM limit: ".concat(t.tpm_limit))}}],children:(0,r.jsx)(eo.Z,{step:1,width:400})}),(0,r.jsx)(ea.Z.Item,{className:"mt-8",label:"Requests per minute Limit (RPM)",name:"rpm_limit",help:"RPM cannot exceed team RPM limit: ".concat((null==t?void 0:t.rpm_limit)!==null&&(null==t?void 0:t.rpm_limit)!==void 0?null==t?void 0:t.rpm_limit:"unlimited"),rules:[{validator:async(e,l)=>{if(l&&t&&null!==t.rpm_limit&&l>t.rpm_limit)throw Error("RPM limit cannot exceed team RPM limit: ".concat(t.rpm_limit))}}],children:(0,r.jsx)(eo.Z,{step:1,width:400})}),(0,r.jsx)(ea.Z.Item,{label:"Expire Key (eg: 30s, 30h, 30d)",name:"duration",className:"mt-8",children:(0,r.jsx)(H.Z,{placeholder:""})}),(0,r.jsx)(ea.Z.Item,{label:"Metadata",name:"metadata",children:(0,r.jsx)(en.Z.TextArea,{rows:4,placeholder:"Enter metadata as JSON"})})]})]})]}),(0,r.jsx)("div",{style:{textAlign:"right",marginTop:"10px"},children:(0,r.jsx)(ei.ZP,{htmlType:"submit",children:"Create Key"})})]})}),h&&(0,r.jsx)(er.Z,{visible:d,onOk:w,onCancel:b,footer:null,children:(0,r.jsxs)(W.Z,{numItems:1,className:"gap-2 w-full",children:[(0,r.jsx)(el.Z,{children:"Save your Key"}),(0,r.jsx)(Y.Z,{numColSpan:1,children:(0,r.jsxs)("p",{children:["Please save this secret key somewhere safe and accessible. For security reasons, ",(0,r.jsx)("b",{children:"you will not be able to view it again"})," ","through your LiteLLM account. If you lose this secret key, you will need to generate a new one."]})}),(0,r.jsx)(Y.Z,{numColSpan:1,children:null!=h?(0,r.jsxs)("div",{children:[(0,r.jsx)(ee.Z,{className:"mt-3",children:"API Key:"}),(0,r.jsx)("div",{style:{background:"#f8f8f8",padding:"10px",borderRadius:"5px",marginBottom:"10px"},children:(0,r.jsx)("pre",{style:{wordWrap:"break-word",whiteSpace:"normal"},children:h})}),(0,r.jsx)(et.CopyToClipboard,{text:h,onCopy:()=>{u.ZP.success("API Key copied to clipboard")},children:(0,r.jsx)(J.Z,{className:"mt-3",children:"Copy API Key"})})]}):(0,r.jsx)(ee.Z,{children:"Key being created, this might take 30s"})})]})})]})},em=t(9454),eu=t(98941),eh=t(33393),ex=t(5),ep=t(13810),ej=t(61244),eg=t(10827),ey=t(3851),ef=t(2044),eZ=t(64167),e_=t(74480),ew=t(7178),eb=t(95093),ek=t(27166);let{Option:ev}=es.default;var eS=e=>{let{userID:l,userRole:t,accessToken:s,selectedTeam:a,data:o,setData:i,teams:c}=e,[d,m]=(0,n.useState)(!1),[h,x]=(0,n.useState)(!1),[p,j]=(0,n.useState)(null),[g,f]=(0,n.useState)(null),[Z,_]=(0,n.useState)(null),[w,b]=(0,n.useState)(""),[k,v]=(0,n.useState)(!1),[S,A]=(0,n.useState)(!1),[E,I]=(0,n.useState)(null),[C,P]=(0,n.useState)([]),T=new Set,[O,F]=(0,n.useState)(T);(0,n.useEffect)(()=>{(async()=>{try{if(null===l)return;if(null!==s&&null!==t){let e=(await N(s,l,t)).data.map(e=>e.id);console.log("available_model_names:",e),P(e)}}catch(e){console.error("Error fetching user models:",e)}})()},[s,l,t]),(0,n.useEffect)(()=>{if(c){let e=new Set;c.forEach((l,t)=>{let s=l.team_id;e.add(s)}),F(e)}},[c]);let R=e=>{console.log("handleEditClick:",e),null==e.token&&null!==e.token_id&&(e.token=e.token_id),I(e),v(!0)},M=async e=>{if(null==s)return;let l=e.token;e.key=l,console.log("handleEditSubmit:",e);let t=await L(s,e);console.log("handleEditSubmit: newKeyValues",t),o&&i(o.map(e=>e.token===l?t:e)),u.ZP.success("Key updated successfully"),v(!1),I(null)},U=async e=>{console.log("handleDelete:",e),null==e.token&&null!==e.token_id&&(e.token=e.token_id),null!=o&&(j(e.token),localStorage.removeItem("userData"+l),x(!0))},D=async()=>{if(null!=p&&null!=o){try{await y(s,p);let e=o.filter(e=>e.token!==p);i(e)}catch(e){console.error("Error deleting the key:",e)}x(!1),j(null)}};if(null!=o)return console.log("RERENDER TRIGGERED"),(0,r.jsxs)("div",{children:[(0,r.jsxs)(ep.Z,{className:"w-full mx-auto flex-auto overflow-y-auto max-h-[50vh] mb-4 mt-2",children:[(0,r.jsxs)(eg.Z,{className:"mt-5 max-h-[300px] min-h-[300px]",children:[(0,r.jsx)(eZ.Z,{children:(0,r.jsxs)(ew.Z,{children:[(0,r.jsx)(e_.Z,{children:"Key Alias"}),(0,r.jsx)(e_.Z,{children:"Secret Key"}),(0,r.jsx)(e_.Z,{children:"Spend (USD)"}),(0,r.jsx)(e_.Z,{children:"Budget (USD)"}),(0,r.jsx)(e_.Z,{children:"Models"}),(0,r.jsx)(e_.Z,{children:"TPM / RPM Limits"})]})}),(0,r.jsx)(ey.Z,{children:o.map(e=>{if(console.log(e),"litellm-dashboard"===e.team_id)return null;if(a){if(console.log("item team id: ".concat(e.team_id,", knownTeamIDs.has(item.team_id): ").concat(O.has(e.team_id),", selectedTeam id: ").concat(a.team_id)),(null!=a.team_id||null===e.team_id||O.has(e.team_id))&&e.team_id!=a.team_id)return null;console.log("item team id: ".concat(e.team_id,", is returned"))}return(0,r.jsxs)(ew.Z,{children:[(0,r.jsx)(ef.Z,{style:{maxWidth:"2px",whiteSpace:"pre-wrap",overflow:"hidden"},children:null!=e.key_alias?(0,r.jsx)(ee.Z,{children:e.key_alias}):(0,r.jsx)(ee.Z,{children:"Not Set"})}),(0,r.jsx)(ef.Z,{children:(0,r.jsx)(ee.Z,{children:e.key_name})}),(0,r.jsx)(ef.Z,{children:(0,r.jsx)(ee.Z,{children:(()=>{try{return parseFloat(e.spend).toFixed(4)}catch(l){return e.spend}})()})}),(0,r.jsx)(ef.Z,{children:null!=e.max_budget?(0,r.jsx)(ee.Z,{children:e.max_budget}):(0,r.jsx)(ee.Z,{children:"Unlimited"})}),(0,r.jsx)(ef.Z,{children:Array.isArray(e.models)?(0,r.jsx)("div",{style:{display:"flex",flexDirection:"column"},children:0===e.models.length?(0,r.jsx)(r.Fragment,{children:a&&a.models&&a.models.length>0?a.models.map((e,l)=>"all-proxy-models"===e?(0,r.jsx)(ex.Z,{size:"xs",className:"mb-1",color:"red",children:(0,r.jsx)(ee.Z,{children:"All Proxy Models"})},l):"all-team-models"===e?(0,r.jsx)(ex.Z,{size:"xs",className:"mb-1",color:"red",children:(0,r.jsx)(ee.Z,{children:"All Team Models"})},l):(0,r.jsx)(ex.Z,{size:"xs",className:"mb-1",color:"blue",children:(0,r.jsx)(ee.Z,{children:e.length>30?"".concat(e.slice(0,30),"..."):e})},l)):(0,r.jsx)(ex.Z,{size:"xs",className:"mb-1",color:"blue",children:(0,r.jsx)(ee.Z,{children:"all-proxy-models"})})}):e.models.map((e,l)=>"all-proxy-models"===e?(0,r.jsx)(ex.Z,{size:"xs",className:"mb-1",color:"red",children:(0,r.jsx)(ee.Z,{children:"All Proxy Models"})},l):"all-team-models"===e?(0,r.jsx)(ex.Z,{size:"xs",className:"mb-1",color:"red",children:(0,r.jsx)(ee.Z,{children:"All Team Models"})},l):(0,r.jsx)(ex.Z,{size:"xs",className:"mb-1",color:"blue",children:(0,r.jsx)(ee.Z,{children:e.length>30?"".concat(e.slice(0,30),"..."):e})},l))}):null}),(0,r.jsx)(ef.Z,{children:(0,r.jsxs)(ee.Z,{children:["TPM: ",e.tpm_limit?e.tpm_limit:"Unlimited"," ",(0,r.jsx)("br",{})," RPM:"," ",e.rpm_limit?e.rpm_limit:"Unlimited"]})}),(0,r.jsxs)(ef.Z,{children:[(0,r.jsx)(ej.Z,{onClick:()=>{I(e),A(!0)},icon:em.Z,size:"sm"}),(0,r.jsx)(er.Z,{open:S,onCancel:()=>{A(!1),I(null)},footer:null,width:800,children:E&&(0,r.jsxs)(r.Fragment,{children:[(0,r.jsxs)("div",{className:"grid grid-cols-1 gap-6 sm:grid-cols-2 lg:grid-cols-3 mt-8",children:[(0,r.jsxs)(ep.Z,{children:[(0,r.jsx)("p",{className:"text-tremor-default font-medium text-tremor-content dark:text-dark-tremor-content",children:"Spend"}),(0,r.jsx)("div",{className:"mt-2 flex items-baseline space-x-2.5",children:(0,r.jsx)("p",{className:"text-tremor font-semibold text-tremor-content-strong dark:text-dark-tremor-content-strong",children:(()=>{try{return parseFloat(E.spend).toFixed(4)}catch(e){return E.spend}})()})})]}),(0,r.jsxs)(ep.Z,{children:[(0,r.jsx)("p",{className:"text-tremor-default font-medium text-tremor-content dark:text-dark-tremor-content",children:"Budget"}),(0,r.jsx)("div",{className:"mt-2 flex items-baseline space-x-2.5",children:(0,r.jsx)("p",{className:"text-tremor font-semibold text-tremor-content-strong dark:text-dark-tremor-content-strong",children:null!=E.max_budget?(0,r.jsx)(r.Fragment,{children:E.max_budget}):(0,r.jsx)(r.Fragment,{children:"Unlimited"})})})]},e.name),(0,r.jsxs)(ep.Z,{children:[(0,r.jsx)("p",{className:"text-tremor-default font-medium text-tremor-content dark:text-dark-tremor-content",children:"Expires"}),(0,r.jsx)("div",{className:"mt-2 flex items-baseline space-x-2.5",children:(0,r.jsx)("p",{className:"text-tremor-default font-small text-tremor-content-strong dark:text-dark-tremor-content-strong",children:null!=E.expires?(0,r.jsx)(r.Fragment,{children:new Date(E.expires).toLocaleString(void 0,{day:"numeric",month:"long",year:"numeric",hour:"numeric",minute:"numeric",second:"numeric"})}):(0,r.jsx)(r.Fragment,{children:"Never"})})})]},e.name)]}),(0,r.jsxs)(ep.Z,{className:"my-4",children:[(0,r.jsx)(el.Z,{children:"Token Name"}),(0,r.jsx)(ee.Z,{className:"my-1",children:E.key_alias?E.key_alias:E.key_name}),(0,r.jsx)(el.Z,{children:"Token ID"}),(0,r.jsx)(ee.Z,{className:"my-1 text-[12px]",children:E.token}),(0,r.jsx)(el.Z,{children:"Metadata"}),(0,r.jsx)(ee.Z,{className:"my-1",children:(0,r.jsxs)("pre",{children:[JSON.stringify(E.metadata)," "]})})]}),(0,r.jsx)(J.Z,{className:"mx-auto flex items-center",onClick:()=>{A(!1),I(null)},children:"Close"})]})}),(0,r.jsx)(ej.Z,{icon:eu.Z,size:"sm",onClick:()=>R(e)}),(0,r.jsx)(ej.Z,{onClick:()=>U(e),icon:eh.Z,size:"sm"})]})]},e.token)})})]}),h&&(0,r.jsx)("div",{className:"fixed z-10 inset-0 overflow-y-auto",children:(0,r.jsxs)("div",{className:"flex items-end justify-center min-h-screen pt-4 px-4 pb-20 text-center sm:block sm:p-0",children:[(0,r.jsx)("div",{className:"fixed inset-0 transition-opacity","aria-hidden":"true",children:(0,r.jsx)("div",{className:"absolute inset-0 bg-gray-500 opacity-75"})}),(0,r.jsx)("span",{className:"hidden sm:inline-block sm:align-middle sm:h-screen","aria-hidden":"true",children:"​"}),(0,r.jsxs)("div",{className:"inline-block align-bottom bg-white rounded-lg text-left overflow-hidden shadow-xl transform transition-all sm:my-8 sm:align-middle sm:max-w-lg sm:w-full",children:[(0,r.jsx)("div",{className:"bg-white px-4 pt-5 pb-4 sm:p-6 sm:pb-4",children:(0,r.jsx)("div",{className:"sm:flex sm:items-start",children:(0,r.jsxs)("div",{className:"mt-3 text-center sm:mt-0 sm:ml-4 sm:text-left",children:[(0,r.jsx)("h3",{className:"text-lg leading-6 font-medium text-gray-900",children:"Delete Key"}),(0,r.jsx)("div",{className:"mt-2",children:(0,r.jsx)("p",{className:"text-sm text-gray-500",children:"Are you sure you want to delete this key ?"})})]})})}),(0,r.jsxs)("div",{className:"bg-gray-50 px-4 py-3 sm:px-6 sm:flex sm:flex-row-reverse",children:[(0,r.jsx)(J.Z,{onClick:D,color:"red",className:"ml-2",children:"Delete"}),(0,r.jsx)(J.Z,{onClick:()=>{x(!1),j(null)},children:"Cancel"})]})]})]})})]}),E&&(0,r.jsx)(e=>{let{visible:l,onCancel:t,token:s,onSubmit:o}=e,[i]=ea.Z.useForm(),[d,m]=(0,n.useState)(a),[u,h]=(0,n.useState)([]),[x,p]=(0,n.useState)(!1);return(0,r.jsx)(er.Z,{title:"Edit Key",visible:l,width:800,footer:null,onOk:()=>{i.validateFields().then(e=>{i.resetFields()}).catch(e=>{console.error("Validation failed:",e)})},onCancel:t,children:(0,r.jsxs)(ea.Z,{form:i,onFinish:M,initialValues:s,labelCol:{span:8},wrapperCol:{span:16},labelAlign:"left",children:[(0,r.jsxs)(r.Fragment,{children:[(0,r.jsx)(ea.Z.Item,{label:"Key Name",name:"key_alias",rules:[{required:!0,message:"Please input a key name"}],help:"required",children:(0,r.jsx)(en.Z,{})}),(0,r.jsx)(ea.Z.Item,{label:"Models",name:"models",rules:[{validator:(e,l)=>{let t=l.filter(e=>!d.models.includes(e)&&"all-team-models"!==e&&"all-proxy-models"!==e&&!d.models.includes("all-proxy-models"));return(console.log("errorModels: ".concat(t)),t.length>0)?Promise.reject("Some models are not part of the new team's models - ".concat(t,"Team models: ").concat(d.models)):Promise.resolve()}}],children:(0,r.jsxs)(es.default,{mode:"multiple",placeholder:"Select models",style:{width:"100%"},children:[(0,r.jsx)(ev,{value:"all-team-models",children:"All Team Models"},"all-team-models"),d&&d.models?d.models.includes("all-proxy-models")?C.filter(e=>"all-proxy-models"!==e).map(e=>(0,r.jsx)(ev,{value:e,children:e},e)):d.models.map(e=>(0,r.jsx)(ev,{value:e,children:e},e)):C.map(e=>(0,r.jsx)(ev,{value:e,children:e},e))]})}),(0,r.jsx)(ea.Z.Item,{className:"mt-8",label:"Max Budget (USD)",name:"max_budget",help:"Budget cannot exceed team max budget: ".concat((null==d?void 0:d.max_budget)!==null&&(null==d?void 0:d.max_budget)!==void 0?null==d?void 0:d.max_budget:"unlimited"),rules:[{validator:async(e,l)=>{if(l&&d&&null!==d.max_budget&&l>d.max_budget)throw console.log("keyTeam.max_budget: ".concat(d.max_budget)),Error("Budget cannot exceed team max budget: $".concat(d.max_budget))}}],children:(0,r.jsx)(eo.Z,{step:.01,precision:2,width:200})}),(0,r.jsx)(ea.Z.Item,{label:"token",name:"token",hidden:!0}),(0,r.jsx)(ea.Z.Item,{label:"Team",name:"team_id",help:"the team this key belongs to",children:(0,r.jsx)(eb.Z,{value:s.team_alias,children:null==c?void 0:c.map((e,l)=>(0,r.jsx)(ek.Z,{value:e.team_id,onClick:()=>m(e),children:e.team_alias},l))})})]}),(0,r.jsx)("div",{style:{textAlign:"right",marginTop:"10px"},children:(0,r.jsx)(ei.ZP,{htmlType:"submit",children:"Edit Key"})})]})})},{visible:k,onCancel:()=>{v(!1),I(null)},token:E,onSubmit:M})]})},eN=t(76032),eA=t(35152),eE=e=>{let{userID:l,userRole:t,accessToken:s,userSpend:a,selectedTeam:o}=e;console.log("userSpend: ".concat(a));let[i,c]=(0,n.useState)(null!==a?a:0),[d,m]=(0,n.useState)(0),[u,h]=(0,n.useState)([]);(0,n.useEffect)(()=>{let e=async()=>{if(s&&l&&t&&"Admin"===t&&null==a)try{let e=await w(s);e&&(e.spend?c(e.spend):c(0),e.max_budget?m(e.max_budget):m(0))}catch(e){console.error("Error fetching global spend data:",e)}};(async()=>{try{if(null===l||null===t)return;if(null!==s){let e=(await N(s,l,t)).data.map(e=>e.id);console.log("available_model_names:",e),h(e)}}catch(e){console.error("Error fetching user models:",e)}})(),e()},[t,s,l]),(0,n.useEffect)(()=>{null!==a&&c(a)},[a]);let x=[];o&&o.models&&(x=o.models),x&&x.includes("all-proxy-models")?(console.log("user models:",u),x=u):x&&x.includes("all-team-models")?x=o.models:x&&0===x.length&&(x=u);let p=void 0!==i?i.toFixed(4):null;return console.log("spend in view user spend: ".concat(i)),(0,r.jsxs)("div",{className:"flex items-center",children:[(0,r.jsxs)("div",{children:[(0,r.jsxs)("p",{className:"text-tremor-default text-tremor-content dark:text-dark-tremor-content",children:["Total Spend"," "]}),(0,r.jsxs)("p",{className:"text-2xl text-tremor-content-strong dark:text-dark-tremor-content-strong font-semibold",children:["$",p]})]}),(0,r.jsx)("div",{className:"ml-auto",children:(0,r.jsxs)($.Z,{children:[(0,r.jsx)(Q.Z,{children:(0,r.jsx)(ee.Z,{children:"Team Models"})}),(0,r.jsx)(X.Z,{className:"absolute right-0 z-10 bg-white p-2 shadow-lg max-w-xs",children:(0,r.jsx)(eN.Z,{children:x.map(e=>(0,r.jsx)(eA.Z,{children:(0,r.jsx)(ee.Z,{children:e})},e))})})]})})]})},eI=e=>{let{userID:l,userRole:t,selectedTeam:s,accessToken:a}=e,[o,i]=(0,n.useState)([]);(0,n.useEffect)(()=>{(async()=>{try{if(null===l||null===t)return;if(null!==a){let e=(await N(a,l,t)).data.map(e=>e.id);console.log("available_model_names:",e),i(e)}}catch(e){console.error("Error fetching user models:",e)}})()},[a,l,t]);let c=[];return s&&s.models&&(c=s.models),c&&c.includes("all-proxy-models")&&(console.log("user models:",o),c=o),(0,r.jsx)(r.Fragment,{children:(0,r.jsx)("div",{className:"mb-5",children:(0,r.jsx)("p",{className:"text-3xl text-tremor-content-strong dark:text-dark-tremor-content-strong font-semibold",children:null==s?void 0:s.team_alias})})})},eC=e=>{let l,{teams:t,setSelectedTeam:s,userRole:a}=e,o={models:[],team_id:null,team_alias:"Default Team"},[i,c]=(0,n.useState)(o);return(l="App User"===a?t:t?[...t,o]:[o],"App User"===a)?null:(0,r.jsxs)("div",{className:"mt-5 mb-5",children:[(0,r.jsx)(el.Z,{children:"Select Team"}),(0,r.jsx)(ee.Z,{children:"If you belong to multiple teams, this setting controls which team is used by default when creating new API Keys."}),(0,r.jsxs)(ee.Z,{className:"mt-3 mb-3",children:[(0,r.jsx)("b",{children:"Default Team:"})," If no team_id is set for a key, it will be grouped under here."]}),l&&l.length>0?(0,r.jsx)(eb.Z,{defaultValue:"0",children:l.map((e,l)=>(0,r.jsx)(ek.Z,{value:String(l),onClick:()=>s(e),children:e.team_alias},l))}):(0,r.jsxs)(ee.Z,{children:["No team created. ",(0,r.jsx)("b",{children:"Defaulting to personal account."})]})]})},eP=t(37963),eT=t(36083);console.log("isLocal:",!1);var eO=e=>{let{userID:l,userRole:t,teams:s,keys:a,setUserRole:i,userEmail:c,setUserEmail:d,setTeams:m,setKeys:u}=e,[h,x]=(0,n.useState)(null),p=(0,o.useSearchParams)();p.get("viewSpend"),(0,o.useRouter)();let j=p.get("token"),[g,y]=(0,n.useState)(null),[f,_]=(0,n.useState)(null),[b,k]=(0,n.useState)([]),v={models:[],team_alias:"Default Team",team_id:null},[S,A]=(0,n.useState)(s?s[0]:v);if(window.addEventListener("beforeunload",function(){sessionStorage.clear()}),(0,n.useEffect)(()=>{if(j){let e=(0,eP.o)(j);if(e){if(console.log("Decoded token:",e),console.log("Decoded key:",e.key),y(e.key),e.user_role){let l=function(e){if(!e)return"Undefined Role";switch(console.log("Received user role: ".concat(e)),e.toLowerCase()){case"app_owner":case"demo_app_owner":return"App Owner";case"app_admin":case"proxy_admin":return"Admin";case"proxy_admin_viewer":return"Admin Viewer";case"app_user":return"App User";default:return"Unknown Role"}}(e.user_role);console.log("Decoded user_role:",l),i(l)}else console.log("User role not defined");e.user_email?d(e.user_email):console.log("User Email is not set ".concat(e))}}if(l&&g&&t&&!a&&!h){let e=sessionStorage.getItem("userModels"+l);e?k(JSON.parse(e)):(async()=>{try{let e=await Z(g,l,t,!1,null,null);if(console.log("received teams in user dashboard: ".concat(Object.keys(e),"; team values: ").concat(Object.entries(e.teams))),"Admin"==t){let e=await w(g);x(e),console.log("globalSpend:",e)}else x(e.user_info);u(e.keys),m(e.teams);let s=[...e.teams];s.length>0?(console.log("response['teams']: ".concat(s)),A(s[0])):A(v),sessionStorage.setItem("userData"+l,JSON.stringify(e.keys)),sessionStorage.setItem("userSpendData"+l,JSON.stringify(e.user_info));let a=(await N(g,l,t)).data.map(e=>e.id);console.log("available_model_names:",a),k(a),console.log("userModels:",b),sessionStorage.setItem("userModels"+l,JSON.stringify(a))}catch(e){console.error("There was an error fetching the data",e)}})()}},[l,j,g,a,t]),(0,n.useEffect)(()=>{if(null!==a&&null!=S){let e=0;for(let l of a)S.hasOwnProperty("team_id")&&null!==l.team_id&&l.team_id===S.team_id&&(e+=l.spend);_(e)}else if(null!==a){let e=0;for(let l of a)e+=l.spend;_(e)}},[S]),null==l||null==j){let e="/sso/key/generate";return console.log("Full URL:",e),window.location.href=e,null}if(null==g)return null;if(null==t&&i("App Owner"),t&&"Admin Viewer"==t){let{Title:e,Paragraph:l}=eT.default;return(0,r.jsxs)("div",{children:[(0,r.jsx)(e,{level:1,children:"Access Denied"}),(0,r.jsx)(l,{children:"Ask your proxy admin for access to create keys"})]})}return console.log("inside user dashboard, selected team",S),console.log("teamSpend: ".concat(f)),(0,r.jsx)("div",{className:"w-full mx-4",children:(0,r.jsx)(W.Z,{numItems:1,className:"gap-2 p-8 h-[75vh] w-full mt-2",children:(0,r.jsxs)(Y.Z,{numColSpan:1,children:[(0,r.jsx)(eI,{userID:l,userRole:t,selectedTeam:S||null,accessToken:g}),(0,r.jsx)(eE,{userID:l,userRole:t,accessToken:g,userSpend:f,selectedTeam:S||null}),(0,r.jsx)(eS,{userID:l,userRole:t,accessToken:g,selectedTeam:S||null,data:a,setData:u,teams:s}),(0,r.jsx)(ed,{userID:l,team:S||null,userRole:t,accessToken:g,data:a,setData:u},S?S.team_id:null),(0,r.jsx)(eC,{teams:s,setSelectedTeam:A,userRole:t})]})})})},eF=t(35087),eR=t(92836),eM=t(26734),eL=t(41608),eU=t(32126),eD=t(23682),eK=t(47047),eB=t(76628),ez=t(57750),eq=t(44041),eV=t(38302),eG=t(28683),eY=t(1460),eW=t(78578),eJ=t(63954),eH=t(90252),e$=t(7905),eX=e=>{let{modelID:l,accessToken:t}=e,[s,a]=(0,n.useState)(!1),o=async()=>{try{u.ZP.info("Making API Call"),a(!0);let e=await p(t,l);console.log("model delete Response:",e),u.ZP.success("Model ".concat(l," deleted successfully")),a(!1)}catch(e){console.error("Error deleting the model:",e)}};return(0,r.jsxs)("div",{children:[(0,r.jsx)(ej.Z,{onClick:()=>a(!0),icon:eh.Z,size:"sm"}),(0,r.jsx)(er.Z,{open:s,onOk:o,okType:"danger",onCancel:()=>a(!1),children:(0,r.jsxs)(W.Z,{numItems:1,className:"gap-2 w-full",children:[(0,r.jsx)(el.Z,{children:"Delete Model"}),(0,r.jsx)(Y.Z,{numColSpan:1,children:(0,r.jsx)("p",{children:"Are you sure you want to delete this model? This action is irreversible."})}),(0,r.jsx)(Y.Z,{numColSpan:1,children:(0,r.jsxs)("p",{children:["Model ID: ",(0,r.jsx)("b",{children:l})]})})]})})]})},eQ=t(97766),e0=t(46495);let{Title:e1,Link:e2}=eT.default;(s=a||(a={})).OpenAI="OpenAI",s.Azure="Azure",s.Anthropic="Anthropic",s.Google_AI_Studio="Gemini (Google AI Studio)",s.Bedrock="Amazon Bedrock",s.OpenAI_Compatible="OpenAI-Compatible Endpoints (Groq, Together AI, Mistral AI, etc.)",s.Vertex_AI="Vertex AI (Anthropic, Gemini, etc.)";let e4={OpenAI:"openai",Azure:"azure",Anthropic:"anthropic",Google_AI_Studio:"gemini",Bedrock:"bedrock",OpenAI_Compatible:"openai",Vertex_AI:"vertex_ai"},e8={"BadRequestError (400)":"BadRequestErrorRetries","AuthenticationError  (401)":"AuthenticationErrorRetries","TimeoutError (408)":"TimeoutErrorRetries","RateLimitError (429)":"RateLimitErrorRetries","ContentPolicyViolationError (400)":"ContentPolicyViolationErrorRetries","InternalServerError (500)":"InternalServerErrorRetries"},e5=async(e,l,t)=>{try{let s=Array.isArray(e.model)?e.model:[e.model];console.log("received deployments: ".concat(s)),console.log("received type of deployments: ".concat(typeof s)),s.forEach(async t=>{console.log("litellm_model: ".concat(t));let s={},a={};s.model=t;let r="";for(let[l,t]of Object.entries(e))if(""!==t){if("model_name"==l)r+=t;else if("custom_llm_provider"==l)continue;else if("model"==l)continue;else if("base_model"===l)a[l]=t;else if("litellm_extra_params"==l){console.log("litellm_extra_params:",t);let e={};if(t&&void 0!=t){try{e=JSON.parse(t)}catch(e){throw u.ZP.error("Failed to parse LiteLLM Extra Params: "+e,10),Error("Failed to parse litellm_extra_params: "+e)}for(let[l,t]of Object.entries(e))s[l]=t}}else s[l]=t}let n={model_name:r,litellm_params:s,model_info:a},o=await x(l,n);console.log("response for model create call: ".concat(o.data))}),t.resetFields()}catch(e){u.ZP.error("Failed to create model: "+e,10)}};var e3=e=>{var l,t,s;let{accessToken:o,token:i,userRole:c,userID:d,modelData:m={data:[]},setModelData:x}=e,[p,j]=(0,n.useState)([]),[g]=ea.Z.useForm(),[y,f]=(0,n.useState)(null),[Z,_]=(0,n.useState)(""),[w,N]=(0,n.useState)([]),A=Object.values(a).filter(e=>isNaN(Number(e))),[E,I]=(0,n.useState)("OpenAI"),[C,P]=(0,n.useState)(""),[T,O]=(0,n.useState)(!1),[F,R]=(0,n.useState)(null),[M,L]=(0,n.useState)([]),[U,K]=(0,n.useState)(null),[B,z]=(0,n.useState)([]),[Y,et]=(0,n.useState)([]),[es,en]=(0,n.useState)([]),[ec,ed]=(0,n.useState)([]),[em,eh]=(0,n.useState)([]),[ev,eS]=(0,n.useState)([]),[eN,eA]=(0,n.useState)([]),[eE,eI]=(0,n.useState)({from:new Date(Date.now()-6048e5),to:new Date}),[eC,eP]=(0,n.useState)(null),[eO,e3]=(0,n.useState)(0),e6=e=>{R(e),O(!0)},e7=async e=>{if(console.log("handleEditSubmit:",e),null==o)return;let l={},t=null;for(let[s,a]of Object.entries(e))"model_id"!==s?l[s]=a:t=a;let s={litellm_params:l,model_info:{id:t}};console.log("handleEditSubmit payload:",s);try{await D(o,s),u.ZP.success("Model updated successfully, restart server to see updates"),O(!1),R(null)}catch(e){console.log("Error occurred")}},e9=()=>{_(new Date().toLocaleString())},le=async()=>{if(!o){console.error("Access token is missing");return}console.log("new modelGroupRetryPolicy:",eC);try{await V(o,{router_settings:{model_group_retry_policy:eC}}),u.ZP.success("Retry settings saved successfully")}catch(e){console.error("Failed to save retry settings:",e),u.ZP.error("Failed to save retry settings")}};if((0,n.useEffect)(()=>{if(!o||!i||!c||!d)return;let e=async()=>{try{var e,l,t,s,a,r;let n=await b(o,d,c);console.log("Model data response:",n.data),x(n);let i=new Set;for(let e=0;e<n.data.length;e++){let l=n.data[e];i.add(l.model_name)}console.log("all_model_groups:",i);let m=Array.from(i);m=m.sort(),L(m),console.log("array_model_groups:",m);let u="all";m.length>0&&(u=m[m.length-1],console.log("_initial_model_group:",u),K(u)),console.log("selectedModelGroup:",U);let h=await k(o,d,c,u,null===(e=eE.from)||void 0===e?void 0:e.toISOString(),null===(l=eE.to)||void 0===l?void 0:l.toISOString());console.log("Model metrics response:",h),et(h.data),en(h.all_api_bases);let p=await S(o,d,c,u,null===(t=eE.from)||void 0===t?void 0:t.toISOString(),null===(s=eE.to)||void 0===s?void 0:s.toISOString());console.log("Model exceptions response:",p),ed(p.data),eh(p.exception_types);let j=await v(o,d,c,u,null===(a=eE.from)||void 0===a?void 0:a.toISOString(),null===(r=eE.to)||void 0===r?void 0:r.toISOString());console.log("slowResponses:",j),eA(j);let g=(await q(o,d,c)).router_settings;console.log("routerSettingsInfo:",g);let y=g.model_group_retry_policy,f=g.num_retries;console.log("model_group_retry_policy:",y),console.log("default_retries:",f),eP(y),e3(f)}catch(e){console.error("There was an error fetching the model data",e)}};o&&i&&c&&d&&e();let l=async()=>{let e=await h();console.log("received model cost map data: ".concat(Object.keys(e))),f(e)};null==y&&l(),e9()},[o,i,c,d,y,Z]),!m||!o||!i||!c||!d)return(0,r.jsx)("div",{children:"Loading..."});let ll=[];for(let e=0;e<m.data.length;e++){let s=m.data[e],a=null==s?void 0:null===(l=s.litellm_params)||void 0===l?void 0:l.model,r=null==s?void 0:s.model_info,n="",o="Undefined",i="Undefined",c="Undefined",d={},u=e=>(console.log("GET PROVIDER CALLED! - ".concat(y)),null!=y&&"object"==typeof y&&e in y)?y[e].litellm_provider:"openai";if(a){let e=a.split("/"),l=e[0];n=1===e.length?u(a):l}else n="openai";r&&(o=null==r?void 0:r.input_cost_per_token,i=null==r?void 0:r.output_cost_per_token,c=null==r?void 0:r.max_tokens),(null==s?void 0:s.litellm_params)&&(d=Object.fromEntries(Object.entries(null==s?void 0:s.litellm_params).filter(e=>{let[l]=e;return"model"!==l&&"api_base"!==l}))),m.data[e].provider=n,m.data[e].input_cost=o,m.data[e].output_cost=i,m.data[e].max_tokens=c,m.data[e].api_base=null==s?void 0:null===(t=s.litellm_params)||void 0===t?void 0:t.api_base,m.data[e].cleanedLitellmParams=d,ll.push(s.model_name),console.log(m.data[e])}if(c&&"Admin Viewer"==c){let{Title:e,Paragraph:l}=eT.default;return(0,r.jsxs)("div",{children:[(0,r.jsx)(e,{level:1,children:"Access Denied"}),(0,r.jsx)(l,{children:"Ask your proxy admin for access to view all models"})]})}let lt=e=>{console.log("received provider string: ".concat(e));let l=Object.keys(a).find(l=>a[l]===e);if(l){let e=e4[l];console.log("mappingResult: ".concat(e));let t=[];"object"==typeof y&&Object.entries(y).forEach(l=>{let[s,a]=l;null!==a&&"object"==typeof a&&"litellm_provider"in a&&(a.litellm_provider===e||a.litellm_provider.includes(e))&&t.push(s)}),N(t),console.log("providerModels: ".concat(w))}},ls=async()=>{try{u.ZP.info("Running health check..."),P("");let e=await G(o);P(e)}catch(e){console.error("Error running health check:",e),P("Error running health check")}},la=async(e,l,t)=>{if(console.log("Updating model metrics for group:",e),o&&d&&c&&l&&t){console.log("inside updateModelMetrics - startTime:",l,"endTime:",t),K(e);try{let s=await k(o,d,c,e,l.toISOString(),t.toISOString());console.log("Model metrics response:",s),et(s.data),en(s.all_api_bases);let a=await S(o,d,c,e,l.toISOString(),t.toISOString());console.log("Model exceptions response:",a),ed(a.data),eh(a.exception_types);let r=await v(o,d,c,e,l.toISOString(),t.toISOString());console.log("slowResponses:",r),eA(r)}catch(e){console.error("Failed to fetch model metrics",e)}}};return console.log("selectedProvider: ".concat(E)),console.log("providerModels.length: ".concat(w.length)),(0,r.jsx)("div",{style:{width:"100%",height:"100%"},children:(0,r.jsxs)(eM.Z,{className:"gap-2 p-8 h-[75vh] w-full mt-2",children:[(0,r.jsxs)(eL.Z,{className:"flex justify-between mt-2 w-full items-center",children:[(0,r.jsxs)("div",{className:"flex",children:[(0,r.jsx)(eR.Z,{children:"All Models"}),(0,r.jsx)(eR.Z,{children:"Add Model"}),(0,r.jsx)(eR.Z,{children:(0,r.jsx)("pre",{children:"/health Models"})}),(0,r.jsx)(eR.Z,{children:"Model Analytics"}),(0,r.jsx)(eR.Z,{children:"Model Retry Settings"})]}),(0,r.jsxs)("div",{className:"flex items-center space-x-2",children:[Z&&(0,r.jsxs)(ee.Z,{children:["Last Refreshed: ",Z]}),(0,r.jsx)(ej.Z,{icon:eJ.Z,variant:"shadow",size:"xs",className:"self-center",onClick:e9})]})]}),(0,r.jsxs)(eD.Z,{children:[(0,r.jsxs)(eU.Z,{children:[(0,r.jsxs)(W.Z,{children:[(0,r.jsxs)("div",{className:"flex items-center",children:[(0,r.jsx)(ee.Z,{children:"Filter by Public Model Name"}),(0,r.jsxs)(eb.Z,{className:"mb-4 mt-2 ml-2 w-50",defaultValue:"all",onValueChange:e=>K("all"===e?"all":e),children:[(0,r.jsx)(ek.Z,{value:"all",children:"All Models"}),M.map((e,l)=>(0,r.jsx)(ek.Z,{value:e,onClick:()=>K(e),children:e},l))]})]}),(0,r.jsx)(ep.Z,{children:(0,r.jsxs)(eg.Z,{className:"mt-5",children:[(0,r.jsx)(eZ.Z,{children:(0,r.jsxs)(ew.Z,{children:[(0,r.jsx)(e_.Z,{children:"Public Model Name "}),(0,r.jsx)(e_.Z,{children:"Provider"}),"Admin"===c&&(0,r.jsx)(e_.Z,{children:"API Base"}),(0,r.jsx)(e_.Z,{children:"Extra litellm Params"}),(0,r.jsx)(e_.Z,{children:"Input Price per token ($)"}),(0,r.jsx)(e_.Z,{children:"Output Price per token ($)"}),(0,r.jsx)(e_.Z,{children:"Max Tokens"}),(0,r.jsx)(e_.Z,{children:"Status"})]})}),(0,r.jsx)(ey.Z,{children:m.data.filter(e=>"all"===U||e.model_name===U||null==U||""===U).map((e,l)=>(0,r.jsxs)(ew.Z,{children:[(0,r.jsx)(ef.Z,{children:(0,r.jsx)(ee.Z,{children:e.model_name})}),(0,r.jsx)(ef.Z,{children:e.provider}),"Admin"===c&&(0,r.jsx)(ef.Z,{children:e.api_base}),(0,r.jsx)(ef.Z,{children:(0,r.jsxs)($.Z,{children:[(0,r.jsx)(Q.Z,{children:(0,r.jsx)(ee.Z,{children:"Litellm params"})}),(0,r.jsx)(X.Z,{children:(0,r.jsx)("pre",{children:JSON.stringify(e.cleanedLitellmParams,null,2)})})]})}),(0,r.jsx)(ef.Z,{children:e.input_cost||e.litellm_params.input_cost_per_token||null}),(0,r.jsx)(ef.Z,{children:e.output_cost||e.litellm_params.output_cost_per_token||null}),(0,r.jsx)(ef.Z,{children:e.max_tokens}),(0,r.jsx)(ef.Z,{children:e.model_info.db_model?(0,r.jsx)(ex.Z,{icon:eH.Z,className:"text-white",children:"DB Model"}):(0,r.jsx)(ex.Z,{icon:e$.Z,className:"text-black",children:"Config Model"})}),(0,r.jsxs)(ef.Z,{children:[(0,r.jsx)(ej.Z,{icon:eu.Z,size:"sm",onClick:()=>e6(e)}),(0,r.jsx)(eX,{modelID:e.model_info.id,accessToken:o})]})]},l))})]})})]}),(0,r.jsx)(e=>{let{visible:l,onCancel:t,model:s,onSubmit:a}=e,[n]=ea.Z.useForm(),o={},i="",c="";if(s){o=s.litellm_params,i=s.model_name;let e=s.model_info;e&&(c=e.id,console.log("model_id: ".concat(c)),o.model_id=c)}return(0,r.jsx)(er.Z,{title:"Edit Model "+i,visible:l,width:800,footer:null,onOk:()=>{n.validateFields().then(e=>{a(e),n.resetFields()}).catch(e=>{console.error("Validation failed:",e)})},onCancel:t,children:(0,r.jsxs)(ea.Z,{form:n,onFinish:e7,initialValues:o,labelCol:{span:8},wrapperCol:{span:16},labelAlign:"left",children:[(0,r.jsxs)(r.Fragment,{children:[(0,r.jsx)(ea.Z.Item,{className:"mt-8",label:"api_base",name:"api_base",children:(0,r.jsx)(H.Z,{})}),(0,r.jsx)(ea.Z.Item,{label:"tpm",name:"tpm",tooltip:"int (optional) - Tokens limit for this deployment: in tokens per minute (tpm). Find this information on your model/providers website",children:(0,r.jsx)(eo.Z,{min:0,step:1})}),(0,r.jsx)(ea.Z.Item,{label:"rpm",name:"rpm",tooltip:"int (optional) - Rate limit for this deployment: in requests per minute (rpm). Find this information on your model/providers website",children:(0,r.jsx)(eo.Z,{min:0,step:1})}),(0,r.jsx)(ea.Z.Item,{label:"max_retries",name:"max_retries",children:(0,r.jsx)(eo.Z,{min:0,step:1})}),(0,r.jsx)(ea.Z.Item,{label:"timeout",name:"timeout",tooltip:"int (optional) - Timeout in seconds for LLM requests (Defaults to 600 seconds)",children:(0,r.jsx)(eo.Z,{min:0,step:1})}),(0,r.jsx)(ea.Z.Item,{label:"stream_timeout",name:"stream_timeout",tooltip:"int (optional) - Timeout for stream requests (seconds)",children:(0,r.jsx)(eo.Z,{min:0,step:1})}),(0,r.jsx)(ea.Z.Item,{label:"input_cost_per_token",name:"input_cost_per_token",tooltip:"float (optional) - Input cost per token",children:(0,r.jsx)(eo.Z,{min:0,step:1e-4})}),(0,r.jsx)(ea.Z.Item,{label:"output_cost_per_token",name:"output_cost_per_token",tooltip:"float (optional) - Output cost per token",children:(0,r.jsx)(eo.Z,{min:0,step:1e-4})}),(0,r.jsx)(ea.Z.Item,{label:"model_id",name:"model_id",hidden:!0})]}),(0,r.jsx)("div",{style:{textAlign:"right",marginTop:"10px"},children:(0,r.jsx)(ei.ZP,{htmlType:"submit",children:"Save"})})]})})},{visible:T,onCancel:()=>{O(!1),R(null)},model:F,onSubmit:e7})]}),(0,r.jsxs)(eU.Z,{className:"h-full",children:[(0,r.jsx)(e1,{level:2,children:"Add new model"}),(0,r.jsx)(ep.Z,{children:(0,r.jsxs)(ea.Z,{form:g,onFinish:()=>{g.validateFields().then(e=>{e5(e,o,g)}).catch(e=>{console.error("Validation failed:",e)})},labelCol:{span:10},wrapperCol:{span:16},labelAlign:"left",children:[(0,r.jsxs)(r.Fragment,{children:[(0,r.jsx)(ea.Z.Item,{rules:[{required:!0,message:"Required"}],label:"Provider:",name:"custom_llm_provider",tooltip:"E.g. OpenAI, Azure OpenAI, Anthropic, Bedrock, etc.",labelCol:{span:10},labelAlign:"left",children:(0,r.jsx)(eb.Z,{value:E.toString(),children:A.map((e,l)=>(0,r.jsx)(ek.Z,{value:e,onClick:()=>{lt(e),I(e)},children:e},l))})}),(0,r.jsx)(ea.Z.Item,{rules:[{required:!0,message:"Required"}],label:"Public Model Name",name:"model_name",tooltip:"Model name your users will pass in. Also used for load-balancing, LiteLLM will load balance between all models with this public name.",className:"mb-0",children:(0,r.jsx)(H.Z,{placeholder:"Vertex AI (Anthropic, Gemini, etc.)"===(s=E.toString())?"gemini-pro":"Anthropic"==s?"claude-3-opus":"Amazon Bedrock"==s?"claude-3-opus":"Gemini (Google AI Studio)"==s?"gemini-pro":"gpt-3.5-turbo"})}),(0,r.jsxs)(eV.Z,{children:[(0,r.jsx)(eG.Z,{span:10}),(0,r.jsx)(eG.Z,{span:10,children:(0,r.jsx)(ee.Z,{className:"mb-3 mt-1",children:"Model name your users will pass in."})})]}),(0,r.jsx)(ea.Z.Item,{rules:[{required:!0,message:"Required"}],label:"LiteLLM Model Name(s)",name:"model",tooltip:"Actual model name used for making litellm.completion() call.",className:"mb-0",children:"Azure"===E?(0,r.jsx)(H.Z,{placeholder:"Enter model name"}):w.length>0?(0,r.jsx)(eK.Z,{value:w,children:w.map((e,l)=>(0,r.jsx)(eB.Z,{value:e,children:e},l))}):(0,r.jsx)(H.Z,{placeholder:"gpt-3.5-turbo-0125"})}),(0,r.jsxs)(eV.Z,{children:[(0,r.jsx)(eG.Z,{span:10}),(0,r.jsx)(eG.Z,{span:10,children:(0,r.jsxs)(ee.Z,{className:"mb-3 mt-1",children:["Actual model name used for making ",(0,r.jsx)(e2,{href:"https://docs.litellm.ai/docs/providers",target:"_blank",children:"litellm.completion() call"}),". We'll ",(0,r.jsx)(e2,{href:"https://docs.litellm.ai/docs/proxy/reliability#step-1---set-deployments-on-config",target:"_blank",children:"loadbalance"})," models with the same 'public name'"]})})]}),"Amazon Bedrock"!=E&&"Vertex AI (Anthropic, Gemini, etc.)"!=E&&(0,r.jsx)(ea.Z.Item,{rules:[{required:!0,message:"Required"}],label:"API Key",name:"api_key",children:(0,r.jsx)(H.Z,{placeholder:"sk-",type:"password"})}),"OpenAI"==E&&(0,r.jsx)(ea.Z.Item,{label:"Organization ID",name:"organization_id",children:(0,r.jsx)(H.Z,{placeholder:"[OPTIONAL] my-unique-org"})}),"Vertex AI (Anthropic, Gemini, etc.)"==E&&(0,r.jsx)(ea.Z.Item,{rules:[{required:!0,message:"Required"}],label:"Vertex Project",name:"vertex_project",children:(0,r.jsx)(H.Z,{placeholder:"adroit-cadet-1234.."})}),"Vertex AI (Anthropic, Gemini, etc.)"==E&&(0,r.jsx)(ea.Z.Item,{rules:[{required:!0,message:"Required"}],label:"Vertex Location",name:"vertex_location",children:(0,r.jsx)(H.Z,{placeholder:"us-east-1"})}),"Vertex AI (Anthropic, Gemini, etc.)"==E&&(0,r.jsx)(ea.Z.Item,{rules:[{required:!0,message:"Required"}],label:"Vertex Credentials",name:"vertex_credentials",className:"mb-0",children:(0,r.jsx)(e0.Z,{name:"file",accept:".json",beforeUpload:e=>{if("application/json"===e.type){let l=new FileReader;l.onload=e=>{if(e.target){let l=e.target.result;g.setFieldsValue({vertex_credentials:l})}},l.readAsText(e)}return!1},onChange(e){"uploading"!==e.file.status&&console.log(e.file,e.fileList),"done"===e.file.status?u.ZP.success("".concat(e.file.name," file uploaded successfully")):"error"===e.file.status&&u.ZP.error("".concat(e.file.name," file upload failed."))},children:(0,r.jsx)(ei.ZP,{icon:(0,r.jsx)(eQ.Z,{}),children:"Click to Upload"})})}),"Vertex AI (Anthropic, Gemini, etc.)"==E&&(0,r.jsxs)(eV.Z,{children:[(0,r.jsx)(eG.Z,{span:10}),(0,r.jsx)(eG.Z,{span:10,children:(0,r.jsx)(ee.Z,{className:"mb-3 mt-1",children:"Give litellm a gcp service account(.json file), so it can make the relevant calls"})})]}),("Azure"==E||"OpenAI-Compatible Endpoints (Groq, Together AI, Mistral AI, etc.)"==E)&&(0,r.jsx)(ea.Z.Item,{rules:[{required:!0,message:"Required"}],label:"API Base",name:"api_base",children:(0,r.jsx)(H.Z,{placeholder:"https://..."})}),"Azure"==E&&(0,r.jsx)(ea.Z.Item,{rules:[{required:!0,message:"Required"}],label:"API Version",name:"api_version",children:(0,r.jsx)(H.Z,{placeholder:"2023-07-01-preview"})}),"Azure"==E&&(0,r.jsxs)(ea.Z.Item,{label:"Base Model",name:"base_model",children:[(0,r.jsx)(H.Z,{placeholder:"azure/gpt-3.5-turbo"}),(0,r.jsxs)(ee.Z,{children:["The actual model your azure deployment uses. Used for accurate cost tracking. Select name from ",(0,r.jsx)(e2,{href:"https://github.com/BerriAI/litellm/blob/main/model_prices_and_context_window.json",target:"_blank",children:"here"})]})]}),"Amazon Bedrock"==E&&(0,r.jsx)(ea.Z.Item,{rules:[{required:!0,message:"Required"}],label:"AWS Access Key ID",name:"aws_access_key_id",tooltip:"You can provide the raw key or the environment variable (e.g. `os.environ/MY_SECRET_KEY`).",children:(0,r.jsx)(H.Z,{placeholder:""})}),"Amazon Bedrock"==E&&(0,r.jsx)(ea.Z.Item,{rules:[{required:!0,message:"Required"}],label:"AWS Secret Access Key",name:"aws_secret_access_key",tooltip:"You can provide the raw key or the environment variable (e.g. `os.environ/MY_SECRET_KEY`).",children:(0,r.jsx)(H.Z,{placeholder:""})}),"Amazon Bedrock"==E&&(0,r.jsx)(ea.Z.Item,{rules:[{required:!0,message:"Required"}],label:"AWS Region Name",name:"aws_region_name",tooltip:"You can provide the raw key or the environment variable (e.g. `os.environ/MY_SECRET_KEY`).",children:(0,r.jsx)(H.Z,{placeholder:"us-east-1"})}),(0,r.jsx)(ea.Z.Item,{label:"LiteLLM Params",name:"litellm_extra_params",tooltip:"Optional litellm params used for making a litellm.completion() call.",className:"mb-0",children:(0,r.jsx)(eW.Z,{rows:4,placeholder:'{ "rpm": 100, "timeout": 0, "stream_timeout": 0 }'})}),(0,r.jsxs)(eV.Z,{children:[(0,r.jsx)(eG.Z,{span:10}),(0,r.jsx)(eG.Z,{span:10,children:(0,r.jsxs)(ee.Z,{className:"mb-3 mt-1",children:["Pass JSON of litellm supported params ",(0,r.jsx)(e2,{href:"https://docs.litellm.ai/docs/completion/input",target:"_blank",children:"litellm.completion() call"})]})})]})]}),(0,r.jsx)("div",{style:{textAlign:"center",marginTop:"10px"},children:(0,r.jsx)(ei.ZP,{htmlType:"submit",children:"Add Model"})}),(0,r.jsx)(eY.Z,{title:"Get help on our github",children:(0,r.jsx)(eT.default.Link,{href:"https://github.com/BerriAI/litellm/issues",children:"Need Help?"})})]})})]}),(0,r.jsx)(eU.Z,{children:(0,r.jsxs)(ep.Z,{children:[(0,r.jsx)(ee.Z,{children:"`/health` will run a very small request through your models configured on litellm"}),(0,r.jsx)(J.Z,{onClick:ls,children:"Run `/health`"}),C&&(0,r.jsx)("pre",{children:JSON.stringify(C,null,2)})]})}),(0,r.jsxs)(eU.Z,{children:[(0,r.jsxs)(W.Z,{numItems:2,className:"mt-2",children:[(0,r.jsxs)(eG.Z,{children:[(0,r.jsx)(ee.Z,{children:"Select Time Range"}),(0,r.jsx)(eF.Z,{enableSelect:!0,value:eE,onValueChange:e=>{eI(e),la(U,e.from,e.to)}})]}),(0,r.jsxs)(eG.Z,{children:[(0,r.jsx)(ee.Z,{children:"Select Model Group"}),(0,r.jsx)(eb.Z,{className:"mb-4 mt-2",defaultValue:U||M[0],value:U||M[0],children:M.map((e,l)=>(0,r.jsx)(ek.Z,{value:e,onClick:()=>la(e,eE.from,eE.to),children:e},l))})]})]}),(0,r.jsxs)(W.Z,{numItems:2,children:[(0,r.jsx)(eG.Z,{children:(0,r.jsxs)(ep.Z,{className:"mr-2 max-h-[400px] min-h-[400px]",children:[(0,r.jsx)(el.Z,{children:"Avg Latency per Token"}),(0,r.jsx)("p",{className:"text-gray-500 italic",children:" (seconds/token)"}),(0,r.jsx)(ee.Z,{className:"text-gray-500 italic mt-1 mb-1",children:"average Latency for successfull requests divided by the total tokens"}),Y&&es&&(0,r.jsx)(ez.Z,{title:"Model Latency",className:"h-72",data:Y,showLegend:!1,index:"date",categories:es,connectNulls:!0,customTooltip:e=>{var l,t;let{payload:s,active:a}=e;if(!a||!s)return null;let n=null===(t=s[0])||void 0===t?void 0:null===(l=t.payload)||void 0===l?void 0:l.date,o=s.sort((e,l)=>l.value-e.value);if(o.length>5){let e=o.length-5;(o=o.slice(0,5)).push({dataKey:"".concat(e," other deployments"),value:s.slice(5).reduce((e,l)=>e+l.value,0),color:"gray"})}return(0,r.jsxs)("div",{className:"w-150 rounded-tremor-default border border-tremor-border bg-tremor-background p-2 text-tremor-default shadow-tremor-dropdown",children:[n&&(0,r.jsxs)("p",{className:"text-tremor-content-emphasis mb-2",children:["Date: ",n]}),o.map((e,l)=>{let t=parseFloat(e.value.toFixed(5)),s=0===t&&e.value>0?"<0.00001":t.toFixed(5);return(0,r.jsxs)("div",{className:"flex justify-between",children:[(0,r.jsxs)("div",{className:"flex items-center space-x-2",children:[(0,r.jsx)("div",{className:"w-2 h-2 mt-1 rounded-full bg-".concat(e.color,"-500")}),(0,r.jsx)("p",{className:"text-tremor-content",children:e.dataKey})]}),(0,r.jsx)("p",{className:"font-medium text-tremor-content-emphasis text-righ ml-2",children:s})]},l)})]})}})]})}),(0,r.jsx)(eG.Z,{children:(0,r.jsx)(ep.Z,{className:"ml-2 max-h-[400px] min-h-[400px]  overflow-y-auto",children:(0,r.jsxs)(eg.Z,{children:[(0,r.jsx)(eZ.Z,{children:(0,r.jsxs)(ew.Z,{children:[(0,r.jsx)(e_.Z,{children:"Deployment"}),(0,r.jsx)(e_.Z,{children:"Success Responses"}),(0,r.jsxs)(e_.Z,{children:["Slow Responses ",(0,r.jsx)("p",{children:"Success Responses taking 600+s"})]})]})}),(0,r.jsx)(ey.Z,{children:eN.map((e,l)=>(0,r.jsxs)(ew.Z,{children:[(0,r.jsx)(ef.Z,{children:e.api_base}),(0,r.jsx)(ef.Z,{children:e.total_count}),(0,r.jsx)(ef.Z,{children:e.slow_count})]},l))})]})})})]}),(0,r.jsxs)(ep.Z,{className:"mt-4",children:[(0,r.jsx)(el.Z,{children:"Exceptions per Model"}),(0,r.jsx)(eq.Z,{className:"h-72",data:ec,index:"model",categories:em,stack:!0,colors:["indigo-300","rose-200","#ffcc33"],yAxisWidth:30})]})]}),(0,r.jsxs)(eU.Z,{children:[(0,r.jsxs)("div",{className:"flex items-center",children:[(0,r.jsx)(ee.Z,{children:"Filter by Public Model Name"}),(0,r.jsx)(eb.Z,{className:"mb-4 mt-2 ml-2 w-50",defaultValue:U||M[0],value:U||M[0],onValueChange:e=>K(e),children:M.map((e,l)=>(0,r.jsx)(ek.Z,{value:e,onClick:()=>K(e),children:e},l))})]}),(0,r.jsxs)(el.Z,{children:["Retry Policy for ",U]}),(0,r.jsx)(ee.Z,{className:"mb-6",children:"How many retries should be attempted based on the Exception"}),e8&&(0,r.jsx)("table",{children:(0,r.jsx)("tbody",{children:Object.entries(e8).map((e,l)=>{var t;let[s,a]=e,n=null==eC?void 0:null===(t=eC[U])||void 0===t?void 0:t[a];return null==n&&(n=eO),(0,r.jsxs)("tr",{className:"flex justify-between items-center mt-2",children:[(0,r.jsx)("td",{children:(0,r.jsx)(ee.Z,{children:s})}),(0,r.jsx)("td",{children:(0,r.jsx)(eo.Z,{className:"ml-5",value:n,min:0,step:1,onChange:e=>{eP(l=>{var t;let s=null!==(t=null==l?void 0:l[U])&&void 0!==t?t:{};return{...null!=l?l:{},[U]:{...s,[a]:e}}})}})})]},l)})})}),(0,r.jsx)(J.Z,{className:"mt-6 mr-8",onClick:le,children:"Save"})]})]})]})})};let{Option:e6}=es.default;var e7=e=>{let{userID:l,accessToken:t,teams:s}=e,[a]=ea.Z.useForm(),[o,i]=(0,n.useState)(!1),[c,d]=(0,n.useState)(null),[m,h]=(0,n.useState)([]);(0,n.useEffect)(()=>{(async()=>{try{let e=await N(t,l,"any"),s=[];for(let l=0;l<e.data.length;l++){let t=e.data[l];s.push(t.id)}console.log("Model data response:",e.data),console.log("Available models:",s),h(s)}catch(e){console.error("Error fetching model data:",e)}})()},[]);let x=()=>{i(!1),a.resetFields()},p=()=>{i(!1),d(null),a.resetFields()},j=async e=>{try{u.ZP.info("Making API Call"),i(!0),console.log("formValues in create user:",e);let s=await g(t,null,e);console.log("user create Response:",s),d(s.key),u.ZP.success("API user Created"),a.resetFields(),localStorage.removeItem("userData"+l)}catch(e){console.error("Error creating the user:",e)}};return(0,r.jsxs)("div",{children:[(0,r.jsx)(J.Z,{className:"mx-auto",onClick:()=>i(!0),children:"+ Invite User"}),(0,r.jsxs)(er.Z,{title:"Invite User",visible:o,width:800,footer:null,onOk:x,onCancel:p,children:[(0,r.jsx)(ee.Z,{className:"mb-1",children:"Invite a user to login to the Admin UI and create Keys"}),(0,r.jsx)(ee.Z,{className:"mb-6",children:(0,r.jsx)("b",{children:"Note: SSO Setup Required for this"})}),(0,r.jsxs)(ea.Z,{form:a,onFinish:j,labelCol:{span:8},wrapperCol:{span:16},labelAlign:"left",children:[(0,r.jsx)(ea.Z.Item,{label:"User Email",name:"user_email",children:(0,r.jsx)(H.Z,{placeholder:""})}),(0,r.jsx)(ea.Z.Item,{label:"Team ID",name:"team_id",children:(0,r.jsx)(es.default,{placeholder:"Select Team ID",style:{width:"100%"},children:s?s.map(e=>(0,r.jsx)(e6,{value:e.team_id,children:e.team_alias},e.team_id)):(0,r.jsx)(e6,{value:null,children:"Default Team"},"default")})}),(0,r.jsx)(ea.Z.Item,{label:"Metadata",name:"metadata",children:(0,r.jsx)(en.Z.TextArea,{rows:4,placeholder:"Enter metadata as JSON"})}),(0,r.jsx)("div",{style:{textAlign:"right",marginTop:"10px"},children:(0,r.jsx)(ei.ZP,{htmlType:"submit",children:"Create User"})})]})]}),c&&(0,r.jsxs)(er.Z,{title:"User Created Successfully",visible:o,onOk:x,onCancel:p,footer:null,children:[(0,r.jsx)("p",{children:"User has been created to access your proxy. Please Ask them to Log In."}),(0,r.jsx)("br",{}),(0,r.jsx)("p",{children:(0,r.jsx)("b",{children:"Note: This Feature is only supported through SSO on the Admin UI"})})]})]})},e9=e=>{let{accessToken:l,token:t,keys:s,userRole:a,userID:o,teams:i,setKeys:c}=e,[d,m]=(0,n.useState)(null),[u,h]=(0,n.useState)(null),[x,p]=(0,n.useState)(0),[j,g]=n.useState(null),[y,f]=(0,n.useState)(null);return((0,n.useEffect)(()=>{if(!l||!t||!a||!o)return;let e=async()=>{try{let e=await Z(l,null,a,!0,x,25);console.log("user data response:",e),m(e)}catch(e){console.error("There was an error fetching the model data",e)}};l&&t&&a&&o&&e()},[l,t,a,o,x]),d&&l&&t&&a&&o)?(0,r.jsx)("div",{style:{width:"100%"},children:(0,r.jsxs)(W.Z,{className:"gap-2 p-2 h-[80vh] w-full mt-8",children:[(0,r.jsx)(e7,{userID:o,accessToken:l,teams:i}),(0,r.jsxs)(ep.Z,{className:"w-full mx-auto flex-auto overflow-y-auto max-h-[80vh] mb-4",children:[(0,r.jsx)("div",{className:"mb-4 mt-1",children:(0,r.jsx)(ee.Z,{children:"These are Users on LiteLLM that created API Keys. Automatically tracked by LiteLLM"})}),(0,r.jsx)(eM.Z,{children:(0,r.jsxs)(eD.Z,{children:[(0,r.jsx)(eU.Z,{children:(0,r.jsxs)(eg.Z,{className:"mt-5",children:[(0,r.jsx)(eZ.Z,{children:(0,r.jsxs)(ew.Z,{children:[(0,r.jsx)(e_.Z,{children:"User ID"}),(0,r.jsx)(e_.Z,{children:"User Email"}),(0,r.jsx)(e_.Z,{children:"User Models"}),(0,r.jsx)(e_.Z,{children:"User Spend ($ USD)"}),(0,r.jsx)(e_.Z,{children:"User Max Budget ($ USD)"}),(0,r.jsx)(e_.Z,{children:"User API Key Aliases"})]})}),(0,r.jsx)(ey.Z,{children:d.map(e=>{var l;return(0,r.jsxs)(ew.Z,{children:[(0,r.jsx)(ef.Z,{children:e.user_id}),(0,r.jsx)(ef.Z,{children:e.user_email}),(0,r.jsx)(ef.Z,{children:e.models&&e.models.length>0?e.models:"All Models"}),(0,r.jsx)(ef.Z,{children:e.spend?null===(l=e.spend)||void 0===l?void 0:l.toFixed(2):0}),(0,r.jsx)(ef.Z,{children:e.max_budget?e.max_budget:"Unlimited"}),(0,r.jsx)(ef.Z,{children:(0,r.jsx)(W.Z,{numItems:2,children:e&&e.key_aliases&&e.key_aliases.filter(e=>null!==e).length>0?(0,r.jsx)(ex.Z,{size:"xs",color:"indigo",children:e.key_aliases.filter(e=>null!==e).join(", ")}):(0,r.jsx)(ex.Z,{size:"xs",color:"gray",children:"No Keys"})})})]},e.user_id)})})]})}),(0,r.jsx)(eU.Z,{children:(0,r.jsxs)("div",{className:"flex items-center",children:[(0,r.jsx)("div",{className:"flex-1"}),(0,r.jsx)("div",{className:"flex-1 flex justify-between items-center"})]})})]})})]}),function(){if(!d)return null;let e=Math.ceil(d.length/25);return(0,r.jsxs)("div",{className:"flex justify-between items-center",children:[(0,r.jsxs)("div",{children:["Showing Page ",x+1," of ",e]}),(0,r.jsxs)("div",{className:"flex",children:[(0,r.jsx)("button",{className:"bg-blue-500 hover:bg-blue-700 text-white font-bold py-2 px-4 rounded-l focus:outline-none",disabled:0===x,onClick:()=>p(x-1),children:"← Prev"}),(0,r.jsx)("button",{className:"bg-blue-500 hover:bg-blue-700 text-white font-bold py-2 px-4 rounded-r focus:outline-none",onClick:()=>{p(x+1)},children:"Next →"})]})]})}()]})}):(0,r.jsx)("div",{children:"Loading..."})},le=e=>{let{teams:l,searchParams:t,accessToken:s,setTeams:a,userID:o,userRole:i}=e,[c]=ea.Z.useForm(),[d]=ea.Z.useForm(),{Title:m,Paragraph:h}=eT.default,[x,p]=(0,n.useState)(""),[j,g]=(0,n.useState)(!1),[y,Z]=(0,n.useState)(l?l[0]:null),[w,b]=(0,n.useState)(!1),[k,v]=(0,n.useState)(!1),[S,A]=(0,n.useState)([]),[E,I]=(0,n.useState)(!1),[C,P]=(0,n.useState)(null),[T,O]=(0,n.useState)({}),F=e=>{Z(e),g(!0)},R=async e=>{let t=e.team_id;if(console.log("handleEditSubmit:",e),null==s)return;let r=await U(s,e);l&&a(l.map(e=>e.team_id===t?r.data:e)),u.ZP.success("Team updated successfully"),g(!1),Z(null)},L=async e=>{P(e),I(!0)},D=async()=>{if(null!=C&&null!=l&&null!=s){try{await f(s,C);let e=l.filter(e=>e.team_id!==C);a(e)}catch(e){console.error("Error deleting the team:",e)}I(!1),P(null)}};(0,n.useEffect)(()=>{let e=async()=>{try{if(null===o||null===i||null===s||null===l)return;console.log("fetching team info:");let e={};for(let t=0;t<(null==l?void 0:l.length);t++){let a=l[t].team_id,r=await _(s,a);console.log("teamInfo response:",r),null!==r&&(e={...e,[a]:r})}O(e)}catch(e){console.error("Error fetching team info:",e)}};(async()=>{try{if(null===o||null===i)return;if(null!==s){let e=(await N(s,o,i)).data.map(e=>e.id);console.log("available_model_names:",e),A(e)}}catch(e){console.error("Error fetching user models:",e)}})(),e()},[s,o,i,l]);let B=async e=>{try{if(null!=s){var t;let r=null==e?void 0:e.team_alias;if((null!==(t=null==l?void 0:l.map(e=>e.team_alias))&&void 0!==t?t:[]).includes(r))throw Error("Team alias ".concat(r," already exists, please pick another alias"));u.ZP.info("Creating Team");let n=await M(s,e);null!==l?a([...l,n]):a([n]),console.log("response for team create call: ".concat(n)),u.ZP.success("Team created"),b(!1)}}catch(e){console.error("Error creating the team:",e),u.ZP.error("Error creating the team: "+e,20)}},z=async e=>{try{if(null!=s&&null!=l){u.ZP.info("Adding Member");let t={role:"user",user_email:e.user_email,user_id:e.user_id},r=await K(s,y.team_id,t);console.log("response for team create call: ".concat(r.data));let n=l.findIndex(e=>(console.log("team.team_id=".concat(e.team_id,"; response.data.team_id=").concat(r.data.team_id)),e.team_id===r.data.team_id));if(console.log("foundIndex: ".concat(n)),-1!==n){let e=[...l];e[n]=r.data,a(e),Z(r.data)}v(!1)}}catch(e){console.error("Error creating the team:",e)}};return console.log("received teams ".concat(JSON.stringify(l))),(0,r.jsx)("div",{className:"w-full mx-4",children:(0,r.jsxs)(W.Z,{numItems:1,className:"gap-2 p-8 h-[75vh] w-full mt-2",children:[(0,r.jsxs)(Y.Z,{numColSpan:1,children:[(0,r.jsx)(m,{level:4,children:"All Teams"}),(0,r.jsxs)(ep.Z,{className:"w-full mx-auto flex-auto overflow-y-auto max-h-[50vh]",children:[(0,r.jsxs)(eg.Z,{children:[(0,r.jsx)(eZ.Z,{children:(0,r.jsxs)(ew.Z,{children:[(0,r.jsx)(e_.Z,{children:"Team Name"}),(0,r.jsx)(e_.Z,{children:"Spend (USD)"}),(0,r.jsx)(e_.Z,{children:"Budget (USD)"}),(0,r.jsx)(e_.Z,{children:"Models"}),(0,r.jsx)(e_.Z,{children:"TPM / RPM Limits"}),(0,r.jsx)(e_.Z,{children:"Info"})]})}),(0,r.jsx)(ey.Z,{children:l&&l.length>0?l.map(e=>(0,r.jsxs)(ew.Z,{children:[(0,r.jsx)(ef.Z,{style:{maxWidth:"4px",whiteSpace:"pre-wrap",overflow:"hidden"},children:e.team_alias}),(0,r.jsx)(ef.Z,{style:{maxWidth:"4px",whiteSpace:"pre-wrap",overflow:"hidden"},children:e.spend}),(0,r.jsx)(ef.Z,{style:{maxWidth:"4px",whiteSpace:"pre-wrap",overflow:"hidden"},children:e.max_budget?e.max_budget:"No limit"}),(0,r.jsx)(ef.Z,{style:{maxWidth:"8-x",whiteSpace:"pre-wrap",overflow:"hidden"},children:Array.isArray(e.models)?(0,r.jsx)("div",{style:{display:"flex",flexDirection:"column"},children:0===e.models.length?(0,r.jsx)(ex.Z,{size:"xs",className:"mb-1",color:"red",children:(0,r.jsx)(ee.Z,{children:"All Proxy Models"})}):e.models.map((e,l)=>"all-proxy-models"===e?(0,r.jsx)(ex.Z,{size:"xs",className:"mb-1",color:"red",children:(0,r.jsx)(ee.Z,{children:"All Proxy Models"})},l):(0,r.jsx)(ex.Z,{size:"xs",className:"mb-1",color:"blue",children:(0,r.jsx)(ee.Z,{children:e.length>30?"".concat(e.slice(0,30),"..."):e})},l))}):null}),(0,r.jsx)(ef.Z,{style:{maxWidth:"4px",whiteSpace:"pre-wrap",overflow:"hidden"},children:(0,r.jsxs)(ee.Z,{children:["TPM: ",e.tpm_limit?e.tpm_limit:"Unlimited"," ",(0,r.jsx)("br",{}),"RPM:"," ",e.rpm_limit?e.rpm_limit:"Unlimited"]})}),(0,r.jsxs)(ef.Z,{children:[(0,r.jsxs)(ee.Z,{children:[T&&e.team_id&&T[e.team_id]&&T[e.team_id].keys&&T[e.team_id].keys.length," ","Keys"]}),(0,r.jsxs)(ee.Z,{children:[T&&e.team_id&&T[e.team_id]&&T[e.team_id].team_info&&T[e.team_id].team_info.members_with_roles&&T[e.team_id].team_info.members_with_roles.length," ","Members"]})]}),(0,r.jsxs)(ef.Z,{children:[(0,r.jsx)(ej.Z,{icon:eu.Z,size:"sm",onClick:()=>F(e)}),(0,r.jsx)(ej.Z,{onClick:()=>L(e.team_id),icon:eh.Z,size:"sm"})]})]},e.team_id)):null})]}),E&&(0,r.jsx)("div",{className:"fixed z-10 inset-0 overflow-y-auto",children:(0,r.jsxs)("div",{className:"flex items-end justify-center min-h-screen pt-4 px-4 pb-20 text-center sm:block sm:p-0",children:[(0,r.jsx)("div",{className:"fixed inset-0 transition-opacity","aria-hidden":"true",children:(0,r.jsx)("div",{className:"absolute inset-0 bg-gray-500 opacity-75"})}),(0,r.jsx)("span",{className:"hidden sm:inline-block sm:align-middle sm:h-screen","aria-hidden":"true",children:"​"}),(0,r.jsxs)("div",{className:"inline-block align-bottom bg-white rounded-lg text-left overflow-hidden shadow-xl transform transition-all sm:my-8 sm:align-middle sm:max-w-lg sm:w-full",children:[(0,r.jsx)("div",{className:"bg-white px-4 pt-5 pb-4 sm:p-6 sm:pb-4",children:(0,r.jsx)("div",{className:"sm:flex sm:items-start",children:(0,r.jsxs)("div",{className:"mt-3 text-center sm:mt-0 sm:ml-4 sm:text-left",children:[(0,r.jsx)("h3",{className:"text-lg leading-6 font-medium text-gray-900",children:"Delete Team"}),(0,r.jsx)("div",{className:"mt-2",children:(0,r.jsx)("p",{className:"text-sm text-gray-500",children:"Are you sure you want to delete this team ?"})})]})})}),(0,r.jsxs)("div",{className:"bg-gray-50 px-4 py-3 sm:px-6 sm:flex sm:flex-row-reverse",children:[(0,r.jsx)(J.Z,{onClick:D,color:"red",className:"ml-2",children:"Delete"}),(0,r.jsx)(J.Z,{onClick:()=>{I(!1),P(null)},children:"Cancel"})]})]})]})})]})]}),(0,r.jsxs)(Y.Z,{numColSpan:1,children:[(0,r.jsx)(J.Z,{className:"mx-auto",onClick:()=>b(!0),children:"+ Create New Team"}),(0,r.jsx)(er.Z,{title:"Create Team",visible:w,width:800,footer:null,onOk:()=>{b(!1),c.resetFields()},onCancel:()=>{b(!1),c.resetFields()},children:(0,r.jsxs)(ea.Z,{form:c,onFinish:B,labelCol:{span:8},wrapperCol:{span:16},labelAlign:"left",children:[(0,r.jsxs)(r.Fragment,{children:[(0,r.jsx)(ea.Z.Item,{label:"Team Name",name:"team_alias",rules:[{required:!0,message:"Please input a team name"}],children:(0,r.jsx)(H.Z,{placeholder:""})}),(0,r.jsx)(ea.Z.Item,{label:"Models",name:"models",children:(0,r.jsxs)(es.default,{mode:"multiple",placeholder:"Select models",style:{width:"100%"},children:[(0,r.jsx)(es.default.Option,{value:"all-proxy-models",children:"All Proxy Models"},"all-proxy-models"),S.map(e=>(0,r.jsx)(es.default.Option,{value:e,children:e},e))]})}),(0,r.jsx)(ea.Z.Item,{label:"Max Budget (USD)",name:"max_budget",children:(0,r.jsx)(eo.Z,{step:.01,precision:2,width:200})}),(0,r.jsx)(ea.Z.Item,{label:"Tokens per minute Limit (TPM)",name:"tpm_limit",children:(0,r.jsx)(eo.Z,{step:1,width:400})}),(0,r.jsx)(ea.Z.Item,{label:"Requests per minute Limit (RPM)",name:"rpm_limit",children:(0,r.jsx)(eo.Z,{step:1,width:400})})]}),(0,r.jsx)("div",{style:{textAlign:"right",marginTop:"10px"},children:(0,r.jsx)(ei.ZP,{htmlType:"submit",children:"Create Team"})})]})})]}),(0,r.jsxs)(Y.Z,{numColSpan:1,children:[(0,r.jsx)(m,{level:4,children:"Team Members"}),(0,r.jsx)(h,{children:"If you belong to multiple teams, this setting controls which teams members you see."}),l&&l.length>0?(0,r.jsx)(eb.Z,{defaultValue:"0",children:l.map((e,l)=>(0,r.jsx)(ek.Z,{value:String(l),onClick:()=>{Z(e)},children:e.team_alias},l))}):(0,r.jsxs)(h,{children:["No team created. ",(0,r.jsx)("b",{children:"Defaulting to personal account."})]})]}),(0,r.jsxs)(Y.Z,{numColSpan:1,children:[(0,r.jsx)(ep.Z,{className:"w-full mx-auto flex-auto overflow-y-auto max-h-[50vh]",children:(0,r.jsxs)(eg.Z,{children:[(0,r.jsx)(eZ.Z,{children:(0,r.jsxs)(ew.Z,{children:[(0,r.jsx)(e_.Z,{children:"Member Name"}),(0,r.jsx)(e_.Z,{children:"Role"})]})}),(0,r.jsx)(ey.Z,{children:y?y.members_with_roles.map((e,l)=>(0,r.jsxs)(ew.Z,{children:[(0,r.jsx)(ef.Z,{children:e.user_email?e.user_email:e.user_id?e.user_id:null}),(0,r.jsx)(ef.Z,{children:e.role})]},l)):null})]})}),y&&(0,r.jsx)(e=>{let{visible:l,onCancel:t,team:s,onSubmit:a}=e,[n]=ea.Z.useForm();return(0,r.jsx)(er.Z,{title:"Edit Team",visible:l,width:800,footer:null,onOk:()=>{n.validateFields().then(e=>{a({...e,team_id:s.team_id}),n.resetFields()}).catch(e=>{console.error("Validation failed:",e)})},onCancel:t,children:(0,r.jsxs)(ea.Z,{form:n,onFinish:R,initialValues:s,labelCol:{span:8},wrapperCol:{span:16},labelAlign:"left",children:[(0,r.jsxs)(r.Fragment,{children:[(0,r.jsx)(ea.Z.Item,{label:"Team Name",name:"team_alias",rules:[{required:!0,message:"Please input a team name"}],children:(0,r.jsx)(H.Z,{})}),(0,r.jsx)(ea.Z.Item,{label:"Models",name:"models",children:(0,r.jsxs)(es.default,{mode:"multiple",placeholder:"Select models",style:{width:"100%"},children:[(0,r.jsx)(es.default.Option,{value:"all-proxy-models",children:"All Proxy Models"},"all-proxy-models"),S&&S.map(e=>(0,r.jsx)(es.default.Option,{value:e,children:e},e))]})}),(0,r.jsx)(ea.Z.Item,{label:"Max Budget (USD)",name:"max_budget",children:(0,r.jsx)(eo.Z,{step:.01,precision:2,width:200})}),(0,r.jsx)(ea.Z.Item,{label:"Tokens per minute Limit (TPM)",name:"tpm_limit",children:(0,r.jsx)(eo.Z,{step:1,width:400})}),(0,r.jsx)(ea.Z.Item,{label:"Requests per minute Limit (RPM)",name:"rpm_limit",children:(0,r.jsx)(eo.Z,{step:1,width:400})}),(0,r.jsx)(ea.Z.Item,{label:"Requests per minute Limit (RPM)",name:"team_id",hidden:!0})]}),(0,r.jsx)("div",{style:{textAlign:"right",marginTop:"10px"},children:(0,r.jsx)(ei.ZP,{htmlType:"submit",children:"Edit Team"})})]})})},{visible:j,onCancel:()=>{g(!1),Z(null)},team:y,onSubmit:R})]}),(0,r.jsxs)(Y.Z,{numColSpan:1,children:[(0,r.jsx)(J.Z,{className:"mx-auto mb-5",onClick:()=>v(!0),children:"+ Add member"}),(0,r.jsx)(er.Z,{title:"Add member",visible:k,width:800,footer:null,onOk:()=>{v(!1),d.resetFields()},onCancel:()=>{v(!1),d.resetFields()},children:(0,r.jsxs)(ea.Z,{form:c,onFinish:z,labelCol:{span:8},wrapperCol:{span:16},labelAlign:"left",children:[(0,r.jsxs)(r.Fragment,{children:[(0,r.jsx)(ea.Z.Item,{label:"Email",name:"user_email",className:"mb-4",children:(0,r.jsx)(en.Z,{name:"user_email",className:"px-3 py-2 border rounded-md w-full"})}),(0,r.jsx)("div",{className:"text-center mb-4",children:"OR"}),(0,r.jsx)(ea.Z.Item,{label:"User ID",name:"user_id",className:"mb-4",children:(0,r.jsx)(en.Z,{name:"user_id",className:"px-3 py-2 border rounded-md w-full"})})]}),(0,r.jsx)("div",{style:{textAlign:"right",marginTop:"10px"},children:(0,r.jsx)(ei.ZP,{htmlType:"submit",children:"Add member"})})]})})]})]})})},ll=t(18190),lt=e=>{let l,{searchParams:t,accessToken:s,showSSOBanner:a}=e,[o]=ea.Z.useForm(),[i]=ea.Z.useForm(),{Title:c,Paragraph:d}=eT.default,[m,h]=(0,n.useState)(""),[x,p]=(0,n.useState)(null),[j,g]=(0,n.useState)(!1),[y,f]=(0,n.useState)(!1),[Z,_]=(0,n.useState)(!1),[w,b]=(0,n.useState)(!1),[k,v]=(0,n.useState)(!1);try{l=window.location.origin}catch(e){l="<your-proxy-url>"}l+="/fallback/login";let S=()=>{v(!1)},N=["proxy_admin","proxy_admin_viewer"];(0,n.useEffect)(()=>{(async()=>{if(null!=s){let e=[],l=await R(s,"proxy_admin_viewer");l.forEach(l=>{e.push({user_role:l.user_role,user_id:l.user_id,user_email:l.user_email})}),console.log("proxy viewers: ".concat(l));let t=await R(s,"proxy_admin");t.forEach(l=>{e.push({user_role:l.user_role,user_id:l.user_id,user_email:l.user_email})}),console.log("proxy admins: ".concat(t)),console.log("combinedList: ".concat(e)),p(e)}})()},[s]);let A=()=>{_(!1),i.resetFields()},E=()=>{_(!1),i.resetFields()},I=e=>(0,r.jsxs)(ea.Z,{form:o,onFinish:e,labelCol:{span:8},wrapperCol:{span:16},labelAlign:"left",children:[(0,r.jsxs)(r.Fragment,{children:[(0,r.jsx)(ea.Z.Item,{label:"Email",name:"user_email",className:"mb-4",children:(0,r.jsx)(en.Z,{name:"user_email",className:"px-3 py-2 border rounded-md w-full"})}),(0,r.jsx)("div",{className:"text-center mb-4",children:"OR"}),(0,r.jsx)(ea.Z.Item,{label:"User ID",name:"user_id",className:"mb-4",children:(0,r.jsx)(en.Z,{name:"user_id",className:"px-3 py-2 border rounded-md w-full"})})]}),(0,r.jsx)("div",{style:{textAlign:"right",marginTop:"10px"},children:(0,r.jsx)(ei.ZP,{htmlType:"submit",children:"Add member"})})]}),C=(e,l,t)=>(0,r.jsxs)(ea.Z,{form:o,onFinish:e,labelCol:{span:8},wrapperCol:{span:16},labelAlign:"left",children:[(0,r.jsxs)(r.Fragment,{children:[(0,r.jsx)(ea.Z.Item,{rules:[{required:!0,message:"Required"}],label:"User Role",name:"user_role",labelCol:{span:10},labelAlign:"left",children:(0,r.jsx)(eb.Z,{value:l,children:N.map((e,l)=>(0,r.jsx)(ek.Z,{value:e,children:e},l))})}),(0,r.jsx)(ea.Z.Item,{label:"Team ID",name:"user_id",hidden:!0,initialValue:t,valuePropName:"user_id",className:"mt-8",children:(0,r.jsx)(en.Z,{value:t,disabled:!0})})]}),(0,r.jsx)("div",{style:{textAlign:"right",marginTop:"10px"},children:(0,r.jsx)(ei.ZP,{htmlType:"submit",children:"Update role"})})]}),P=async e=>{try{if(null!=s&&null!=x){u.ZP.info("Making API Call");let l=await B(s,e,null);console.log("response for team create call: ".concat(l));let t=x.findIndex(e=>(console.log("user.user_id=".concat(e.user_id,"; response.user_id=").concat(l.user_id)),e.user_id===l.user_id));console.log("foundIndex: ".concat(t)),-1==t&&(console.log("updates admin with new user"),x.push(l),p(x)),u.ZP.success("Refresh tab to see updated user role"),_(!1)}}catch(e){console.error("Error creating the key:",e)}},T=async e=>{try{if(null!=s&&null!=x){u.ZP.info("Making API Call");let l=await B(s,e,"proxy_admin_viewer");console.log("response for team create call: ".concat(l));let t=x.findIndex(e=>(console.log("user.user_id=".concat(e.user_id,"; response.user_id=").concat(l.user_id)),e.user_id===l.user_id));console.log("foundIndex: ".concat(t)),-1==t&&(console.log("updates admin with new user"),x.push(l),p(x)),g(!1)}}catch(e){console.error("Error creating the key:",e)}},O=async e=>{try{if(null!=s&&null!=x){u.ZP.info("Making API Call"),e.user_email,e.user_id;let l=await B(s,e,"proxy_admin");console.log("response for team create call: ".concat(l));let t=x.findIndex(e=>(console.log("user.user_id=".concat(e.user_id,"; response.user_id=").concat(l.user_id)),e.user_id===l.user_id));console.log("foundIndex: ".concat(t)),-1==t&&(console.log("updates admin with new user"),x.push(l),p(x)),f(!1)}}catch(e){console.error("Error creating the key:",e)}},F=async e=>{null!=s&&V(s,{environment_variables:{PROXY_BASE_URL:e.proxy_base_url,GOOGLE_CLIENT_ID:e.google_client_id,GOOGLE_CLIENT_SECRET:e.google_client_secret}})};return console.log("admins: ".concat(null==x?void 0:x.length)),(0,r.jsxs)("div",{className:"w-full m-2 mt-2 p-8",children:[(0,r.jsx)(c,{level:4,children:"Admin Access "}),(0,r.jsxs)(d,{children:[a&&(0,r.jsx)("a",{href:"https://docs.litellm.ai/docs/proxy/ui#restrict-ui-access",children:"Requires SSO Setup"}),(0,r.jsx)("br",{}),(0,r.jsx)("b",{children:"Proxy Admin: "})," Can create keys, teams, users, add models, etc. ",(0,r.jsx)("br",{}),(0,r.jsx)("b",{children:"Proxy Admin Viewer: "}),"Can just view spend. They cannot create keys, teams or grant users access to new models."," "]}),(0,r.jsxs)(W.Z,{numItems:1,className:"gap-2 p-2 w-full",children:[(0,r.jsx)(Y.Z,{numColSpan:1,children:(0,r.jsx)(ep.Z,{className:"w-full mx-auto flex-auto overflow-y-auto max-h-[50vh]",children:(0,r.jsxs)(eg.Z,{children:[(0,r.jsx)(eZ.Z,{children:(0,r.jsxs)(ew.Z,{children:[(0,r.jsx)(e_.Z,{children:"Member Name"}),(0,r.jsx)(e_.Z,{children:"Role"})]})}),(0,r.jsx)(ey.Z,{children:x?x.map((e,l)=>(0,r.jsxs)(ew.Z,{children:[(0,r.jsx)(ef.Z,{children:e.user_email?e.user_email:e.user_id?e.user_id:null}),(0,r.jsx)(ef.Z,{children:e.user_role}),(0,r.jsxs)(ef.Z,{children:[(0,r.jsx)(ej.Z,{icon:eu.Z,size:"sm",onClick:()=>_(!0)}),(0,r.jsx)(er.Z,{title:"Update role",visible:Z,width:800,footer:null,onOk:A,onCancel:E,children:C(P,e.user_role,e.user_id)})]})]},l)):null})]})})}),(0,r.jsx)(Y.Z,{numColSpan:1,children:(0,r.jsxs)("div",{className:"flex justify-start",children:[(0,r.jsx)(J.Z,{className:"mr-4 mb-5",onClick:()=>f(!0),children:"+ Add admin"}),(0,r.jsx)(er.Z,{title:"Add admin",visible:y,width:800,footer:null,onOk:()=>{f(!1),i.resetFields()},onCancel:()=>{f(!1),i.resetFields()},children:I(O)}),(0,r.jsx)(J.Z,{className:"mb-5",onClick:()=>g(!0),children:"+ Add viewer"}),(0,r.jsx)(er.Z,{title:"Add viewer",visible:j,width:800,footer:null,onOk:()=>{g(!1),i.resetFields()},onCancel:()=>{g(!1),i.resetFields()},children:I(T)})]})})]}),(0,r.jsxs)(W.Z,{children:[(0,r.jsx)(c,{level:4,children:"Add SSO"}),(0,r.jsxs)("div",{className:"flex justify-start mb-4",children:[(0,r.jsx)(J.Z,{onClick:()=>b(!0),children:"Add SSO"}),(0,r.jsx)(er.Z,{title:"Add SSO",visible:w,width:800,footer:null,onOk:()=>{b(!1),o.resetFields()},onCancel:()=>{b(!1),o.resetFields()},children:(0,r.jsxs)(ea.Z,{form:o,onFinish:e=>{O(e),F(e),b(!1),v(!0)},labelCol:{span:8},wrapperCol:{span:16},labelAlign:"left",children:[(0,r.jsxs)(r.Fragment,{children:[(0,r.jsx)(ea.Z.Item,{label:"Admin Email",name:"user_email",rules:[{required:!0,message:"Please enter the email of the proxy admin"}],children:(0,r.jsx)(en.Z,{})}),(0,r.jsx)(ea.Z.Item,{label:"PROXY BASE URL",name:"proxy_base_url",rules:[{required:!0,message:"Please enter the proxy base url"}],children:(0,r.jsx)(en.Z,{})}),(0,r.jsx)(ea.Z.Item,{label:"GOOGLE CLIENT ID",name:"google_client_id",rules:[{required:!0,message:"Please enter the google client id"}],children:(0,r.jsx)(en.Z.Password,{})}),(0,r.jsx)(ea.Z.Item,{label:"GOOGLE CLIENT SECRET",name:"google_client_secret",rules:[{required:!0,message:"Please enter the google client secret"}],children:(0,r.jsx)(en.Z.Password,{})})]}),(0,r.jsx)("div",{style:{textAlign:"right",marginTop:"10px"},children:(0,r.jsx)(ei.ZP,{htmlType:"submit",children:"Save"})})]})}),(0,r.jsxs)(er.Z,{title:"SSO Setup Instructions",visible:k,width:800,footer:null,onOk:S,onCancel:()=>{v(!1)},children:[(0,r.jsx)("p",{children:"Follow these steps to complete the SSO setup:"}),(0,r.jsx)(ee.Z,{className:"mt-2",children:"1. DO NOT Exit this TAB"}),(0,r.jsx)(ee.Z,{className:"mt-2",children:"2. Open a new tab, visit your proxy base url"}),(0,r.jsx)(ee.Z,{className:"mt-2",children:"3. Confirm your SSO is configured correctly and you can login on the new Tab"}),(0,r.jsx)(ee.Z,{className:"mt-2",children:"4. If Step 3 is successful, you can close this tab"}),(0,r.jsx)("div",{style:{textAlign:"right",marginTop:"10px"},children:(0,r.jsx)(ei.ZP,{onClick:S,children:"Done"})})]})]}),(0,r.jsxs)(ll.Z,{title:"Login without SSO",color:"teal",children:["If you need to login without sso, you can access ",(0,r.jsxs)("a",{href:l,target:"_blank",children:[(0,r.jsx)("b",{children:l}),"  "]})]})]})]})},ls=t(42556);let la=[{name:"slack",variables:{LANGFUSE_HOST:null,LANGFUSE_PUBLIC_KEY:null,LANGFUSE_SECRET_KEY:null,OPENMETER_API_KEY:null,SLACK_WEBHOOK_URL:null}},{name:"langfuse",variables:{LANGFUSE_HOST:null,LANGFUSE_PUBLIC_KEY:null,LANGFUSE_SECRET_KEY:null,OPENMETER_API_KEY:null,SLACK_WEBHOOK_URL:null}},{name:"openmeter",variables:{LANGFUSE_HOST:null,LANGFUSE_PUBLIC_KEY:null,LANGFUSE_SECRET_KEY:null,OPENMETER_API_KEY:null,SLACK_WEBHOOK_URL:null}}];var lr=e=>{let{accessToken:l,userRole:t,userID:s}=e,[a,o]=(0,n.useState)(la),[i,c]=(0,n.useState)([]),[d,m]=(0,n.useState)(!1),[h]=ea.Z.useForm(),[x,p]=(0,n.useState)(null),[j,g]=(0,n.useState)([]),[y,f]=(0,n.useState)(""),[Z,_]=(0,n.useState)({}),[w,b]=(0,n.useState)([]),k=e=>{w.includes(e)?b(w.filter(l=>l!==e)):b([...w,e])},v={llm_exceptions:"LLM Exceptions",llm_too_slow:"LLM Responses Too Slow",llm_requests_hanging:"LLM Requests Hanging",budget_alerts:"Budget Alerts (API Keys, Users)",db_exceptions:"Database Exceptions (Read/Write)",daily_reports:"Weekly/Monthly Spend Reports"};(0,n.useEffect)(()=>{l&&t&&s&&q(l,s,t).then(e=>{console.log("callbacks",e);let l=la;o(l=l.map(l=>{let t=e.callbacks.find(e=>e.name===l.name);return t?{...l,variables:{...l.variables,...t.variables}}:l}));let t=e.alerts;if(console.log("alerts_data",t),t&&t.length>0){let e=t[0];console.log("_alert_info",e);let l=e.variables.SLACK_WEBHOOK_URL;console.log("catch_all_webhook",l),b(e.active_alerts),f(l),_(e.alerts_to_webhook)}c(t)})},[l,t,s]);let S=e=>w&&w.includes(e),N=e=>{if(!l)return;let t=Object.fromEntries(Object.entries(e.variables).map(e=>{var l;let[t,s]=e;return[t,(null===(l=document.querySelector('input[name="'.concat(t,'"]')))||void 0===l?void 0:l.value)||s]}));console.log("updatedVariables",t),console.log("updateAlertTypes",j);let s={environment_variables:t,litellm_settings:{success_callback:[e.name]}};try{V(l,s)}catch(e){u.ZP.error("Failed to update callback: "+e,20)}u.ZP.success("Callback updated successfully")},A=()=>{l&&h.validateFields().then(e=>{if(console.log("Form values:",e),"langfuse"===e.callback){V(l,{environment_variables:{LANGFUSE_PUBLIC_KEY:e.langfusePublicKey,LANGFUSE_SECRET_KEY:e.langfusePrivateKey},litellm_settings:{success_callback:[e.callback]}});let t={name:e.callback,variables:{SLACK_WEBHOOK_URL:null,LANGFUSE_HOST:null,LANGFUSE_PUBLIC_KEY:e.langfusePublicKey,LANGFUSE_SECRET_KEY:e.langfusePrivateKey,OPENMETER_API_KEY:null}};o(a?[...a,t]:[t])}else if("slack"===e.callback){console.log("values.slackWebhookUrl: ".concat(e.slackWebhookUrl)),V(l,{general_settings:{alerting:["slack"],alerting_threshold:300},environment_variables:{SLACK_WEBHOOK_URL:e.slackWebhookUrl}}),console.log("values.callback: ".concat(e.callback));let t={name:e.callback,variables:{SLACK_WEBHOOK_URL:e.slackWebhookUrl,LANGFUSE_HOST:null,LANGFUSE_PUBLIC_KEY:null,LANGFUSE_SECRET_KEY:null,OPENMETER_API_KEY:null}};o(a?[...a,t]:[t])}else if("openmeter"==e.callback){console.log("values.openMeterApiKey: ".concat(e.openMeterApiKey)),V(l,{environment_variables:{OPENMETER_API_KEY:e.openMeterApiKey},litellm_settings:{success_callback:[e.callback]}});let t={name:e.callback,variables:{SLACK_WEBHOOK_URL:null,LANGFUSE_HOST:null,LANGFUSE_PUBLIC_KEY:null,LANGFUSE_SECRET_KEY:null,OPENMETER_API_KEY:e.openMeterAPIKey}};o(a?[...a,t]:[t])}m(!1),h.resetFields(),p(null)})};return l?(console.log("callbacks: ".concat(a)),(0,r.jsxs)("div",{className:"w-full mx-4",children:[(0,r.jsxs)(W.Z,{numItems:1,className:"gap-2 p-8 w-full mt-2",children:[(0,r.jsx)(ll.Z,{title:"[UI] Presidio PII + Guardrails Coming Soon. https://docs.litellm.ai/docs/proxy/pii_masking",color:"sky"}),(0,r.jsxs)(eM.Z,{children:[(0,r.jsxs)(eL.Z,{variant:"line",defaultValue:"1",children:[(0,r.jsx)(eR.Z,{value:"1",children:"Logging Callbacks"}),(0,r.jsx)(eR.Z,{value:"2",children:"Alerting"})]}),(0,r.jsxs)(eD.Z,{children:[(0,r.jsx)(eU.Z,{children:(0,r.jsx)(ep.Z,{children:(0,r.jsxs)(eg.Z,{children:[(0,r.jsx)(eZ.Z,{children:(0,r.jsxs)(ew.Z,{children:[(0,r.jsx)(e_.Z,{children:"Callback"}),(0,r.jsx)(e_.Z,{children:"Callback Env Vars"})]})}),(0,r.jsx)(ey.Z,{children:a.filter(e=>"slack"!==e.name).map((e,t)=>{var s;return(0,r.jsxs)(ew.Z,{children:[(0,r.jsx)(ef.Z,{children:(0,r.jsx)(ex.Z,{color:"emerald",children:e.name})}),(0,r.jsxs)(ef.Z,{children:[(0,r.jsx)("ul",{children:Object.entries(null!==(s=e.variables)&&void 0!==s?s:{}).filter(l=>{let[t,s]=l;return t.toLowerCase().includes(e.name)}).map(e=>{let[l,t]=e;return(0,r.jsxs)("li",{children:[(0,r.jsx)(ee.Z,{className:"mt-2",children:l}),"LANGFUSE_HOST"===l?(0,r.jsx)("p",{children:"default value=https://cloud.langfuse.com"}):(0,r.jsx)("div",{}),(0,r.jsx)(H.Z,{name:l,defaultValue:t,type:"password"})]},l)})}),(0,r.jsx)(J.Z,{className:"mt-2",onClick:()=>N(e),children:"Save Changes"}),(0,r.jsx)(J.Z,{onClick:()=>z(l,e.name),className:"mx-2",children:"Test Callback"})]})]},t)})})]})})}),(0,r.jsx)(eU.Z,{children:(0,r.jsxs)(ep.Z,{children:[(0,r.jsxs)(ee.Z,{className:"my-2",children:["Alerts are only supported for Slack Webhook URLs. Get your webhook urls from ",(0,r.jsx)("a",{href:"https://api.slack.com/messaging/webhooks",target:"_blank",style:{color:"blue"},children:"here"})]}),(0,r.jsxs)(eg.Z,{children:[(0,r.jsx)(eZ.Z,{children:(0,r.jsxs)(ew.Z,{children:[(0,r.jsx)(e_.Z,{}),(0,r.jsx)(e_.Z,{}),(0,r.jsx)(e_.Z,{children:"Slack Webhook URL"})]})}),(0,r.jsx)(ey.Z,{children:Object.entries(v).map((e,l)=>{let[t,s]=e;return(0,r.jsxs)(ew.Z,{children:[(0,r.jsx)(ef.Z,{children:(0,r.jsx)(ls.Z,{id:"switch",name:"switch",checked:S(t),onChange:()=>k(t)})}),(0,r.jsx)(ef.Z,{children:(0,r.jsx)(ee.Z,{children:s})}),(0,r.jsx)(ef.Z,{children:(0,r.jsx)(H.Z,{name:t,type:"password",defaultValue:Z&&Z[t]?Z[t]:y})})]},l)})})]}),(0,r.jsx)(J.Z,{size:"xs",className:"mt-2",onClick:()=>{if(!l)return;let e={};Object.entries(v).forEach(l=>{let[t,s]=l,a=document.querySelector('input[name="'.concat(t,'"]'));console.log("key",t),console.log("webhookInput",a);let r=(null==a?void 0:a.value)||"";console.log("newWebhookValue",r),e[t]=r}),console.log("updatedAlertToWebhooks",e);let t={general_settings:{alert_to_webhook_url:e,alert_types:w}};console.log("payload",t);try{V(l,t)}catch(e){u.ZP.error("Failed to update alerts: "+e,20)}u.ZP.success("Alerts updated successfully")},children:"Save Changes"}),(0,r.jsx)(J.Z,{onClick:()=>z(l,"slack"),className:"mx-2",children:"Test Alerts"})]})})]})]})]}),(0,r.jsx)(er.Z,{title:"Add Callback",visible:d,onOk:A,width:800,onCancel:()=>{m(!1),h.resetFields(),p(null)},footer:null,children:(0,r.jsxs)(ea.Z,{form:h,layout:"vertical",onFinish:A,children:[(0,r.jsx)(ea.Z.Item,{label:"Callback",name:"callback",rules:[{required:!0,message:"Please select a callback"}],children:(0,r.jsxs)(es.default,{onChange:e=>{p(e)},children:[(0,r.jsx)(es.default.Option,{value:"langfuse",children:"langfuse"}),(0,r.jsx)(es.default.Option,{value:"openmeter",children:"openmeter"})]})}),"langfuse"===x&&(0,r.jsxs)(r.Fragment,{children:[(0,r.jsx)(ea.Z.Item,{label:"LANGFUSE_PUBLIC_KEY",name:"langfusePublicKey",rules:[{required:!0,message:"Please enter the public key"}],children:(0,r.jsx)(H.Z,{type:"password"})}),(0,r.jsx)(ea.Z.Item,{label:"LANGFUSE_PRIVATE_KEY",name:"langfusePrivateKey",rules:[{required:!0,message:"Please enter the private key"}],children:(0,r.jsx)(H.Z,{type:"password"})})]}),"openmeter"==x&&(0,r.jsx)(r.Fragment,{children:(0,r.jsx)(ea.Z.Item,{label:"OPENMETER_API_KEY",name:"openMeterApiKey",rules:[{required:!0,message:"Please enter the openmeter api key"}],children:(0,r.jsx)(H.Z,{type:"password"})})}),(0,r.jsx)("div",{style:{textAlign:"right",marginTop:"10px"},children:(0,r.jsx)(ei.ZP,{htmlType:"submit",children:"Save"})})]})})]})):null};let{Option:ln}=es.default;var lo=e=>{let{models:l,accessToken:t,routerSettings:s,setRouterSettings:a}=e,[o]=ea.Z.useForm(),[i,c]=(0,n.useState)(!1),[d,m]=(0,n.useState)("");return(0,r.jsxs)("div",{children:[(0,r.jsx)(J.Z,{className:"mx-auto",onClick:()=>c(!0),children:"+ Add Fallbacks"}),(0,r.jsx)(er.Z,{title:"Add Fallbacks",visible:i,width:800,footer:null,onOk:()=>{c(!1),o.resetFields()},onCancel:()=>{c(!1),o.resetFields()},children:(0,r.jsxs)(ea.Z,{form:o,onFinish:e=>{console.log(e);let{model_name:l,models:r}=e,n=[...s.fallbacks||[],{[l]:r}],i={...s,fallbacks:n};console.log(i);try{V(t,{router_settings:i}),a(i)}catch(e){u.ZP.error("Failed to update router settings: "+e,20)}u.ZP.success("router settings updated successfully"),c(!1),o.resetFields()},labelCol:{span:8},wrapperCol:{span:16},labelAlign:"left",children:[(0,r.jsxs)(r.Fragment,{children:[(0,r.jsx)(ea.Z.Item,{label:"Public Model Name",name:"model_name",rules:[{required:!0,message:"Set the model to fallback for"}],help:"required",children:(0,r.jsx)(eb.Z,{defaultValue:d,children:l&&l.map((e,l)=>(0,r.jsx)(ek.Z,{value:e,onClick:()=>m(e),children:e},l))})}),(0,r.jsx)(ea.Z.Item,{label:"Fallback Models",name:"models",rules:[{required:!0,message:"Please select a model"}],help:"required",children:(0,r.jsx)(eK.Z,{value:l,children:l&&l.filter(e=>e!=d).map(e=>(0,r.jsx)(eB.Z,{value:e,children:e},e))})})]}),(0,r.jsx)("div",{style:{textAlign:"right",marginTop:"10px"},children:(0,r.jsx)(ei.ZP,{htmlType:"submit",children:"Add Fallbacks"})})]})})]})},li=t(12968);async function lc(e,l){console.log("isLocal:",!1);let t=window.location.origin,s=new li.ZP.OpenAI({apiKey:l,baseURL:t,dangerouslyAllowBrowser:!0});try{let l=await s.chat.completions.create({model:e,messages:[{role:"user",content:"Hi, this is a test message"}],mock_testing_fallbacks:!0});u.ZP.success((0,r.jsxs)("span",{children:["Test model=",(0,r.jsx)("strong",{children:e}),", received model=",(0,r.jsx)("strong",{children:l.model}),". See ",(0,r.jsx)("a",{href:"#",onClick:()=>window.open("https://docs.litellm.ai/docs/proxy/reliability","_blank"),style:{textDecoration:"underline",color:"blue"},children:"curl"})]}))}catch(e){u.ZP.error("Error occurred while generating model response. Please try again. Error: ".concat(e),20)}}let ld={ttl:3600,lowest_latency_buffer:0},lm=e=>{let{selectedStrategy:l,strategyArgs:t,paramExplanation:s}=e;return(0,r.jsxs)($.Z,{children:[(0,r.jsx)(Q.Z,{className:"text-sm font-medium text-tremor-content-strong dark:text-dark-tremor-content-strong",children:"Routing Strategy Specific Args"}),(0,r.jsx)(X.Z,{children:"latency-based-routing"==l?(0,r.jsx)(ep.Z,{children:(0,r.jsxs)(eg.Z,{children:[(0,r.jsx)(eZ.Z,{children:(0,r.jsxs)(ew.Z,{children:[(0,r.jsx)(e_.Z,{children:"Setting"}),(0,r.jsx)(e_.Z,{children:"Value"})]})}),(0,r.jsx)(ey.Z,{children:Object.entries(t).map(e=>{let[l,t]=e;return(0,r.jsxs)(ew.Z,{children:[(0,r.jsxs)(ef.Z,{children:[(0,r.jsx)(ee.Z,{children:l}),(0,r.jsx)("p",{style:{fontSize:"0.65rem",color:"#808080",fontStyle:"italic"},className:"mt-1",children:s[l]})]}),(0,r.jsx)(ef.Z,{children:(0,r.jsx)(H.Z,{name:l,defaultValue:"object"==typeof t?JSON.stringify(t,null,2):t.toString()})})]},l)})})]})}):(0,r.jsx)(ee.Z,{children:"No specific settings"})})]})};var lu=e=>{let{accessToken:l,userRole:t,userID:s,modelData:a}=e,[o,i]=(0,n.useState)({}),[c,d]=(0,n.useState)(!1),[m]=ea.Z.useForm(),[h,x]=(0,n.useState)(null),[p,j]=(0,n.useState)(null),[g,y]=(0,n.useState)(null),f={routing_strategy_args:"(dict) Arguments to pass to the routing strategy",routing_strategy:"(string) Routing strategy to use",allowed_fails:"(int) Number of times a deployment can fail before being added to cooldown",cooldown_time:"(int) time in seconds to cooldown a deployment after failure",num_retries:"(int) Number of retries for failed requests. Defaults to 0.",timeout:"(float) Timeout for requests. Defaults to None.",retry_after:"(int) Minimum time to wait before retrying a failed request",ttl:"(int) Sliding window to look back over when calculating the average latency of a deployment. Default - 1 hour (in seconds).",lowest_latency_buffer:"(float) Shuffle between deployments within this % of the lowest latency. Default - 0 (i.e. always pick lowest latency)."};(0,n.useEffect)(()=>{l&&t&&s&&q(l,s,t).then(e=>{console.log("callbacks",e),i(e.router_settings)})},[l,t,s]);let Z=async e=>{if(l){console.log("received key: ".concat(e)),console.log("routerSettings['fallbacks']: ".concat(o.fallbacks)),o.fallbacks.map(l=>(e in l&&delete l[e],l));try{await V(l,{router_settings:o}),i({...o}),j(o.routing_strategy),u.ZP.success("Router settings updated successfully")}catch(e){u.ZP.error("Failed to update router settings: "+e,20)}}},_=e=>{if(!l)return;console.log("router_settings",e);let t=Object.fromEntries(Object.entries(e).map(e=>{let[l,t]=e;if("routing_strategy_args"!==l&&"routing_strategy"!==l){var s;return[l,(null===(s=document.querySelector('input[name="'.concat(l,'"]')))||void 0===s?void 0:s.value)||t]}if("routing_strategy"==l)return[l,p];if("routing_strategy_args"==l&&"latency-based-routing"==p){let e={},l=document.querySelector('input[name="lowest_latency_buffer"]'),t=document.querySelector('input[name="ttl"]');return(null==l?void 0:l.value)&&(e.lowest_latency_buffer=Number(l.value)),(null==t?void 0:t.value)&&(e.ttl=Number(t.value)),console.log("setRoutingStrategyArgs: ".concat(e)),["routing_strategy_args",e]}return null}).filter(e=>null!=e));console.log("updatedVariables",t);try{V(l,{router_settings:t})}catch(e){u.ZP.error("Failed to update router settings: "+e,20)}u.ZP.success("router settings updated successfully")};return l?(0,r.jsx)("div",{className:"w-full mx-4",children:(0,r.jsxs)(eM.Z,{className:"gap-2 p-8 h-[75vh] w-full mt-2",children:[(0,r.jsxs)(eL.Z,{variant:"line",defaultValue:"1",children:[(0,r.jsx)(eR.Z,{value:"1",children:"General Settings"}),(0,r.jsx)(eR.Z,{value:"2",children:"Fallbacks"})]}),(0,r.jsxs)(eD.Z,{children:[(0,r.jsx)(eU.Z,{children:(0,r.jsxs)(W.Z,{numItems:1,className:"gap-2 p-8 w-full mt-2",children:[(0,r.jsx)(el.Z,{children:"Router Settings"}),(0,r.jsxs)(ep.Z,{children:[(0,r.jsxs)(eg.Z,{children:[(0,r.jsx)(eZ.Z,{children:(0,r.jsxs)(ew.Z,{children:[(0,r.jsx)(e_.Z,{children:"Setting"}),(0,r.jsx)(e_.Z,{children:"Value"})]})}),(0,r.jsx)(ey.Z,{children:Object.entries(o).filter(e=>{let[l,t]=e;return"fallbacks"!=l&&"context_window_fallbacks"!=l&&"routing_strategy_args"!=l}).map(e=>{let[l,t]=e;return(0,r.jsxs)(ew.Z,{children:[(0,r.jsxs)(ef.Z,{children:[(0,r.jsx)(ee.Z,{children:l}),(0,r.jsx)("p",{style:{fontSize:"0.65rem",color:"#808080",fontStyle:"italic"},className:"mt-1",children:f[l]})]}),(0,r.jsx)(ef.Z,{children:"routing_strategy"==l?(0,r.jsxs)(eb.Z,{defaultValue:t,className:"w-full max-w-md",onValueChange:j,children:[(0,r.jsx)(ek.Z,{value:"usage-based-routing",children:"usage-based-routing"}),(0,r.jsx)(ek.Z,{value:"latency-based-routing",children:"latency-based-routing"}),(0,r.jsx)(ek.Z,{value:"simple-shuffle",children:"simple-shuffle"})]}):(0,r.jsx)(H.Z,{name:l,defaultValue:"object"==typeof t?JSON.stringify(t,null,2):t.toString()})})]},l)})})]}),(0,r.jsx)(lm,{selectedStrategy:p,strategyArgs:o&&o.routing_strategy_args&&Object.keys(o.routing_strategy_args).length>0?o.routing_strategy_args:ld,paramExplanation:f})]}),(0,r.jsx)(Y.Z,{children:(0,r.jsx)(J.Z,{className:"mt-2",onClick:()=>_(o),children:"Save Changes"})})]})}),(0,r.jsxs)(eU.Z,{children:[(0,r.jsxs)(eg.Z,{children:[(0,r.jsx)(eZ.Z,{children:(0,r.jsxs)(ew.Z,{children:[(0,r.jsx)(e_.Z,{children:"Model Name"}),(0,r.jsx)(e_.Z,{children:"Fallbacks"})]})}),(0,r.jsx)(ey.Z,{children:o.fallbacks&&o.fallbacks.map((e,t)=>Object.entries(e).map(e=>{let[s,a]=e;return(0,r.jsxs)(ew.Z,{children:[(0,r.jsx)(ef.Z,{children:s}),(0,r.jsx)(ef.Z,{children:Array.isArray(a)?a.join(", "):a}),(0,r.jsx)(ef.Z,{children:(0,r.jsx)(J.Z,{onClick:()=>lc(s,l),children:"Test Fallback"})}),(0,r.jsx)(ef.Z,{children:(0,r.jsx)(ej.Z,{icon:eh.Z,size:"sm",onClick:()=>Z(s)})})]},t.toString()+s)}))})]}),(0,r.jsx)(lo,{models:(null==a?void 0:a.data)?a.data.map(e=>e.model_name):[],accessToken:l,routerSettings:o,setRouterSettings:i})]})]})]})}):null},lh=t(67951),lx=e=>{let{}=e;return(0,r.jsx)(r.Fragment,{children:(0,r.jsx)(W.Z,{className:"gap-2 p-8 h-[80vh] w-full mt-2",children:(0,r.jsxs)("div",{className:"mb-5",children:[(0,r.jsx)("p",{className:"text-2xl text-tremor-content-strong dark:text-dark-tremor-content-strong font-semibold",children:"OpenAI Compatible Proxy: API Reference"}),(0,r.jsx)(ee.Z,{className:"mt-2 mb-2",children:"LiteLLM is OpenAI Compatible. This means your API Key works with the OpenAI SDK. Just replace the base_url to point to your litellm proxy. Example Below "}),(0,r.jsxs)(eM.Z,{children:[(0,r.jsxs)(eL.Z,{children:[(0,r.jsx)(eR.Z,{children:"OpenAI Python SDK"}),(0,r.jsx)(eR.Z,{children:"LlamaIndex"}),(0,r.jsx)(eR.Z,{children:"Langchain Py"})]}),(0,r.jsxs)(eD.Z,{children:[(0,r.jsx)(eU.Z,{children:(0,r.jsx)(lh.Z,{language:"python",children:'\nimport openai\nclient = openai.OpenAI(\n    api_key="your_api_key",\n    base_url="http://0.0.0.0:4000" # LiteLLM Proxy is OpenAI compatible, Read More: https://docs.litellm.ai/docs/proxy/user_keys\n)\n\nresponse = client.chat.completions.create(\n    model="gpt-3.5-turbo", # model to send to the proxy\n    messages = [\n        {\n            "role": "user",\n            "content": "this is a test request, write a short poem"\n        }\n    ]\n)\n\nprint(response)\n            '})}),(0,r.jsx)(eU.Z,{children:(0,r.jsx)(lh.Z,{language:"python",children:'\nimport os, dotenv\n\nfrom llama_index.llms import AzureOpenAI\nfrom llama_index.embeddings import AzureOpenAIEmbedding\nfrom llama_index import VectorStoreIndex, SimpleDirectoryReader, ServiceContext\n\nllm = AzureOpenAI(\n    engine="azure-gpt-3.5",               # model_name on litellm proxy\n    temperature=0.0,\n    azure_endpoint="http://0.0.0.0:4000", # litellm proxy endpoint\n    api_key="sk-1234",                    # litellm proxy API Key\n    api_version="2023-07-01-preview",\n)\n\nembed_model = AzureOpenAIEmbedding(\n    deployment_name="azure-embedding-model",\n    azure_endpoint="http://0.0.0.0:4000",\n    api_key="sk-1234",\n    api_version="2023-07-01-preview",\n)\n\n\ndocuments = SimpleDirectoryReader("llama_index_data").load_data()\nservice_context = ServiceContext.from_defaults(llm=llm, embed_model=embed_model)\nindex = VectorStoreIndex.from_documents(documents, service_context=service_context)\n\nquery_engine = index.as_query_engine()\nresponse = query_engine.query("What did the author do growing up?")\nprint(response)\n\n            '})}),(0,r.jsx)(eU.Z,{children:(0,r.jsx)(lh.Z,{language:"python",children:'\nfrom langchain.chat_models import ChatOpenAI\nfrom langchain.prompts.chat import (\n    ChatPromptTemplate,\n    HumanMessagePromptTemplate,\n    SystemMessagePromptTemplate,\n)\nfrom langchain.schema import HumanMessage, SystemMessage\n\nchat = ChatOpenAI(\n    openai_api_base="http://0.0.0.0:4000",\n    model = "gpt-3.5-turbo",\n    temperature=0.1\n)\n\nmessages = [\n    SystemMessage(\n        content="You are a helpful assistant that im using to make a test request to."\n    ),\n    HumanMessage(\n        content="test from litellm. tell me why it\'s amazing in 1 sentence"\n    ),\n]\nresponse = chat(messages)\n\nprint(response)\n\n            '})})]})]})]})})})};async function lp(e,l,t,s){console.log("isLocal:",!1);let a=window.location.origin,r=new li.ZP.OpenAI({apiKey:s,baseURL:a,dangerouslyAllowBrowser:!0});try{for await(let s of(await r.chat.completions.create({model:t,stream:!0,messages:[{role:"user",content:e}]})))console.log(s),s.choices[0].delta.content&&l(s.choices[0].delta.content)}catch(e){u.ZP.error("Error occurred while generating model response. Please try again. Error: ".concat(e),20)}}var lj=e=>{let{accessToken:l,token:t,userRole:s,userID:a}=e,[o,i]=(0,n.useState)(""),[c,d]=(0,n.useState)(""),[m,u]=(0,n.useState)([]),[h,x]=(0,n.useState)(void 0),[p,j]=(0,n.useState)([]);(0,n.useEffect)(()=>{l&&t&&s&&a&&(async()=>{try{let e=await N(l,a,s);if(console.log("model_info:",e),(null==e?void 0:e.data.length)>0){let l=e.data.map(e=>({value:e.id,label:e.id}));console.log(l),j(l),x(e.data[0].id)}}catch(e){console.error("Error fetching model info:",e)}})()},[l,a,s]);let g=(e,l)=>{u(t=>{let s=t[t.length-1];return s&&s.role===e?[...t.slice(0,t.length-1),{role:e,content:s.content+l}]:[...t,{role:e,content:l}]})},y=async()=>{if(""!==c.trim()&&o&&t&&s&&a){u(e=>[...e,{role:"user",content:c}]);try{h&&await lp(c,e=>g("assistant",e),h,o)}catch(e){console.error("Error fetching model response",e),g("assistant","Error fetching model response")}d("")}};if(s&&"Admin Viewer"==s){let{Title:e,Paragraph:l}=eT.default;return(0,r.jsxs)("div",{children:[(0,r.jsx)(e,{level:1,children:"Access Denied"}),(0,r.jsx)(l,{children:"Ask your proxy admin for access to test models"})]})}return(0,r.jsx)("div",{style:{width:"100%",position:"relative"},children:(0,r.jsx)(W.Z,{className:"gap-2 p-8 h-[80vh] w-full mt-2",children:(0,r.jsx)(ep.Z,{children:(0,r.jsxs)(eM.Z,{children:[(0,r.jsx)(eL.Z,{children:(0,r.jsx)(eR.Z,{children:"Chat"})}),(0,r.jsx)(eD.Z,{children:(0,r.jsxs)(eU.Z,{children:[(0,r.jsx)("div",{className:"sm:max-w-2xl",children:(0,r.jsxs)(W.Z,{numItems:2,children:[(0,r.jsxs)(Y.Z,{children:[(0,r.jsx)(ee.Z,{children:"API Key"}),(0,r.jsx)(H.Z,{placeholder:"Type API Key here",type:"password",onValueChange:i,value:o})]}),(0,r.jsxs)(Y.Z,{className:"mx-2",children:[(0,r.jsx)(ee.Z,{children:"Select Model:"}),(0,r.jsx)(es.default,{placeholder:"Select a Model",onChange:e=>{console.log("selected ".concat(e)),x(e)},options:p,style:{width:"200px"}})]})]})}),(0,r.jsxs)(eg.Z,{className:"mt-5",style:{display:"block",maxHeight:"60vh",overflowY:"auto"},children:[(0,r.jsx)(eZ.Z,{children:(0,r.jsx)(ew.Z,{children:(0,r.jsx)(ef.Z,{})})}),(0,r.jsx)(ey.Z,{children:m.map((e,l)=>(0,r.jsx)(ew.Z,{children:(0,r.jsx)(ef.Z,{children:"".concat(e.role,": ").concat(e.content)})},l))})]}),(0,r.jsx)("div",{className:"mt-3",style:{position:"absolute",bottom:5,width:"95%"},children:(0,r.jsxs)("div",{className:"flex",children:[(0,r.jsx)(H.Z,{type:"text",value:c,onChange:e=>d(e.target.value),placeholder:"Type your message..."}),(0,r.jsx)(J.Z,{onClick:y,className:"ml-2",children:"Send"})]})})]})})]})})})})},lg=t(33509),ly=t(95781);let{Sider:lf}=lg.default;var lZ=e=>{let{setPage:l,userRole:t,defaultSelectedKey:s}=e;return"Admin Viewer"==t?(0,r.jsx)(lg.default,{style:{minHeight:"100vh",maxWidth:"120px"},children:(0,r.jsx)(lf,{width:120,children:(0,r.jsxs)(ly.Z,{mode:"inline",defaultSelectedKeys:s||["4"],style:{height:"100%",borderRight:0},children:[(0,r.jsx)(ly.Z.Item,{onClick:()=>l("api-keys"),children:"API Keys"},"4"),(0,r.jsx)(ly.Z.Item,{onClick:()=>l("models"),children:"Models"},"2"),(0,r.jsx)(ly.Z.Item,{onClick:()=>l("llm-playground"),children:"Chat UI"},"3"),(0,r.jsx)(ly.Z.Item,{onClick:()=>l("usage"),children:"Usage"},"1")]})})}):(0,r.jsx)(lg.default,{style:{minHeight:"100vh",maxWidth:"145px"},children:(0,r.jsx)(lf,{width:145,children:(0,r.jsxs)(ly.Z,{mode:"inline",defaultSelectedKeys:s||["1"],style:{height:"100%",borderRight:0},children:[(0,r.jsx)(ly.Z.Item,{onClick:()=>l("api-keys"),children:(0,r.jsx)(ee.Z,{children:"API Keys"})},"1"),(0,r.jsx)(ly.Z.Item,{onClick:()=>l("llm-playground"),children:(0,r.jsx)(ee.Z,{children:"Test Key"})},"3"),"Admin"==t?(0,r.jsx)(ly.Z.Item,{onClick:()=>l("models"),children:(0,r.jsx)(ee.Z,{children:"Models"})},"2"):null,"Admin"==t?(0,r.jsx)(ly.Z.Item,{onClick:()=>l("usage"),children:(0,r.jsx)(ee.Z,{children:"Usage"})},"4"):null,"Admin"==t?(0,r.jsx)(ly.Z.Item,{onClick:()=>l("teams"),children:(0,r.jsx)(ee.Z,{children:"Teams"})},"6"):null,"Admin"==t?(0,r.jsx)(ly.Z.Item,{onClick:()=>l("users"),children:(0,r.jsx)(ee.Z,{children:"Users"})},"5"):null,"Admin"==t?(0,r.jsx)(ly.Z.Item,{onClick:()=>l("settings"),children:(0,r.jsx)(ee.Z,{children:"Logging & Alerts"})},"8"):null,"Admin"==t?(0,r.jsx)(ly.Z.Item,{onClick:()=>l("general-settings"),children:(0,r.jsx)(ee.Z,{children:"Router Settings"})},"9"):null,"Admin"==t?(0,r.jsx)(ly.Z.Item,{onClick:()=>l("admin-panel"),children:(0,r.jsx)(ee.Z,{children:"Admin"})},"7"):null,(0,r.jsx)(ly.Z.Item,{onClick:()=>l("api_ref"),children:(0,r.jsx)(ee.Z,{children:"API Reference"})},"11")]})})})},l_=t(67989),lw=e=>{let{accessToken:l,token:t,userRole:s,userID:a,keys:o}=e,i=new Date,[c,d]=(0,n.useState)([]),[m,u]=(0,n.useState)([]),[h,x]=(0,n.useState)([]),[p,j]=(0,n.useState)([]),[g,y]=(0,n.useState)([]),[f,Z]=(0,n.useState)([]),[_,w]=(0,n.useState)([]),[b,k]=(0,n.useState)([]),[v,S]=(0,n.useState)(""),[N,R]=(0,n.useState)({from:new Date(Date.now()-6048e5),to:new Date}),M=new Date(i.getFullYear(),i.getMonth(),1),L=new Date(i.getFullYear(),i.getMonth()+1,0),U=z(M),D=z(L);console.log("keys in usage",o);let K=async(e,t,s)=>{if(!e||!t||!l)return;console.log("uiSelectedKey",s);let a=await T(l,s,e.toISOString(),t.toISOString());console.log("End user data updated successfully",a),j(a)},B=async(e,t)=>{e&&t&&l&&(Z((await E(l,e.toISOString(),t.toISOString())).spend_per_tag),console.log("Tag spend data updated successfully"))};function z(e){let l=e.getFullYear(),t=e.getMonth()+1,s=e.getDate();return"".concat(l,"-").concat(t<10?"0"+t:t,"-").concat(s<10?"0"+s:s)}return console.log("Start date is ".concat(U)),console.log("End date is ".concat(D)),(0,n.useEffect)(()=>{l&&t&&s&&a&&(async()=>{try{if(console.log("user role: ".concat(s)),"Admin"==s||"Admin Viewer"==s){var e,r;let t=await C(l);d(t);let s=(await P(l)).map(e=>({key:(e.key_name||e.key_alias||e.api_key).substring(0,10),spend:e.total_spend}));u(s);let a=(await O(l)).map(e=>({key:e.model,spend:e.total_spend}));x(a);let n=await A(l);console.log("teamSpend",n),y(n.daily_spend),w(n.teams);let o=n.total_spend_per_team;o=o.map(e=>(e.name=e.team_id||"",e.value=e.total_spend||0,e)),k(o);let i=await E(l,null===(e=N.from)||void 0===e?void 0:e.toISOString(),null===(r=N.to)||void 0===r?void 0:r.toISOString());Z(i.spend_per_tag);let c=await T(l,null,void 0,void 0);j(c),console.log("spend/user result",c)}else"App Owner"==s&&await I(l,t,s,a,U,D).then(async e=>{if(console.log("result from spend logs call",e),"daily_spend"in e){let l=e.daily_spend;console.log("daily spend",l),d(l);let t=e.top_api_keys;u(t)}else{let t=(await F(l,function(e){let l=[];e.forEach(e=>{Object.entries(e).forEach(e=>{let[t,s]=e;"spend"!==t&&"startTime"!==t&&"models"!==t&&"users"!==t&&l.push({key:t,spend:s})})}),l.sort((e,l)=>Number(l.spend)-Number(e.spend));let t=l.slice(0,5).map(e=>e.key);return console.log("topKeys: ".concat(Object.keys(t[0]))),t}(e))).info.map(e=>({key:(e.key_name||e.key_alias).substring(0,10),spend:e.spend}));u(t),d(e)}})}catch(e){console.error("There was an error fetching the data",e)}})()},[l,t,s,a,U,D]),(0,r.jsxs)("div",{style:{width:"100%"},className:"p-8",children:[(0,r.jsx)(eE,{userID:a,userRole:s,accessToken:l,userSpend:null,selectedTeam:null}),(0,r.jsxs)(eM.Z,{children:[(0,r.jsxs)(eL.Z,{className:"mt-2",children:[(0,r.jsx)(eR.Z,{children:"All Up"}),(0,r.jsx)(eR.Z,{children:"Team Based Usage"}),(0,r.jsx)(eR.Z,{children:"End User Usage"}),(0,r.jsx)(eR.Z,{children:"Tag Based Usage"})]}),(0,r.jsxs)(eD.Z,{children:[(0,r.jsx)(eU.Z,{children:(0,r.jsxs)(W.Z,{numItems:2,className:"gap-2 h-[75vh] w-full",children:[(0,r.jsx)(Y.Z,{numColSpan:2,children:(0,r.jsxs)(ep.Z,{children:[(0,r.jsx)(el.Z,{children:"Monthly Spend"}),(0,r.jsx)(eq.Z,{data:c,index:"date",categories:["spend"],colors:["blue"],valueFormatter:e=>"$ ".concat(new Intl.NumberFormat("us").format(e).toString()),yAxisWidth:100,tickGap:5})]})}),(0,r.jsx)(Y.Z,{numColSpan:1,children:(0,r.jsxs)(ep.Z,{children:[(0,r.jsx)(el.Z,{children:"Top API Keys"}),(0,r.jsx)(eq.Z,{className:"mt-4 h-40",data:m,index:"key",categories:["spend"],colors:["blue"],yAxisWidth:80,tickGap:5,layout:"vertical",showXAxis:!1,showLegend:!1})]})}),(0,r.jsx)(Y.Z,{numColSpan:1,children:(0,r.jsxs)(ep.Z,{children:[(0,r.jsx)(el.Z,{children:"Top Models"}),(0,r.jsx)(eq.Z,{className:"mt-4 h-40",data:h,index:"key",categories:["spend"],colors:["blue"],yAxisWidth:200,layout:"vertical",showXAxis:!1,showLegend:!1})]})}),(0,r.jsx)(Y.Z,{numColSpan:1})]})}),(0,r.jsx)(eU.Z,{children:(0,r.jsxs)(W.Z,{numItems:2,className:"gap-2 h-[75vh] w-full",children:[(0,r.jsxs)(Y.Z,{numColSpan:2,children:[(0,r.jsxs)(ep.Z,{className:"mb-2",children:[(0,r.jsx)(el.Z,{children:"Total Spend Per Team"}),(0,r.jsx)(l_.Z,{data:b})]}),(0,r.jsxs)(ep.Z,{children:[(0,r.jsx)(el.Z,{children:"Daily Spend Per Team"}),(0,r.jsx)(eq.Z,{className:"h-72",data:g,showLegend:!0,index:"date",categories:_,yAxisWidth:80,colors:["blue","green","yellow","red","purple"],stack:!0})]})]}),(0,r.jsx)(Y.Z,{numColSpan:2})]})}),(0,r.jsxs)(eU.Z,{children:[(0,r.jsxs)("p",{className:"mb-2 text-gray-500 italic text-[12px]",children:["End-Users of your LLM API calls. Tracked when a `user` param is passed in your LLM calls ",(0,r.jsx)("a",{className:"text-blue-500",href:"https://docs.litellm.ai/docs/proxy/users",target:"_blank",children:"docs here"})]}),(0,r.jsxs)(W.Z,{numItems:2,children:[(0,r.jsxs)(Y.Z,{children:[(0,r.jsx)(ee.Z,{children:"Select Time Range"}),(0,r.jsx)(eF.Z,{enableSelect:!0,value:N,onValueChange:e=>{R(e),K(e.from,e.to,null)}})]}),(0,r.jsxs)(Y.Z,{children:[(0,r.jsx)(ee.Z,{children:"Select Key"}),(0,r.jsxs)(eb.Z,{defaultValue:"all-keys",children:[(0,r.jsx)(ek.Z,{value:"all-keys",onClick:()=>{K(N.from,N.to,null)},children:"All Keys"},"all-keys"),null==o?void 0:o.map((e,l)=>e&&null!==e.key_alias&&e.key_alias.length>0?(0,r.jsx)(ek.Z,{value:String(l),onClick:()=>{K(N.from,N.to,e.token)},children:e.key_alias},l):null)]})]})]}),(0,r.jsx)(ep.Z,{className:"mt-4",children:(0,r.jsxs)(eg.Z,{className:"max-h-[70vh] min-h-[500px]",children:[(0,r.jsx)(eZ.Z,{children:(0,r.jsxs)(ew.Z,{children:[(0,r.jsx)(e_.Z,{children:"End User"}),(0,r.jsx)(e_.Z,{children:"Spend"}),(0,r.jsx)(e_.Z,{children:"Total Events"})]})}),(0,r.jsx)(ey.Z,{children:null==p?void 0:p.map((e,l)=>{var t;return(0,r.jsxs)(ew.Z,{children:[(0,r.jsx)(ef.Z,{children:e.end_user}),(0,r.jsx)(ef.Z,{children:null===(t=e.total_spend)||void 0===t?void 0:t.toFixed(4)}),(0,r.jsx)(ef.Z,{children:e.total_count})]},l)})})]})})]}),(0,r.jsx)(eU.Z,{children:(0,r.jsxs)(W.Z,{numItems:2,className:"gap-2 h-[75vh] w-full mb-4",children:[(0,r.jsxs)(Y.Z,{numColSpan:2,children:[(0,r.jsx)(eF.Z,{className:"mb-4",enableSelect:!0,value:N,onValueChange:e=>{R(e),B(e.from,e.to)}}),(0,r.jsxs)(ep.Z,{children:[(0,r.jsx)(el.Z,{children:"Spend Per Tag"}),(0,r.jsxs)(ee.Z,{children:["Get Started Tracking cost per tag ",(0,r.jsx)("a",{className:"text-blue-500",href:"https://docs.litellm.ai/docs/proxy/enterprise#tracking-spend-for-custom-tags",target:"_blank",children:"here"})]}),(0,r.jsx)(eq.Z,{className:"h-72",data:f,index:"name",categories:["spend"],colors:["blue"]})]})]}),(0,r.jsx)(Y.Z,{numColSpan:2})]})})]})]})]})},lb=()=>{let{Title:e,Paragraph:l}=eT.default,[t,s]=(0,n.useState)(""),[a,i]=(0,n.useState)(null),[c,d]=(0,n.useState)(null),[u,h]=(0,n.useState)(null),[x,p]=(0,n.useState)(!0),j=(0,o.useSearchParams)(),[g,y]=(0,n.useState)({data:[]}),f=j.get("userID"),Z=j.get("token"),[_,w]=(0,n.useState)("api-keys"),[b,k]=(0,n.useState)(null);return(0,n.useEffect)(()=>{if(Z){let e=(0,eP.o)(Z);if(e){if(console.log("Decoded token:",e),console.log("Decoded key:",e.key),k(e.key),e.user_role){let l=function(e){if(!e)return"Undefined Role";switch(console.log("Received user role: ".concat(e.toLowerCase())),console.log("Received user role length: ".concat(e.toLowerCase().length)),e.toLowerCase()){case"app_owner":case"demo_app_owner":return"App Owner";case"app_admin":case"proxy_admin":return"Admin";case"proxy_admin_viewer":return"Admin Viewer";case"app_user":return"App User";default:return"Unknown Role"}}(e.user_role);console.log("Decoded user_role:",l),s(l),"Admin Viewer"==l&&w("usage")}else console.log("User role not defined");e.user_email?i(e.user_email):console.log("User Email is not set ".concat(e)),e.login_method?p("username_password"==e.login_method):console.log("User Email is not set ".concat(e))}}},[Z]),(0,r.jsx)(n.Suspense,{fallback:(0,r.jsx)("div",{children:"Loading..."}),children:(0,r.jsxs)("div",{className:"flex flex-col min-h-screen",children:[(0,r.jsx)(m,{userID:f,userRole:t,userEmail:a,showSSOBanner:x}),(0,r.jsxs)("div",{className:"flex flex-1 overflow-auto",children:[(0,r.jsx)("div",{className:"mt-8",children:(0,r.jsx)(lZ,{setPage:w,userRole:t,defaultSelectedKey:null})}),"api-keys"==_?(0,r.jsx)(eO,{userID:f,userRole:t,teams:c,keys:u,setUserRole:s,userEmail:a,setUserEmail:i,setTeams:d,setKeys:h}):"models"==_?(0,r.jsx)(e3,{userID:f,userRole:t,token:Z,accessToken:b,modelData:g,setModelData:y}):"llm-playground"==_?(0,r.jsx)(lj,{userID:f,userRole:t,token:Z,accessToken:b}):"users"==_?(0,r.jsx)(e9,{userID:f,userRole:t,token:Z,keys:u,teams:c,accessToken:b,setKeys:h}):"teams"==_?(0,r.jsx)(le,{teams:c,setTeams:d,searchParams:j,accessToken:b,userID:f,userRole:t}):"admin-panel"==_?(0,r.jsx)(lt,{setTeams:d,searchParams:j,accessToken:b,showSSOBanner:x}):"api_ref"==_?(0,r.jsx)(lx,{}):"settings"==_?(0,r.jsx)(lr,{userID:f,userRole:t,accessToken:b}):"general-settings"==_?(0,r.jsx)(lu,{userID:f,userRole:t,accessToken:b,modelData:g}):(0,r.jsx)(lw,{userID:f,userRole:t,token:Z,accessToken:b,keys:u})]})]})})}}},function(e){e.O(0,[936,884,971,69,744],function(){return e(e.s=20661)}),_N_E=e.O()}]);
\ No newline at end of file
diff --git a/ui/litellm-dashboard/out/_next/static/chunks/app/page-f20fdea77aed85ba.js b/ui/litellm-dashboard/out/_next/static/chunks/app/page-f20fdea77aed85ba.js
new file mode 100644
index 0000000000..3db3281fa9
--- /dev/null
+++ b/ui/litellm-dashboard/out/_next/static/chunks/app/page-f20fdea77aed85ba.js
@@ -0,0 +1 @@
+(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[931],{20661:function(e,l,t){Promise.resolve().then(t.bind(t,4858))},4858:function(e,l,t){"use strict";t.r(l),t.d(l,{default:function(){return lS}});var s,a,r=t(3827),n=t(64090),o=t(47907),i=t(8792),c=t(40491),d=t(65270),m=e=>{let{userID:l,userRole:t,userEmail:s,showSSOBanner:a}=e;console.log("User ID:",l),console.log("userEmail:",s),console.log("showSSOBanner:",a);let n=[{key:"1",label:(0,r.jsxs)(r.Fragment,{children:[(0,r.jsxs)("p",{children:["Role: ",t]}),(0,r.jsxs)("p",{children:["ID: ",l]})]})}];return(0,r.jsxs)("nav",{className:"left-0 right-0 top-0 flex justify-between items-center h-12 mb-4",children:[(0,r.jsx)("div",{className:"text-left my-2 absolute top-0 left-0",children:(0,r.jsx)("div",{className:"flex flex-col items-center",children:(0,r.jsx)(i.default,{href:"/",children:(0,r.jsx)("button",{className:"text-gray-800 rounded text-center",children:(0,r.jsx)("img",{src:"/get_image",width:160,height:160,alt:"LiteLLM Brand",className:"mr-2"})})})})}),(0,r.jsxs)("div",{className:"text-right mx-4 my-2 absolute top-0 right-0 flex items-center justify-end space-x-2",children:[a?(0,r.jsx)("div",{style:{padding:"6px",borderRadius:"8px"},children:(0,r.jsx)("a",{href:"https://calendly.com/d/4mp-gd3-k5k/litellm-1-1-onboarding-chat",target:"_blank",style:{fontSize:"14px",textDecoration:"underline"},children:"Request hosted proxy"})}):null,(0,r.jsx)("div",{style:{border:"1px solid #391085",padding:"6px",borderRadius:"8px"},children:(0,r.jsx)(c.Z,{menu:{items:n},children:(0,r.jsx)(d.Z,{children:s})})})]})]})},u=t(80588);let h=async()=>{try{let e=await fetch("https://raw.githubusercontent.com/BerriAI/litellm/main/model_prices_and_context_window.json"),l=await e.json();return console.log("received data: ".concat(l)),l}catch(e){throw console.error("Failed to get model cost map:",e),e}},x=async(e,l)=>{try{let t=await fetch("/model/new",{method:"POST",headers:{Authorization:"Bearer ".concat(e),"Content-Type":"application/json"},body:JSON.stringify({...l})});if(!t.ok){let e=await t.text();throw u.ZP.error("Failed to create key: "+e,10),console.error("Error response from the server:",e),Error("Network response was not ok")}let s=await t.json();return console.log("API Response:",s),u.ZP.success("Model created successfully. Wait 60s and refresh on 'All Models' page"),s}catch(e){throw console.error("Failed to create key:",e),e}},p=async(e,l)=>{console.log("model_id in model delete call: ".concat(l));try{let t=await fetch("/model/delete",{method:"POST",headers:{Authorization:"Bearer ".concat(e),"Content-Type":"application/json"},body:JSON.stringify({id:l})});if(!t.ok){let e=await t.text();throw u.ZP.error("Failed to create key: "+e,10),console.error("Error response from the server:",e),Error("Network response was not ok")}let s=await t.json();return console.log("API Response:",s),u.ZP.success("Model deleted successfully. Restart server to see this."),s}catch(e){throw console.error("Failed to create key:",e),e}},j=async(e,l,t)=>{try{if(console.log("Form Values in keyCreateCall:",t),t.description&&(t.metadata||(t.metadata={}),t.metadata.description=t.description,delete t.description,t.metadata=JSON.stringify(t.metadata)),t.metadata){console.log("formValues.metadata:",t.metadata);try{t.metadata=JSON.parse(t.metadata)}catch(e){throw u.ZP.error("Failed to parse metadata: "+e,10),Error("Failed to parse metadata: "+e)}}console.log("Form Values after check:",t);let s=await fetch("/key/generate",{method:"POST",headers:{Authorization:"Bearer ".concat(e),"Content-Type":"application/json"},body:JSON.stringify({user_id:l,...t})});if(!s.ok){let e=await s.text();throw u.ZP.error("Failed to create key: "+e,10),console.error("Error response from the server:",e),Error("Network response was not ok")}let a=await s.json();return console.log("API Response:",a),a}catch(e){throw console.error("Failed to create key:",e),e}},g=async(e,l,t)=>{try{if(console.log("Form Values in keyCreateCall:",t),t.description&&(t.metadata||(t.metadata={}),t.metadata.description=t.description,delete t.description,t.metadata=JSON.stringify(t.metadata)),t.metadata){console.log("formValues.metadata:",t.metadata);try{t.metadata=JSON.parse(t.metadata)}catch(e){throw u.ZP.error("Failed to parse metadata: "+e,10),Error("Failed to parse metadata: "+e)}}console.log("Form Values after check:",t);let s=await fetch("/user/new",{method:"POST",headers:{Authorization:"Bearer ".concat(e),"Content-Type":"application/json"},body:JSON.stringify({user_id:l,...t})});if(!s.ok){let e=await s.text();throw u.ZP.error("Failed to create key: "+e,10),console.error("Error response from the server:",e),Error("Network response was not ok")}let a=await s.json();return console.log("API Response:",a),a}catch(e){throw console.error("Failed to create key:",e),e}},y=async(e,l)=>{try{console.log("in keyDeleteCall:",l);let t=await fetch("/key/delete",{method:"POST",headers:{Authorization:"Bearer ".concat(e),"Content-Type":"application/json"},body:JSON.stringify({keys:[l]})});if(!t.ok){let e=await t.text();throw u.ZP.error("Failed to delete key: "+e,10),Error("Network response was not ok")}let s=await t.json();return console.log(s),s}catch(e){throw console.error("Failed to create key:",e),e}},f=async(e,l)=>{try{console.log("in teamDeleteCall:",l);let t=await fetch("/team/delete",{method:"POST",headers:{Authorization:"Bearer ".concat(e),"Content-Type":"application/json"},body:JSON.stringify({team_ids:[l]})});if(!t.ok){let e=await t.text();throw u.ZP.error("Failed to delete team: "+e,10),Error("Network response was not ok")}let s=await t.json();return console.log(s),s}catch(e){throw console.error("Failed to delete key:",e),e}},Z=async function(e,l,t){let s=arguments.length>3&&void 0!==arguments[3]&&arguments[3],a=arguments.length>4?arguments[4]:void 0,r=arguments.length>5?arguments[5]:void 0;try{let n="/user/info";"App Owner"==t&&l&&(n="".concat(n,"?user_id=").concat(l)),"App User"==t&&l&&(n="".concat(n,"?user_id=").concat(l)),console.log("in userInfoCall viewAll=",s),s&&r&&null!=a&&void 0!=a&&(n="".concat(n,"?view_all=true&page=").concat(a,"&page_size=").concat(r));let o=await fetch(n,{method:"GET",headers:{Authorization:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!o.ok){let e=await o.text();throw u.ZP.error(e,10),Error("Network response was not ok")}let i=await o.json();return console.log("API Response:",i),i}catch(e){throw console.error("Failed to create key:",e),e}},_=async(e,l)=>{try{let t="/team/info";l&&(t="".concat(t,"?team_id=").concat(l)),console.log("in teamInfoCall");let s=await fetch(t,{method:"GET",headers:{Authorization:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!s.ok){let e=await s.text();throw u.ZP.error(e,10),Error("Network response was not ok")}let a=await s.json();return console.log("API Response:",a),a}catch(e){throw console.error("Failed to create key:",e),e}},w=async e=>{try{let l=await fetch("/global/spend",{method:"GET",headers:{Authorization:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!l.ok){let e=await l.text();throw u.ZP.error(e,10),Error("Network response was not ok")}return await l.json()}catch(e){throw console.error("Failed to create key:",e),e}},b=async(e,l,t)=>{try{let l=await fetch("/v2/model/info",{method:"GET",headers:{Authorization:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!l.ok){let e=await l.text();throw u.ZP.error(e,10),Error("Network response was not ok")}let t=await l.json();return console.log("modelInfoCall:",t),t}catch(e){throw console.error("Failed to create key:",e),e}},k=async(e,l,t,s,a,r)=>{try{let l="/model/metrics";s&&(l="".concat(l,"?_selected_model_group=").concat(s,"&startTime=").concat(a,"&endTime=").concat(r));let t=await fetch(l,{method:"GET",headers:{Authorization:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!t.ok){let e=await t.text();throw u.ZP.error(e,10),Error("Network response was not ok")}return await t.json()}catch(e){throw console.error("Failed to create key:",e),e}},v=async(e,l,t,s,a,r)=>{try{let l="/model/metrics/slow_responses";s&&(l="".concat(l,"?_selected_model_group=").concat(s,"&startTime=").concat(a,"&endTime=").concat(r));let t=await fetch(l,{method:"GET",headers:{Authorization:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!t.ok){let e=await t.text();throw u.ZP.error(e,10),Error("Network response was not ok")}return await t.json()}catch(e){throw console.error("Failed to create key:",e),e}},S=async(e,l,t,s,a,r)=>{try{let l="/model/metrics/exceptions";s&&(l="".concat(l,"?_selected_model_group=").concat(s,"&startTime=").concat(a,"&endTime=").concat(r));let t=await fetch(l,{method:"GET",headers:{Authorization:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!t.ok){let e=await t.text();throw u.ZP.error(e,10),Error("Network response was not ok")}return await t.json()}catch(e){throw console.error("Failed to create key:",e),e}},N=async(e,l,t)=>{try{let l=await fetch("/models",{method:"GET",headers:{Authorization:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!l.ok){let e=await l.text();throw u.ZP.error(e,10),Error("Network response was not ok")}return await l.json()}catch(e){throw console.error("Failed to create key:",e),e}},A=async e=>{try{let l="/global/spend/teams";console.log("in teamSpendLogsCall:",l);let t=await fetch("".concat(l),{method:"GET",headers:{Authorization:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!t.ok){let e=await t.text();throw u.ZP.error(e,10),Error("Network response was not ok")}let s=await t.json();return console.log(s),s}catch(e){throw console.error("Failed to create key:",e),e}},E=async(e,l,t)=>{try{let s="/global/spend/tags";l&&t&&(s="".concat(s,"?start_date=").concat(l,"&end_date=").concat(t)),console.log("in tagsSpendLogsCall:",s);let a=await fetch("".concat(s),{method:"GET",headers:{Authorization:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!a.ok)throw await a.text(),Error("Network response was not ok");let r=await a.json();return console.log(r),r}catch(e){throw console.error("Failed to create key:",e),e}},I=async(e,l,t,s,a,r)=>{try{console.log("user role in spend logs call: ".concat(t));let l="/spend/logs";l="App Owner"==t?"".concat(l,"?user_id=").concat(s,"&start_date=").concat(a,"&end_date=").concat(r):"".concat(l,"?start_date=").concat(a,"&end_date=").concat(r);let n=await fetch(l,{method:"GET",headers:{Authorization:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!n.ok){let e=await n.text();throw u.ZP.error(e,10),Error("Network response was not ok")}let o=await n.json();return console.log(o),o}catch(e){throw console.error("Failed to create key:",e),e}},C=async e=>{try{let l=await fetch("/global/spend/logs",{method:"GET",headers:{Authorization:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!l.ok){let e=await l.text();throw u.ZP.error(e,10),Error("Network response was not ok")}let t=await l.json();return console.log(t),t}catch(e){throw console.error("Failed to create key:",e),e}},T=async e=>{try{let l=await fetch("/global/spend/keys?limit=5",{method:"GET",headers:{Authorization:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!l.ok){let e=await l.text();throw u.ZP.error(e,10),Error("Network response was not ok")}let t=await l.json();return console.log(t),t}catch(e){throw console.error("Failed to create key:",e),e}},P=async(e,l,t,s)=>{try{let a="";a=l?JSON.stringify({api_key:l,startTime:t,endTime:s}):JSON.stringify({startTime:t,endTime:s});let r={method:"POST",headers:{Authorization:"Bearer ".concat(e),"Content-Type":"application/json"}};r.body=a;let n=await fetch("/global/spend/end_users",r);if(!n.ok){let e=await n.text();throw u.ZP.error(e,10),Error("Network response was not ok")}let o=await n.json();return console.log(o),o}catch(e){throw console.error("Failed to create key:",e),e}},O=async e=>{try{let l=await fetch("/global/spend/models?limit=5",{method:"GET",headers:{Authorization:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!l.ok){let e=await l.text();throw u.ZP.error(e,10),Error("Network response was not ok")}let t=await l.json();return console.log(t),t}catch(e){throw console.error("Failed to create key:",e),e}},F=async(e,l)=>{try{let t=await fetch("/v2/key/info",{method:"POST",headers:{Authorization:"Bearer ".concat(e),"Content-Type":"application/json"},body:JSON.stringify({keys:l})});if(!t.ok){let e=await t.text();throw u.ZP.error(e,10),Error("Network response was not ok")}let s=await t.json();return console.log(s),s}catch(e){throw console.error("Failed to create key:",e),e}},R=async(e,l)=>{try{let t="/user/get_users?role=".concat(l);console.log("in userGetAllUsersCall:",t);let s=await fetch(t,{method:"GET",headers:{Authorization:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!s.ok){let e=await s.text();throw u.ZP.error("Failed to delete key: "+e,10),Error("Network response was not ok")}let a=await s.json();return console.log(a),a}catch(e){throw console.error("Failed to get requested models:",e),e}},M=async(e,l)=>{try{console.log("Form Values in teamCreateCall:",l);let t=await fetch("/team/new",{method:"POST",headers:{Authorization:"Bearer ".concat(e),"Content-Type":"application/json"},body:JSON.stringify({...l})});if(!t.ok){let e=await t.text();throw u.ZP.error("Failed to create key: "+e,10),console.error("Error response from the server:",e),Error("Network response was not ok")}let s=await t.json();return console.log("API Response:",s),s}catch(e){throw console.error("Failed to create key:",e),e}},L=async(e,l)=>{try{console.log("Form Values in keyUpdateCall:",l);let t=await fetch("/key/update",{method:"POST",headers:{Authorization:"Bearer ".concat(e),"Content-Type":"application/json"},body:JSON.stringify({...l})});if(!t.ok){let e=await t.text();throw u.ZP.error("Failed to update key: "+e,10),console.error("Error response from the server:",e),Error("Network response was not ok")}let s=await t.json();return console.log("Update key Response:",s),s}catch(e){throw console.error("Failed to create key:",e),e}},U=async(e,l)=>{try{console.log("Form Values in teamUpateCall:",l);let t=await fetch("/team/update",{method:"POST",headers:{Authorization:"Bearer ".concat(e),"Content-Type":"application/json"},body:JSON.stringify({...l})});if(!t.ok){let e=await t.text();throw u.ZP.error("Failed to update team: "+e,10),console.error("Error response from the server:",e),Error("Network response was not ok")}let s=await t.json();return console.log("Update Team Response:",s),s}catch(e){throw console.error("Failed to create key:",e),e}},B=async(e,l)=>{try{console.log("Form Values in modelUpateCall:",l);let t=await fetch("/model/update",{method:"POST",headers:{Authorization:"Bearer ".concat(e),"Content-Type":"application/json"},body:JSON.stringify({...l})});if(!t.ok){let e=await t.text();throw u.ZP.error("Failed to update model: "+e,10),console.error("Error update from the server:",e),Error("Network response was not ok")}let s=await t.json();return console.log("Update model Response:",s),s}catch(e){throw console.error("Failed to update model:",e),e}},D=async(e,l,t)=>{try{console.log("Form Values in teamMemberAddCall:",t);let s=await fetch("/team/member_add",{method:"POST",headers:{Authorization:"Bearer ".concat(e),"Content-Type":"application/json"},body:JSON.stringify({team_id:l,member:t})});if(!s.ok){let e=await s.text();throw u.ZP.error("Failed to create key: "+e,10),console.error("Error response from the server:",e),Error("Network response was not ok")}let a=await s.json();return console.log("API Response:",a),a}catch(e){throw console.error("Failed to create key:",e),e}},K=async(e,l,t)=>{try{console.log("Form Values in userUpdateUserCall:",l);let s={...l};null!==t&&(s.user_role=t),s=JSON.stringify(s);let a=await fetch("/user/update",{method:"POST",headers:{Authorization:"Bearer ".concat(e),"Content-Type":"application/json"},body:s});if(!a.ok){let e=await a.text();throw u.ZP.error("Failed to create key: "+e,10),console.error("Error response from the server:",e),Error("Network response was not ok")}let r=await a.json();return console.log("API Response:",r),r}catch(e){throw console.error("Failed to create key:",e),e}},z=async(e,l)=>{try{let t="/health/services?service=".concat(l);console.log("Checking Slack Budget Alerts service health");let s=await fetch(t,{method:"GET",headers:{Authorization:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!s.ok){let e=await s.text();throw u.ZP.error("Failed ".concat(l," service health check ")+e),Error(e)}let a=await s.json();return u.ZP.success("Test request to ".concat(l," made - check logs/alerts on ").concat(l," to verify")),a}catch(e){throw console.error("Failed to perform health check:",e),e}},V=async(e,l,t)=>{try{let l=await fetch("/get/config/callbacks",{method:"GET",headers:{Authorization:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!l.ok){let e=await l.text();throw u.ZP.error(e,10),Error("Network response was not ok")}return await l.json()}catch(e){throw console.error("Failed to get callbacks:",e),e}},q=async e=>{try{let l=await fetch("/config/list?config_type=general_settings",{method:"GET",headers:{Authorization:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!l.ok){let e=await l.text();throw u.ZP.error(e,10),Error("Network response was not ok")}return await l.json()}catch(e){throw console.error("Failed to get callbacks:",e),e}},G=async(e,l,t)=>{try{let s=await fetch("/config/field/update",{method:"POST",headers:{Authorization:"Bearer ".concat(e),"Content-Type":"application/json"},body:JSON.stringify({field_name:l,field_value:t,config_type:"general_settings"})});if(!s.ok){let e=await s.text();throw u.ZP.error(e,10),Error("Network response was not ok")}let a=await s.json();return u.ZP.success("Successfully updated value!"),a}catch(e){throw console.error("Failed to set callbacks:",e),e}},W=async(e,l)=>{try{let t=await fetch("/config/field/delete",{method:"POST",headers:{Authorization:"Bearer ".concat(e),"Content-Type":"application/json"},body:JSON.stringify({field_name:l,config_type:"general_settings"})});if(!t.ok){let e=await t.text();throw u.ZP.error(e,10),Error("Network response was not ok")}let s=await t.json();return u.ZP.success("Field reset on proxy"),s}catch(e){throw console.error("Failed to get callbacks:",e),e}},Y=async(e,l)=>{try{let t=await fetch("/config/update",{method:"POST",headers:{Authorization:"Bearer ".concat(e),"Content-Type":"application/json"},body:JSON.stringify({...l})});if(!t.ok){let e=await t.text();throw u.ZP.error(e,10),Error("Network response was not ok")}return await t.json()}catch(e){throw console.error("Failed to set callbacks:",e),e}},J=async e=>{try{let l=await fetch("/health",{method:"GET",headers:{Authorization:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!l.ok){let e=await l.text();throw u.ZP.error(e),Error("Network response was not ok")}return await l.json()}catch(e){throw console.error("Failed to call /health:",e),e}};var H=t(10384),$=t(46453),X=t(16450),Q=t(52273),ee=t(26780),el=t(15595),et=t(6698),es=t(71801),ea=t(42440),er=t(42308),en=t(50670),eo=t(81583),ei=t(99129),ec=t(44839),ed=t(88707),em=t(1861);let{Option:eu}=en.default;var eh=e=>{let{userID:l,team:t,userRole:s,accessToken:a,data:o,setData:i}=e,[c]=eo.Z.useForm(),[d,m]=(0,n.useState)(!1),[h,x]=(0,n.useState)(null),[p,g]=(0,n.useState)(null),[y,f]=(0,n.useState)([]),[Z,_]=(0,n.useState)([]),w=()=>{m(!1),c.resetFields()},b=()=>{m(!1),x(null),c.resetFields()};(0,n.useEffect)(()=>{(async()=>{try{if(null===l||null===s)return;if(null!==a){let e=(await N(a,l,s)).data.map(e=>e.id);console.log("available_model_names:",e),f(e)}}catch(e){console.error("Error fetching user models:",e)}})()},[a,l,s]);let k=async e=>{try{var t,s,r;let n=null!==(t=null==e?void 0:e.key_alias)&&void 0!==t?t:"",d=null!==(s=null==e?void 0:e.team_id)&&void 0!==s?s:null;if((null!==(r=null==o?void 0:o.filter(e=>e.team_id===d).map(e=>e.key_alias))&&void 0!==r?r:[]).includes(n))throw Error("Key alias ".concat(n," already exists for team with ID ").concat(d,", please provide another key alias"));u.ZP.info("Making API Call"),m(!0);let h=await j(a,l,e);console.log("key create Response:",h),i(e=>e?[...e,h]:[h]),x(h.key),g(h.soft_budget),u.ZP.success("API Key Created"),c.resetFields(),localStorage.removeItem("userData"+l)}catch(e){console.error("Error creating the key:",e),u.ZP.error("Error creating the key: ".concat(e),20)}};return(0,n.useEffect)(()=>{_(t&&t.models.length>0?t.models.includes("all-proxy-models")?y:t.models:y)},[t,y]),(0,r.jsxs)("div",{children:[(0,r.jsx)(X.Z,{className:"mx-auto",onClick:()=>m(!0),children:"+ Create New Key"}),(0,r.jsx)(ei.Z,{title:"Create Key",visible:d,width:800,footer:null,onOk:w,onCancel:b,children:(0,r.jsxs)(eo.Z,{form:c,onFinish:k,labelCol:{span:8},wrapperCol:{span:16},labelAlign:"left",children:[(0,r.jsxs)(r.Fragment,{children:[(0,r.jsx)(eo.Z.Item,{label:"Key Name",name:"key_alias",rules:[{required:!0,message:"Please input a key name"}],help:"required",children:(0,r.jsx)(Q.Z,{placeholder:""})}),(0,r.jsx)(eo.Z.Item,{label:"Team ID",name:"team_id",hidden:!0,initialValue:t?t.team_id:null,valuePropName:"team_id",className:"mt-8",children:(0,r.jsx)(ec.Z,{value:t?t.team_alias:"",disabled:!0})}),(0,r.jsx)(eo.Z.Item,{label:"Models",name:"models",rules:[{required:!0,message:"Please select a model"}],help:"required",children:(0,r.jsxs)(en.default,{mode:"multiple",placeholder:"Select models",style:{width:"100%"},onChange:e=>{e.includes("all-team-models")&&c.setFieldsValue({models:["all-team-models"]})},children:[(0,r.jsx)(eu,{value:"all-team-models",children:"All Team Models"},"all-team-models"),Z.map(e=>(0,r.jsx)(eu,{value:e,children:e},e))]})}),(0,r.jsxs)(ee.Z,{className:"mt-20 mb-8",children:[(0,r.jsx)(et.Z,{children:(0,r.jsx)("b",{children:"Optional Settings"})}),(0,r.jsxs)(el.Z,{children:[(0,r.jsx)(eo.Z.Item,{className:"mt-8",label:"Max Budget (USD)",name:"max_budget",help:"Budget cannot exceed team max budget: $".concat((null==t?void 0:t.max_budget)!==null&&(null==t?void 0:t.max_budget)!==void 0?null==t?void 0:t.max_budget:"unlimited"),rules:[{validator:async(e,l)=>{if(l&&t&&null!==t.max_budget&&l>t.max_budget)throw Error("Budget cannot exceed team max budget: $".concat(t.max_budget))}}],children:(0,r.jsx)(ed.Z,{step:.01,precision:2,width:200})}),(0,r.jsx)(eo.Z.Item,{className:"mt-8",label:"Reset Budget",name:"budget_duration",help:"Team Reset Budget: ".concat((null==t?void 0:t.budget_duration)!==null&&(null==t?void 0:t.budget_duration)!==void 0?null==t?void 0:t.budget_duration:"None"),children:(0,r.jsxs)(en.default,{defaultValue:null,placeholder:"n/a",children:[(0,r.jsx)(en.default.Option,{value:"24h",children:"daily"}),(0,r.jsx)(en.default.Option,{value:"30d",children:"monthly"})]})}),(0,r.jsx)(eo.Z.Item,{className:"mt-8",label:"Tokens per minute Limit (TPM)",name:"tpm_limit",help:"TPM cannot exceed team TPM limit: ".concat((null==t?void 0:t.tpm_limit)!==null&&(null==t?void 0:t.tpm_limit)!==void 0?null==t?void 0:t.tpm_limit:"unlimited"),rules:[{validator:async(e,l)=>{if(l&&t&&null!==t.tpm_limit&&l>t.tpm_limit)throw Error("TPM limit cannot exceed team TPM limit: ".concat(t.tpm_limit))}}],children:(0,r.jsx)(ed.Z,{step:1,width:400})}),(0,r.jsx)(eo.Z.Item,{className:"mt-8",label:"Requests per minute Limit (RPM)",name:"rpm_limit",help:"RPM cannot exceed team RPM limit: ".concat((null==t?void 0:t.rpm_limit)!==null&&(null==t?void 0:t.rpm_limit)!==void 0?null==t?void 0:t.rpm_limit:"unlimited"),rules:[{validator:async(e,l)=>{if(l&&t&&null!==t.rpm_limit&&l>t.rpm_limit)throw Error("RPM limit cannot exceed team RPM limit: ".concat(t.rpm_limit))}}],children:(0,r.jsx)(ed.Z,{step:1,width:400})}),(0,r.jsx)(eo.Z.Item,{label:"Expire Key (eg: 30s, 30h, 30d)",name:"duration",className:"mt-8",children:(0,r.jsx)(Q.Z,{placeholder:""})}),(0,r.jsx)(eo.Z.Item,{label:"Metadata",name:"metadata",children:(0,r.jsx)(ec.Z.TextArea,{rows:4,placeholder:"Enter metadata as JSON"})})]})]})]}),(0,r.jsx)("div",{style:{textAlign:"right",marginTop:"10px"},children:(0,r.jsx)(em.ZP,{htmlType:"submit",children:"Create Key"})})]})}),h&&(0,r.jsx)(ei.Z,{visible:d,onOk:w,onCancel:b,footer:null,children:(0,r.jsxs)($.Z,{numItems:1,className:"gap-2 w-full",children:[(0,r.jsx)(ea.Z,{children:"Save your Key"}),(0,r.jsx)(H.Z,{numColSpan:1,children:(0,r.jsxs)("p",{children:["Please save this secret key somewhere safe and accessible. For security reasons, ",(0,r.jsx)("b",{children:"you will not be able to view it again"})," ","through your LiteLLM account. If you lose this secret key, you will need to generate a new one."]})}),(0,r.jsx)(H.Z,{numColSpan:1,children:null!=h?(0,r.jsxs)("div",{children:[(0,r.jsx)(es.Z,{className:"mt-3",children:"API Key:"}),(0,r.jsx)("div",{style:{background:"#f8f8f8",padding:"10px",borderRadius:"5px",marginBottom:"10px"},children:(0,r.jsx)("pre",{style:{wordWrap:"break-word",whiteSpace:"normal"},children:h})}),(0,r.jsx)(er.CopyToClipboard,{text:h,onCopy:()=>{u.ZP.success("API Key copied to clipboard")},children:(0,r.jsx)(X.Z,{className:"mt-3",children:"Copy API Key"})})]}):(0,r.jsx)(es.Z,{children:"Key being created, this might take 30s"})})]})})]})},ex=t(9454),ep=t(98941),ej=t(33393),eg=t(5),ey=t(13810),ef=t(61244),eZ=t(10827),e_=t(3851),ew=t(2044),eb=t(64167),ek=t(74480),ev=t(7178),eS=t(95093),eN=t(27166);let{Option:eA}=en.default;var eE=e=>{let{userID:l,userRole:t,accessToken:s,selectedTeam:a,data:o,setData:i,teams:c}=e,[d,m]=(0,n.useState)(!1),[h,x]=(0,n.useState)(!1),[p,j]=(0,n.useState)(null),[g,f]=(0,n.useState)(null),[Z,_]=(0,n.useState)(null),[w,b]=(0,n.useState)(""),[k,v]=(0,n.useState)(!1),[S,A]=(0,n.useState)(!1),[E,I]=(0,n.useState)(null),[C,T]=(0,n.useState)([]),P=new Set,[O,F]=(0,n.useState)(P);(0,n.useEffect)(()=>{(async()=>{try{if(null===l)return;if(null!==s&&null!==t){let e=(await N(s,l,t)).data.map(e=>e.id);console.log("available_model_names:",e),T(e)}}catch(e){console.error("Error fetching user models:",e)}})()},[s,l,t]),(0,n.useEffect)(()=>{if(c){let e=new Set;c.forEach((l,t)=>{let s=l.team_id;e.add(s)}),F(e)}},[c]);let R=e=>{console.log("handleEditClick:",e),null==e.token&&null!==e.token_id&&(e.token=e.token_id),I(e),v(!0)},M=async e=>{if(null==s)return;let l=e.token;e.key=l,console.log("handleEditSubmit:",e);let t=await L(s,e);console.log("handleEditSubmit: newKeyValues",t),o&&i(o.map(e=>e.token===l?t:e)),u.ZP.success("Key updated successfully"),v(!1),I(null)},U=async e=>{console.log("handleDelete:",e),null==e.token&&null!==e.token_id&&(e.token=e.token_id),null!=o&&(j(e.token),localStorage.removeItem("userData"+l),x(!0))},B=async()=>{if(null!=p&&null!=o){try{await y(s,p);let e=o.filter(e=>e.token!==p);i(e)}catch(e){console.error("Error deleting the key:",e)}x(!1),j(null)}};if(null!=o)return console.log("RERENDER TRIGGERED"),(0,r.jsxs)("div",{children:[(0,r.jsxs)(ey.Z,{className:"w-full mx-auto flex-auto overflow-y-auto max-h-[50vh] mb-4 mt-2",children:[(0,r.jsxs)(eZ.Z,{className:"mt-5 max-h-[300px] min-h-[300px]",children:[(0,r.jsx)(eb.Z,{children:(0,r.jsxs)(ev.Z,{children:[(0,r.jsx)(ek.Z,{children:"Key Alias"}),(0,r.jsx)(ek.Z,{children:"Secret Key"}),(0,r.jsx)(ek.Z,{children:"Spend (USD)"}),(0,r.jsx)(ek.Z,{children:"Budget (USD)"}),(0,r.jsx)(ek.Z,{children:"Models"}),(0,r.jsx)(ek.Z,{children:"TPM / RPM Limits"})]})}),(0,r.jsx)(e_.Z,{children:o.map(e=>{if(console.log(e),"litellm-dashboard"===e.team_id)return null;if(a){if(console.log("item team id: ".concat(e.team_id,", knownTeamIDs.has(item.team_id): ").concat(O.has(e.team_id),", selectedTeam id: ").concat(a.team_id)),(null!=a.team_id||null===e.team_id||O.has(e.team_id))&&e.team_id!=a.team_id)return null;console.log("item team id: ".concat(e.team_id,", is returned"))}return(0,r.jsxs)(ev.Z,{children:[(0,r.jsx)(ew.Z,{style:{maxWidth:"2px",whiteSpace:"pre-wrap",overflow:"hidden"},children:null!=e.key_alias?(0,r.jsx)(es.Z,{children:e.key_alias}):(0,r.jsx)(es.Z,{children:"Not Set"})}),(0,r.jsx)(ew.Z,{children:(0,r.jsx)(es.Z,{children:e.key_name})}),(0,r.jsx)(ew.Z,{children:(0,r.jsx)(es.Z,{children:(()=>{try{return parseFloat(e.spend).toFixed(4)}catch(l){return e.spend}})()})}),(0,r.jsx)(ew.Z,{children:null!=e.max_budget?(0,r.jsx)(es.Z,{children:e.max_budget}):(0,r.jsx)(es.Z,{children:"Unlimited"})}),(0,r.jsx)(ew.Z,{children:Array.isArray(e.models)?(0,r.jsx)("div",{style:{display:"flex",flexDirection:"column"},children:0===e.models.length?(0,r.jsx)(r.Fragment,{children:a&&a.models&&a.models.length>0?a.models.map((e,l)=>"all-proxy-models"===e?(0,r.jsx)(eg.Z,{size:"xs",className:"mb-1",color:"red",children:(0,r.jsx)(es.Z,{children:"All Proxy Models"})},l):"all-team-models"===e?(0,r.jsx)(eg.Z,{size:"xs",className:"mb-1",color:"red",children:(0,r.jsx)(es.Z,{children:"All Team Models"})},l):(0,r.jsx)(eg.Z,{size:"xs",className:"mb-1",color:"blue",children:(0,r.jsx)(es.Z,{children:e.length>30?"".concat(e.slice(0,30),"..."):e})},l)):(0,r.jsx)(eg.Z,{size:"xs",className:"mb-1",color:"blue",children:(0,r.jsx)(es.Z,{children:"all-proxy-models"})})}):e.models.map((e,l)=>"all-proxy-models"===e?(0,r.jsx)(eg.Z,{size:"xs",className:"mb-1",color:"red",children:(0,r.jsx)(es.Z,{children:"All Proxy Models"})},l):"all-team-models"===e?(0,r.jsx)(eg.Z,{size:"xs",className:"mb-1",color:"red",children:(0,r.jsx)(es.Z,{children:"All Team Models"})},l):(0,r.jsx)(eg.Z,{size:"xs",className:"mb-1",color:"blue",children:(0,r.jsx)(es.Z,{children:e.length>30?"".concat(e.slice(0,30),"..."):e})},l))}):null}),(0,r.jsx)(ew.Z,{children:(0,r.jsxs)(es.Z,{children:["TPM: ",e.tpm_limit?e.tpm_limit:"Unlimited"," ",(0,r.jsx)("br",{})," RPM:"," ",e.rpm_limit?e.rpm_limit:"Unlimited"]})}),(0,r.jsxs)(ew.Z,{children:[(0,r.jsx)(ef.Z,{onClick:()=>{I(e),A(!0)},icon:ex.Z,size:"sm"}),(0,r.jsx)(ei.Z,{open:S,onCancel:()=>{A(!1),I(null)},footer:null,width:800,children:E&&(0,r.jsxs)(r.Fragment,{children:[(0,r.jsxs)("div",{className:"grid grid-cols-1 gap-6 sm:grid-cols-2 lg:grid-cols-3 mt-8",children:[(0,r.jsxs)(ey.Z,{children:[(0,r.jsx)("p",{className:"text-tremor-default font-medium text-tremor-content dark:text-dark-tremor-content",children:"Spend"}),(0,r.jsx)("div",{className:"mt-2 flex items-baseline space-x-2.5",children:(0,r.jsx)("p",{className:"text-tremor font-semibold text-tremor-content-strong dark:text-dark-tremor-content-strong",children:(()=>{try{return parseFloat(E.spend).toFixed(4)}catch(e){return E.spend}})()})})]}),(0,r.jsxs)(ey.Z,{children:[(0,r.jsx)("p",{className:"text-tremor-default font-medium text-tremor-content dark:text-dark-tremor-content",children:"Budget"}),(0,r.jsx)("div",{className:"mt-2 flex items-baseline space-x-2.5",children:(0,r.jsx)("p",{className:"text-tremor font-semibold text-tremor-content-strong dark:text-dark-tremor-content-strong",children:null!=E.max_budget?(0,r.jsx)(r.Fragment,{children:E.max_budget}):(0,r.jsx)(r.Fragment,{children:"Unlimited"})})})]},e.name),(0,r.jsxs)(ey.Z,{children:[(0,r.jsx)("p",{className:"text-tremor-default font-medium text-tremor-content dark:text-dark-tremor-content",children:"Expires"}),(0,r.jsx)("div",{className:"mt-2 flex items-baseline space-x-2.5",children:(0,r.jsx)("p",{className:"text-tremor-default font-small text-tremor-content-strong dark:text-dark-tremor-content-strong",children:null!=E.expires?(0,r.jsx)(r.Fragment,{children:new Date(E.expires).toLocaleString(void 0,{day:"numeric",month:"long",year:"numeric",hour:"numeric",minute:"numeric",second:"numeric"})}):(0,r.jsx)(r.Fragment,{children:"Never"})})})]},e.name)]}),(0,r.jsxs)(ey.Z,{className:"my-4",children:[(0,r.jsx)(ea.Z,{children:"Token Name"}),(0,r.jsx)(es.Z,{className:"my-1",children:E.key_alias?E.key_alias:E.key_name}),(0,r.jsx)(ea.Z,{children:"Token ID"}),(0,r.jsx)(es.Z,{className:"my-1 text-[12px]",children:E.token}),(0,r.jsx)(ea.Z,{children:"Metadata"}),(0,r.jsx)(es.Z,{className:"my-1",children:(0,r.jsxs)("pre",{children:[JSON.stringify(E.metadata)," "]})})]}),(0,r.jsx)(X.Z,{className:"mx-auto flex items-center",onClick:()=>{A(!1),I(null)},children:"Close"})]})}),(0,r.jsx)(ef.Z,{icon:ep.Z,size:"sm",onClick:()=>R(e)}),(0,r.jsx)(ef.Z,{onClick:()=>U(e),icon:ej.Z,size:"sm"})]})]},e.token)})})]}),h&&(0,r.jsx)("div",{className:"fixed z-10 inset-0 overflow-y-auto",children:(0,r.jsxs)("div",{className:"flex items-end justify-center min-h-screen pt-4 px-4 pb-20 text-center sm:block sm:p-0",children:[(0,r.jsx)("div",{className:"fixed inset-0 transition-opacity","aria-hidden":"true",children:(0,r.jsx)("div",{className:"absolute inset-0 bg-gray-500 opacity-75"})}),(0,r.jsx)("span",{className:"hidden sm:inline-block sm:align-middle sm:h-screen","aria-hidden":"true",children:"​"}),(0,r.jsxs)("div",{className:"inline-block align-bottom bg-white rounded-lg text-left overflow-hidden shadow-xl transform transition-all sm:my-8 sm:align-middle sm:max-w-lg sm:w-full",children:[(0,r.jsx)("div",{className:"bg-white px-4 pt-5 pb-4 sm:p-6 sm:pb-4",children:(0,r.jsx)("div",{className:"sm:flex sm:items-start",children:(0,r.jsxs)("div",{className:"mt-3 text-center sm:mt-0 sm:ml-4 sm:text-left",children:[(0,r.jsx)("h3",{className:"text-lg leading-6 font-medium text-gray-900",children:"Delete Key"}),(0,r.jsx)("div",{className:"mt-2",children:(0,r.jsx)("p",{className:"text-sm text-gray-500",children:"Are you sure you want to delete this key ?"})})]})})}),(0,r.jsxs)("div",{className:"bg-gray-50 px-4 py-3 sm:px-6 sm:flex sm:flex-row-reverse",children:[(0,r.jsx)(X.Z,{onClick:B,color:"red",className:"ml-2",children:"Delete"}),(0,r.jsx)(X.Z,{onClick:()=>{x(!1),j(null)},children:"Cancel"})]})]})]})})]}),E&&(0,r.jsx)(e=>{let{visible:l,onCancel:t,token:s,onSubmit:o}=e,[i]=eo.Z.useForm(),[d,m]=(0,n.useState)(a),[u,h]=(0,n.useState)([]),[x,p]=(0,n.useState)(!1);return(0,r.jsx)(ei.Z,{title:"Edit Key",visible:l,width:800,footer:null,onOk:()=>{i.validateFields().then(e=>{i.resetFields()}).catch(e=>{console.error("Validation failed:",e)})},onCancel:t,children:(0,r.jsxs)(eo.Z,{form:i,onFinish:M,initialValues:s,labelCol:{span:8},wrapperCol:{span:16},labelAlign:"left",children:[(0,r.jsxs)(r.Fragment,{children:[(0,r.jsx)(eo.Z.Item,{label:"Key Name",name:"key_alias",rules:[{required:!0,message:"Please input a key name"}],help:"required",children:(0,r.jsx)(ec.Z,{})}),(0,r.jsx)(eo.Z.Item,{label:"Models",name:"models",rules:[{validator:(e,l)=>{let t=l.filter(e=>!d.models.includes(e)&&"all-team-models"!==e&&"all-proxy-models"!==e&&!d.models.includes("all-proxy-models"));return(console.log("errorModels: ".concat(t)),t.length>0)?Promise.reject("Some models are not part of the new team's models - ".concat(t,"Team models: ").concat(d.models)):Promise.resolve()}}],children:(0,r.jsxs)(en.default,{mode:"multiple",placeholder:"Select models",style:{width:"100%"},children:[(0,r.jsx)(eA,{value:"all-team-models",children:"All Team Models"},"all-team-models"),d&&d.models?d.models.includes("all-proxy-models")?C.filter(e=>"all-proxy-models"!==e).map(e=>(0,r.jsx)(eA,{value:e,children:e},e)):d.models.map(e=>(0,r.jsx)(eA,{value:e,children:e},e)):C.map(e=>(0,r.jsx)(eA,{value:e,children:e},e))]})}),(0,r.jsx)(eo.Z.Item,{className:"mt-8",label:"Max Budget (USD)",name:"max_budget",help:"Budget cannot exceed team max budget: ".concat((null==d?void 0:d.max_budget)!==null&&(null==d?void 0:d.max_budget)!==void 0?null==d?void 0:d.max_budget:"unlimited"),rules:[{validator:async(e,l)=>{if(l&&d&&null!==d.max_budget&&l>d.max_budget)throw console.log("keyTeam.max_budget: ".concat(d.max_budget)),Error("Budget cannot exceed team max budget: $".concat(d.max_budget))}}],children:(0,r.jsx)(ed.Z,{step:.01,precision:2,width:200})}),(0,r.jsx)(eo.Z.Item,{label:"token",name:"token",hidden:!0}),(0,r.jsx)(eo.Z.Item,{label:"Team",name:"team_id",help:"the team this key belongs to",children:(0,r.jsx)(eS.Z,{value:s.team_alias,children:null==c?void 0:c.map((e,l)=>(0,r.jsx)(eN.Z,{value:e.team_id,onClick:()=>m(e),children:e.team_alias},l))})})]}),(0,r.jsx)("div",{style:{textAlign:"right",marginTop:"10px"},children:(0,r.jsx)(em.ZP,{htmlType:"submit",children:"Edit Key"})})]})})},{visible:k,onCancel:()=>{v(!1),I(null)},token:E,onSubmit:M})]})},eI=t(76032),eC=t(35152),eT=e=>{let{userID:l,userRole:t,accessToken:s,userSpend:a,selectedTeam:o}=e;console.log("userSpend: ".concat(a));let[i,c]=(0,n.useState)(null!==a?a:0),[d,m]=(0,n.useState)(0),[u,h]=(0,n.useState)([]);(0,n.useEffect)(()=>{let e=async()=>{if(s&&l&&t&&"Admin"===t&&null==a)try{let e=await w(s);e&&(e.spend?c(e.spend):c(0),e.max_budget?m(e.max_budget):m(0))}catch(e){console.error("Error fetching global spend data:",e)}};(async()=>{try{if(null===l||null===t)return;if(null!==s){let e=(await N(s,l,t)).data.map(e=>e.id);console.log("available_model_names:",e),h(e)}}catch(e){console.error("Error fetching user models:",e)}})(),e()},[t,s,l]),(0,n.useEffect)(()=>{null!==a&&c(a)},[a]);let x=[];o&&o.models&&(x=o.models),x&&x.includes("all-proxy-models")?(console.log("user models:",u),x=u):x&&x.includes("all-team-models")?x=o.models:x&&0===x.length&&(x=u);let p=void 0!==i?i.toFixed(4):null;return console.log("spend in view user spend: ".concat(i)),(0,r.jsxs)("div",{className:"flex items-center",children:[(0,r.jsxs)("div",{children:[(0,r.jsxs)("p",{className:"text-tremor-default text-tremor-content dark:text-dark-tremor-content",children:["Total Spend"," "]}),(0,r.jsxs)("p",{className:"text-2xl text-tremor-content-strong dark:text-dark-tremor-content-strong font-semibold",children:["$",p]})]}),(0,r.jsx)("div",{className:"ml-auto",children:(0,r.jsxs)(ee.Z,{children:[(0,r.jsx)(et.Z,{children:(0,r.jsx)(es.Z,{children:"Team Models"})}),(0,r.jsx)(el.Z,{className:"absolute right-0 z-10 bg-white p-2 shadow-lg max-w-xs",children:(0,r.jsx)(eI.Z,{children:x.map(e=>(0,r.jsx)(eC.Z,{children:(0,r.jsx)(es.Z,{children:e})},e))})})]})})]})},eP=e=>{let{userID:l,userRole:t,selectedTeam:s,accessToken:a}=e,[o,i]=(0,n.useState)([]);(0,n.useEffect)(()=>{(async()=>{try{if(null===l||null===t)return;if(null!==a){let e=(await N(a,l,t)).data.map(e=>e.id);console.log("available_model_names:",e),i(e)}}catch(e){console.error("Error fetching user models:",e)}})()},[a,l,t]);let c=[];return s&&s.models&&(c=s.models),c&&c.includes("all-proxy-models")&&(console.log("user models:",o),c=o),(0,r.jsx)(r.Fragment,{children:(0,r.jsx)("div",{className:"mb-5",children:(0,r.jsx)("p",{className:"text-3xl text-tremor-content-strong dark:text-dark-tremor-content-strong font-semibold",children:null==s?void 0:s.team_alias})})})},eO=e=>{let l,{teams:t,setSelectedTeam:s,userRole:a}=e,o={models:[],team_id:null,team_alias:"Default Team"},[i,c]=(0,n.useState)(o);return(l="App User"===a?t:t?[...t,o]:[o],"App User"===a)?null:(0,r.jsxs)("div",{className:"mt-5 mb-5",children:[(0,r.jsx)(ea.Z,{children:"Select Team"}),(0,r.jsx)(es.Z,{children:"If you belong to multiple teams, this setting controls which team is used by default when creating new API Keys."}),(0,r.jsxs)(es.Z,{className:"mt-3 mb-3",children:[(0,r.jsx)("b",{children:"Default Team:"})," If no team_id is set for a key, it will be grouped under here."]}),l&&l.length>0?(0,r.jsx)(eS.Z,{defaultValue:"0",children:l.map((e,l)=>(0,r.jsx)(eN.Z,{value:String(l),onClick:()=>s(e),children:e.team_alias},l))}):(0,r.jsxs)(es.Z,{children:["No team created. ",(0,r.jsx)("b",{children:"Defaulting to personal account."})]})]})},eF=t(37963),eR=t(36083);console.log("isLocal:",!1);var eM=e=>{let{userID:l,userRole:t,teams:s,keys:a,setUserRole:i,userEmail:c,setUserEmail:d,setTeams:m,setKeys:u}=e,[h,x]=(0,n.useState)(null),p=(0,o.useSearchParams)();p.get("viewSpend"),(0,o.useRouter)();let j=p.get("token"),[g,y]=(0,n.useState)(null),[f,_]=(0,n.useState)(null),[b,k]=(0,n.useState)([]),v={models:[],team_alias:"Default Team",team_id:null},[S,A]=(0,n.useState)(s?s[0]:v);if(window.addEventListener("beforeunload",function(){sessionStorage.clear()}),(0,n.useEffect)(()=>{if(j){let e=(0,eF.o)(j);if(e){if(console.log("Decoded token:",e),console.log("Decoded key:",e.key),y(e.key),e.user_role){let l=function(e){if(!e)return"Undefined Role";switch(console.log("Received user role: ".concat(e)),e.toLowerCase()){case"app_owner":case"demo_app_owner":return"App Owner";case"app_admin":case"proxy_admin":return"Admin";case"proxy_admin_viewer":return"Admin Viewer";case"app_user":return"App User";default:return"Unknown Role"}}(e.user_role);console.log("Decoded user_role:",l),i(l)}else console.log("User role not defined");e.user_email?d(e.user_email):console.log("User Email is not set ".concat(e))}}if(l&&g&&t&&!a&&!h){let e=sessionStorage.getItem("userModels"+l);e?k(JSON.parse(e)):(async()=>{try{let e=await Z(g,l,t,!1,null,null);if(console.log("received teams in user dashboard: ".concat(Object.keys(e),"; team values: ").concat(Object.entries(e.teams))),"Admin"==t){let e=await w(g);x(e),console.log("globalSpend:",e)}else x(e.user_info);u(e.keys),m(e.teams);let s=[...e.teams];s.length>0?(console.log("response['teams']: ".concat(s)),A(s[0])):A(v),sessionStorage.setItem("userData"+l,JSON.stringify(e.keys)),sessionStorage.setItem("userSpendData"+l,JSON.stringify(e.user_info));let a=(await N(g,l,t)).data.map(e=>e.id);console.log("available_model_names:",a),k(a),console.log("userModels:",b),sessionStorage.setItem("userModels"+l,JSON.stringify(a))}catch(e){console.error("There was an error fetching the data",e)}})()}},[l,j,g,a,t]),(0,n.useEffect)(()=>{if(null!==a&&null!=S){let e=0;for(let l of a)S.hasOwnProperty("team_id")&&null!==l.team_id&&l.team_id===S.team_id&&(e+=l.spend);_(e)}else if(null!==a){let e=0;for(let l of a)e+=l.spend;_(e)}},[S]),null==l||null==j){let e="/sso/key/generate";return console.log("Full URL:",e),window.location.href=e,null}if(null==g)return null;if(null==t&&i("App Owner"),t&&"Admin Viewer"==t){let{Title:e,Paragraph:l}=eR.default;return(0,r.jsxs)("div",{children:[(0,r.jsx)(e,{level:1,children:"Access Denied"}),(0,r.jsx)(l,{children:"Ask your proxy admin for access to create keys"})]})}return console.log("inside user dashboard, selected team",S),console.log("teamSpend: ".concat(f)),(0,r.jsx)("div",{className:"w-full mx-4",children:(0,r.jsx)($.Z,{numItems:1,className:"gap-2 p-8 h-[75vh] w-full mt-2",children:(0,r.jsxs)(H.Z,{numColSpan:1,children:[(0,r.jsx)(eP,{userID:l,userRole:t,selectedTeam:S||null,accessToken:g}),(0,r.jsx)(eT,{userID:l,userRole:t,accessToken:g,userSpend:f,selectedTeam:S||null}),(0,r.jsx)(eE,{userID:l,userRole:t,accessToken:g,selectedTeam:S||null,data:a,setData:u,teams:s}),(0,r.jsx)(eh,{userID:l,team:S||null,userRole:t,accessToken:g,data:a,setData:u},S?S.team_id:null),(0,r.jsx)(eO,{teams:s,setSelectedTeam:A,userRole:t})]})})})},eL=t(35087),eU=t(92836),eB=t(26734),eD=t(41608),eK=t(32126),ez=t(23682),eV=t(47047),eq=t(76628),eG=t(57750),eW=t(44041),eY=t(38302),eJ=t(28683),eH=t(1460),e$=t(78578),eX=t(63954),eQ=t(90252),e0=t(7905),e1=e=>{let{modelID:l,accessToken:t}=e,[s,a]=(0,n.useState)(!1),o=async()=>{try{u.ZP.info("Making API Call"),a(!0);let e=await p(t,l);console.log("model delete Response:",e),u.ZP.success("Model ".concat(l," deleted successfully")),a(!1)}catch(e){console.error("Error deleting the model:",e)}};return(0,r.jsxs)("div",{children:[(0,r.jsx)(ef.Z,{onClick:()=>a(!0),icon:ej.Z,size:"sm"}),(0,r.jsx)(ei.Z,{open:s,onOk:o,okType:"danger",onCancel:()=>a(!1),children:(0,r.jsxs)($.Z,{numItems:1,className:"gap-2 w-full",children:[(0,r.jsx)(ea.Z,{children:"Delete Model"}),(0,r.jsx)(H.Z,{numColSpan:1,children:(0,r.jsx)("p",{children:"Are you sure you want to delete this model? This action is irreversible."})}),(0,r.jsx)(H.Z,{numColSpan:1,children:(0,r.jsxs)("p",{children:["Model ID: ",(0,r.jsx)("b",{children:l})]})})]})})]})},e2=t(97766),e4=t(46495);let{Title:e5,Link:e8}=eR.default;(s=a||(a={})).OpenAI="OpenAI",s.Azure="Azure",s.Anthropic="Anthropic",s.Google_AI_Studio="Gemini (Google AI Studio)",s.Bedrock="Amazon Bedrock",s.OpenAI_Compatible="OpenAI-Compatible Endpoints (Groq, Together AI, Mistral AI, etc.)",s.Vertex_AI="Vertex AI (Anthropic, Gemini, etc.)";let e3={OpenAI:"openai",Azure:"azure",Anthropic:"anthropic",Google_AI_Studio:"gemini",Bedrock:"bedrock",OpenAI_Compatible:"openai",Vertex_AI:"vertex_ai"},e6={"BadRequestError (400)":"BadRequestErrorRetries","AuthenticationError  (401)":"AuthenticationErrorRetries","TimeoutError (408)":"TimeoutErrorRetries","RateLimitError (429)":"RateLimitErrorRetries","ContentPolicyViolationError (400)":"ContentPolicyViolationErrorRetries","InternalServerError (500)":"InternalServerErrorRetries"},e7=async(e,l,t)=>{try{let s=Array.isArray(e.model)?e.model:[e.model];console.log("received deployments: ".concat(s)),console.log("received type of deployments: ".concat(typeof s)),s.forEach(async t=>{console.log("litellm_model: ".concat(t));let s={},a={};s.model=t;let r="";for(let[l,t]of(console.log("formValues add deployment:",e),Object.entries(e)))if(""!==t){if("model_name"==l)r+=t;else if("custom_llm_provider"==l)continue;else if("model"==l)continue;else if("base_model"===l)a[l]=t;else if("litellm_extra_params"==l){console.log("litellm_extra_params:",t);let e={};if(t&&void 0!=t){try{e=JSON.parse(t)}catch(e){throw u.ZP.error("Failed to parse LiteLLM Extra Params: "+e,10),Error("Failed to parse litellm_extra_params: "+e)}for(let[l,t]of Object.entries(e))s[l]=t}}else s[l]=t}let n={model_name:r,litellm_params:s,model_info:a},o=await x(l,n);console.log("response for model create call: ".concat(o.data))}),t.resetFields()}catch(e){u.ZP.error("Failed to create model: "+e,10)}};var e9=e=>{var l,t,s;let{accessToken:o,token:i,userRole:c,userID:d,modelData:m={data:[]},setModelData:x}=e,[p,j]=(0,n.useState)([]),[g]=eo.Z.useForm(),[y,f]=(0,n.useState)(null),[Z,_]=(0,n.useState)(""),[w,N]=(0,n.useState)([]),A=Object.values(a).filter(e=>isNaN(Number(e))),[E,I]=(0,n.useState)("OpenAI"),[C,T]=(0,n.useState)(""),[P,O]=(0,n.useState)(!1),[F,R]=(0,n.useState)(null),[M,L]=(0,n.useState)([]),[U,D]=(0,n.useState)(null),[K,z]=(0,n.useState)([]),[q,G]=(0,n.useState)([]),[W,H]=(0,n.useState)([]),[er,en]=(0,n.useState)([]),[ec,eu]=(0,n.useState)([]),[eh,ex]=(0,n.useState)([]),[ej,eA]=(0,n.useState)([]),[eE,eI]=(0,n.useState)({from:new Date(Date.now()-6048e5),to:new Date}),[eC,eT]=(0,n.useState)(null),[eP,eO]=(0,n.useState)(0),eF=e=>{R(e),O(!0)},eM=async e=>{if(console.log("handleEditSubmit:",e),null==o)return;let l={},t=null;for(let[s,a]of Object.entries(e))"model_id"!==s?l[s]=a:t=a;let s={litellm_params:l,model_info:{id:t}};console.log("handleEditSubmit payload:",s);try{await B(o,s),u.ZP.success("Model updated successfully, restart server to see updates"),O(!1),R(null)}catch(e){console.log("Error occurred")}},e9=()=>{_(new Date().toLocaleString())},le=async()=>{if(!o){console.error("Access token is missing");return}console.log("new modelGroupRetryPolicy:",eC);try{await Y(o,{router_settings:{model_group_retry_policy:eC}}),u.ZP.success("Retry settings saved successfully")}catch(e){console.error("Failed to save retry settings:",e),u.ZP.error("Failed to save retry settings")}};if((0,n.useEffect)(()=>{if(!o||!i||!c||!d)return;let e=async()=>{try{var e,l,t,s,a,r;let n=await b(o,d,c);console.log("Model data response:",n.data),x(n);let i=new Set;for(let e=0;e<n.data.length;e++){let l=n.data[e];i.add(l.model_name)}console.log("all_model_groups:",i);let m=Array.from(i);m=m.sort(),L(m),console.log("array_model_groups:",m);let u="all";m.length>0&&(u=m[m.length-1],console.log("_initial_model_group:",u),D(u)),console.log("selectedModelGroup:",U);let h=await k(o,d,c,u,null===(e=eE.from)||void 0===e?void 0:e.toISOString(),null===(l=eE.to)||void 0===l?void 0:l.toISOString());console.log("Model metrics response:",h),G(h.data),H(h.all_api_bases);let p=await S(o,d,c,u,null===(t=eE.from)||void 0===t?void 0:t.toISOString(),null===(s=eE.to)||void 0===s?void 0:s.toISOString());console.log("Model exceptions response:",p),en(p.data),eu(p.exception_types);let j=await v(o,d,c,u,null===(a=eE.from)||void 0===a?void 0:a.toISOString(),null===(r=eE.to)||void 0===r?void 0:r.toISOString());console.log("slowResponses:",j),eA(j);let g=(await V(o,d,c)).router_settings;console.log("routerSettingsInfo:",g);let y=g.model_group_retry_policy,f=g.num_retries;console.log("model_group_retry_policy:",y),console.log("default_retries:",f),eT(y),eO(f)}catch(e){console.error("There was an error fetching the model data",e)}};o&&i&&c&&d&&e();let l=async()=>{let e=await h();console.log("received model cost map data: ".concat(Object.keys(e))),f(e)};null==y&&l(),e9()},[o,i,c,d,y,Z]),!m||!o||!i||!c||!d)return(0,r.jsx)("div",{children:"Loading..."});let ll=[];for(let e=0;e<m.data.length;e++){let s=m.data[e],a=null==s?void 0:null===(l=s.litellm_params)||void 0===l?void 0:l.model,r=null==s?void 0:s.model_info,n="",o="Undefined",i="Undefined",c="Undefined",d="Undefined",u={},h=e=>(console.log("GET PROVIDER CALLED! - ".concat(y)),null!=y&&"object"==typeof y&&e in y)?y[e].litellm_provider:"openai";if(a){let e=a.split("/"),l=e[0];n=1===e.length?h(a):l}else n="openai";r&&(o=null==r?void 0:r.input_cost_per_token,i=null==r?void 0:r.output_cost_per_token,c=null==r?void 0:r.max_tokens,d=null==r?void 0:r.max_input_tokens),(null==s?void 0:s.litellm_params)&&(u=Object.fromEntries(Object.entries(null==s?void 0:s.litellm_params).filter(e=>{let[l]=e;return"model"!==l&&"api_base"!==l}))),m.data[e].provider=n,m.data[e].input_cost=o,m.data[e].output_cost=i,m.data[e].input_cost&&(m.data[e].input_cost=(1e6*Number(m.data[e].input_cost)).toFixed(2)),m.data[e].output_cost&&(m.data[e].output_cost=(1e6*Number(m.data[e].output_cost)).toFixed(2)),m.data[e].max_tokens=c,m.data[e].max_input_tokens=d,m.data[e].api_base=null==s?void 0:null===(t=s.litellm_params)||void 0===t?void 0:t.api_base,m.data[e].cleanedLitellmParams=u,ll.push(s.model_name),console.log(m.data[e])}if(c&&"Admin Viewer"==c){let{Title:e,Paragraph:l}=eR.default;return(0,r.jsxs)("div",{children:[(0,r.jsx)(e,{level:1,children:"Access Denied"}),(0,r.jsx)(l,{children:"Ask your proxy admin for access to view all models"})]})}let lt=e=>{console.log("received provider string: ".concat(e));let l=Object.keys(a).find(l=>a[l]===e);if(l){let e=e3[l];console.log("mappingResult: ".concat(e));let t=[];"object"==typeof y&&Object.entries(y).forEach(l=>{let[s,a]=l;null!==a&&"object"==typeof a&&"litellm_provider"in a&&(a.litellm_provider===e||a.litellm_provider.includes(e))&&t.push(s)}),N(t),console.log("providerModels: ".concat(w))}},ls=async()=>{try{u.ZP.info("Running health check..."),T("");let e=await J(o);T(e)}catch(e){console.error("Error running health check:",e),T("Error running health check")}},la=async(e,l,t)=>{if(console.log("Updating model metrics for group:",e),o&&d&&c&&l&&t){console.log("inside updateModelMetrics - startTime:",l,"endTime:",t),D(e);try{let s=await k(o,d,c,e,l.toISOString(),t.toISOString());console.log("Model metrics response:",s),G(s.data),H(s.all_api_bases);let a=await S(o,d,c,e,l.toISOString(),t.toISOString());console.log("Model exceptions response:",a),en(a.data),eu(a.exception_types);let r=await v(o,d,c,e,l.toISOString(),t.toISOString());console.log("slowResponses:",r),eA(r)}catch(e){console.error("Failed to fetch model metrics",e)}}};return console.log("selectedProvider: ".concat(E)),console.log("providerModels.length: ".concat(w.length)),(0,r.jsx)("div",{style:{width:"100%",height:"100%"},children:(0,r.jsxs)(eB.Z,{className:"gap-2 p-8 h-[75vh] w-full mt-2",children:[(0,r.jsxs)(eD.Z,{className:"flex justify-between mt-2 w-full items-center",children:[(0,r.jsxs)("div",{className:"flex",children:[(0,r.jsx)(eU.Z,{children:"All Models"}),(0,r.jsx)(eU.Z,{children:"Add Model"}),(0,r.jsx)(eU.Z,{children:(0,r.jsx)("pre",{children:"/health Models"})}),(0,r.jsx)(eU.Z,{children:"Model Analytics"}),(0,r.jsx)(eU.Z,{children:"Model Retry Settings"})]}),(0,r.jsxs)("div",{className:"flex items-center space-x-2",children:[Z&&(0,r.jsxs)(es.Z,{children:["Last Refreshed: ",Z]}),(0,r.jsx)(ef.Z,{icon:eX.Z,variant:"shadow",size:"xs",className:"self-center",onClick:e9})]})]}),(0,r.jsxs)(ez.Z,{children:[(0,r.jsxs)(eK.Z,{children:[(0,r.jsxs)($.Z,{children:[(0,r.jsxs)("div",{className:"flex items-center",children:[(0,r.jsx)(es.Z,{children:"Filter by Public Model Name"}),(0,r.jsxs)(eS.Z,{className:"mb-4 mt-2 ml-2 w-50",defaultValue:U||M[0],onValueChange:e=>D("all"===e?"all":e),value:U||M[0],children:[(0,r.jsx)(eN.Z,{value:"all",children:"All Models"}),M.map((e,l)=>(0,r.jsx)(eN.Z,{value:e,onClick:()=>D(e),children:e},l))]})]}),(0,r.jsx)(ey.Z,{children:(0,r.jsxs)(eZ.Z,{className:"mt-5",style:{maxWidth:"1500px",width:"100%"},children:[(0,r.jsx)(eb.Z,{children:(0,r.jsxs)(ev.Z,{children:[(0,r.jsx)(ek.Z,{style:{maxWidth:"150px",whiteSpace:"normal",wordBreak:"break-word"},children:"Public Model Name"}),(0,r.jsx)(ek.Z,{style:{maxWidth:"100px",whiteSpace:"normal",wordBreak:"break-word"},children:"Provider"}),"Admin"===c&&(0,r.jsx)(ek.Z,{style:{maxWidth:"150px",whiteSpace:"normal",wordBreak:"break-word"},children:"API Base"}),(0,r.jsx)(ek.Z,{style:{maxWidth:"200px",whiteSpace:"normal",wordBreak:"break-word"},children:"Extra litellm Params"}),(0,r.jsxs)(ek.Z,{style:{maxWidth:"85px",whiteSpace:"normal",wordBreak:"break-word"},children:["Input Price ",(0,r.jsx)("p",{style:{fontSize:"10px",color:"gray"},children:"/1M Tokens ($)"})]}),(0,r.jsxs)(ek.Z,{style:{maxWidth:"85px",whiteSpace:"normal",wordBreak:"break-word"},children:["Output Price ",(0,r.jsx)("p",{style:{fontSize:"10px",color:"gray"},children:"/1M Tokens ($)"})]}),(0,r.jsx)(ek.Z,{style:{maxWidth:"120px",whiteSpace:"normal",wordBreak:"break-word"},children:"Max Tokens"}),(0,r.jsx)(ek.Z,{style:{maxWidth:"50px",whiteSpace:"normal",wordBreak:"break-word"},children:"Status"})]})}),(0,r.jsx)(e_.Z,{children:m.data.filter(e=>"all"===U||e.model_name===U||null==U||""===U).map((e,l)=>(0,r.jsxs)(ev.Z,{children:[(0,r.jsx)(ew.Z,{style:{maxWidth:"150px",whiteSpace:"normal",wordBreak:"break-word"},children:(0,r.jsx)(es.Z,{children:e.model_name})}),(0,r.jsx)(ew.Z,{style:{maxWidth:"100px",whiteSpace:"normal",wordBreak:"break-word"},children:e.provider}),"Admin"===c&&(0,r.jsx)(ew.Z,{style:{maxWidth:"150px",whiteSpace:"normal",wordBreak:"break-word"},children:e.api_base}),(0,r.jsx)(ew.Z,{style:{maxWidth:"200px",whiteSpace:"normal",wordBreak:"break-word"},children:(0,r.jsxs)(ee.Z,{children:[(0,r.jsx)(et.Z,{children:(0,r.jsx)(es.Z,{children:"Litellm params"})}),(0,r.jsx)(el.Z,{children:(0,r.jsx)("pre",{children:JSON.stringify(e.cleanedLitellmParams,null,2)})})]})}),(0,r.jsx)(ew.Z,{style:{maxWidth:"80px",whiteSpace:"normal",wordBreak:"break-word"},children:e.input_cost||e.litellm_params.input_cost_per_token||null}),(0,r.jsx)(ew.Z,{style:{maxWidth:"80px",whiteSpace:"normal",wordBreak:"break-word"},children:e.output_cost||e.litellm_params.output_cost_per_token||null}),(0,r.jsx)(ew.Z,{style:{maxWidth:"120px",whiteSpace:"normal",wordBreak:"break-word"},children:(0,r.jsxs)("p",{style:{fontSize:"10px"},children:["Max Tokens: ",e.max_tokens," ",(0,r.jsx)("br",{}),"Max Input Tokens: ",e.max_input_tokens]})}),(0,r.jsx)(ew.Z,{style:{maxWidth:"100px",whiteSpace:"normal",wordBreak:"break-word"},children:e.model_info.db_model?(0,r.jsx)(eg.Z,{icon:eQ.Z,size:"xs",className:"text-white",children:(0,r.jsx)("p",{style:{fontSize:"10px"},children:"DB Model"})}):(0,r.jsx)(eg.Z,{icon:e0.Z,size:"xs",className:"text-black",children:(0,r.jsx)("p",{style:{fontSize:"10px"},children:"Config Model"})})}),(0,r.jsxs)(ew.Z,{style:{maxWidth:"100px",whiteSpace:"normal",wordBreak:"break-word"},children:[(0,r.jsx)(ef.Z,{icon:ep.Z,size:"sm",onClick:()=>eF(e)}),(0,r.jsx)(e1,{modelID:e.model_info.id,accessToken:o})]})]},l))})]})})]}),(0,r.jsx)(e=>{let{visible:l,onCancel:t,model:s,onSubmit:a}=e,[n]=eo.Z.useForm(),o={},i="",c="";if(s){o=s.litellm_params,i=s.model_name;let e=s.model_info;e&&(c=e.id,console.log("model_id: ".concat(c)),o.model_id=c)}return(0,r.jsx)(ei.Z,{title:"Edit Model "+i,visible:l,width:800,footer:null,onOk:()=>{n.validateFields().then(e=>{a(e),n.resetFields()}).catch(e=>{console.error("Validation failed:",e)})},onCancel:t,children:(0,r.jsxs)(eo.Z,{form:n,onFinish:eM,initialValues:o,labelCol:{span:8},wrapperCol:{span:16},labelAlign:"left",children:[(0,r.jsxs)(r.Fragment,{children:[(0,r.jsx)(eo.Z.Item,{className:"mt-8",label:"api_base",name:"api_base",children:(0,r.jsx)(Q.Z,{})}),(0,r.jsx)(eo.Z.Item,{label:"tpm",name:"tpm",tooltip:"int (optional) - Tokens limit for this deployment: in tokens per minute (tpm). Find this information on your model/providers website",children:(0,r.jsx)(ed.Z,{min:0,step:1})}),(0,r.jsx)(eo.Z.Item,{label:"rpm",name:"rpm",tooltip:"int (optional) - Rate limit for this deployment: in requests per minute (rpm). Find this information on your model/providers website",children:(0,r.jsx)(ed.Z,{min:0,step:1})}),(0,r.jsx)(eo.Z.Item,{label:"max_retries",name:"max_retries",children:(0,r.jsx)(ed.Z,{min:0,step:1})}),(0,r.jsx)(eo.Z.Item,{label:"timeout",name:"timeout",tooltip:"int (optional) - Timeout in seconds for LLM requests (Defaults to 600 seconds)",children:(0,r.jsx)(ed.Z,{min:0,step:1})}),(0,r.jsx)(eo.Z.Item,{label:"stream_timeout",name:"stream_timeout",tooltip:"int (optional) - Timeout for stream requests (seconds)",children:(0,r.jsx)(ed.Z,{min:0,step:1})}),(0,r.jsx)(eo.Z.Item,{label:"input_cost_per_token",name:"input_cost_per_token",tooltip:"float (optional) - Input cost per token",children:(0,r.jsx)(ed.Z,{min:0,step:1e-4})}),(0,r.jsx)(eo.Z.Item,{label:"output_cost_per_token",name:"output_cost_per_token",tooltip:"float (optional) - Output cost per token",children:(0,r.jsx)(ed.Z,{min:0,step:1e-4})}),(0,r.jsx)(eo.Z.Item,{label:"model_id",name:"model_id",hidden:!0})]}),(0,r.jsx)("div",{style:{textAlign:"right",marginTop:"10px"},children:(0,r.jsx)(em.ZP,{htmlType:"submit",children:"Save"})})]})})},{visible:P,onCancel:()=>{O(!1),R(null)},model:F,onSubmit:eM})]}),(0,r.jsxs)(eK.Z,{className:"h-full",children:[(0,r.jsx)(e5,{level:2,children:"Add new model"}),(0,r.jsx)(ey.Z,{children:(0,r.jsxs)(eo.Z,{form:g,onFinish:()=>{g.validateFields().then(e=>{e7(e,o,g)}).catch(e=>{console.error("Validation failed:",e)})},labelCol:{span:10},wrapperCol:{span:16},labelAlign:"left",children:[(0,r.jsxs)(r.Fragment,{children:[(0,r.jsx)(eo.Z.Item,{rules:[{required:!0,message:"Required"}],label:"Provider:",name:"custom_llm_provider",tooltip:"E.g. OpenAI, Azure OpenAI, Anthropic, Bedrock, etc.",labelCol:{span:10},labelAlign:"left",children:(0,r.jsx)(eS.Z,{value:E.toString(),children:A.map((e,l)=>(0,r.jsx)(eN.Z,{value:e,onClick:()=>{lt(e),I(e)},children:e},l))})}),(0,r.jsx)(eo.Z.Item,{rules:[{required:!0,message:"Required"}],label:"Public Model Name",name:"model_name",tooltip:"Model name your users will pass in. Also used for load-balancing, LiteLLM will load balance between all models with this public name.",className:"mb-0",children:(0,r.jsx)(Q.Z,{placeholder:"Vertex AI (Anthropic, Gemini, etc.)"===(s=E.toString())?"gemini-pro":"Anthropic"==s?"claude-3-opus":"Amazon Bedrock"==s?"claude-3-opus":"Gemini (Google AI Studio)"==s?"gemini-pro":"gpt-3.5-turbo"})}),(0,r.jsxs)(eY.Z,{children:[(0,r.jsx)(eJ.Z,{span:10}),(0,r.jsx)(eJ.Z,{span:10,children:(0,r.jsx)(es.Z,{className:"mb-3 mt-1",children:"Model name your users will pass in."})})]}),(0,r.jsx)(eo.Z.Item,{rules:[{required:!0,message:"Required"}],label:"LiteLLM Model Name(s)",name:"model",tooltip:"Actual model name used for making litellm.completion() call.",className:"mb-0",children:"Azure"===E?(0,r.jsx)(Q.Z,{placeholder:"Enter model name"}):w.length>0?(0,r.jsx)(eV.Z,{value:w,children:w.map((e,l)=>(0,r.jsx)(eq.Z,{value:e,children:e},l))}):(0,r.jsx)(Q.Z,{placeholder:"gpt-3.5-turbo-0125"})}),(0,r.jsxs)(eY.Z,{children:[(0,r.jsx)(eJ.Z,{span:10}),(0,r.jsx)(eJ.Z,{span:10,children:(0,r.jsxs)(es.Z,{className:"mb-3 mt-1",children:["Actual model name used for making ",(0,r.jsx)(e8,{href:"https://docs.litellm.ai/docs/providers",target:"_blank",children:"litellm.completion() call"}),". We'll ",(0,r.jsx)(e8,{href:"https://docs.litellm.ai/docs/proxy/reliability#step-1---set-deployments-on-config",target:"_blank",children:"loadbalance"})," models with the same 'public name'"]})})]}),"Amazon Bedrock"!=E&&"Vertex AI (Anthropic, Gemini, etc.)"!=E&&(0,r.jsx)(eo.Z.Item,{rules:[{required:!0,message:"Required"}],label:"API Key",name:"api_key",children:(0,r.jsx)(Q.Z,{placeholder:"sk-",type:"password"})}),"OpenAI"==E&&(0,r.jsx)(eo.Z.Item,{label:"Organization ID",name:"organization_id",children:(0,r.jsx)(Q.Z,{placeholder:"[OPTIONAL] my-unique-org"})}),"Vertex AI (Anthropic, Gemini, etc.)"==E&&(0,r.jsx)(eo.Z.Item,{rules:[{required:!0,message:"Required"}],label:"Vertex Project",name:"vertex_project",children:(0,r.jsx)(Q.Z,{placeholder:"adroit-cadet-1234.."})}),"Vertex AI (Anthropic, Gemini, etc.)"==E&&(0,r.jsx)(eo.Z.Item,{rules:[{required:!0,message:"Required"}],label:"Vertex Location",name:"vertex_location",children:(0,r.jsx)(Q.Z,{placeholder:"us-east-1"})}),"Vertex AI (Anthropic, Gemini, etc.)"==E&&(0,r.jsx)(eo.Z.Item,{rules:[{required:!0,message:"Required"}],label:"Vertex Credentials",name:"vertex_credentials",className:"mb-0",children:(0,r.jsx)(e4.Z,{name:"file",accept:".json",beforeUpload:e=>{if("application/json"===e.type){let l=new FileReader;l.onload=e=>{if(e.target){let l=e.target.result;g.setFieldsValue({vertex_credentials:l})}},l.readAsText(e)}return!1},onChange(e){"uploading"!==e.file.status&&console.log(e.file,e.fileList),"done"===e.file.status?u.ZP.success("".concat(e.file.name," file uploaded successfully")):"error"===e.file.status&&u.ZP.error("".concat(e.file.name," file upload failed."))},children:(0,r.jsx)(em.ZP,{icon:(0,r.jsx)(e2.Z,{}),children:"Click to Upload"})})}),"Vertex AI (Anthropic, Gemini, etc.)"==E&&(0,r.jsxs)(eY.Z,{children:[(0,r.jsx)(eJ.Z,{span:10}),(0,r.jsx)(eJ.Z,{span:10,children:(0,r.jsx)(es.Z,{className:"mb-3 mt-1",children:"Give litellm a gcp service account(.json file), so it can make the relevant calls"})})]}),("Azure"==E||"OpenAI-Compatible Endpoints (Groq, Together AI, Mistral AI, etc.)"==E)&&(0,r.jsx)(eo.Z.Item,{rules:[{required:!0,message:"Required"}],label:"API Base",name:"api_base",children:(0,r.jsx)(Q.Z,{placeholder:"https://..."})}),"Azure"==E&&(0,r.jsx)(eo.Z.Item,{rules:[{required:!0,message:"Required"}],label:"API Version",name:"api_version",children:(0,r.jsx)(Q.Z,{placeholder:"2023-07-01-preview"})}),"Azure"==E&&(0,r.jsxs)("div",{children:[(0,r.jsx)(eo.Z.Item,{label:"Base Model",name:"base_model",className:"mb-0",children:(0,r.jsx)(Q.Z,{placeholder:"azure/gpt-3.5-turbo"})}),(0,r.jsxs)(eY.Z,{children:[(0,r.jsx)(eJ.Z,{span:10}),(0,r.jsx)(eJ.Z,{span:10,children:(0,r.jsxs)(es.Z,{className:"mb-2",children:["The actual model your azure deployment uses. Used for accurate cost tracking. Select name from ",(0,r.jsx)(e8,{href:"https://github.com/BerriAI/litellm/blob/main/model_prices_and_context_window.json",target:"_blank",children:"here"})]})})]})]}),"Amazon Bedrock"==E&&(0,r.jsx)(eo.Z.Item,{rules:[{required:!0,message:"Required"}],label:"AWS Access Key ID",name:"aws_access_key_id",tooltip:"You can provide the raw key or the environment variable (e.g. `os.environ/MY_SECRET_KEY`).",children:(0,r.jsx)(Q.Z,{placeholder:""})}),"Amazon Bedrock"==E&&(0,r.jsx)(eo.Z.Item,{rules:[{required:!0,message:"Required"}],label:"AWS Secret Access Key",name:"aws_secret_access_key",tooltip:"You can provide the raw key or the environment variable (e.g. `os.environ/MY_SECRET_KEY`).",children:(0,r.jsx)(Q.Z,{placeholder:""})}),"Amazon Bedrock"==E&&(0,r.jsx)(eo.Z.Item,{rules:[{required:!0,message:"Required"}],label:"AWS Region Name",name:"aws_region_name",tooltip:"You can provide the raw key or the environment variable (e.g. `os.environ/MY_SECRET_KEY`).",children:(0,r.jsx)(Q.Z,{placeholder:"us-east-1"})}),(0,r.jsx)(eo.Z.Item,{label:"LiteLLM Params",name:"litellm_extra_params",tooltip:"Optional litellm params used for making a litellm.completion() call.",className:"mb-0",children:(0,r.jsx)(e$.Z,{rows:4,placeholder:'{ "rpm": 100, "timeout": 0, "stream_timeout": 0 }'})}),(0,r.jsxs)(eY.Z,{children:[(0,r.jsx)(eJ.Z,{span:10}),(0,r.jsx)(eJ.Z,{span:10,children:(0,r.jsxs)(es.Z,{className:"mb-3 mt-1",children:["Pass JSON of litellm supported params ",(0,r.jsx)(e8,{href:"https://docs.litellm.ai/docs/completion/input",target:"_blank",children:"litellm.completion() call"})]})})]})]}),(0,r.jsx)("div",{style:{textAlign:"center",marginTop:"10px"},children:(0,r.jsx)(em.ZP,{htmlType:"submit",children:"Add Model"})}),(0,r.jsx)(eH.Z,{title:"Get help on our github",children:(0,r.jsx)(eR.default.Link,{href:"https://github.com/BerriAI/litellm/issues",children:"Need Help?"})})]})})]}),(0,r.jsx)(eK.Z,{children:(0,r.jsxs)(ey.Z,{children:[(0,r.jsx)(es.Z,{children:"`/health` will run a very small request through your models configured on litellm"}),(0,r.jsx)(X.Z,{onClick:ls,children:"Run `/health`"}),C&&(0,r.jsx)("pre",{children:JSON.stringify(C,null,2)})]})}),(0,r.jsxs)(eK.Z,{children:[(0,r.jsxs)($.Z,{numItems:2,className:"mt-2",children:[(0,r.jsxs)(eJ.Z,{children:[(0,r.jsx)(es.Z,{children:"Select Time Range"}),(0,r.jsx)(eL.Z,{enableSelect:!0,value:eE,onValueChange:e=>{eI(e),la(U,e.from,e.to)}})]}),(0,r.jsxs)(eJ.Z,{children:[(0,r.jsx)(es.Z,{children:"Select Model Group"}),(0,r.jsx)(eS.Z,{className:"mb-4 mt-2",defaultValue:U||M[0],value:U||M[0],children:M.map((e,l)=>(0,r.jsx)(eN.Z,{value:e,onClick:()=>la(e,eE.from,eE.to),children:e},l))})]})]}),(0,r.jsxs)($.Z,{numItems:2,children:[(0,r.jsx)(eJ.Z,{children:(0,r.jsxs)(ey.Z,{className:"mr-2 max-h-[400px] min-h-[400px]",children:[(0,r.jsx)(ea.Z,{children:"Avg Latency per Token"}),(0,r.jsx)("p",{className:"text-gray-500 italic",children:" (seconds/token)"}),(0,r.jsx)(es.Z,{className:"text-gray-500 italic mt-1 mb-1",children:"average Latency for successfull requests divided by the total tokens"}),q&&W&&(0,r.jsx)(eG.Z,{title:"Model Latency",className:"h-72",data:q,showLegend:!1,index:"date",categories:W,connectNulls:!0,customTooltip:e=>{var l,t;let{payload:s,active:a}=e;if(!a||!s)return null;let n=null===(t=s[0])||void 0===t?void 0:null===(l=t.payload)||void 0===l?void 0:l.date,o=s.sort((e,l)=>l.value-e.value);if(o.length>5){let e=o.length-5;(o=o.slice(0,5)).push({dataKey:"".concat(e," other deployments"),value:s.slice(5).reduce((e,l)=>e+l.value,0),color:"gray"})}return(0,r.jsxs)("div",{className:"w-150 rounded-tremor-default border border-tremor-border bg-tremor-background p-2 text-tremor-default shadow-tremor-dropdown",children:[n&&(0,r.jsxs)("p",{className:"text-tremor-content-emphasis mb-2",children:["Date: ",n]}),o.map((e,l)=>{let t=parseFloat(e.value.toFixed(5)),s=0===t&&e.value>0?"<0.00001":t.toFixed(5);return(0,r.jsxs)("div",{className:"flex justify-between",children:[(0,r.jsxs)("div",{className:"flex items-center space-x-2",children:[(0,r.jsx)("div",{className:"w-2 h-2 mt-1 rounded-full bg-".concat(e.color,"-500")}),(0,r.jsx)("p",{className:"text-tremor-content",children:e.dataKey})]}),(0,r.jsx)("p",{className:"font-medium text-tremor-content-emphasis text-righ ml-2",children:s})]},l)})]})}})]})}),(0,r.jsx)(eJ.Z,{children:(0,r.jsx)(ey.Z,{className:"ml-2 max-h-[400px] min-h-[400px]  overflow-y-auto",children:(0,r.jsxs)(eZ.Z,{children:[(0,r.jsx)(eb.Z,{children:(0,r.jsxs)(ev.Z,{children:[(0,r.jsx)(ek.Z,{children:"Deployment"}),(0,r.jsx)(ek.Z,{children:"Success Responses"}),(0,r.jsxs)(ek.Z,{children:["Slow Responses ",(0,r.jsx)("p",{children:"Success Responses taking 600+s"})]})]})}),(0,r.jsx)(e_.Z,{children:ej.map((e,l)=>(0,r.jsxs)(ev.Z,{children:[(0,r.jsx)(ew.Z,{children:e.api_base}),(0,r.jsx)(ew.Z,{children:e.total_count}),(0,r.jsx)(ew.Z,{children:e.slow_count})]},l))})]})})})]}),(0,r.jsxs)(ey.Z,{className:"mt-4",children:[(0,r.jsx)(ea.Z,{children:"Exceptions per Model"}),(0,r.jsx)(eW.Z,{className:"h-72",data:er,index:"model",categories:ec,stack:!0,colors:["indigo-300","rose-200","#ffcc33"],yAxisWidth:30})]})]}),(0,r.jsxs)(eK.Z,{children:[(0,r.jsxs)("div",{className:"flex items-center",children:[(0,r.jsx)(es.Z,{children:"Filter by Public Model Name"}),(0,r.jsx)(eS.Z,{className:"mb-4 mt-2 ml-2 w-50",defaultValue:U||M[0],value:U||M[0],onValueChange:e=>D(e),children:M.map((e,l)=>(0,r.jsx)(eN.Z,{value:e,onClick:()=>D(e),children:e},l))})]}),(0,r.jsxs)(ea.Z,{children:["Retry Policy for ",U]}),(0,r.jsx)(es.Z,{className:"mb-6",children:"How many retries should be attempted based on the Exception"}),e6&&(0,r.jsx)("table",{children:(0,r.jsx)("tbody",{children:Object.entries(e6).map((e,l)=>{var t;let[s,a]=e,n=null==eC?void 0:null===(t=eC[U])||void 0===t?void 0:t[a];return null==n&&(n=eP),(0,r.jsxs)("tr",{className:"flex justify-between items-center mt-2",children:[(0,r.jsx)("td",{children:(0,r.jsx)(es.Z,{children:s})}),(0,r.jsx)("td",{children:(0,r.jsx)(ed.Z,{className:"ml-5",value:n,min:0,step:1,onChange:e=>{eT(l=>{var t;let s=null!==(t=null==l?void 0:l[U])&&void 0!==t?t:{};return{...null!=l?l:{},[U]:{...s,[a]:e}}})}})})]},l)})})}),(0,r.jsx)(X.Z,{className:"mt-6 mr-8",onClick:le,children:"Save"})]})]})]})})};let{Option:le}=en.default;var ll=e=>{let{userID:l,accessToken:t,teams:s}=e,[a]=eo.Z.useForm(),[o,i]=(0,n.useState)(!1),[c,d]=(0,n.useState)(null),[m,h]=(0,n.useState)([]);(0,n.useEffect)(()=>{(async()=>{try{let e=await N(t,l,"any"),s=[];for(let l=0;l<e.data.length;l++){let t=e.data[l];s.push(t.id)}console.log("Model data response:",e.data),console.log("Available models:",s),h(s)}catch(e){console.error("Error fetching model data:",e)}})()},[]);let x=()=>{i(!1),a.resetFields()},p=()=>{i(!1),d(null),a.resetFields()},j=async e=>{try{u.ZP.info("Making API Call"),i(!0),console.log("formValues in create user:",e);let s=await g(t,null,e);console.log("user create Response:",s),d(s.key),u.ZP.success("API user Created"),a.resetFields(),localStorage.removeItem("userData"+l)}catch(e){console.error("Error creating the user:",e)}};return(0,r.jsxs)("div",{children:[(0,r.jsx)(X.Z,{className:"mx-auto",onClick:()=>i(!0),children:"+ Invite User"}),(0,r.jsxs)(ei.Z,{title:"Invite User",visible:o,width:800,footer:null,onOk:x,onCancel:p,children:[(0,r.jsx)(es.Z,{className:"mb-1",children:"Invite a user to login to the Admin UI and create Keys"}),(0,r.jsx)(es.Z,{className:"mb-6",children:(0,r.jsx)("b",{children:"Note: SSO Setup Required for this"})}),(0,r.jsxs)(eo.Z,{form:a,onFinish:j,labelCol:{span:8},wrapperCol:{span:16},labelAlign:"left",children:[(0,r.jsx)(eo.Z.Item,{label:"User Email",name:"user_email",children:(0,r.jsx)(Q.Z,{placeholder:""})}),(0,r.jsx)(eo.Z.Item,{label:"Team ID",name:"team_id",children:(0,r.jsx)(en.default,{placeholder:"Select Team ID",style:{width:"100%"},children:s?s.map(e=>(0,r.jsx)(le,{value:e.team_id,children:e.team_alias},e.team_id)):(0,r.jsx)(le,{value:null,children:"Default Team"},"default")})}),(0,r.jsx)(eo.Z.Item,{label:"Metadata",name:"metadata",children:(0,r.jsx)(ec.Z.TextArea,{rows:4,placeholder:"Enter metadata as JSON"})}),(0,r.jsx)("div",{style:{textAlign:"right",marginTop:"10px"},children:(0,r.jsx)(em.ZP,{htmlType:"submit",children:"Create User"})})]})]}),c&&(0,r.jsxs)(ei.Z,{title:"User Created Successfully",visible:o,onOk:x,onCancel:p,footer:null,children:[(0,r.jsx)("p",{children:"User has been created to access your proxy. Please Ask them to Log In."}),(0,r.jsx)("br",{}),(0,r.jsx)("p",{children:(0,r.jsx)("b",{children:"Note: This Feature is only supported through SSO on the Admin UI"})})]})]})},lt=e=>{let{accessToken:l,token:t,keys:s,userRole:a,userID:o,teams:i,setKeys:c}=e,[d,m]=(0,n.useState)(null),[u,h]=(0,n.useState)(null),[x,p]=(0,n.useState)(0),[j,g]=n.useState(null),[y,f]=(0,n.useState)(null);return((0,n.useEffect)(()=>{if(!l||!t||!a||!o)return;let e=async()=>{try{let e=await Z(l,null,a,!0,x,25);console.log("user data response:",e),m(e)}catch(e){console.error("There was an error fetching the model data",e)}};l&&t&&a&&o&&e()},[l,t,a,o,x]),d&&l&&t&&a&&o)?(0,r.jsx)("div",{style:{width:"100%"},children:(0,r.jsxs)($.Z,{className:"gap-2 p-2 h-[80vh] w-full mt-8",children:[(0,r.jsx)(ll,{userID:o,accessToken:l,teams:i}),(0,r.jsxs)(ey.Z,{className:"w-full mx-auto flex-auto overflow-y-auto max-h-[80vh] mb-4",children:[(0,r.jsx)("div",{className:"mb-4 mt-1",children:(0,r.jsx)(es.Z,{children:"These are Users on LiteLLM that created API Keys. Automatically tracked by LiteLLM"})}),(0,r.jsx)(eB.Z,{children:(0,r.jsxs)(ez.Z,{children:[(0,r.jsx)(eK.Z,{children:(0,r.jsxs)(eZ.Z,{className:"mt-5",children:[(0,r.jsx)(eb.Z,{children:(0,r.jsxs)(ev.Z,{children:[(0,r.jsx)(ek.Z,{children:"User ID"}),(0,r.jsx)(ek.Z,{children:"User Email"}),(0,r.jsx)(ek.Z,{children:"User Models"}),(0,r.jsx)(ek.Z,{children:"User Spend ($ USD)"}),(0,r.jsx)(ek.Z,{children:"User Max Budget ($ USD)"}),(0,r.jsx)(ek.Z,{children:"User API Key Aliases"})]})}),(0,r.jsx)(e_.Z,{children:d.map(e=>{var l;return(0,r.jsxs)(ev.Z,{children:[(0,r.jsx)(ew.Z,{children:e.user_id}),(0,r.jsx)(ew.Z,{children:e.user_email}),(0,r.jsx)(ew.Z,{children:e.models&&e.models.length>0?e.models:"All Models"}),(0,r.jsx)(ew.Z,{children:e.spend?null===(l=e.spend)||void 0===l?void 0:l.toFixed(2):0}),(0,r.jsx)(ew.Z,{children:e.max_budget?e.max_budget:"Unlimited"}),(0,r.jsx)(ew.Z,{children:(0,r.jsx)($.Z,{numItems:2,children:e&&e.key_aliases&&e.key_aliases.filter(e=>null!==e).length>0?(0,r.jsx)(eg.Z,{size:"xs",color:"indigo",children:e.key_aliases.filter(e=>null!==e).join(", ")}):(0,r.jsx)(eg.Z,{size:"xs",color:"gray",children:"No Keys"})})})]},e.user_id)})})]})}),(0,r.jsx)(eK.Z,{children:(0,r.jsxs)("div",{className:"flex items-center",children:[(0,r.jsx)("div",{className:"flex-1"}),(0,r.jsx)("div",{className:"flex-1 flex justify-between items-center"})]})})]})})]}),function(){if(!d)return null;let e=Math.ceil(d.length/25);return(0,r.jsxs)("div",{className:"flex justify-between items-center",children:[(0,r.jsxs)("div",{children:["Showing Page ",x+1," of ",e]}),(0,r.jsxs)("div",{className:"flex",children:[(0,r.jsx)("button",{className:"bg-blue-500 hover:bg-blue-700 text-white font-bold py-2 px-4 rounded-l focus:outline-none",disabled:0===x,onClick:()=>p(x-1),children:"← Prev"}),(0,r.jsx)("button",{className:"bg-blue-500 hover:bg-blue-700 text-white font-bold py-2 px-4 rounded-r focus:outline-none",onClick:()=>{p(x+1)},children:"Next →"})]})]})}()]})}):(0,r.jsx)("div",{children:"Loading..."})},ls=e=>{let{teams:l,searchParams:t,accessToken:s,setTeams:a,userID:o,userRole:i}=e,[c]=eo.Z.useForm(),[d]=eo.Z.useForm(),{Title:m,Paragraph:h}=eR.default,[x,p]=(0,n.useState)(""),[j,g]=(0,n.useState)(!1),[y,Z]=(0,n.useState)(l?l[0]:null),[w,b]=(0,n.useState)(!1),[k,v]=(0,n.useState)(!1),[S,A]=(0,n.useState)([]),[E,I]=(0,n.useState)(!1),[C,T]=(0,n.useState)(null),[P,O]=(0,n.useState)({}),F=e=>{Z(e),g(!0)},R=async e=>{let t=e.team_id;if(console.log("handleEditSubmit:",e),null==s)return;let r=await U(s,e);l&&a(l.map(e=>e.team_id===t?r.data:e)),u.ZP.success("Team updated successfully"),g(!1),Z(null)},L=async e=>{T(e),I(!0)},B=async()=>{if(null!=C&&null!=l&&null!=s){try{await f(s,C);let e=l.filter(e=>e.team_id!==C);a(e)}catch(e){console.error("Error deleting the team:",e)}I(!1),T(null)}};(0,n.useEffect)(()=>{let e=async()=>{try{if(null===o||null===i||null===s||null===l)return;console.log("fetching team info:");let e={};for(let t=0;t<(null==l?void 0:l.length);t++){let a=l[t].team_id,r=await _(s,a);console.log("teamInfo response:",r),null!==r&&(e={...e,[a]:r})}O(e)}catch(e){console.error("Error fetching team info:",e)}};(async()=>{try{if(null===o||null===i)return;if(null!==s){let e=(await N(s,o,i)).data.map(e=>e.id);console.log("available_model_names:",e),A(e)}}catch(e){console.error("Error fetching user models:",e)}})(),e()},[s,o,i,l]);let K=async e=>{try{if(null!=s){var t;let r=null==e?void 0:e.team_alias;if((null!==(t=null==l?void 0:l.map(e=>e.team_alias))&&void 0!==t?t:[]).includes(r))throw Error("Team alias ".concat(r," already exists, please pick another alias"));u.ZP.info("Creating Team");let n=await M(s,e);null!==l?a([...l,n]):a([n]),console.log("response for team create call: ".concat(n)),u.ZP.success("Team created"),b(!1)}}catch(e){console.error("Error creating the team:",e),u.ZP.error("Error creating the team: "+e,20)}},z=async e=>{try{if(null!=s&&null!=l){u.ZP.info("Adding Member");let t={role:"user",user_email:e.user_email,user_id:e.user_id},r=await D(s,y.team_id,t);console.log("response for team create call: ".concat(r.data));let n=l.findIndex(e=>(console.log("team.team_id=".concat(e.team_id,"; response.data.team_id=").concat(r.data.team_id)),e.team_id===r.data.team_id));if(console.log("foundIndex: ".concat(n)),-1!==n){let e=[...l];e[n]=r.data,a(e),Z(r.data)}v(!1)}}catch(e){console.error("Error creating the team:",e)}};return console.log("received teams ".concat(JSON.stringify(l))),(0,r.jsx)("div",{className:"w-full mx-4",children:(0,r.jsxs)($.Z,{numItems:1,className:"gap-2 p-8 h-[75vh] w-full mt-2",children:[(0,r.jsxs)(H.Z,{numColSpan:1,children:[(0,r.jsx)(m,{level:4,children:"All Teams"}),(0,r.jsxs)(ey.Z,{className:"w-full mx-auto flex-auto overflow-y-auto max-h-[50vh]",children:[(0,r.jsxs)(eZ.Z,{children:[(0,r.jsx)(eb.Z,{children:(0,r.jsxs)(ev.Z,{children:[(0,r.jsx)(ek.Z,{children:"Team Name"}),(0,r.jsx)(ek.Z,{children:"Spend (USD)"}),(0,r.jsx)(ek.Z,{children:"Budget (USD)"}),(0,r.jsx)(ek.Z,{children:"Models"}),(0,r.jsx)(ek.Z,{children:"TPM / RPM Limits"}),(0,r.jsx)(ek.Z,{children:"Info"})]})}),(0,r.jsx)(e_.Z,{children:l&&l.length>0?l.map(e=>(0,r.jsxs)(ev.Z,{children:[(0,r.jsx)(ew.Z,{style:{maxWidth:"4px",whiteSpace:"pre-wrap",overflow:"hidden"},children:e.team_alias}),(0,r.jsx)(ew.Z,{style:{maxWidth:"4px",whiteSpace:"pre-wrap",overflow:"hidden"},children:e.spend}),(0,r.jsx)(ew.Z,{style:{maxWidth:"4px",whiteSpace:"pre-wrap",overflow:"hidden"},children:e.max_budget?e.max_budget:"No limit"}),(0,r.jsx)(ew.Z,{style:{maxWidth:"8-x",whiteSpace:"pre-wrap",overflow:"hidden"},children:Array.isArray(e.models)?(0,r.jsx)("div",{style:{display:"flex",flexDirection:"column"},children:0===e.models.length?(0,r.jsx)(eg.Z,{size:"xs",className:"mb-1",color:"red",children:(0,r.jsx)(es.Z,{children:"All Proxy Models"})}):e.models.map((e,l)=>"all-proxy-models"===e?(0,r.jsx)(eg.Z,{size:"xs",className:"mb-1",color:"red",children:(0,r.jsx)(es.Z,{children:"All Proxy Models"})},l):(0,r.jsx)(eg.Z,{size:"xs",className:"mb-1",color:"blue",children:(0,r.jsx)(es.Z,{children:e.length>30?"".concat(e.slice(0,30),"..."):e})},l))}):null}),(0,r.jsx)(ew.Z,{style:{maxWidth:"4px",whiteSpace:"pre-wrap",overflow:"hidden"},children:(0,r.jsxs)(es.Z,{children:["TPM: ",e.tpm_limit?e.tpm_limit:"Unlimited"," ",(0,r.jsx)("br",{}),"RPM:"," ",e.rpm_limit?e.rpm_limit:"Unlimited"]})}),(0,r.jsxs)(ew.Z,{children:[(0,r.jsxs)(es.Z,{children:[P&&e.team_id&&P[e.team_id]&&P[e.team_id].keys&&P[e.team_id].keys.length," ","Keys"]}),(0,r.jsxs)(es.Z,{children:[P&&e.team_id&&P[e.team_id]&&P[e.team_id].team_info&&P[e.team_id].team_info.members_with_roles&&P[e.team_id].team_info.members_with_roles.length," ","Members"]})]}),(0,r.jsxs)(ew.Z,{children:[(0,r.jsx)(ef.Z,{icon:ep.Z,size:"sm",onClick:()=>F(e)}),(0,r.jsx)(ef.Z,{onClick:()=>L(e.team_id),icon:ej.Z,size:"sm"})]})]},e.team_id)):null})]}),E&&(0,r.jsx)("div",{className:"fixed z-10 inset-0 overflow-y-auto",children:(0,r.jsxs)("div",{className:"flex items-end justify-center min-h-screen pt-4 px-4 pb-20 text-center sm:block sm:p-0",children:[(0,r.jsx)("div",{className:"fixed inset-0 transition-opacity","aria-hidden":"true",children:(0,r.jsx)("div",{className:"absolute inset-0 bg-gray-500 opacity-75"})}),(0,r.jsx)("span",{className:"hidden sm:inline-block sm:align-middle sm:h-screen","aria-hidden":"true",children:"​"}),(0,r.jsxs)("div",{className:"inline-block align-bottom bg-white rounded-lg text-left overflow-hidden shadow-xl transform transition-all sm:my-8 sm:align-middle sm:max-w-lg sm:w-full",children:[(0,r.jsx)("div",{className:"bg-white px-4 pt-5 pb-4 sm:p-6 sm:pb-4",children:(0,r.jsx)("div",{className:"sm:flex sm:items-start",children:(0,r.jsxs)("div",{className:"mt-3 text-center sm:mt-0 sm:ml-4 sm:text-left",children:[(0,r.jsx)("h3",{className:"text-lg leading-6 font-medium text-gray-900",children:"Delete Team"}),(0,r.jsx)("div",{className:"mt-2",children:(0,r.jsx)("p",{className:"text-sm text-gray-500",children:"Are you sure you want to delete this team ?"})})]})})}),(0,r.jsxs)("div",{className:"bg-gray-50 px-4 py-3 sm:px-6 sm:flex sm:flex-row-reverse",children:[(0,r.jsx)(X.Z,{onClick:B,color:"red",className:"ml-2",children:"Delete"}),(0,r.jsx)(X.Z,{onClick:()=>{I(!1),T(null)},children:"Cancel"})]})]})]})})]})]}),(0,r.jsxs)(H.Z,{numColSpan:1,children:[(0,r.jsx)(X.Z,{className:"mx-auto",onClick:()=>b(!0),children:"+ Create New Team"}),(0,r.jsx)(ei.Z,{title:"Create Team",visible:w,width:800,footer:null,onOk:()=>{b(!1),c.resetFields()},onCancel:()=>{b(!1),c.resetFields()},children:(0,r.jsxs)(eo.Z,{form:c,onFinish:K,labelCol:{span:8},wrapperCol:{span:16},labelAlign:"left",children:[(0,r.jsxs)(r.Fragment,{children:[(0,r.jsx)(eo.Z.Item,{label:"Team Name",name:"team_alias",rules:[{required:!0,message:"Please input a team name"}],children:(0,r.jsx)(Q.Z,{placeholder:""})}),(0,r.jsx)(eo.Z.Item,{label:"Models",name:"models",children:(0,r.jsxs)(en.default,{mode:"multiple",placeholder:"Select models",style:{width:"100%"},children:[(0,r.jsx)(en.default.Option,{value:"all-proxy-models",children:"All Proxy Models"},"all-proxy-models"),S.map(e=>(0,r.jsx)(en.default.Option,{value:e,children:e},e))]})}),(0,r.jsx)(eo.Z.Item,{label:"Max Budget (USD)",name:"max_budget",children:(0,r.jsx)(ed.Z,{step:.01,precision:2,width:200})}),(0,r.jsx)(eo.Z.Item,{label:"Tokens per minute Limit (TPM)",name:"tpm_limit",children:(0,r.jsx)(ed.Z,{step:1,width:400})}),(0,r.jsx)(eo.Z.Item,{label:"Requests per minute Limit (RPM)",name:"rpm_limit",children:(0,r.jsx)(ed.Z,{step:1,width:400})})]}),(0,r.jsx)("div",{style:{textAlign:"right",marginTop:"10px"},children:(0,r.jsx)(em.ZP,{htmlType:"submit",children:"Create Team"})})]})})]}),(0,r.jsxs)(H.Z,{numColSpan:1,children:[(0,r.jsx)(m,{level:4,children:"Team Members"}),(0,r.jsx)(h,{children:"If you belong to multiple teams, this setting controls which teams members you see."}),l&&l.length>0?(0,r.jsx)(eS.Z,{defaultValue:"0",children:l.map((e,l)=>(0,r.jsx)(eN.Z,{value:String(l),onClick:()=>{Z(e)},children:e.team_alias},l))}):(0,r.jsxs)(h,{children:["No team created. ",(0,r.jsx)("b",{children:"Defaulting to personal account."})]})]}),(0,r.jsxs)(H.Z,{numColSpan:1,children:[(0,r.jsx)(ey.Z,{className:"w-full mx-auto flex-auto overflow-y-auto max-h-[50vh]",children:(0,r.jsxs)(eZ.Z,{children:[(0,r.jsx)(eb.Z,{children:(0,r.jsxs)(ev.Z,{children:[(0,r.jsx)(ek.Z,{children:"Member Name"}),(0,r.jsx)(ek.Z,{children:"Role"})]})}),(0,r.jsx)(e_.Z,{children:y?y.members_with_roles.map((e,l)=>(0,r.jsxs)(ev.Z,{children:[(0,r.jsx)(ew.Z,{children:e.user_email?e.user_email:e.user_id?e.user_id:null}),(0,r.jsx)(ew.Z,{children:e.role})]},l)):null})]})}),y&&(0,r.jsx)(e=>{let{visible:l,onCancel:t,team:s,onSubmit:a}=e,[n]=eo.Z.useForm();return(0,r.jsx)(ei.Z,{title:"Edit Team",visible:l,width:800,footer:null,onOk:()=>{n.validateFields().then(e=>{a({...e,team_id:s.team_id}),n.resetFields()}).catch(e=>{console.error("Validation failed:",e)})},onCancel:t,children:(0,r.jsxs)(eo.Z,{form:n,onFinish:R,initialValues:s,labelCol:{span:8},wrapperCol:{span:16},labelAlign:"left",children:[(0,r.jsxs)(r.Fragment,{children:[(0,r.jsx)(eo.Z.Item,{label:"Team Name",name:"team_alias",rules:[{required:!0,message:"Please input a team name"}],children:(0,r.jsx)(Q.Z,{})}),(0,r.jsx)(eo.Z.Item,{label:"Models",name:"models",children:(0,r.jsxs)(en.default,{mode:"multiple",placeholder:"Select models",style:{width:"100%"},children:[(0,r.jsx)(en.default.Option,{value:"all-proxy-models",children:"All Proxy Models"},"all-proxy-models"),S&&S.map(e=>(0,r.jsx)(en.default.Option,{value:e,children:e},e))]})}),(0,r.jsx)(eo.Z.Item,{label:"Max Budget (USD)",name:"max_budget",children:(0,r.jsx)(ed.Z,{step:.01,precision:2,width:200})}),(0,r.jsx)(eo.Z.Item,{label:"Tokens per minute Limit (TPM)",name:"tpm_limit",children:(0,r.jsx)(ed.Z,{step:1,width:400})}),(0,r.jsx)(eo.Z.Item,{label:"Requests per minute Limit (RPM)",name:"rpm_limit",children:(0,r.jsx)(ed.Z,{step:1,width:400})}),(0,r.jsx)(eo.Z.Item,{label:"Requests per minute Limit (RPM)",name:"team_id",hidden:!0})]}),(0,r.jsx)("div",{style:{textAlign:"right",marginTop:"10px"},children:(0,r.jsx)(em.ZP,{htmlType:"submit",children:"Edit Team"})})]})})},{visible:j,onCancel:()=>{g(!1),Z(null)},team:y,onSubmit:R})]}),(0,r.jsxs)(H.Z,{numColSpan:1,children:[(0,r.jsx)(X.Z,{className:"mx-auto mb-5",onClick:()=>v(!0),children:"+ Add member"}),(0,r.jsx)(ei.Z,{title:"Add member",visible:k,width:800,footer:null,onOk:()=>{v(!1),d.resetFields()},onCancel:()=>{v(!1),d.resetFields()},children:(0,r.jsxs)(eo.Z,{form:c,onFinish:z,labelCol:{span:8},wrapperCol:{span:16},labelAlign:"left",children:[(0,r.jsxs)(r.Fragment,{children:[(0,r.jsx)(eo.Z.Item,{label:"Email",name:"user_email",className:"mb-4",children:(0,r.jsx)(ec.Z,{name:"user_email",className:"px-3 py-2 border rounded-md w-full"})}),(0,r.jsx)("div",{className:"text-center mb-4",children:"OR"}),(0,r.jsx)(eo.Z.Item,{label:"User ID",name:"user_id",className:"mb-4",children:(0,r.jsx)(ec.Z,{name:"user_id",className:"px-3 py-2 border rounded-md w-full"})})]}),(0,r.jsx)("div",{style:{textAlign:"right",marginTop:"10px"},children:(0,r.jsx)(em.ZP,{htmlType:"submit",children:"Add member"})})]})})]})]})})},la=t(18190),lr=e=>{let l,{searchParams:t,accessToken:s,showSSOBanner:a}=e,[o]=eo.Z.useForm(),[i]=eo.Z.useForm(),{Title:c,Paragraph:d}=eR.default,[m,h]=(0,n.useState)(""),[x,p]=(0,n.useState)(null),[j,g]=(0,n.useState)(!1),[y,f]=(0,n.useState)(!1),[Z,_]=(0,n.useState)(!1),[w,b]=(0,n.useState)(!1),[k,v]=(0,n.useState)(!1);try{l=window.location.origin}catch(e){l="<your-proxy-url>"}l+="/fallback/login";let S=()=>{v(!1)},N=["proxy_admin","proxy_admin_viewer"];(0,n.useEffect)(()=>{(async()=>{if(null!=s){let e=[],l=await R(s,"proxy_admin_viewer");l.forEach(l=>{e.push({user_role:l.user_role,user_id:l.user_id,user_email:l.user_email})}),console.log("proxy viewers: ".concat(l));let t=await R(s,"proxy_admin");t.forEach(l=>{e.push({user_role:l.user_role,user_id:l.user_id,user_email:l.user_email})}),console.log("proxy admins: ".concat(t)),console.log("combinedList: ".concat(e)),p(e)}})()},[s]);let A=()=>{_(!1),i.resetFields()},E=()=>{_(!1),i.resetFields()},I=e=>(0,r.jsxs)(eo.Z,{form:o,onFinish:e,labelCol:{span:8},wrapperCol:{span:16},labelAlign:"left",children:[(0,r.jsxs)(r.Fragment,{children:[(0,r.jsx)(eo.Z.Item,{label:"Email",name:"user_email",className:"mb-4",children:(0,r.jsx)(ec.Z,{name:"user_email",className:"px-3 py-2 border rounded-md w-full"})}),(0,r.jsx)("div",{className:"text-center mb-4",children:"OR"}),(0,r.jsx)(eo.Z.Item,{label:"User ID",name:"user_id",className:"mb-4",children:(0,r.jsx)(ec.Z,{name:"user_id",className:"px-3 py-2 border rounded-md w-full"})})]}),(0,r.jsx)("div",{style:{textAlign:"right",marginTop:"10px"},children:(0,r.jsx)(em.ZP,{htmlType:"submit",children:"Add member"})})]}),C=(e,l,t)=>(0,r.jsxs)(eo.Z,{form:o,onFinish:e,labelCol:{span:8},wrapperCol:{span:16},labelAlign:"left",children:[(0,r.jsxs)(r.Fragment,{children:[(0,r.jsx)(eo.Z.Item,{rules:[{required:!0,message:"Required"}],label:"User Role",name:"user_role",labelCol:{span:10},labelAlign:"left",children:(0,r.jsx)(eS.Z,{value:l,children:N.map((e,l)=>(0,r.jsx)(eN.Z,{value:e,children:e},l))})}),(0,r.jsx)(eo.Z.Item,{label:"Team ID",name:"user_id",hidden:!0,initialValue:t,valuePropName:"user_id",className:"mt-8",children:(0,r.jsx)(ec.Z,{value:t,disabled:!0})})]}),(0,r.jsx)("div",{style:{textAlign:"right",marginTop:"10px"},children:(0,r.jsx)(em.ZP,{htmlType:"submit",children:"Update role"})})]}),T=async e=>{try{if(null!=s&&null!=x){u.ZP.info("Making API Call");let l=await K(s,e,null);console.log("response for team create call: ".concat(l));let t=x.findIndex(e=>(console.log("user.user_id=".concat(e.user_id,"; response.user_id=").concat(l.user_id)),e.user_id===l.user_id));console.log("foundIndex: ".concat(t)),-1==t&&(console.log("updates admin with new user"),x.push(l),p(x)),u.ZP.success("Refresh tab to see updated user role"),_(!1)}}catch(e){console.error("Error creating the key:",e)}},P=async e=>{try{if(null!=s&&null!=x){u.ZP.info("Making API Call");let l=await K(s,e,"proxy_admin_viewer");console.log("response for team create call: ".concat(l));let t=x.findIndex(e=>(console.log("user.user_id=".concat(e.user_id,"; response.user_id=").concat(l.user_id)),e.user_id===l.user_id));console.log("foundIndex: ".concat(t)),-1==t&&(console.log("updates admin with new user"),x.push(l),p(x)),g(!1)}}catch(e){console.error("Error creating the key:",e)}},O=async e=>{try{if(null!=s&&null!=x){u.ZP.info("Making API Call"),e.user_email,e.user_id;let l=await K(s,e,"proxy_admin");console.log("response for team create call: ".concat(l));let t=x.findIndex(e=>(console.log("user.user_id=".concat(e.user_id,"; response.user_id=").concat(l.user_id)),e.user_id===l.user_id));console.log("foundIndex: ".concat(t)),-1==t&&(console.log("updates admin with new user"),x.push(l),p(x)),f(!1)}}catch(e){console.error("Error creating the key:",e)}},F=async e=>{null!=s&&Y(s,{environment_variables:{PROXY_BASE_URL:e.proxy_base_url,GOOGLE_CLIENT_ID:e.google_client_id,GOOGLE_CLIENT_SECRET:e.google_client_secret}})};return console.log("admins: ".concat(null==x?void 0:x.length)),(0,r.jsxs)("div",{className:"w-full m-2 mt-2 p-8",children:[(0,r.jsx)(c,{level:4,children:"Admin Access "}),(0,r.jsxs)(d,{children:[a&&(0,r.jsx)("a",{href:"https://docs.litellm.ai/docs/proxy/ui#restrict-ui-access",children:"Requires SSO Setup"}),(0,r.jsx)("br",{}),(0,r.jsx)("b",{children:"Proxy Admin: "})," Can create keys, teams, users, add models, etc. ",(0,r.jsx)("br",{}),(0,r.jsx)("b",{children:"Proxy Admin Viewer: "}),"Can just view spend. They cannot create keys, teams or grant users access to new models."," "]}),(0,r.jsxs)($.Z,{numItems:1,className:"gap-2 p-2 w-full",children:[(0,r.jsx)(H.Z,{numColSpan:1,children:(0,r.jsx)(ey.Z,{className:"w-full mx-auto flex-auto overflow-y-auto max-h-[50vh]",children:(0,r.jsxs)(eZ.Z,{children:[(0,r.jsx)(eb.Z,{children:(0,r.jsxs)(ev.Z,{children:[(0,r.jsx)(ek.Z,{children:"Member Name"}),(0,r.jsx)(ek.Z,{children:"Role"})]})}),(0,r.jsx)(e_.Z,{children:x?x.map((e,l)=>(0,r.jsxs)(ev.Z,{children:[(0,r.jsx)(ew.Z,{children:e.user_email?e.user_email:e.user_id?e.user_id:null}),(0,r.jsx)(ew.Z,{children:e.user_role}),(0,r.jsxs)(ew.Z,{children:[(0,r.jsx)(ef.Z,{icon:ep.Z,size:"sm",onClick:()=>_(!0)}),(0,r.jsx)(ei.Z,{title:"Update role",visible:Z,width:800,footer:null,onOk:A,onCancel:E,children:C(T,e.user_role,e.user_id)})]})]},l)):null})]})})}),(0,r.jsx)(H.Z,{numColSpan:1,children:(0,r.jsxs)("div",{className:"flex justify-start",children:[(0,r.jsx)(X.Z,{className:"mr-4 mb-5",onClick:()=>f(!0),children:"+ Add admin"}),(0,r.jsx)(ei.Z,{title:"Add admin",visible:y,width:800,footer:null,onOk:()=>{f(!1),i.resetFields()},onCancel:()=>{f(!1),i.resetFields()},children:I(O)}),(0,r.jsx)(X.Z,{className:"mb-5",onClick:()=>g(!0),children:"+ Add viewer"}),(0,r.jsx)(ei.Z,{title:"Add viewer",visible:j,width:800,footer:null,onOk:()=>{g(!1),i.resetFields()},onCancel:()=>{g(!1),i.resetFields()},children:I(P)})]})})]}),(0,r.jsxs)($.Z,{children:[(0,r.jsx)(c,{level:4,children:"Add SSO"}),(0,r.jsxs)("div",{className:"flex justify-start mb-4",children:[(0,r.jsx)(X.Z,{onClick:()=>b(!0),children:"Add SSO"}),(0,r.jsx)(ei.Z,{title:"Add SSO",visible:w,width:800,footer:null,onOk:()=>{b(!1),o.resetFields()},onCancel:()=>{b(!1),o.resetFields()},children:(0,r.jsxs)(eo.Z,{form:o,onFinish:e=>{O(e),F(e),b(!1),v(!0)},labelCol:{span:8},wrapperCol:{span:16},labelAlign:"left",children:[(0,r.jsxs)(r.Fragment,{children:[(0,r.jsx)(eo.Z.Item,{label:"Admin Email",name:"user_email",rules:[{required:!0,message:"Please enter the email of the proxy admin"}],children:(0,r.jsx)(ec.Z,{})}),(0,r.jsx)(eo.Z.Item,{label:"PROXY BASE URL",name:"proxy_base_url",rules:[{required:!0,message:"Please enter the proxy base url"}],children:(0,r.jsx)(ec.Z,{})}),(0,r.jsx)(eo.Z.Item,{label:"GOOGLE CLIENT ID",name:"google_client_id",rules:[{required:!0,message:"Please enter the google client id"}],children:(0,r.jsx)(ec.Z.Password,{})}),(0,r.jsx)(eo.Z.Item,{label:"GOOGLE CLIENT SECRET",name:"google_client_secret",rules:[{required:!0,message:"Please enter the google client secret"}],children:(0,r.jsx)(ec.Z.Password,{})})]}),(0,r.jsx)("div",{style:{textAlign:"right",marginTop:"10px"},children:(0,r.jsx)(em.ZP,{htmlType:"submit",children:"Save"})})]})}),(0,r.jsxs)(ei.Z,{title:"SSO Setup Instructions",visible:k,width:800,footer:null,onOk:S,onCancel:()=>{v(!1)},children:[(0,r.jsx)("p",{children:"Follow these steps to complete the SSO setup:"}),(0,r.jsx)(es.Z,{className:"mt-2",children:"1. DO NOT Exit this TAB"}),(0,r.jsx)(es.Z,{className:"mt-2",children:"2. Open a new tab, visit your proxy base url"}),(0,r.jsx)(es.Z,{className:"mt-2",children:"3. Confirm your SSO is configured correctly and you can login on the new Tab"}),(0,r.jsx)(es.Z,{className:"mt-2",children:"4. If Step 3 is successful, you can close this tab"}),(0,r.jsx)("div",{style:{textAlign:"right",marginTop:"10px"},children:(0,r.jsx)(em.ZP,{onClick:S,children:"Done"})})]})]}),(0,r.jsxs)(la.Z,{title:"Login without SSO",color:"teal",children:["If you need to login without sso, you can access ",(0,r.jsxs)("a",{href:l,target:"_blank",children:[(0,r.jsx)("b",{children:l}),"  "]})]})]})]})},ln=t(42556);let lo=[{name:"slack",variables:{LANGFUSE_HOST:null,LANGFUSE_PUBLIC_KEY:null,LANGFUSE_SECRET_KEY:null,OPENMETER_API_KEY:null,SLACK_WEBHOOK_URL:null}},{name:"langfuse",variables:{LANGFUSE_HOST:null,LANGFUSE_PUBLIC_KEY:null,LANGFUSE_SECRET_KEY:null,OPENMETER_API_KEY:null,SLACK_WEBHOOK_URL:null}},{name:"openmeter",variables:{LANGFUSE_HOST:null,LANGFUSE_PUBLIC_KEY:null,LANGFUSE_SECRET_KEY:null,OPENMETER_API_KEY:null,SLACK_WEBHOOK_URL:null}}];var li=e=>{let{accessToken:l,userRole:t,userID:s}=e,[a,o]=(0,n.useState)(lo),[i,c]=(0,n.useState)([]),[d,m]=(0,n.useState)(!1),[h]=eo.Z.useForm(),[x,p]=(0,n.useState)(null),[j,g]=(0,n.useState)([]),[y,f]=(0,n.useState)(""),[Z,_]=(0,n.useState)({}),[w,b]=(0,n.useState)([]),k=e=>{w.includes(e)?b(w.filter(l=>l!==e)):b([...w,e])},v={llm_exceptions:"LLM Exceptions",llm_too_slow:"LLM Responses Too Slow",llm_requests_hanging:"LLM Requests Hanging",budget_alerts:"Budget Alerts (API Keys, Users)",db_exceptions:"Database Exceptions (Read/Write)",daily_reports:"Weekly/Monthly Spend Reports"};(0,n.useEffect)(()=>{l&&t&&s&&V(l,s,t).then(e=>{console.log("callbacks",e);let l=lo;o(l=l.map(l=>{let t=e.callbacks.find(e=>e.name===l.name);return t?{...l,variables:{...l.variables,...t.variables}}:l}));let t=e.alerts;if(console.log("alerts_data",t),t&&t.length>0){let e=t[0];console.log("_alert_info",e);let l=e.variables.SLACK_WEBHOOK_URL;console.log("catch_all_webhook",l),b(e.active_alerts),f(l),_(e.alerts_to_webhook)}c(t)})},[l,t,s]);let S=e=>w&&w.includes(e),N=e=>{if(!l)return;let t=Object.fromEntries(Object.entries(e.variables).map(e=>{var l;let[t,s]=e;return[t,(null===(l=document.querySelector('input[name="'.concat(t,'"]')))||void 0===l?void 0:l.value)||s]}));console.log("updatedVariables",t),console.log("updateAlertTypes",j);let s={environment_variables:t,litellm_settings:{success_callback:[e.name]}};try{Y(l,s)}catch(e){u.ZP.error("Failed to update callback: "+e,20)}u.ZP.success("Callback updated successfully")},A=()=>{l&&h.validateFields().then(e=>{if(console.log("Form values:",e),"langfuse"===e.callback){Y(l,{environment_variables:{LANGFUSE_PUBLIC_KEY:e.langfusePublicKey,LANGFUSE_SECRET_KEY:e.langfusePrivateKey},litellm_settings:{success_callback:[e.callback]}});let t={name:e.callback,variables:{SLACK_WEBHOOK_URL:null,LANGFUSE_HOST:null,LANGFUSE_PUBLIC_KEY:e.langfusePublicKey,LANGFUSE_SECRET_KEY:e.langfusePrivateKey,OPENMETER_API_KEY:null}};o(a?[...a,t]:[t])}else if("slack"===e.callback){console.log("values.slackWebhookUrl: ".concat(e.slackWebhookUrl)),Y(l,{general_settings:{alerting:["slack"],alerting_threshold:300},environment_variables:{SLACK_WEBHOOK_URL:e.slackWebhookUrl}}),console.log("values.callback: ".concat(e.callback));let t={name:e.callback,variables:{SLACK_WEBHOOK_URL:e.slackWebhookUrl,LANGFUSE_HOST:null,LANGFUSE_PUBLIC_KEY:null,LANGFUSE_SECRET_KEY:null,OPENMETER_API_KEY:null}};o(a?[...a,t]:[t])}else if("openmeter"==e.callback){console.log("values.openMeterApiKey: ".concat(e.openMeterApiKey)),Y(l,{environment_variables:{OPENMETER_API_KEY:e.openMeterApiKey},litellm_settings:{success_callback:[e.callback]}});let t={name:e.callback,variables:{SLACK_WEBHOOK_URL:null,LANGFUSE_HOST:null,LANGFUSE_PUBLIC_KEY:null,LANGFUSE_SECRET_KEY:null,OPENMETER_API_KEY:e.openMeterAPIKey}};o(a?[...a,t]:[t])}m(!1),h.resetFields(),p(null)})};return l?(console.log("callbacks: ".concat(a)),(0,r.jsxs)("div",{className:"w-full mx-4",children:[(0,r.jsxs)($.Z,{numItems:1,className:"gap-2 p-8 w-full mt-2",children:[(0,r.jsx)(la.Z,{title:"[UI] Presidio PII + Guardrails Coming Soon. https://docs.litellm.ai/docs/proxy/pii_masking",color:"sky"}),(0,r.jsxs)(eB.Z,{children:[(0,r.jsxs)(eD.Z,{variant:"line",defaultValue:"1",children:[(0,r.jsx)(eU.Z,{value:"1",children:"Logging Callbacks"}),(0,r.jsx)(eU.Z,{value:"2",children:"Alerting"})]}),(0,r.jsxs)(ez.Z,{children:[(0,r.jsx)(eK.Z,{children:(0,r.jsx)(ey.Z,{children:(0,r.jsxs)(eZ.Z,{children:[(0,r.jsx)(eb.Z,{children:(0,r.jsxs)(ev.Z,{children:[(0,r.jsx)(ek.Z,{children:"Callback"}),(0,r.jsx)(ek.Z,{children:"Callback Env Vars"})]})}),(0,r.jsx)(e_.Z,{children:a.filter(e=>"slack"!==e.name).map((e,t)=>{var s;return(0,r.jsxs)(ev.Z,{children:[(0,r.jsx)(ew.Z,{children:(0,r.jsx)(eg.Z,{color:"emerald",children:e.name})}),(0,r.jsxs)(ew.Z,{children:[(0,r.jsx)("ul",{children:Object.entries(null!==(s=e.variables)&&void 0!==s?s:{}).filter(l=>{let[t,s]=l;return t.toLowerCase().includes(e.name)}).map(e=>{let[l,t]=e;return(0,r.jsxs)("li",{children:[(0,r.jsx)(es.Z,{className:"mt-2",children:l}),"LANGFUSE_HOST"===l?(0,r.jsx)("p",{children:"default value=https://cloud.langfuse.com"}):(0,r.jsx)("div",{}),(0,r.jsx)(Q.Z,{name:l,defaultValue:t,type:"password"})]},l)})}),(0,r.jsx)(X.Z,{className:"mt-2",onClick:()=>N(e),children:"Save Changes"}),(0,r.jsx)(X.Z,{onClick:()=>z(l,e.name),className:"mx-2",children:"Test Callback"})]})]},t)})})]})})}),(0,r.jsx)(eK.Z,{children:(0,r.jsxs)(ey.Z,{children:[(0,r.jsxs)(es.Z,{className:"my-2",children:["Alerts are only supported for Slack Webhook URLs. Get your webhook urls from ",(0,r.jsx)("a",{href:"https://api.slack.com/messaging/webhooks",target:"_blank",style:{color:"blue"},children:"here"})]}),(0,r.jsxs)(eZ.Z,{children:[(0,r.jsx)(eb.Z,{children:(0,r.jsxs)(ev.Z,{children:[(0,r.jsx)(ek.Z,{}),(0,r.jsx)(ek.Z,{}),(0,r.jsx)(ek.Z,{children:"Slack Webhook URL"})]})}),(0,r.jsx)(e_.Z,{children:Object.entries(v).map((e,l)=>{let[t,s]=e;return(0,r.jsxs)(ev.Z,{children:[(0,r.jsx)(ew.Z,{children:(0,r.jsx)(ln.Z,{id:"switch",name:"switch",checked:S(t),onChange:()=>k(t)})}),(0,r.jsx)(ew.Z,{children:(0,r.jsx)(es.Z,{children:s})}),(0,r.jsx)(ew.Z,{children:(0,r.jsx)(Q.Z,{name:t,type:"password",defaultValue:Z&&Z[t]?Z[t]:y})})]},l)})})]}),(0,r.jsx)(X.Z,{size:"xs",className:"mt-2",onClick:()=>{if(!l)return;let e={};Object.entries(v).forEach(l=>{let[t,s]=l,a=document.querySelector('input[name="'.concat(t,'"]'));console.log("key",t),console.log("webhookInput",a);let r=(null==a?void 0:a.value)||"";console.log("newWebhookValue",r),e[t]=r}),console.log("updatedAlertToWebhooks",e);let t={general_settings:{alert_to_webhook_url:e,alert_types:w}};console.log("payload",t);try{Y(l,t)}catch(e){u.ZP.error("Failed to update alerts: "+e,20)}u.ZP.success("Alerts updated successfully")},children:"Save Changes"}),(0,r.jsx)(X.Z,{onClick:()=>z(l,"slack"),className:"mx-2",children:"Test Alerts"})]})})]})]})]}),(0,r.jsx)(ei.Z,{title:"Add Callback",visible:d,onOk:A,width:800,onCancel:()=>{m(!1),h.resetFields(),p(null)},footer:null,children:(0,r.jsxs)(eo.Z,{form:h,layout:"vertical",onFinish:A,children:[(0,r.jsx)(eo.Z.Item,{label:"Callback",name:"callback",rules:[{required:!0,message:"Please select a callback"}],children:(0,r.jsxs)(en.default,{onChange:e=>{p(e)},children:[(0,r.jsx)(en.default.Option,{value:"langfuse",children:"langfuse"}),(0,r.jsx)(en.default.Option,{value:"openmeter",children:"openmeter"})]})}),"langfuse"===x&&(0,r.jsxs)(r.Fragment,{children:[(0,r.jsx)(eo.Z.Item,{label:"LANGFUSE_PUBLIC_KEY",name:"langfusePublicKey",rules:[{required:!0,message:"Please enter the public key"}],children:(0,r.jsx)(Q.Z,{type:"password"})}),(0,r.jsx)(eo.Z.Item,{label:"LANGFUSE_PRIVATE_KEY",name:"langfusePrivateKey",rules:[{required:!0,message:"Please enter the private key"}],children:(0,r.jsx)(Q.Z,{type:"password"})})]}),"openmeter"==x&&(0,r.jsx)(r.Fragment,{children:(0,r.jsx)(eo.Z.Item,{label:"OPENMETER_API_KEY",name:"openMeterApiKey",rules:[{required:!0,message:"Please enter the openmeter api key"}],children:(0,r.jsx)(Q.Z,{type:"password"})})}),(0,r.jsx)("div",{style:{textAlign:"right",marginTop:"10px"},children:(0,r.jsx)(em.ZP,{htmlType:"submit",children:"Save"})})]})})]})):null};let{Option:lc}=en.default;var ld=e=>{let{models:l,accessToken:t,routerSettings:s,setRouterSettings:a}=e,[o]=eo.Z.useForm(),[i,c]=(0,n.useState)(!1),[d,m]=(0,n.useState)("");return(0,r.jsxs)("div",{children:[(0,r.jsx)(X.Z,{className:"mx-auto",onClick:()=>c(!0),children:"+ Add Fallbacks"}),(0,r.jsx)(ei.Z,{title:"Add Fallbacks",visible:i,width:800,footer:null,onOk:()=>{c(!1),o.resetFields()},onCancel:()=>{c(!1),o.resetFields()},children:(0,r.jsxs)(eo.Z,{form:o,onFinish:e=>{console.log(e);let{model_name:l,models:r}=e,n=[...s.fallbacks||[],{[l]:r}],i={...s,fallbacks:n};console.log(i);try{Y(t,{router_settings:i}),a(i)}catch(e){u.ZP.error("Failed to update router settings: "+e,20)}u.ZP.success("router settings updated successfully"),c(!1),o.resetFields()},labelCol:{span:8},wrapperCol:{span:16},labelAlign:"left",children:[(0,r.jsxs)(r.Fragment,{children:[(0,r.jsx)(eo.Z.Item,{label:"Public Model Name",name:"model_name",rules:[{required:!0,message:"Set the model to fallback for"}],help:"required",children:(0,r.jsx)(eS.Z,{defaultValue:d,children:l&&l.map((e,l)=>(0,r.jsx)(eN.Z,{value:e,onClick:()=>m(e),children:e},l))})}),(0,r.jsx)(eo.Z.Item,{label:"Fallback Models",name:"models",rules:[{required:!0,message:"Please select a model"}],help:"required",children:(0,r.jsx)(eV.Z,{value:l,children:l&&l.filter(e=>e!=d).map(e=>(0,r.jsx)(eq.Z,{value:e,children:e},e))})})]}),(0,r.jsx)("div",{style:{textAlign:"right",marginTop:"10px"},children:(0,r.jsx)(em.ZP,{htmlType:"submit",children:"Add Fallbacks"})})]})})]})},lm=t(12968);async function lu(e,l){console.log("isLocal:",!1);let t=window.location.origin,s=new lm.ZP.OpenAI({apiKey:l,baseURL:t,dangerouslyAllowBrowser:!0});try{let l=await s.chat.completions.create({model:e,messages:[{role:"user",content:"Hi, this is a test message"}],mock_testing_fallbacks:!0});u.ZP.success((0,r.jsxs)("span",{children:["Test model=",(0,r.jsx)("strong",{children:e}),", received model=",(0,r.jsx)("strong",{children:l.model}),". See"," ",(0,r.jsx)("a",{href:"#",onClick:()=>window.open("https://docs.litellm.ai/docs/proxy/reliability","_blank"),style:{textDecoration:"underline",color:"blue"},children:"curl"})]}))}catch(e){u.ZP.error("Error occurred while generating model response. Please try again. Error: ".concat(e),20)}}let lh={ttl:3600,lowest_latency_buffer:0},lx=e=>{let{selectedStrategy:l,strategyArgs:t,paramExplanation:s}=e;return(0,r.jsxs)(ee.Z,{children:[(0,r.jsx)(et.Z,{className:"text-sm font-medium text-tremor-content-strong dark:text-dark-tremor-content-strong",children:"Routing Strategy Specific Args"}),(0,r.jsx)(el.Z,{children:"latency-based-routing"==l?(0,r.jsx)(ey.Z,{children:(0,r.jsxs)(eZ.Z,{children:[(0,r.jsx)(eb.Z,{children:(0,r.jsxs)(ev.Z,{children:[(0,r.jsx)(ek.Z,{children:"Setting"}),(0,r.jsx)(ek.Z,{children:"Value"})]})}),(0,r.jsx)(e_.Z,{children:Object.entries(t).map(e=>{let[l,t]=e;return(0,r.jsxs)(ev.Z,{children:[(0,r.jsxs)(ew.Z,{children:[(0,r.jsx)(es.Z,{children:l}),(0,r.jsx)("p",{style:{fontSize:"0.65rem",color:"#808080",fontStyle:"italic"},className:"mt-1",children:s[l]})]}),(0,r.jsx)(ew.Z,{children:(0,r.jsx)(Q.Z,{name:l,defaultValue:"object"==typeof t?JSON.stringify(t,null,2):t.toString()})})]},l)})})]})}):(0,r.jsx)(es.Z,{children:"No specific settings"})})]})};var lp=e=>{let{accessToken:l,userRole:t,userID:s,modelData:a}=e,[o,i]=(0,n.useState)({}),[c,d]=(0,n.useState)({}),[m,h]=(0,n.useState)([]),[x,p]=(0,n.useState)(!1),[j]=eo.Z.useForm(),[g,y]=(0,n.useState)(null),[f,Z]=(0,n.useState)(null),[_,w]=(0,n.useState)(null),b={routing_strategy_args:"(dict) Arguments to pass to the routing strategy",routing_strategy:"(string) Routing strategy to use",allowed_fails:"(int) Number of times a deployment can fail before being added to cooldown",cooldown_time:"(int) time in seconds to cooldown a deployment after failure",num_retries:"(int) Number of retries for failed requests. Defaults to 0.",timeout:"(float) Timeout for requests. Defaults to None.",retry_after:"(int) Minimum time to wait before retrying a failed request",ttl:"(int) Sliding window to look back over when calculating the average latency of a deployment. Default - 1 hour (in seconds).",lowest_latency_buffer:"(float) Shuffle between deployments within this % of the lowest latency. Default - 0 (i.e. always pick lowest latency)."};(0,n.useEffect)(()=>{l&&t&&s&&(V(l,s,t).then(e=>{console.log("callbacks",e),i(e.router_settings)}),q(l).then(e=>{h(e)}))},[l,t,s]);let k=async e=>{if(l){console.log("received key: ".concat(e)),console.log("routerSettings['fallbacks']: ".concat(o.fallbacks)),o.fallbacks.map(l=>(e in l&&delete l[e],l));try{await Y(l,{router_settings:o}),i({...o}),Z(o.routing_strategy),u.ZP.success("Router settings updated successfully")}catch(e){u.ZP.error("Failed to update router settings: "+e,20)}}},v=(e,l)=>{h(m.map(t=>t.field_name===e?{...t,field_value:l}:t))},S=(e,t)=>{if(!l)return;let s=m[t].field_value;if(null!=s&&void 0!=s)try{G(l,e,s);let t=m.map(l=>l.field_name===e?{...l,stored_in_db:!0}:l);h(t)}catch(e){}},N=(e,t)=>{if(l)try{W(l,e);let t=m.map(l=>l.field_name===e?{...l,stored_in_db:null,field_value:null}:l);h(t)}catch(e){}},A=e=>{if(!l)return;console.log("router_settings",e);let t=Object.fromEntries(Object.entries(e).map(e=>{let[l,t]=e;if("routing_strategy_args"!==l&&"routing_strategy"!==l){var s;return[l,(null===(s=document.querySelector('input[name="'.concat(l,'"]')))||void 0===s?void 0:s.value)||t]}if("routing_strategy"==l)return[l,f];if("routing_strategy_args"==l&&"latency-based-routing"==f){let e={},l=document.querySelector('input[name="lowest_latency_buffer"]'),t=document.querySelector('input[name="ttl"]');return(null==l?void 0:l.value)&&(e.lowest_latency_buffer=Number(l.value)),(null==t?void 0:t.value)&&(e.ttl=Number(t.value)),console.log("setRoutingStrategyArgs: ".concat(e)),["routing_strategy_args",e]}return null}).filter(e=>null!=e));console.log("updatedVariables",t);try{Y(l,{router_settings:t})}catch(e){u.ZP.error("Failed to update router settings: "+e,20)}u.ZP.success("router settings updated successfully")};return l?(0,r.jsx)("div",{className:"w-full mx-4",children:(0,r.jsxs)(eB.Z,{className:"gap-2 p-8 h-[75vh] w-full mt-2",children:[(0,r.jsxs)(eD.Z,{variant:"line",defaultValue:"1",children:[(0,r.jsx)(eU.Z,{value:"1",children:"Loadbalancing"}),(0,r.jsx)(eU.Z,{value:"2",children:"Fallbacks"}),(0,r.jsx)(eU.Z,{value:"3",children:"General"})]}),(0,r.jsxs)(ez.Z,{children:[(0,r.jsx)(eK.Z,{children:(0,r.jsxs)($.Z,{numItems:1,className:"gap-2 p-8 w-full mt-2",children:[(0,r.jsx)(ea.Z,{children:"Router Settings"}),(0,r.jsxs)(ey.Z,{children:[(0,r.jsxs)(eZ.Z,{children:[(0,r.jsx)(eb.Z,{children:(0,r.jsxs)(ev.Z,{children:[(0,r.jsx)(ek.Z,{children:"Setting"}),(0,r.jsx)(ek.Z,{children:"Value"})]})}),(0,r.jsx)(e_.Z,{children:Object.entries(o).filter(e=>{let[l,t]=e;return"fallbacks"!=l&&"context_window_fallbacks"!=l&&"routing_strategy_args"!=l}).map(e=>{let[l,t]=e;return(0,r.jsxs)(ev.Z,{children:[(0,r.jsxs)(ew.Z,{children:[(0,r.jsx)(es.Z,{children:l}),(0,r.jsx)("p",{style:{fontSize:"0.65rem",color:"#808080",fontStyle:"italic"},className:"mt-1",children:b[l]})]}),(0,r.jsx)(ew.Z,{children:"routing_strategy"==l?(0,r.jsxs)(eS.Z,{defaultValue:t,className:"w-full max-w-md",onValueChange:Z,children:[(0,r.jsx)(eN.Z,{value:"usage-based-routing",children:"usage-based-routing"}),(0,r.jsx)(eN.Z,{value:"latency-based-routing",children:"latency-based-routing"}),(0,r.jsx)(eN.Z,{value:"simple-shuffle",children:"simple-shuffle"})]}):(0,r.jsx)(Q.Z,{name:l,defaultValue:"object"==typeof t?JSON.stringify(t,null,2):t.toString()})})]},l)})})]}),(0,r.jsx)(lx,{selectedStrategy:f,strategyArgs:o&&o.routing_strategy_args&&Object.keys(o.routing_strategy_args).length>0?o.routing_strategy_args:lh,paramExplanation:b})]}),(0,r.jsx)(H.Z,{children:(0,r.jsx)(X.Z,{className:"mt-2",onClick:()=>A(o),children:"Save Changes"})})]})}),(0,r.jsxs)(eK.Z,{children:[(0,r.jsxs)(eZ.Z,{children:[(0,r.jsx)(eb.Z,{children:(0,r.jsxs)(ev.Z,{children:[(0,r.jsx)(ek.Z,{children:"Model Name"}),(0,r.jsx)(ek.Z,{children:"Fallbacks"})]})}),(0,r.jsx)(e_.Z,{children:o.fallbacks&&o.fallbacks.map((e,t)=>Object.entries(e).map(e=>{let[s,a]=e;return(0,r.jsxs)(ev.Z,{children:[(0,r.jsx)(ew.Z,{children:s}),(0,r.jsx)(ew.Z,{children:Array.isArray(a)?a.join(", "):a}),(0,r.jsx)(ew.Z,{children:(0,r.jsx)(X.Z,{onClick:()=>lu(s,l),children:"Test Fallback"})}),(0,r.jsx)(ew.Z,{children:(0,r.jsx)(ef.Z,{icon:ej.Z,size:"sm",onClick:()=>k(s)})})]},t.toString()+s)}))})]}),(0,r.jsx)(ld,{models:(null==a?void 0:a.data)?a.data.map(e=>e.model_name):[],accessToken:l,routerSettings:o,setRouterSettings:i})]}),(0,r.jsx)(eK.Z,{children:(0,r.jsx)(ey.Z,{children:(0,r.jsxs)(eZ.Z,{children:[(0,r.jsx)(eb.Z,{children:(0,r.jsxs)(ev.Z,{children:[(0,r.jsx)(ek.Z,{children:"Setting"}),(0,r.jsx)(ek.Z,{children:"Value"}),(0,r.jsx)(ek.Z,{children:"Status"}),(0,r.jsx)(ek.Z,{children:"Action"})]})}),(0,r.jsx)(e_.Z,{children:m.map((e,l)=>(0,r.jsxs)(ev.Z,{children:[(0,r.jsxs)(ew.Z,{children:[(0,r.jsx)(es.Z,{children:e.field_name}),(0,r.jsx)("p",{style:{fontSize:"0.65rem",color:"#808080",fontStyle:"italic"},className:"mt-1",children:e.field_description})]}),(0,r.jsx)(ew.Z,{children:"Integer"==e.field_type?(0,r.jsx)(ed.Z,{step:1,value:e.field_value,onChange:l=>v(e.field_name,l)}):null}),(0,r.jsx)(ew.Z,{children:!0==e.stored_in_db?(0,r.jsx)(eg.Z,{icon:eQ.Z,className:"text-white",children:"In DB"}):!1==e.stored_in_db?(0,r.jsx)(eg.Z,{className:"text-gray bg-white outline",children:"In Config"}):(0,r.jsx)(eg.Z,{className:"text-gray bg-white outline",children:"Not Set"})}),(0,r.jsxs)(ew.Z,{children:[(0,r.jsx)(X.Z,{onClick:()=>S(e.field_name,l),children:"Update"}),(0,r.jsx)(ef.Z,{icon:ej.Z,color:"red",onClick:()=>N(e.field_name,l),children:"Reset"})]})]},l))})]})})})]})]})}):null},lj=t(67951),lg=e=>{let{}=e;return(0,r.jsx)(r.Fragment,{children:(0,r.jsx)($.Z,{className:"gap-2 p-8 h-[80vh] w-full mt-2",children:(0,r.jsxs)("div",{className:"mb-5",children:[(0,r.jsx)("p",{className:"text-2xl text-tremor-content-strong dark:text-dark-tremor-content-strong font-semibold",children:"OpenAI Compatible Proxy: API Reference"}),(0,r.jsx)(es.Z,{className:"mt-2 mb-2",children:"LiteLLM is OpenAI Compatible. This means your API Key works with the OpenAI SDK. Just replace the base_url to point to your litellm proxy. Example Below "}),(0,r.jsxs)(eB.Z,{children:[(0,r.jsxs)(eD.Z,{children:[(0,r.jsx)(eU.Z,{children:"OpenAI Python SDK"}),(0,r.jsx)(eU.Z,{children:"LlamaIndex"}),(0,r.jsx)(eU.Z,{children:"Langchain Py"})]}),(0,r.jsxs)(ez.Z,{children:[(0,r.jsx)(eK.Z,{children:(0,r.jsx)(lj.Z,{language:"python",children:'\nimport openai\nclient = openai.OpenAI(\n    api_key="your_api_key",\n    base_url="http://0.0.0.0:4000" # LiteLLM Proxy is OpenAI compatible, Read More: https://docs.litellm.ai/docs/proxy/user_keys\n)\n\nresponse = client.chat.completions.create(\n    model="gpt-3.5-turbo", # model to send to the proxy\n    messages = [\n        {\n            "role": "user",\n            "content": "this is a test request, write a short poem"\n        }\n    ]\n)\n\nprint(response)\n            '})}),(0,r.jsx)(eK.Z,{children:(0,r.jsx)(lj.Z,{language:"python",children:'\nimport os, dotenv\n\nfrom llama_index.llms import AzureOpenAI\nfrom llama_index.embeddings import AzureOpenAIEmbedding\nfrom llama_index import VectorStoreIndex, SimpleDirectoryReader, ServiceContext\n\nllm = AzureOpenAI(\n    engine="azure-gpt-3.5",               # model_name on litellm proxy\n    temperature=0.0,\n    azure_endpoint="http://0.0.0.0:4000", # litellm proxy endpoint\n    api_key="sk-1234",                    # litellm proxy API Key\n    api_version="2023-07-01-preview",\n)\n\nembed_model = AzureOpenAIEmbedding(\n    deployment_name="azure-embedding-model",\n    azure_endpoint="http://0.0.0.0:4000",\n    api_key="sk-1234",\n    api_version="2023-07-01-preview",\n)\n\n\ndocuments = SimpleDirectoryReader("llama_index_data").load_data()\nservice_context = ServiceContext.from_defaults(llm=llm, embed_model=embed_model)\nindex = VectorStoreIndex.from_documents(documents, service_context=service_context)\n\nquery_engine = index.as_query_engine()\nresponse = query_engine.query("What did the author do growing up?")\nprint(response)\n\n            '})}),(0,r.jsx)(eK.Z,{children:(0,r.jsx)(lj.Z,{language:"python",children:'\nfrom langchain.chat_models import ChatOpenAI\nfrom langchain.prompts.chat import (\n    ChatPromptTemplate,\n    HumanMessagePromptTemplate,\n    SystemMessagePromptTemplate,\n)\nfrom langchain.schema import HumanMessage, SystemMessage\n\nchat = ChatOpenAI(\n    openai_api_base="http://0.0.0.0:4000",\n    model = "gpt-3.5-turbo",\n    temperature=0.1\n)\n\nmessages = [\n    SystemMessage(\n        content="You are a helpful assistant that im using to make a test request to."\n    ),\n    HumanMessage(\n        content="test from litellm. tell me why it\'s amazing in 1 sentence"\n    ),\n]\nresponse = chat(messages)\n\nprint(response)\n\n            '})})]})]})]})})})};async function ly(e,l,t,s){console.log("isLocal:",!1);let a=window.location.origin,r=new lm.ZP.OpenAI({apiKey:s,baseURL:a,dangerouslyAllowBrowser:!0});try{for await(let s of(await r.chat.completions.create({model:t,stream:!0,messages:[{role:"user",content:e}]})))console.log(s),s.choices[0].delta.content&&l(s.choices[0].delta.content)}catch(e){u.ZP.error("Error occurred while generating model response. Please try again. Error: ".concat(e),20)}}var lf=e=>{let{accessToken:l,token:t,userRole:s,userID:a}=e,[o,i]=(0,n.useState)(""),[c,d]=(0,n.useState)(""),[m,u]=(0,n.useState)([]),[h,x]=(0,n.useState)(void 0),[p,j]=(0,n.useState)([]);(0,n.useEffect)(()=>{l&&t&&s&&a&&(async()=>{try{let e=await N(l,a,s);if(console.log("model_info:",e),(null==e?void 0:e.data.length)>0){let l=e.data.map(e=>({value:e.id,label:e.id}));console.log(l),j(l),x(e.data[0].id)}}catch(e){console.error("Error fetching model info:",e)}})()},[l,a,s]);let g=(e,l)=>{u(t=>{let s=t[t.length-1];return s&&s.role===e?[...t.slice(0,t.length-1),{role:e,content:s.content+l}]:[...t,{role:e,content:l}]})},y=async()=>{if(""!==c.trim()&&o&&t&&s&&a){u(e=>[...e,{role:"user",content:c}]);try{h&&await ly(c,e=>g("assistant",e),h,o)}catch(e){console.error("Error fetching model response",e),g("assistant","Error fetching model response")}d("")}};if(s&&"Admin Viewer"==s){let{Title:e,Paragraph:l}=eR.default;return(0,r.jsxs)("div",{children:[(0,r.jsx)(e,{level:1,children:"Access Denied"}),(0,r.jsx)(l,{children:"Ask your proxy admin for access to test models"})]})}return(0,r.jsx)("div",{style:{width:"100%",position:"relative"},children:(0,r.jsx)($.Z,{className:"gap-2 p-8 h-[80vh] w-full mt-2",children:(0,r.jsx)(ey.Z,{children:(0,r.jsxs)(eB.Z,{children:[(0,r.jsx)(eD.Z,{children:(0,r.jsx)(eU.Z,{children:"Chat"})}),(0,r.jsx)(ez.Z,{children:(0,r.jsxs)(eK.Z,{children:[(0,r.jsx)("div",{className:"sm:max-w-2xl",children:(0,r.jsxs)($.Z,{numItems:2,children:[(0,r.jsxs)(H.Z,{children:[(0,r.jsx)(es.Z,{children:"API Key"}),(0,r.jsx)(Q.Z,{placeholder:"Type API Key here",type:"password",onValueChange:i,value:o})]}),(0,r.jsxs)(H.Z,{className:"mx-2",children:[(0,r.jsx)(es.Z,{children:"Select Model:"}),(0,r.jsx)(en.default,{placeholder:"Select a Model",onChange:e=>{console.log("selected ".concat(e)),x(e)},options:p,style:{width:"200px"}})]})]})}),(0,r.jsxs)(eZ.Z,{className:"mt-5",style:{display:"block",maxHeight:"60vh",overflowY:"auto"},children:[(0,r.jsx)(eb.Z,{children:(0,r.jsx)(ev.Z,{children:(0,r.jsx)(ew.Z,{})})}),(0,r.jsx)(e_.Z,{children:m.map((e,l)=>(0,r.jsx)(ev.Z,{children:(0,r.jsx)(ew.Z,{children:"".concat(e.role,": ").concat(e.content)})},l))})]}),(0,r.jsx)("div",{className:"mt-3",style:{position:"absolute",bottom:5,width:"95%"},children:(0,r.jsxs)("div",{className:"flex",children:[(0,r.jsx)(Q.Z,{type:"text",value:c,onChange:e=>d(e.target.value),placeholder:"Type your message..."}),(0,r.jsx)(X.Z,{onClick:y,className:"ml-2",children:"Send"})]})})]})})]})})})})},lZ=t(33509),l_=t(95781);let{Sider:lw}=lZ.default;var lb=e=>{let{setPage:l,userRole:t,defaultSelectedKey:s}=e;return"Admin Viewer"==t?(0,r.jsx)(lZ.default,{style:{minHeight:"100vh",maxWidth:"120px"},children:(0,r.jsx)(lw,{width:120,children:(0,r.jsxs)(l_.Z,{mode:"inline",defaultSelectedKeys:s||["4"],style:{height:"100%",borderRight:0},children:[(0,r.jsx)(l_.Z.Item,{onClick:()=>l("api-keys"),children:"API Keys"},"4"),(0,r.jsx)(l_.Z.Item,{onClick:()=>l("models"),children:"Models"},"2"),(0,r.jsx)(l_.Z.Item,{onClick:()=>l("llm-playground"),children:"Chat UI"},"3"),(0,r.jsx)(l_.Z.Item,{onClick:()=>l("usage"),children:"Usage"},"1")]})})}):(0,r.jsx)(lZ.default,{style:{minHeight:"100vh",maxWidth:"145px"},children:(0,r.jsx)(lw,{width:145,children:(0,r.jsxs)(l_.Z,{mode:"inline",defaultSelectedKeys:s||["1"],style:{height:"100%",borderRight:0},children:[(0,r.jsx)(l_.Z.Item,{onClick:()=>l("api-keys"),children:(0,r.jsx)(es.Z,{children:"API Keys"})},"1"),(0,r.jsx)(l_.Z.Item,{onClick:()=>l("llm-playground"),children:(0,r.jsx)(es.Z,{children:"Test Key"})},"3"),"Admin"==t?(0,r.jsx)(l_.Z.Item,{onClick:()=>l("models"),children:(0,r.jsx)(es.Z,{children:"Models"})},"2"):null,"Admin"==t?(0,r.jsx)(l_.Z.Item,{onClick:()=>l("usage"),children:(0,r.jsx)(es.Z,{children:"Usage"})},"4"):null,"Admin"==t?(0,r.jsx)(l_.Z.Item,{onClick:()=>l("teams"),children:(0,r.jsx)(es.Z,{children:"Teams"})},"6"):null,"Admin"==t?(0,r.jsx)(l_.Z.Item,{onClick:()=>l("users"),children:(0,r.jsx)(es.Z,{children:"Users"})},"5"):null,"Admin"==t?(0,r.jsx)(l_.Z.Item,{onClick:()=>l("settings"),children:(0,r.jsx)(es.Z,{children:"Logging & Alerts"})},"8"):null,"Admin"==t?(0,r.jsx)(l_.Z.Item,{onClick:()=>l("general-settings"),children:(0,r.jsx)(es.Z,{children:"Router Settings"})},"9"):null,"Admin"==t?(0,r.jsx)(l_.Z.Item,{onClick:()=>l("admin-panel"),children:(0,r.jsx)(es.Z,{children:"Admin"})},"7"):null,(0,r.jsx)(l_.Z.Item,{onClick:()=>l("api_ref"),children:(0,r.jsx)(es.Z,{children:"API Reference"})},"11")]})})})},lk=t(67989),lv=e=>{let{accessToken:l,token:t,userRole:s,userID:a,keys:o}=e,i=new Date,[c,d]=(0,n.useState)([]),[m,u]=(0,n.useState)([]),[h,x]=(0,n.useState)([]),[p,j]=(0,n.useState)([]),[g,y]=(0,n.useState)([]),[f,Z]=(0,n.useState)([]),[_,w]=(0,n.useState)([]),[b,k]=(0,n.useState)([]),[v,S]=(0,n.useState)(""),[N,R]=(0,n.useState)({from:new Date(Date.now()-6048e5),to:new Date}),M=new Date(i.getFullYear(),i.getMonth(),1),L=new Date(i.getFullYear(),i.getMonth()+1,0),U=z(M),B=z(L);console.log("keys in usage",o);let D=async(e,t,s)=>{if(!e||!t||!l)return;t.setHours(23,59,59,999),e.setHours(0,0,0,0),console.log("uiSelectedKey",s);let a=await P(l,s,e.toISOString(),t.toISOString());console.log("End user data updated successfully",a),j(a)},K=async(e,t)=>{e&&t&&l&&(t.setHours(23,59,59,999),e.setHours(0,0,0,0),Z((await E(l,e.toISOString(),t.toISOString())).spend_per_tag),console.log("Tag spend data updated successfully"))};function z(e){let l=e.getFullYear(),t=e.getMonth()+1,s=e.getDate();return"".concat(l,"-").concat(t<10?"0"+t:t,"-").concat(s<10?"0"+s:s)}return console.log("Start date is ".concat(U)),console.log("End date is ".concat(B)),(0,n.useEffect)(()=>{l&&t&&s&&a&&(async()=>{try{if(console.log("user role: ".concat(s)),"Admin"==s||"Admin Viewer"==s){var e,r;let t=await C(l);d(t);let s=(await T(l)).map(e=>({key:(e.key_alias||e.key_name||e.api_key).substring(0,10),spend:e.total_spend}));u(s);let a=(await O(l)).map(e=>({key:e.model,spend:e.total_spend}));x(a);let n=await A(l);console.log("teamSpend",n),y(n.daily_spend),w(n.teams);let o=n.total_spend_per_team;o=o.map(e=>(e.name=e.team_id||"",e.value=e.total_spend||0,e.value=e.value.toFixed(2),e)),k(o);let i=await E(l,null===(e=N.from)||void 0===e?void 0:e.toISOString(),null===(r=N.to)||void 0===r?void 0:r.toISOString());Z(i.spend_per_tag);let c=await P(l,null,void 0,void 0);j(c),console.log("spend/user result",c)}else"App Owner"==s&&await I(l,t,s,a,U,B).then(async e=>{if(console.log("result from spend logs call",e),"daily_spend"in e){let l=e.daily_spend;console.log("daily spend",l),d(l);let t=e.top_api_keys;u(t)}else{let t=(await F(l,function(e){let l=[];e.forEach(e=>{Object.entries(e).forEach(e=>{let[t,s]=e;"spend"!==t&&"startTime"!==t&&"models"!==t&&"users"!==t&&l.push({key:t,spend:s})})}),l.sort((e,l)=>Number(l.spend)-Number(e.spend));let t=l.slice(0,5).map(e=>e.key);return console.log("topKeys: ".concat(Object.keys(t[0]))),t}(e))).info.map(e=>({key:(e.key_name||e.key_alias).substring(0,10),spend:e.spend}));u(t),d(e)}})}catch(e){console.error("There was an error fetching the data",e)}})()},[l,t,s,a,U,B]),(0,r.jsxs)("div",{style:{width:"100%"},className:"p-8",children:[(0,r.jsx)(eT,{userID:a,userRole:s,accessToken:l,userSpend:null,selectedTeam:null}),(0,r.jsxs)(eB.Z,{children:[(0,r.jsxs)(eD.Z,{className:"mt-2",children:[(0,r.jsx)(eU.Z,{children:"All Up"}),(0,r.jsx)(eU.Z,{children:"Team Based Usage"}),(0,r.jsx)(eU.Z,{children:"End User Usage"}),(0,r.jsx)(eU.Z,{children:"Tag Based Usage"})]}),(0,r.jsxs)(ez.Z,{children:[(0,r.jsx)(eK.Z,{children:(0,r.jsxs)($.Z,{numItems:2,className:"gap-2 h-[75vh] w-full",children:[(0,r.jsx)(H.Z,{numColSpan:2,children:(0,r.jsxs)(ey.Z,{children:[(0,r.jsx)(ea.Z,{children:"Monthly Spend"}),(0,r.jsx)(eW.Z,{data:c,index:"date",categories:["spend"],colors:["blue"],valueFormatter:e=>"$ ".concat(new Intl.NumberFormat("us").format(e).toString()),yAxisWidth:100,tickGap:5})]})}),(0,r.jsx)(H.Z,{numColSpan:1,children:(0,r.jsxs)(ey.Z,{children:[(0,r.jsx)(ea.Z,{children:"Top API Keys"}),(0,r.jsx)(eW.Z,{className:"mt-4 h-40",data:m,index:"key",categories:["spend"],colors:["blue"],yAxisWidth:80,tickGap:5,layout:"vertical",showXAxis:!1,showLegend:!1})]})}),(0,r.jsx)(H.Z,{numColSpan:1,children:(0,r.jsxs)(ey.Z,{children:[(0,r.jsx)(ea.Z,{children:"Top Models"}),(0,r.jsx)(eW.Z,{className:"mt-4 h-40",data:h,index:"key",categories:["spend"],colors:["blue"],yAxisWidth:200,layout:"vertical",showXAxis:!1,showLegend:!1})]})}),(0,r.jsx)(H.Z,{numColSpan:1})]})}),(0,r.jsx)(eK.Z,{children:(0,r.jsxs)($.Z,{numItems:2,className:"gap-2 h-[75vh] w-full",children:[(0,r.jsxs)(H.Z,{numColSpan:2,children:[(0,r.jsxs)(ey.Z,{className:"mb-2",children:[(0,r.jsx)(ea.Z,{children:"Total Spend Per Team"}),(0,r.jsx)(lk.Z,{data:b})]}),(0,r.jsxs)(ey.Z,{children:[(0,r.jsx)(ea.Z,{children:"Daily Spend Per Team"}),(0,r.jsx)(eW.Z,{className:"h-72",data:g,showLegend:!0,index:"date",categories:_,yAxisWidth:80,colors:["blue","green","yellow","red","purple"],stack:!0})]})]}),(0,r.jsx)(H.Z,{numColSpan:2})]})}),(0,r.jsxs)(eK.Z,{children:[(0,r.jsxs)("p",{className:"mb-2 text-gray-500 italic text-[12px]",children:["End-Users of your LLM API calls. Tracked when a `user` param is passed in your LLM calls ",(0,r.jsx)("a",{className:"text-blue-500",href:"https://docs.litellm.ai/docs/proxy/users",target:"_blank",children:"docs here"})]}),(0,r.jsxs)($.Z,{numItems:2,children:[(0,r.jsxs)(H.Z,{children:[(0,r.jsx)(es.Z,{children:"Select Time Range"}),(0,r.jsx)(eL.Z,{enableSelect:!0,value:N,onValueChange:e=>{R(e),D(e.from,e.to,null)}})]}),(0,r.jsxs)(H.Z,{children:[(0,r.jsx)(es.Z,{children:"Select Key"}),(0,r.jsxs)(eS.Z,{defaultValue:"all-keys",children:[(0,r.jsx)(eN.Z,{value:"all-keys",onClick:()=>{D(N.from,N.to,null)},children:"All Keys"},"all-keys"),null==o?void 0:o.map((e,l)=>e&&null!==e.key_alias&&e.key_alias.length>0?(0,r.jsx)(eN.Z,{value:String(l),onClick:()=>{D(N.from,N.to,e.token)},children:e.key_alias},l):null)]})]})]}),(0,r.jsx)(ey.Z,{className:"mt-4",children:(0,r.jsxs)(eZ.Z,{className:"max-h-[70vh] min-h-[500px]",children:[(0,r.jsx)(eb.Z,{children:(0,r.jsxs)(ev.Z,{children:[(0,r.jsx)(ek.Z,{children:"End User"}),(0,r.jsx)(ek.Z,{children:"Spend"}),(0,r.jsx)(ek.Z,{children:"Total Events"})]})}),(0,r.jsx)(e_.Z,{children:null==p?void 0:p.map((e,l)=>{var t;return(0,r.jsxs)(ev.Z,{children:[(0,r.jsx)(ew.Z,{children:e.end_user}),(0,r.jsx)(ew.Z,{children:null===(t=e.total_spend)||void 0===t?void 0:t.toFixed(4)}),(0,r.jsx)(ew.Z,{children:e.total_count})]},l)})})]})})]}),(0,r.jsx)(eK.Z,{children:(0,r.jsxs)($.Z,{numItems:2,className:"gap-2 h-[75vh] w-full mb-4",children:[(0,r.jsxs)(H.Z,{numColSpan:2,children:[(0,r.jsx)(eL.Z,{className:"mb-4",enableSelect:!0,value:N,onValueChange:e=>{R(e),K(e.from,e.to)}}),(0,r.jsxs)(ey.Z,{children:[(0,r.jsx)(ea.Z,{children:"Spend Per Tag"}),(0,r.jsxs)(es.Z,{children:["Get Started Tracking cost per tag ",(0,r.jsx)("a",{className:"text-blue-500",href:"https://docs.litellm.ai/docs/proxy/enterprise#tracking-spend-for-custom-tags",target:"_blank",children:"here"})]}),(0,r.jsx)(eW.Z,{className:"h-72",data:f,index:"name",categories:["spend"],colors:["blue"]})]})]}),(0,r.jsx)(H.Z,{numColSpan:2})]})})]})]})]})},lS=()=>{let{Title:e,Paragraph:l}=eR.default,[t,s]=(0,n.useState)(""),[a,i]=(0,n.useState)(null),[c,d]=(0,n.useState)(null),[u,h]=(0,n.useState)(null),[x,p]=(0,n.useState)(!0),j=(0,o.useSearchParams)(),[g,y]=(0,n.useState)({data:[]}),f=j.get("userID"),Z=j.get("token"),[_,w]=(0,n.useState)("api-keys"),[b,k]=(0,n.useState)(null);return(0,n.useEffect)(()=>{if(Z){let e=(0,eF.o)(Z);if(e){if(console.log("Decoded token:",e),console.log("Decoded key:",e.key),k(e.key),e.user_role){let l=function(e){if(!e)return"Undefined Role";switch(console.log("Received user role: ".concat(e.toLowerCase())),console.log("Received user role length: ".concat(e.toLowerCase().length)),e.toLowerCase()){case"app_owner":case"demo_app_owner":return"App Owner";case"app_admin":case"proxy_admin":return"Admin";case"proxy_admin_viewer":return"Admin Viewer";case"app_user":return"App User";default:return"Unknown Role"}}(e.user_role);console.log("Decoded user_role:",l),s(l),"Admin Viewer"==l&&w("usage")}else console.log("User role not defined");e.user_email?i(e.user_email):console.log("User Email is not set ".concat(e)),e.login_method?p("username_password"==e.login_method):console.log("User Email is not set ".concat(e))}}},[Z]),(0,r.jsx)(n.Suspense,{fallback:(0,r.jsx)("div",{children:"Loading..."}),children:(0,r.jsxs)("div",{className:"flex flex-col min-h-screen",children:[(0,r.jsx)(m,{userID:f,userRole:t,userEmail:a,showSSOBanner:x}),(0,r.jsxs)("div",{className:"flex flex-1 overflow-auto",children:[(0,r.jsx)("div",{className:"mt-8",children:(0,r.jsx)(lb,{setPage:w,userRole:t,defaultSelectedKey:null})}),"api-keys"==_?(0,r.jsx)(eM,{userID:f,userRole:t,teams:c,keys:u,setUserRole:s,userEmail:a,setUserEmail:i,setTeams:d,setKeys:h}):"models"==_?(0,r.jsx)(e9,{userID:f,userRole:t,token:Z,accessToken:b,modelData:g,setModelData:y}):"llm-playground"==_?(0,r.jsx)(lf,{userID:f,userRole:t,token:Z,accessToken:b}):"users"==_?(0,r.jsx)(lt,{userID:f,userRole:t,token:Z,keys:u,teams:c,accessToken:b,setKeys:h}):"teams"==_?(0,r.jsx)(ls,{teams:c,setTeams:d,searchParams:j,accessToken:b,userID:f,userRole:t}):"admin-panel"==_?(0,r.jsx)(lr,{setTeams:d,searchParams:j,accessToken:b,showSSOBanner:x}):"api_ref"==_?(0,r.jsx)(lg,{}):"settings"==_?(0,r.jsx)(li,{userID:f,userRole:t,accessToken:b}):"general-settings"==_?(0,r.jsx)(lp,{userID:f,userRole:t,accessToken:b,modelData:g}):(0,r.jsx)(lv,{userID:f,userRole:t,token:Z,accessToken:b,keys:u})]})]})})}}},function(e){e.O(0,[936,884,971,69,744],function(){return e(e.s=20661)}),_N_E=e.O()}]);
\ No newline at end of file
diff --git a/ui/litellm-dashboard/out/_next/static/obp5wqVSVDMiDTC414cR8/_buildManifest.js b/ui/litellm-dashboard/out/_next/static/l-0LDfSCdaUCAbcLIx_QC/_buildManifest.js
similarity index 100%
rename from ui/litellm-dashboard/out/_next/static/obp5wqVSVDMiDTC414cR8/_buildManifest.js
rename to ui/litellm-dashboard/out/_next/static/l-0LDfSCdaUCAbcLIx_QC/_buildManifest.js
diff --git a/ui/litellm-dashboard/out/_next/static/obp5wqVSVDMiDTC414cR8/_ssgManifest.js b/ui/litellm-dashboard/out/_next/static/l-0LDfSCdaUCAbcLIx_QC/_ssgManifest.js
similarity index 100%
rename from ui/litellm-dashboard/out/_next/static/obp5wqVSVDMiDTC414cR8/_ssgManifest.js
rename to ui/litellm-dashboard/out/_next/static/l-0LDfSCdaUCAbcLIx_QC/_ssgManifest.js
diff --git a/ui/litellm-dashboard/out/index.html b/ui/litellm-dashboard/out/index.html
index 930018e005..66765eacb2 100644
--- a/ui/litellm-dashboard/out/index.html
+++ b/ui/litellm-dashboard/out/index.html
@@ -1 +1 @@
-<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-de9c0fadf6a94b3b.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-f960ab1e6d32b002.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-04708d7d4a17c1ee.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-de9c0fadf6a94b3b.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/f04e46b02318b660.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[7926,[\"936\",\"static/chunks/2f6dbc85-052c4579f80d66ae.js\",\"884\",\"static/chunks/884-7576ee407a2ecbe6.js\",\"931\",\"static/chunks/app/page-6a39771cacf75ea6.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/f04e46b02318b660.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"obp5wqVSVDMiDTC414cR8\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_c23dc8\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>
\ No newline at end of file
+<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-de9c0fadf6a94b3b.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-f960ab1e6d32b002.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-04708d7d4a17c1ee.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-de9c0fadf6a94b3b.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/f04e46b02318b660.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[4858,[\"936\",\"static/chunks/2f6dbc85-052c4579f80d66ae.js\",\"884\",\"static/chunks/884-7576ee407a2ecbe6.js\",\"931\",\"static/chunks/app/page-f20fdea77aed85ba.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/f04e46b02318b660.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"l-0LDfSCdaUCAbcLIx_QC\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_c23dc8\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>
\ No newline at end of file
diff --git a/ui/litellm-dashboard/out/index.txt b/ui/litellm-dashboard/out/index.txt
index d67a480b37..cecddd99e8 100644
--- a/ui/litellm-dashboard/out/index.txt
+++ b/ui/litellm-dashboard/out/index.txt
@@ -1,7 +1,7 @@
 2:I[77831,[],""]
-3:I[7926,["936","static/chunks/2f6dbc85-052c4579f80d66ae.js","884","static/chunks/884-7576ee407a2ecbe6.js","931","static/chunks/app/page-6a39771cacf75ea6.js"],""]
+3:I[4858,["936","static/chunks/2f6dbc85-052c4579f80d66ae.js","884","static/chunks/884-7576ee407a2ecbe6.js","931","static/chunks/app/page-f20fdea77aed85ba.js"],""]
 4:I[5613,[],""]
 5:I[31778,[],""]
-0:["obp5wqVSVDMiDTC414cR8",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/f04e46b02318b660.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
+0:["l-0LDfSCdaUCAbcLIx_QC",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/f04e46b02318b660.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
 6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
 1:null
diff --git a/ui/litellm-dashboard/src/components/general_settings.tsx b/ui/litellm-dashboard/src/components/general_settings.tsx
index c2013b1578..d16b434b89 100644
--- a/ui/litellm-dashboard/src/components/general_settings.tsx
+++ b/ui/litellm-dashboard/src/components/general_settings.tsx
@@ -23,12 +23,44 @@ import {
   AccordionHeader,
   AccordionList,
 } from "@tremor/react";
-import { TabPanel, TabPanels, TabGroup, TabList, Tab, Icon } from "@tremor/react";
-import { getCallbacksCall, setCallbacksCall, serviceHealthCheck } from "./networking";
-import { Modal, Form, Input, Select, Button as Button2, message } from "antd";
-import { InformationCircleIcon, PencilAltIcon, PencilIcon, StatusOnlineIcon, TrashIcon, RefreshIcon } from "@heroicons/react/outline";
+import {
+  TabPanel,
+  TabPanels,
+  TabGroup,
+  TabList,
+  Tab,
+  Icon,
+} from "@tremor/react";
+import {
+  getCallbacksCall,
+  setCallbacksCall,
+  getGeneralSettingsCall,
+  serviceHealthCheck,
+  updateConfigFieldSetting,
+  deleteConfigFieldSetting,
+} from "./networking";
+import {
+  Modal,
+  Form,
+  Input,
+  Select,
+  Button as Button2,
+  message,
+  InputNumber,
+} from "antd";
+import {
+  InformationCircleIcon,
+  PencilAltIcon,
+  PencilIcon,
+  StatusOnlineIcon,
+  TrashIcon,
+  RefreshIcon,
+  CheckCircleIcon,
+  XCircleIcon,
+  QuestionMarkCircleIcon,
+} from "@heroicons/react/outline";
 import StaticGenerationSearchParamsBailoutProvider from "next/dist/client/components/static-generation-searchparams-bailout-provider";
-import AddFallbacks from "./add_fallbacks"
+import AddFallbacks from "./add_fallbacks";
 import openai from "openai";
 import Paragraph from "antd/es/skeleton/Paragraph";
 
@@ -36,7 +68,7 @@ interface GeneralSettingsPageProps {
   accessToken: string | null;
   userRole: string | null;
   userID: string | null;
-  modelData: any
+  modelData: any;
 }
 
 async function testFallbackModelResponse(
@@ -65,43 +97,71 @@ async function testFallbackModelResponse(
         },
       ],
       // @ts-ignore
-      mock_testing_fallbacks: true
+      mock_testing_fallbacks: true,
     });
 
     message.success(
       <span>
-        Test model=<strong>{selectedModel}</strong>, received model=<strong>{response.model}</strong>. 
-        See <a href="#" onClick={() => window.open('https://docs.litellm.ai/docs/proxy/reliability', '_blank')} style={{ textDecoration: 'underline', color: 'blue' }}>curl</a>
+        Test model=<strong>{selectedModel}</strong>, received model=
+        <strong>{response.model}</strong>. See{" "}
+        <a
+          href="#"
+          onClick={() =>
+            window.open(
+              "https://docs.litellm.ai/docs/proxy/reliability",
+              "_blank"
+            )
+          }
+          style={{ textDecoration: "underline", color: "blue" }}
+        >
+          curl
+        </a>
       </span>
     );
   } catch (error) {
-    message.error(`Error occurred while generating model response. Please try again. Error: ${error}`, 20);
+    message.error(
+      `Error occurred while generating model response. Please try again. Error: ${error}`,
+      20
+    );
   }
 }
 
 interface AccordionHeroProps {
   selectedStrategy: string | null;
   strategyArgs: routingStrategyArgs;
-  paramExplanation: { [key: string]: string }
+  paramExplanation: { [key: string]: string };
 }
 
 interface routingStrategyArgs {
-    ttl?: number;
-    lowest_latency_buffer?: number;
+  ttl?: number;
+  lowest_latency_buffer?: number;
+}
+
+interface generalSettingsItem {
+  field_name: string;
+  field_type: string;
+  field_value: any;
+  field_description: string;
+  stored_in_db: boolean | null;
 }
 
 const defaultLowestLatencyArgs: routingStrategyArgs = {
-  "ttl": 3600, 
-  "lowest_latency_buffer": 0
-}
+  ttl: 3600,
+  lowest_latency_buffer: 0,
+};
 
-export const AccordionHero: React.FC<AccordionHeroProps> = ({ selectedStrategy, strategyArgs, paramExplanation }) => (
+export const AccordionHero: React.FC<AccordionHeroProps> = ({
+  selectedStrategy,
+  strategyArgs,
+  paramExplanation,
+}) => (
   <Accordion>
-      <AccordionHeader className="text-sm font-medium text-tremor-content-strong dark:text-dark-tremor-content-strong">Routing Strategy Specific Args</AccordionHeader>
-      <AccordionBody>
-      {
-          selectedStrategy == "latency-based-routing" ? 
-          <Card>
+    <AccordionHeader className="text-sm font-medium text-tremor-content-strong dark:text-dark-tremor-content-strong">
+      Routing Strategy Specific Args
+    </AccordionHeader>
+    <AccordionBody>
+      {selectedStrategy == "latency-based-routing" ? (
+        <Card>
           <Table>
             <TableHead>
               <TableRow>
@@ -114,51 +174,75 @@ export const AccordionHero: React.FC<AccordionHeroProps> = ({ selectedStrategy,
                 <TableRow key={param}>
                   <TableCell>
                     <Text>{param}</Text>
-                    <p style={{fontSize: '0.65rem', color: '#808080', fontStyle: 'italic'}} className="mt-1">{paramExplanation[param]}</p>
+                    <p
+                      style={{
+                        fontSize: "0.65rem",
+                        color: "#808080",
+                        fontStyle: "italic",
+                      }}
+                      className="mt-1"
+                    >
+                      {paramExplanation[param]}
+                    </p>
                   </TableCell>
                   <TableCell>
                     <TextInput
-                        name={param}
-                        defaultValue={
-                          typeof value === 'object' ? JSON.stringify(value, null, 2) : value.toString()
-                        }
-                      />
+                      name={param}
+                      defaultValue={
+                        typeof value === "object"
+                          ? JSON.stringify(value, null, 2)
+                          : value.toString()
+                      }
+                    />
                   </TableCell>
                 </TableRow>
               ))}
             </TableBody>
           </Table>
-          </Card>
-          : <Text>No specific settings</Text>
-        }
-        </AccordionBody>
-    </Accordion>
+        </Card>
+      ) : (
+        <Text>No specific settings</Text>
+      )}
+    </AccordionBody>
+  </Accordion>
 );
 
 const GeneralSettings: React.FC<GeneralSettingsPageProps> = ({
   accessToken,
   userRole,
   userID,
-  modelData
+  modelData,
 }) => {
-  const [routerSettings, setRouterSettings] = useState<{ [key: string]: any }>({});
+  const [routerSettings, setRouterSettings] = useState<{ [key: string]: any }>(
+    {}
+  );
+  const [generalSettingsDict, setGeneralSettingsDict] = useState<{
+    [key: string]: any;
+  }>({});
+  const [generalSettings, setGeneralSettings] = useState<generalSettingsItem[]>(
+    []
+  );
   const [isModalVisible, setIsModalVisible] = useState(false);
   const [form] = Form.useForm();
   const [selectedCallback, setSelectedCallback] = useState<string | null>(null);
-  const [selectedStrategy, setSelectedStrategy] = useState<string | null>(null)
-  const [strategySettings, setStrategySettings] = useState<routingStrategyArgs | null>(null); 
+  const [selectedStrategy, setSelectedStrategy] = useState<string | null>(null);
+  const [strategySettings, setStrategySettings] =
+    useState<routingStrategyArgs | null>(null);
 
   let paramExplanation: { [key: string]: string } = {
-    "routing_strategy_args": "(dict) Arguments to pass to the routing strategy",
-    "routing_strategy": "(string) Routing strategy to use",
-    "allowed_fails": "(int) Number of times a deployment can fail before being added to cooldown",
-    "cooldown_time": "(int) time in seconds to cooldown a deployment after failure",
-    "num_retries": "(int) Number of retries for failed requests. Defaults to 0.",
-    "timeout": "(float) Timeout for requests. Defaults to None.",
-    "retry_after": "(int) Minimum time to wait before retrying a failed request",
-    "ttl": "(int) Sliding window to look back over when calculating the average latency of a deployment. Default - 1 hour (in seconds).",
-    "lowest_latency_buffer": "(float) Shuffle between deployments within this % of the lowest latency. Default - 0 (i.e. always pick lowest latency)."
-  }
+    routing_strategy_args: "(dict) Arguments to pass to the routing strategy",
+    routing_strategy: "(string) Routing strategy to use",
+    allowed_fails:
+      "(int) Number of times a deployment can fail before being added to cooldown",
+    cooldown_time:
+      "(int) time in seconds to cooldown a deployment after failure",
+    num_retries: "(int) Number of retries for failed requests. Defaults to 0.",
+    timeout: "(float) Timeout for requests. Defaults to None.",
+    retry_after: "(int) Minimum time to wait before retrying a failed request",
+    ttl: "(int) Sliding window to look back over when calculating the average latency of a deployment. Default - 1 hour (in seconds).",
+    lowest_latency_buffer:
+      "(float) Shuffle between deployments within this % of the lowest latency. Default - 0 (i.e. always pick lowest latency).",
+  };
 
   useEffect(() => {
     if (!accessToken || !userRole || !userID) {
@@ -169,6 +253,10 @@ const GeneralSettings: React.FC<GeneralSettingsPageProps> = ({
       let router_settings = data.router_settings;
       setRouterSettings(router_settings);
     });
+    getGeneralSettingsCall(accessToken).then((data) => {
+      let general_settings = data;
+      setGeneralSettings(general_settings);
+    });
   }, [accessToken, userRole, userID]);
 
   const handleAddCallback = () => {
@@ -190,8 +278,8 @@ const GeneralSettings: React.FC<GeneralSettingsPageProps> = ({
       return;
     }
 
-    console.log(`received key: ${key}`)
-    console.log(`routerSettings['fallbacks']: ${routerSettings['fallbacks']}`)
+    console.log(`received key: ${key}`);
+    console.log(`routerSettings['fallbacks']: ${routerSettings["fallbacks"]}`);
 
     routerSettings["fallbacks"].map((dict: { [key: string]: any }) => {
       // Check if the dictionary has the specified key and delete it if present
@@ -202,18 +290,73 @@ const GeneralSettings: React.FC<GeneralSettingsPageProps> = ({
     });
 
     const payload = {
-      router_settings: routerSettings
+      router_settings: routerSettings,
     };
 
     try {
       await setCallbacksCall(accessToken, payload);
       setRouterSettings({ ...routerSettings });
-      setSelectedStrategy(routerSettings["routing_strategy"])
+      setSelectedStrategy(routerSettings["routing_strategy"]);
       message.success("Router settings updated successfully");
     } catch (error) {
       message.error("Failed to update router settings: " + error, 20);
     }
-  }
+  };
+
+  const handleInputChange = (fieldName: string, newValue: any) => {
+    // Update the value in the state
+    const updatedSettings = generalSettings.map((setting) =>
+      setting.field_name === fieldName
+        ? { ...setting, field_value: newValue }
+        : setting
+    );
+    setGeneralSettings(updatedSettings);
+  };
+
+  const handleUpdateField = (fieldName: string, idx: number) => {
+    if (!accessToken) {
+      return;
+    }
+
+    let fieldValue = generalSettings[idx].field_value;
+
+    if (fieldValue == null || fieldValue == undefined) {
+      return;
+    }
+    try {
+      updateConfigFieldSetting(accessToken, fieldName, fieldValue);
+      // update value in state
+
+      const updatedSettings = generalSettings.map((setting) =>
+        setting.field_name === fieldName
+          ? { ...setting, stored_in_db: true }
+          : setting
+      );
+      setGeneralSettings(updatedSettings);
+    } catch (error) {
+      // do something
+    }
+  };
+
+  const handleResetField = (fieldName: string, idx: number) => {
+    if (!accessToken) {
+      return;
+    }
+
+    try {
+      deleteConfigFieldSetting(accessToken, fieldName);
+      // update value in state
+
+      const updatedSettings = generalSettings.map((setting) =>
+        setting.field_name === fieldName
+          ? { ...setting, stored_in_db: null, field_value: null }
+          : setting
+      );
+      setGeneralSettings(updatedSettings);
+    } catch (error) {
+      // do something
+    }
+  };
 
   const handleSaveChanges = (router_settings: any) => {
     if (!accessToken) {
@@ -223,39 +366,55 @@ const GeneralSettings: React.FC<GeneralSettingsPageProps> = ({
     console.log("router_settings", router_settings);
 
     const updatedVariables = Object.fromEntries(
-      Object.entries(router_settings).map(([key, value]) => {
-        if (key !== 'routing_strategy_args' && key !== "routing_strategy") {
-          return [key, (document.querySelector(`input[name="${key}"]`) as HTMLInputElement)?.value || value];
-        }
-        else if (key == "routing_strategy") {
-          return [key, selectedStrategy]
-        }
-        else if (key == "routing_strategy_args" && selectedStrategy == "latency-based-routing") {
-          let setRoutingStrategyArgs: routingStrategyArgs = {}
+      Object.entries(router_settings)
+        .map(([key, value]) => {
+          if (key !== "routing_strategy_args" && key !== "routing_strategy") {
+            return [
+              key,
+              (
+                document.querySelector(
+                  `input[name="${key}"]`
+                ) as HTMLInputElement
+              )?.value || value,
+            ];
+          } else if (key == "routing_strategy") {
+            return [key, selectedStrategy];
+          } else if (
+            key == "routing_strategy_args" &&
+            selectedStrategy == "latency-based-routing"
+          ) {
+            let setRoutingStrategyArgs: routingStrategyArgs = {};
 
-          const lowestLatencyBufferElement = document.querySelector(`input[name="lowest_latency_buffer"]`) as HTMLInputElement;
-          const ttlElement = document.querySelector(`input[name="ttl"]`) as HTMLInputElement;
+            const lowestLatencyBufferElement = document.querySelector(
+              `input[name="lowest_latency_buffer"]`
+            ) as HTMLInputElement;
+            const ttlElement = document.querySelector(
+              `input[name="ttl"]`
+            ) as HTMLInputElement;
 
-          if (lowestLatencyBufferElement?.value) {
-                setRoutingStrategyArgs["lowest_latency_buffer"] = Number(lowestLatencyBufferElement.value)
+            if (lowestLatencyBufferElement?.value) {
+              setRoutingStrategyArgs["lowest_latency_buffer"] = Number(
+                lowestLatencyBufferElement.value
+              );
+            }
+
+            if (ttlElement?.value) {
+              setRoutingStrategyArgs["ttl"] = Number(ttlElement.value);
+            }
+
+            console.log(`setRoutingStrategyArgs: ${setRoutingStrategyArgs}`);
+            return ["routing_strategy_args", setRoutingStrategyArgs];
           }
-
-          if (ttlElement?.value) {
-            setRoutingStrategyArgs["ttl"] = Number(ttlElement.value)
-          }
-
-          console.log(`setRoutingStrategyArgs: ${setRoutingStrategyArgs}`)
-          return [
-            "routing_strategy_args", setRoutingStrategyArgs
-          ]
-        }
-        return null;
-      }).filter(entry => entry !== null && entry !== undefined) as Iterable<[string, unknown]>
+          return null;
+        })
+        .filter((entry) => entry !== null && entry !== undefined) as Iterable<
+        [string, unknown]
+      >
     );
     console.log("updatedVariables", updatedVariables);
 
     const payload = {
-      router_settings: updatedVariables
+      router_settings: updatedVariables,
     };
 
     try {
@@ -267,117 +426,240 @@ const GeneralSettings: React.FC<GeneralSettingsPageProps> = ({
     message.success("router settings updated successfully");
   };
 
-  
-
   if (!accessToken) {
     return null;
   }
 
-  
   return (
     <div className="w-full mx-4">
       <TabGroup className="gap-2 p-8 h-[75vh] w-full mt-2">
         <TabList variant="line" defaultValue="1">
-          <Tab value="1">General Settings</Tab>
+          <Tab value="1">Loadbalancing</Tab>
           <Tab value="2">Fallbacks</Tab>
+          <Tab value="3">General</Tab>
         </TabList>
         <TabPanels>
           <TabPanel>
-      <Grid numItems={1} className="gap-2 p-8 w-full mt-2">
-      <Title>Router Settings</Title>
-        <Card >
-          <Table>
-            <TableHead>
-              <TableRow>
-                <TableHeaderCell>Setting</TableHeaderCell>
-                <TableHeaderCell>Value</TableHeaderCell>
-              </TableRow>
-            </TableHead>
-            <TableBody>
-              {Object.entries(routerSettings).filter(([param, value]) => param != "fallbacks" && param != "context_window_fallbacks" && param != "routing_strategy_args").map(([param, value]) => (
-                <TableRow key={param}>
-                  <TableCell>
-                    <Text>{param}</Text>
-                    <p style={{fontSize: '0.65rem', color: '#808080', fontStyle: 'italic'}} className="mt-1">{paramExplanation[param]}</p>
-                  </TableCell>
-                  <TableCell>
-                    {
-                      param == "routing_strategy" ?
-                      <Select2 defaultValue={value} className="w-full max-w-md" onValueChange={setSelectedStrategy}>
-                        <SelectItem value="usage-based-routing">usage-based-routing</SelectItem>
-                        <SelectItem value="latency-based-routing">latency-based-routing</SelectItem>
-                        <SelectItem value="simple-shuffle">simple-shuffle</SelectItem>
-                      </Select2> :
-                      <TextInput
-                        name={param}
-                        defaultValue={
-                          typeof value === 'object' ? JSON.stringify(value, null, 2) : value.toString()
-                        }
-                      />
-                    }
-                  </TableCell>
+            <Grid numItems={1} className="gap-2 p-8 w-full mt-2">
+              <Title>Router Settings</Title>
+              <Card>
+                <Table>
+                  <TableHead>
+                    <TableRow>
+                      <TableHeaderCell>Setting</TableHeaderCell>
+                      <TableHeaderCell>Value</TableHeaderCell>
+                    </TableRow>
+                  </TableHead>
+                  <TableBody>
+                    {Object.entries(routerSettings)
+                      .filter(
+                        ([param, value]) =>
+                          param != "fallbacks" &&
+                          param != "context_window_fallbacks" &&
+                          param != "routing_strategy_args"
+                      )
+                      .map(([param, value]) => (
+                        <TableRow key={param}>
+                          <TableCell>
+                            <Text>{param}</Text>
+                            <p
+                              style={{
+                                fontSize: "0.65rem",
+                                color: "#808080",
+                                fontStyle: "italic",
+                              }}
+                              className="mt-1"
+                            >
+                              {paramExplanation[param]}
+                            </p>
+                          </TableCell>
+                          <TableCell>
+                            {param == "routing_strategy" ? (
+                              <Select2
+                                defaultValue={value}
+                                className="w-full max-w-md"
+                                onValueChange={setSelectedStrategy}
+                              >
+                                <SelectItem value="usage-based-routing">
+                                  usage-based-routing
+                                </SelectItem>
+                                <SelectItem value="latency-based-routing">
+                                  latency-based-routing
+                                </SelectItem>
+                                <SelectItem value="simple-shuffle">
+                                  simple-shuffle
+                                </SelectItem>
+                              </Select2>
+                            ) : (
+                              <TextInput
+                                name={param}
+                                defaultValue={
+                                  typeof value === "object"
+                                    ? JSON.stringify(value, null, 2)
+                                    : value.toString()
+                                }
+                              />
+                            )}
+                          </TableCell>
+                        </TableRow>
+                      ))}
+                  </TableBody>
+                </Table>
+                <AccordionHero
+                  selectedStrategy={selectedStrategy}
+                  strategyArgs={
+                    routerSettings &&
+                    routerSettings["routing_strategy_args"] &&
+                    Object.keys(routerSettings["routing_strategy_args"])
+                      .length > 0
+                      ? routerSettings["routing_strategy_args"]
+                      : defaultLowestLatencyArgs // default value when keys length is 0
+                  }
+                  paramExplanation={paramExplanation}
+                />
+              </Card>
+              <Col>
+                <Button
+                  className="mt-2"
+                  onClick={() => handleSaveChanges(routerSettings)}
+                >
+                  Save Changes
+                </Button>
+              </Col>
+            </Grid>
+          </TabPanel>
+          <TabPanel>
+            <Table>
+              <TableHead>
+                <TableRow>
+                  <TableHeaderCell>Model Name</TableHeaderCell>
+                  <TableHeaderCell>Fallbacks</TableHeaderCell>
                 </TableRow>
-              ))}
-            </TableBody>
-        </Table>
-        <AccordionHero
-          selectedStrategy={selectedStrategy}
-          strategyArgs={
-            routerSettings && routerSettings['routing_strategy_args'] && Object.keys(routerSettings['routing_strategy_args']).length > 0
-              ? routerSettings['routing_strategy_args']
-              : defaultLowestLatencyArgs // default value when keys length is 0
-          }
-          paramExplanation={paramExplanation}
-        />
-        </Card>
-        <Col>
-            <Button className="mt-2" onClick={() => handleSaveChanges(routerSettings)}>
-            Save Changes
-            </Button>
-        </Col>
-      </Grid>
-      </TabPanel>
-      <TabPanel>
-      <Table>
-      <TableHead>
-        <TableRow>
-          <TableHeaderCell>Model Name</TableHeaderCell>
-          <TableHeaderCell>Fallbacks</TableHeaderCell>
-        </TableRow>
-      </TableHead>
+              </TableHead>
 
-        <TableBody>
-          {
-            routerSettings["fallbacks"] &&
-            routerSettings["fallbacks"].map((item: Object, index: number) =>
-              Object.entries(item).map(([key, value]) => (
-                <TableRow key={index.toString() + key}>
-                  <TableCell>{key}</TableCell>
-                  <TableCell>{Array.isArray(value) ? value.join(', ') : value}</TableCell>
-                  <TableCell>
-                    <Button onClick={() => testFallbackModelResponse(key, accessToken)}>
-                      Test Fallback
-                    </Button>
-                  </TableCell>
-                  <TableCell>
-                    <Icon
-                        icon={TrashIcon}
-                        size="sm"
-                        onClick={() => deleteFallbacks(key)}
-                      />
-                  </TableCell>
-                </TableRow>
-              ))
-            )
-          }
-        </TableBody>
-      </Table>
-      <AddFallbacks models={modelData?.data ? modelData.data.map((data: any) => data.model_name) : []} accessToken={accessToken} routerSettings={routerSettings} setRouterSettings={setRouterSettings}/>
-      </TabPanel>
-      </TabPanels>
-    </TabGroup>
+              <TableBody>
+                {routerSettings["fallbacks"] &&
+                  routerSettings["fallbacks"].map(
+                    (item: Object, index: number) =>
+                      Object.entries(item).map(([key, value]) => (
+                        <TableRow key={index.toString() + key}>
+                          <TableCell>{key}</TableCell>
+                          <TableCell>
+                            {Array.isArray(value) ? value.join(", ") : value}
+                          </TableCell>
+                          <TableCell>
+                            <Button
+                              onClick={() =>
+                                testFallbackModelResponse(key, accessToken)
+                              }
+                            >
+                              Test Fallback
+                            </Button>
+                          </TableCell>
+                          <TableCell>
+                            <Icon
+                              icon={TrashIcon}
+                              size="sm"
+                              onClick={() => deleteFallbacks(key)}
+                            />
+                          </TableCell>
+                        </TableRow>
+                      ))
+                  )}
+              </TableBody>
+            </Table>
+            <AddFallbacks
+              models={
+                modelData?.data
+                  ? modelData.data.map((data: any) => data.model_name)
+                  : []
+              }
+              accessToken={accessToken}
+              routerSettings={routerSettings}
+              setRouterSettings={setRouterSettings}
+            />
+          </TabPanel>
+          <TabPanel>
+            <Card>
+              <Table>
+                <TableHead>
+                  <TableRow>
+                    <TableHeaderCell>Setting</TableHeaderCell>
+                    <TableHeaderCell>Value</TableHeaderCell>
+                    <TableHeaderCell>Status</TableHeaderCell>
+                    <TableHeaderCell>Action</TableHeaderCell>
+                  </TableRow>
+                </TableHead>
+                <TableBody>
+                  {generalSettings.map((value, index) => (
+                    <TableRow key={index}>
+                      <TableCell>
+                        <Text>{value.field_name}</Text>
+                        <p
+                          style={{
+                            fontSize: "0.65rem",
+                            color: "#808080",
+                            fontStyle: "italic",
+                          }}
+                          className="mt-1"
+                        >
+                          {value.field_description}
+                        </p>
+                      </TableCell>
+                      <TableCell>
+                        {value.field_type == "Integer" ? (
+                          <InputNumber
+                            step={1}
+                            value={value.field_value}
+                            onChange={(newValue) =>
+                              handleInputChange(value.field_name, newValue)
+                            } // Handle value change
+                          />
+                        ) : null}
+                      </TableCell>
+                      <TableCell>
+                        {value.stored_in_db == true ? (
+                          <Badge icon={CheckCircleIcon} className="text-white">
+                            In DB
+                          </Badge>
+                        ) : value.stored_in_db == false ? (
+                          <Badge className="text-gray bg-white outline">
+                            In Config
+                          </Badge>
+                        ) : (
+                          <Badge className="text-gray bg-white outline">
+                            Not Set
+                          </Badge>
+                        )}
+                      </TableCell>
+                      <TableCell>
+                        <Button
+                          onClick={() =>
+                            handleUpdateField(value.field_name, index)
+                          }
+                        >
+                          Update
+                        </Button>
+                        <Icon
+                          icon={TrashIcon}
+                          color="red"
+                          onClick={() =>
+                            handleResetField(value.field_name, index)
+                          }
+                        >
+                          Reset
+                        </Icon>
+                      </TableCell>
+                    </TableRow>
+                  ))}
+                </TableBody>
+              </Table>
+            </Card>
+          </TabPanel>
+        </TabPanels>
+      </TabGroup>
     </div>
   );
 };
 
-export default GeneralSettings;
\ No newline at end of file
+export default GeneralSettings;
diff --git a/ui/litellm-dashboard/src/components/model_dashboard.tsx b/ui/litellm-dashboard/src/components/model_dashboard.tsx
index e11619165f..6fc6df07bb 100644
--- a/ui/litellm-dashboard/src/components/model_dashboard.tsx
+++ b/ui/litellm-dashboard/src/components/model_dashboard.tsx
@@ -121,6 +121,7 @@ const handleSubmit = async (formValues: Record<string, any>, accessToken: string
       // Iterate through the key-value pairs in formValues
       litellmParamsObj["model"] = litellm_model
       let modelName: string  = "";
+      console.log("formValues add deployment:", formValues);
       for (const [key, value] of Object.entries(formValues)) {
         if (value === '') {
           continue;
@@ -628,6 +629,7 @@ const handleEditSubmit = async (formValues: Record<string, any>) => {
     let input_cost = "Undefined";
     let output_cost = "Undefined";
     let max_tokens = "Undefined";
+    let max_input_tokens = "Undefined";
     let cleanedLitellmParams = {};
 
     const getProviderFromModel = (model: string) => {
@@ -664,6 +666,7 @@ const handleEditSubmit = async (formValues: Record<string, any>) => {
       input_cost = model_info?.input_cost_per_token;
       output_cost = model_info?.output_cost_per_token;
       max_tokens = model_info?.max_tokens;
+      max_input_tokens = model_info?.max_input_tokens;
     }
 
     if (curr_model?.litellm_params) {
@@ -677,7 +680,19 @@ const handleEditSubmit = async (formValues: Record<string, any>) => {
     modelData.data[i].provider = provider;
     modelData.data[i].input_cost = input_cost;
     modelData.data[i].output_cost = output_cost;
+    
+
+    // Convert Cost in terms of Cost per 1M tokens 
+    if (modelData.data[i].input_cost) {
+      modelData.data[i].input_cost = (Number(modelData.data[i].input_cost) * 1000000).toFixed(2);
+    }
+
+    if (modelData.data[i].output_cost) {
+      modelData.data[i].output_cost = (Number(modelData.data[i].output_cost) * 1000000).toFixed(2);
+    }
+    
     modelData.data[i].max_tokens = max_tokens;
+    modelData.data[i].max_input_tokens = max_input_tokens;
     modelData.data[i].api_base = curr_model?.litellm_params?.api_base;
     modelData.data[i].cleanedLitellmParams = cleanedLitellmParams;
 
@@ -893,8 +908,9 @@ const handleEditSubmit = async (formValues: Record<string, any>) => {
       <Text>Filter by Public Model Name</Text>
       <Select
               className="mb-4 mt-2 ml-2 w-50"
-              defaultValue="all"
+              defaultValue={selectedModelGroup? selectedModelGroup : availableModelGroups[0]}
               onValueChange={(value) => setSelectedModelGroup(value === "all" ? "all" : value)}
+              value={selectedModelGroup ? selectedModelGroup : availableModelGroups[0]}
             >
               <SelectItem 
                   value={"all"}
@@ -913,85 +929,76 @@ const handleEditSubmit = async (formValues: Record<string, any>) => {
             </Select>
           </div>
         <Card>
-          <Table className="mt-5">
-            <TableHead>
-              <TableRow>
-
-                  <TableHeaderCell>Public Model Name </TableHeaderCell>
-
-                <TableHeaderCell>
-                  Provider
-                </TableHeaderCell>
-                {
-                  userRole === "Admin" && (
-                    <TableHeaderCell>
-                      API Base
-                    </TableHeaderCell>
-                  )
-                }
-                <TableHeaderCell>
-                  Extra litellm Params
-                </TableHeaderCell>
-                <TableHeaderCell>Input Price per token ($)</TableHeaderCell>
-                <TableHeaderCell>Output Price per token ($)</TableHeaderCell>
-                <TableHeaderCell>Max Tokens</TableHeaderCell>
-                <TableHeaderCell>Status</TableHeaderCell>
-              </TableRow>
-            </TableHead>
-            <TableBody>
-              { modelData.data
-                  .filter((model: any) =>
-                    selectedModelGroup === "all" || model.model_name === selectedModelGroup || selectedModelGroup === null || selectedModelGroup === undefined || selectedModelGroup === ""
-                  )
-                  .map((model: any, index: number) => (
-                    
-                <TableRow key={index}>
-                  <TableCell>
-                    <Text>{model.model_name}</Text>
-                  </TableCell>
-                  <TableCell>{model.provider}</TableCell>
-                  {
-                    userRole === "Admin" && (
-                      <TableCell>{model.api_base}</TableCell>
-                    )
-                  }
-
-                  <TableCell>
-
-                <Accordion>
-                  <AccordionHeader>
-                    <Text>Litellm params</Text>
-                  </AccordionHeader>
-                  <AccordionBody>
-                  <pre>
-                    {JSON.stringify(model.cleanedLitellmParams, null, 2)}
-                    </pre>
-                  </AccordionBody>
-                </Accordion>
-                   
-                  </TableCell>
-                  <TableCell>{model.input_cost || model.litellm_params.input_cost_per_token || null}</TableCell>
-                  <TableCell>{model.output_cost || model.litellm_params.output_cost_per_token || null}</TableCell>
-                  <TableCell>{model.max_tokens}</TableCell>
-                  <TableCell>
-                    {
-                      model.model_info.db_model ? <Badge icon={CheckCircleIcon} className="text-white">DB Model</Badge> : <Badge icon={XCircleIcon} className="text-black">Config Model</Badge>
-                    }
-                    
-                  </TableCell>
-                  <TableCell>
-                        <Icon
-                            icon={PencilAltIcon}
-                            size="sm"
-                            onClick={() => handleEditClick(model)}
-                          />
-                          <DeleteModelButton modelID={model.model_info.id} accessToken={accessToken} />
-                        </TableCell>
-                </TableRow>
-              ))}
-            </TableBody>
-          </Table>
-
+        <Table className="mt-5" style={{ maxWidth: '1500px', width: '100%' }}>
+  <TableHead>
+    <TableRow>
+      <TableHeaderCell style={{ maxWidth: '150px', whiteSpace: 'normal', wordBreak: 'break-word' }}>Public Model Name</TableHeaderCell>
+      <TableHeaderCell style={{ maxWidth: '100px', whiteSpace: 'normal', wordBreak: 'break-word' }}>Provider</TableHeaderCell>
+      {userRole === "Admin" && (
+        <TableHeaderCell style={{ maxWidth: '150px', whiteSpace: 'normal', wordBreak: 'break-word' }}>API Base</TableHeaderCell>
+      )}
+      <TableHeaderCell style={{ maxWidth: '200px', whiteSpace: 'normal', wordBreak: 'break-word' }}>Extra litellm Params</TableHeaderCell>
+      <TableHeaderCell style={{ maxWidth: '85px', whiteSpace: 'normal', wordBreak: 'break-word' }}>Input Price <p style={{ fontSize: '10px', color: 'gray' }}>/1M Tokens ($)</p></TableHeaderCell>
+      <TableHeaderCell style={{ maxWidth: '85px', whiteSpace: 'normal', wordBreak: 'break-word' }}>Output Price <p style={{ fontSize: '10px', color: 'gray' }}>/1M Tokens ($)</p></TableHeaderCell>
+      <TableHeaderCell style={{ maxWidth: '120px', whiteSpace: 'normal', wordBreak: 'break-word' }}>Max Tokens</TableHeaderCell>
+      <TableHeaderCell style={{ maxWidth: '50px', whiteSpace: 'normal', wordBreak: 'break-word' }}>Status</TableHeaderCell>
+    </TableRow>
+  </TableHead>
+  <TableBody>
+    {modelData.data
+      .filter((model: any) =>
+        selectedModelGroup === "all" ||
+        model.model_name === selectedModelGroup ||
+        selectedModelGroup === null ||
+        selectedModelGroup === undefined ||
+        selectedModelGroup === ""
+      )
+      .map((model: any, index: number) => (
+        <TableRow key={index}>
+          <TableCell style={{ maxWidth: '150px', whiteSpace: 'normal', wordBreak: 'break-word' }}>
+            <Text>{model.model_name}</Text>
+          </TableCell>
+          <TableCell style={{ maxWidth: '100px', whiteSpace: 'normal', wordBreak: 'break-word' }}>{model.provider}</TableCell>
+          {userRole === "Admin" && (
+            <TableCell style={{ maxWidth: '150px', whiteSpace: 'normal', wordBreak: 'break-word' }}>{model.api_base}</TableCell>
+          )}
+          <TableCell style={{ maxWidth: '200px', whiteSpace: 'normal', wordBreak: 'break-word' }}>
+            <Accordion>
+              <AccordionHeader>
+                <Text>Litellm params</Text>
+              </AccordionHeader>
+              <AccordionBody>
+                <pre>{JSON.stringify(model.cleanedLitellmParams, null, 2)}</pre>
+              </AccordionBody>
+            </Accordion>
+          </TableCell>
+          <TableCell style={{ maxWidth: '80px', whiteSpace: 'normal', wordBreak: 'break-word' }}>{model.input_cost || model.litellm_params.input_cost_per_token || null}</TableCell>
+          <TableCell style={{ maxWidth: '80px', whiteSpace: 'normal', wordBreak: 'break-word' }}>{model.output_cost || model.litellm_params.output_cost_per_token || null}</TableCell>
+          <TableCell style={{ maxWidth: '120px', whiteSpace: 'normal', wordBreak: 'break-word' }}>
+            <p style={{ fontSize: '10px' }}>
+            Max Tokens: {model.max_tokens} <br></br>
+            Max Input Tokens: {model.max_input_tokens}
+            </p>
+          </TableCell>
+          <TableCell style={{ maxWidth: '100px', whiteSpace: 'normal', wordBreak: 'break-word' }}>
+            {model.model_info.db_model ? (
+              <Badge icon={CheckCircleIcon} size="xs" className="text-white">
+                <p style={{ fontSize: '10px' }}>DB Model</p>
+              </Badge>
+            ) : (
+              <Badge icon={XCircleIcon} size="xs" className="text-black">
+                 <p style={{ fontSize: '10px' }}>Config Model</p>
+              </Badge>
+            )}
+          </TableCell>
+          <TableCell style={{ maxWidth: '100px', whiteSpace: 'normal', wordBreak: 'break-word' }}>
+            <Icon icon={PencilAltIcon} size="sm" onClick={() => handleEditClick(model)} />
+            <DeleteModelButton modelID={model.model_info.id} accessToken={accessToken} />
+          </TableCell>
+        </TableRow>
+      ))}
+  </TableBody>
+</Table>
         </Card>
 
       </Grid>
@@ -1116,13 +1123,22 @@ const handleEditSubmit = async (formValues: Record<string, any>) => {
                 </Form.Item>
                 }
                 {
-                  selectedProvider == Providers.Azure && <Form.Item
-                  label="Base Model"
-                  name="base_model"
-                >
-                  <TextInput placeholder="azure/gpt-3.5-turbo"/>
-                  <Text>The actual model your azure deployment uses. Used for accurate cost tracking. Select name from <Link href="https://github.com/BerriAI/litellm/blob/main/model_prices_and_context_window.json" target="_blank">here</Link></Text>
-                </Form.Item>
+                  selectedProvider == Providers.Azure && 
+     
+                    <div>
+                    <Form.Item
+                      label="Base Model"
+                      name="base_model"
+                      className="mb-0"
+                    >
+                    <TextInput placeholder="azure/gpt-3.5-turbo"/>
+                    </Form.Item>
+                <Row>
+                <Col span={10}></Col>
+                <Col span={10}><Text className="mb-2">The actual model your azure deployment uses. Used for accurate cost tracking. Select name from <Link href="https://github.com/BerriAI/litellm/blob/main/model_prices_and_context_window.json" target="_blank">here</Link></Text></Col>
+                </Row>
+
+                  </div>
                 }
                 {
                   selectedProvider == Providers.Bedrock && <Form.Item
diff --git a/ui/litellm-dashboard/src/components/networking.tsx b/ui/litellm-dashboard/src/components/networking.tsx
index 1ec131f726..bb7899755b 100644
--- a/ui/litellm-dashboard/src/components/networking.tsx
+++ b/ui/litellm-dashboard/src/components/networking.tsx
@@ -14,15 +14,17 @@ export interface Model {
 
 export const modelCostMap = async () => {
   try {
-    const response = await fetch('https://raw.githubusercontent.com/BerriAI/litellm/main/model_prices_and_context_window.json');
+    const response = await fetch(
+      "https://raw.githubusercontent.com/BerriAI/litellm/main/model_prices_and_context_window.json"
+    );
     const jsonData = await response.json();
-    console.log(`received data: ${jsonData}`)
-    return jsonData
+    console.log(`received data: ${jsonData}`);
+    return jsonData;
   } catch (error) {
     console.error("Failed to get model cost map:", error);
     throw error;
   }
-}
+};
 
 export const modelCreateCall = async (
   accessToken: string,
@@ -50,19 +52,21 @@ export const modelCreateCall = async (
 
     const data = await response.json();
     console.log("API Response:", data);
-    message.success("Model created successfully. Wait 60s and refresh on 'All Models' page");
+    message.success(
+      "Model created successfully. Wait 60s and refresh on 'All Models' page"
+    );
     return data;
   } catch (error) {
     console.error("Failed to create key:", error);
     throw error;
   }
-}
+};
 
-export const modelDeleteCall = async (  
+export const modelDeleteCall = async (
   accessToken: string,
-  model_id: string,
+  model_id: string
 ) => {
-  console.log(`model_id in model delete call: ${model_id}`)
+  console.log(`model_id in model delete call: ${model_id}`);
   try {
     const url = proxyBaseUrl ? `${proxyBaseUrl}/model/delete` : `/model/delete`;
     const response = await fetch(url, {
@@ -72,7 +76,7 @@ export const modelDeleteCall = async (
         "Content-Type": "application/json",
       },
       body: JSON.stringify({
-        "id": model_id, 
+        id: model_id,
       }),
     });
 
@@ -91,7 +95,7 @@ export const modelDeleteCall = async (
     console.error("Failed to create key:", error);
     throw error;
   }
-}
+};
 
 export const keyCreateCall = async (
   accessToken: string,
@@ -280,15 +284,14 @@ export const teamDeleteCall = async (accessToken: String, teamID: String) => {
     console.error("Failed to delete key:", error);
     throw error;
   }
-  
-}
+};
 
 export const userInfoCall = async (
   accessToken: String,
   userID: String | null,
   userRole: String,
   viewAll: Boolean = false,
-  page: number | null, 
+  page: number | null,
   page_size: number | null
 ) => {
   try {
@@ -300,7 +303,7 @@ export const userInfoCall = async (
       url = `${url}?user_id=${userID}`;
     }
     console.log("in userInfoCall viewAll=", viewAll);
-    if (viewAll && page_size && (page != null) && (page != undefined)) {
+    if (viewAll && page_size && page != null && page != undefined) {
       url = `${url}?view_all=true&page=${page}&page_size=${page_size}`;
     }
     //message.info("Requesting user data");
@@ -329,10 +332,9 @@ export const userInfoCall = async (
   }
 };
 
-
 export const teamInfoCall = async (
   accessToken: String,
-  teamID: String | null,
+  teamID: String | null
 ) => {
   try {
     let url = proxyBaseUrl ? `${proxyBaseUrl}/team/info` : `/team/info`;
@@ -364,10 +366,7 @@ export const teamInfoCall = async (
   }
 };
 
-
-export const getTotalSpendCall = async (
-  accessToken: String,
-) => {
+export const getTotalSpendCall = async (accessToken: String) => {
   /**
    * Get all models on proxy
    */
@@ -435,11 +434,10 @@ export const modelInfoCall = async (
   }
 };
 
-
 export const modelMetricsCall = async (
   accessToken: String,
   userID: String,
-  userRole: String, 
+  userRole: String,
   modelGroup: String | null,
   startTime: String | undefined,
   endTime: String | undefined
@@ -450,7 +448,7 @@ export const modelMetricsCall = async (
   try {
     let url = proxyBaseUrl ? `${proxyBaseUrl}/model/metrics` : `/model/metrics`;
     if (modelGroup) {
-      url = `${url}?_selected_model_group=${modelGroup}&startTime=${startTime}&endTime=${endTime}`
+      url = `${url}?_selected_model_group=${modelGroup}&startTime=${startTime}&endTime=${endTime}`;
     }
     // message.info("Requesting model data");
     const response = await fetch(url, {
@@ -476,12 +474,10 @@ export const modelMetricsCall = async (
   }
 };
 
-
-
 export const modelMetricsSlowResponsesCall = async (
   accessToken: String,
   userID: String,
-  userRole: String, 
+  userRole: String,
   modelGroup: String | null,
   startTime: String | undefined,
   endTime: String | undefined
@@ -490,11 +486,13 @@ export const modelMetricsSlowResponsesCall = async (
    * Get all models on proxy
    */
   try {
-    let url = proxyBaseUrl ? `${proxyBaseUrl}/model/metrics/slow_responses` : `/model/metrics/slow_responses`;
+    let url = proxyBaseUrl
+      ? `${proxyBaseUrl}/model/metrics/slow_responses`
+      : `/model/metrics/slow_responses`;
     if (modelGroup) {
-      url = `${url}?_selected_model_group=${modelGroup}&startTime=${startTime}&endTime=${endTime}`
+      url = `${url}?_selected_model_group=${modelGroup}&startTime=${startTime}&endTime=${endTime}`;
     }
-    
+
     // message.info("Requesting model data");
     const response = await fetch(url, {
       method: "GET",
@@ -519,11 +517,10 @@ export const modelMetricsSlowResponsesCall = async (
   }
 };
 
-
 export const modelExceptionsCall = async (
   accessToken: String,
   userID: String,
-  userRole: String, 
+  userRole: String,
   modelGroup: String | null,
   startTime: String | undefined,
   endTime: String | undefined
@@ -532,10 +529,12 @@ export const modelExceptionsCall = async (
    * Get all models on proxy
    */
   try {
-    let url = proxyBaseUrl ? `${proxyBaseUrl}/model/metrics/exceptions` : `/model/metrics/exceptions`;
+    let url = proxyBaseUrl
+      ? `${proxyBaseUrl}/model/metrics/exceptions`
+      : `/model/metrics/exceptions`;
 
     if (modelGroup) {
-      url = `${url}?_selected_model_group=${modelGroup}&startTime=${startTime}&endTime=${endTime}`
+      url = `${url}?_selected_model_group=${modelGroup}&startTime=${startTime}&endTime=${endTime}`;
     }
     const response = await fetch(url, {
       method: "GET",
@@ -560,7 +559,6 @@ export const modelExceptionsCall = async (
   }
 };
 
-
 export const modelAvailableCall = async (
   accessToken: String,
   userID: String,
@@ -625,7 +623,6 @@ export const keySpendLogsCall = async (accessToken: String, token: String) => {
   }
 };
 
-
 export const teamSpendLogsCall = async (accessToken: String) => {
   try {
     const url = proxyBaseUrl
@@ -654,19 +651,18 @@ export const teamSpendLogsCall = async (accessToken: String) => {
   }
 };
 
-
 export const tagsSpendLogsCall = async (
-    accessToken: String,
-    startTime: String | undefined,
-    endTime: String | undefined
-  ) => {
+  accessToken: String,
+  startTime: String | undefined,
+  endTime: String | undefined
+) => {
   try {
     let url = proxyBaseUrl
       ? `${proxyBaseUrl}/global/spend/tags`
       : `/global/spend/tags`;
-    
+
     if (startTime && endTime) {
-      url = `${url}?start_date=${startTime}&end_date=${endTime}`
+      url = `${url}?start_date=${startTime}&end_date=${endTime}`;
     }
 
     console.log("in tagsSpendLogsCall:", url);
@@ -679,7 +675,6 @@ export const tagsSpendLogsCall = async (
     });
     if (!response.ok) {
       const errorData = await response.text();
-      message.error(errorData, 10);
       throw new Error("Network response was not ok");
     }
 
@@ -692,7 +687,6 @@ export const tagsSpendLogsCall = async (
   }
 };
 
-
 export const userSpendLogsCall = async (
   accessToken: String,
   token: String,
@@ -806,11 +800,15 @@ export const adminTopEndUsersCall = async (
 
     let body = "";
     if (keyToken) {
-      body = JSON.stringify({ api_key: keyToken, startTime: startTime, endTime: endTime });
+      body = JSON.stringify({
+        api_key: keyToken,
+        startTime: startTime,
+        endTime: endTime,
+      });
     } else {
       body = JSON.stringify({ startTime: startTime, endTime: endTime });
     }
-    
+
     //message.info("Making top end users request");
 
     // Define requestOptions with body as an optional property
@@ -1079,7 +1077,6 @@ export const teamCreateCall = async (
   }
 };
 
-
 export const keyUpdateCall = async (
   accessToken: string,
   formValues: Record<string, any> // Assuming formValues is an object
@@ -1240,7 +1237,7 @@ export const userUpdateUserCall = async (
     console.log("Form Values in userUpdateUserCall:", formValues); // Log the form values before making the API call
 
     const url = proxyBaseUrl ? `${proxyBaseUrl}/user/update` : `/user/update`;
-    let response_body = {...formValues};
+    let response_body = { ...formValues };
     if (userRole !== null) {
       response_body["user_role"] = userRole;
     }
@@ -1333,7 +1330,7 @@ export const slackBudgetAlertsHealthCheck = async (accessToken: String) => {
       // throw error with message
       throw new Error(errorData);
     }
-    
+
     const data = await response.json();
     message.success("Test Slack Alert worked - check your Slack!");
     console.log("Service Health Response:", data);
@@ -1347,9 +1344,10 @@ export const slackBudgetAlertsHealthCheck = async (accessToken: String) => {
   }
 };
 
-
-
-export const serviceHealthCheck= async (accessToken: String, service: String) => {
+export const serviceHealthCheck = async (
+  accessToken: String,
+  service: String
+) => {
   try {
     let url = proxyBaseUrl
       ? `${proxyBaseUrl}/health/services?service=${service}`
@@ -1371,9 +1369,11 @@ export const serviceHealthCheck= async (accessToken: String, service: String) =>
       // throw error with message
       throw new Error(errorData);
     }
-    
+
     const data = await response.json();
-    message.success(`Test request to ${service} made - check logs/alerts on ${service} to verify`);
+    message.success(
+      `Test request to ${service} made - check logs/alerts on ${service} to verify`
+    );
     // You can add additional logic here based on the response if needed
     return data;
   } catch (error) {
@@ -1382,9 +1382,6 @@ export const serviceHealthCheck= async (accessToken: String, service: String) =>
   }
 };
 
-
-
-
 export const getCallbacksCall = async (
   accessToken: String,
   userID: String,
@@ -1394,7 +1391,9 @@ export const getCallbacksCall = async (
    * Get all the models user has access to
    */
   try {
-    let url = proxyBaseUrl ? `${proxyBaseUrl}/get/config/callbacks` : `/get/config/callbacks`;
+    let url = proxyBaseUrl
+      ? `${proxyBaseUrl}/get/config/callbacks`
+      : `/get/config/callbacks`;
 
     //message.info("Requesting model data");
     const response = await fetch(url, {
@@ -1421,11 +1420,117 @@ export const getCallbacksCall = async (
   }
 };
 
+export const getGeneralSettingsCall = async (accessToken: String) => {
+  try {
+    let url = proxyBaseUrl
+      ? `${proxyBaseUrl}/config/list?config_type=general_settings`
+      : `/config/list?config_type=general_settings`;
 
+    //message.info("Requesting model data");
+    const response = await fetch(url, {
+      method: "GET",
+      headers: {
+        Authorization: `Bearer ${accessToken}`,
+        "Content-Type": "application/json",
+      },
+    });
 
+    if (!response.ok) {
+      const errorData = await response.text();
+      message.error(errorData, 10);
+      throw new Error("Network response was not ok");
+    }
 
+    const data = await response.json();
+    //message.info("Received model data");
+    return data;
+    // Handle success - you might want to update some state or UI based on the created key
+  } catch (error) {
+    console.error("Failed to get callbacks:", error);
+    throw error;
+  }
+};
 
+export const updateConfigFieldSetting = async (
+  accessToken: String,
+  fieldName: string,
+  fieldValue: any
+) => {
+  try {
+    let url = proxyBaseUrl
+      ? `${proxyBaseUrl}/config/field/update`
+      : `/config/field/update`;
 
+    let formData = {
+      field_name: fieldName,
+      field_value: fieldValue,
+      config_type: "general_settings",
+    };
+    //message.info("Requesting model data");
+    const response = await fetch(url, {
+      method: "POST",
+      headers: {
+        Authorization: `Bearer ${accessToken}`,
+        "Content-Type": "application/json",
+      },
+      body: JSON.stringify(formData),
+    });
+
+    if (!response.ok) {
+      const errorData = await response.text();
+      message.error(errorData, 10);
+      throw new Error("Network response was not ok");
+    }
+
+    const data = await response.json();
+    //message.info("Received model data");
+    message.success("Successfully updated value!");
+    return data;
+    // Handle success - you might want to update some state or UI based on the created key
+  } catch (error) {
+    console.error("Failed to set callbacks:", error);
+    throw error;
+  }
+};
+
+export const deleteConfigFieldSetting = async (
+  accessToken: String,
+  fieldName: String
+) => {
+  try {
+    let url = proxyBaseUrl
+      ? `${proxyBaseUrl}/config/field/delete`
+      : `/config/field/delete`;
+
+    let formData = {
+      field_name: fieldName,
+      config_type: "general_settings",
+    };
+    //message.info("Requesting model data");
+    const response = await fetch(url, {
+      method: "POST",
+      headers: {
+        Authorization: `Bearer ${accessToken}`,
+        "Content-Type": "application/json",
+      },
+      body: JSON.stringify(formData),
+    });
+
+    if (!response.ok) {
+      const errorData = await response.text();
+      message.error(errorData, 10);
+      throw new Error("Network response was not ok");
+    }
+
+    const data = await response.json();
+    message.success("Field reset on proxy");
+    return data;
+    // Handle success - you might want to update some state or UI based on the created key
+  } catch (error) {
+    console.error("Failed to get callbacks:", error);
+    throw error;
+  }
+};
 export const setCallbacksCall = async (
   accessToken: String,
   formValues: Record<string, any>
@@ -1464,9 +1569,7 @@ export const setCallbacksCall = async (
   }
 };
 
-export const healthCheckCall = async (
-  accessToken: String,
-) => {
+export const healthCheckCall = async (accessToken: String) => {
   /**
    * Get all the models user has access to
    */
@@ -1497,6 +1600,3 @@ export const healthCheckCall = async (
     throw error;
   }
 };
-
-
-
diff --git a/ui/litellm-dashboard/src/components/usage.tsx b/ui/litellm-dashboard/src/components/usage.tsx
index 6f03c29603..682c664c4f 100644
--- a/ui/litellm-dashboard/src/components/usage.tsx
+++ b/ui/litellm-dashboard/src/components/usage.tsx
@@ -142,6 +142,12 @@ const UsagePage: React.FC<UsagePageProps> = ({
       return;
     }
 
+    // the endTime put it to the last hour of the selected date
+    endTime.setHours(23, 59, 59, 999);
+
+    // startTime put it to the first hour of the selected date
+    startTime.setHours(0, 0, 0, 0);
+
     console.log("uiSelectedKey", uiSelectedKey);
 
     let newTopUserData = await adminTopEndUsersCall(
@@ -160,6 +166,12 @@ const UsagePage: React.FC<UsagePageProps> = ({
       return;
     }
 
+    // the endTime put it to the last hour of the selected date
+    endTime.setHours(23, 59, 59, 999);
+
+    // startTime put it to the first hour of the selected date
+    startTime.setHours(0, 0, 0, 0);
+
     let top_tags = await tagsSpendLogsCall(accessToken, startTime.toISOString(), endTime.toISOString());
     setTopTagsData(top_tags.spend_per_tag);
     console.log("Tag spend data updated successfully");
@@ -200,7 +212,7 @@ const UsagePage: React.FC<UsagePageProps> = ({
             setKeySpendData(overall_spend);
             const top_keys = await adminTopKeysCall(accessToken);
             const filtered_keys = top_keys.map((k: any) => ({
-              key: (k["key_name"] || k["key_alias"] || k["api_key"]).substring(
+              key: (k["key_alias"] || k["key_name"] || k["api_key"]).substring(
                 0,
                 10
               ),
@@ -225,6 +237,11 @@ const UsagePage: React.FC<UsagePageProps> = ({
             total_spend_per_team = total_spend_per_team.map((tspt: any) => {
               tspt["name"] = tspt["team_id"] || "";
               tspt["value"] = tspt["total_spend"] || 0;
+              // round the value to 2 decimal places
+
+              tspt["value"] = tspt["value"].toFixed(2);
+              
+
               return tspt;
             })