Merge branch 'BerriAI:main' into main

2024-07-07 18:00:11 +02:00 · 2024-07-07 18:00:11 +02:00 · d54d4b6734
commit d54d4b6734
parent ea952a57b0 49d7faa31e
81 changed files with 2022 additions and 1231 deletions
--- a/.github/workflows/ghcr_deploy.yml
+++ b/.github/workflows/ghcr_deploy.yml
@ -289,7 +289,8 @@ jobs:
                repo: context.repo.repo,
                release_id: process.env.RELEASE_ID,
              });
-              return response.data.body;
+              const formattedBody = JSON.stringify(response.data.body).slice(1, -1);
+              return formattedBody;
            } catch (error) {
              core.setFailed(error.message);
            }
@ -302,14 +303,15 @@ jobs:
          RELEASE_NOTES: ${{ steps.release-notes.outputs.result }}
        run: |
          curl -H "Content-Type: application/json" -X POST -d '{
-            "content": "New LiteLLM release ${{ env.RELEASE_TAG }}",
+            "content": "New LiteLLM release '"${RELEASE_TAG}"'",
            "username": "Release Changelog",
            "avatar_url": "https://cdn.discordapp.com/avatars/487431320314576937/bd64361e4ba6313d561d54e78c9e7171.png",
            "embeds": [
              {
-                "title": "Changelog for LiteLLM ${{ env.RELEASE_TAG }}",
-                "description": "${{ env.RELEASE_NOTES }}",
+                "title": "Changelog for LiteLLM '"${RELEASE_TAG}"'",
+                "description": "'"${RELEASE_NOTES}"'",
                "color": 2105893
              }
            ]
          }' $WEBHOOK_URL
+
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@ -25,6 +25,10 @@ repos:
       exclude: ^litellm/tests/|^litellm/proxy/tests/
       additional_dependencies: [flake8-print]
       files: litellm/.*\.py
+-   repo: https://github.com/python-poetry/poetry
+    rev: 1.8.0
+    hooks:
+      - id: poetry-check
 -   repo: local
    hooks:
    -   id: check-files-match
--- a/docs/my-website/docs/proxy/cost_tracking.md
+++ b/docs/my-website/docs/proxy/cost_tracking.md
@ -151,12 +151,9 @@ Navigate to the Usage Tab on the LiteLLM UI (found on https://your-proxy-endpoin
 </Tabs>

 ## ✨ (Enterprise) API Endpoints to get Spend
-#### Getting Spend Reports - To Charge Other Teams, Customers
+#### Getting Spend Reports - To Charge Other Teams, Customers, Users

-Use the `/global/spend/report` endpoint to get daily spend report per 
- Team
- Customer [this is `user` passed to `/chat/completions` request](#how-to-track-spend-with-litellm)
- [LiteLLM API key](virtual_keys.md)
+Use the `/global/spend/report` endpoint to get spend reports

 <Tabs>

@ -285,6 +282,16 @@ Output from script

 <TabItem value="per customer" label="Spend Per Customer">

+:::info
+
+Customer This is the value of `user_id` passed when calling [`/key/generate`](https://litellm-api.up.railway.app/#/key%20management/generate_key_fn_key_generate_post)
+
+[this is `user` passed to `/chat/completions` request](#how-to-track-spend-with-litellm)
+- [LiteLLM API key](virtual_keys.md)
+
+
+:::
+
 ##### Example Request

 👉 Key Change: Specify `group_by=customer`
@ -341,14 +348,14 @@ curl -X GET 'http://localhost:4000/global/spend/report?start_date=2024-04-01&end

 </TabItem>

-<TabItem value="per key" label="Spend Per API Key">
+<TabItem value="per key" label="Spend for Specific API Key">


-👉 Key Change: Specify `group_by=api_key`
+👉 Key Change: Specify `api_key=sk-1234`


 ```shell
-curl -X GET 'http://localhost:4000/global/spend/report?start_date=2024-04-01&end_date=2024-06-30&group_by=api_key' \
+curl -X GET 'http://localhost:4000/global/spend/report?start_date=2024-04-01&end_date=2024-06-30&api_key=sk-1234' \
  -H 'Authorization: Bearer sk-1234'
 ```

@ -357,32 +364,18 @@ curl -X GET 'http://localhost:4000/global/spend/report?start_date=2024-04-01&end

 ```shell
 [
-  {
-    "api_key": "ad64768847d05d978d62f623d872bff0f9616cc14b9c1e651c84d14fe3b9f539",
-    "total_cost": 0.0002157,
-    "total_input_tokens": 45.0,
-    "total_output_tokens": 1375.0,
-    "model_details": [
-      {
-        "model": "gpt-3.5-turbo",
-        "total_cost": 0.0001095,
-        "total_input_tokens": 9,
-        "total_output_tokens": 70
-      },
-      {
-        "model": "llama3-8b-8192",
-        "total_cost": 0.0001062,
-        "total_input_tokens": 36,
-        "total_output_tokens": 1305
-      }
-    ]
-  },
  {
    "api_key": "88dc28d0f030c55ed4ab77ed8faf098196cb1c05df778539800c9f1243fe6b4b",
-    "total_cost": 0.00012924,
+    "total_cost": 0.3201286305151999,
    "total_input_tokens": 36.0,
    "total_output_tokens": 1593.0,
    "model_details": [
+      {
+        "model": "dall-e-3",
+        "total_cost": 0.31999939051519993,
+        "total_input_tokens": 0,
+        "total_output_tokens": 0
+      },
      {
        "model": "llama3-8b-8192",
        "total_cost": 0.00012924,
@ -396,6 +389,87 @@ curl -X GET 'http://localhost:4000/global/spend/report?start_date=2024-04-01&end

 </TabItem>

+<TabItem value="per user" label="Spend for Internal User (Key Owner)">
+
+:::info
+
+Internal User (Key Owner): This is the value of `user_id` passed when calling [`/key/generate`](https://litellm-api.up.railway.app/#/key%20management/generate_key_fn_key_generate_post)
+
+:::
+
+
+👉 Key Change: Specify `internal_user_id=ishaan`
+
+
+```shell
+curl -X GET 'http://localhost:4000/global/spend/report?start_date=2024-04-01&end_date=2024-12-30&internal_user_id=ishaan' \
+  -H 'Authorization: Bearer sk-1234'
+```
+
+##### Example Response
+
+
+```shell
+[
+  {
+    "api_key": "88dc28d0f030c55ed4ab77ed8faf098196cb1c05df778539800c9f1243fe6b4b",
+    "total_cost": 0.00013132,
+    "total_input_tokens": 105.0,
+    "total_output_tokens": 872.0,
+    "model_details": [
+      {
+        "model": "gpt-3.5-turbo-instruct",
+        "total_cost": 5.85e-05,
+        "total_input_tokens": 15,
+        "total_output_tokens": 18
+      },
+      {
+        "model": "llama3-8b-8192",
+        "total_cost": 7.282000000000001e-05,
+        "total_input_tokens": 90,
+        "total_output_tokens": 854
+      }
+    ]
+  },
+  {
+    "api_key": "151e85e46ab8c9c7fad090793e3fe87940213f6ae665b543ca633b0b85ba6dc6",
+    "total_cost": 5.2699999999999993e-05,
+    "total_input_tokens": 26.0,
+    "total_output_tokens": 27.0,
+    "model_details": [
+      {
+        "model": "gpt-3.5-turbo",
+        "total_cost": 5.2499999999999995e-05,
+        "total_input_tokens": 24,
+        "total_output_tokens": 27
+      },
+      {
+        "model": "text-embedding-ada-002",
+        "total_cost": 2e-07,
+        "total_input_tokens": 2,
+        "total_output_tokens": 0
+      }
+    ]
+  },
+  {
+    "api_key": "60cb83a2dcbf13531bd27a25f83546ecdb25a1a6deebe62d007999dc00e1e32a",
+    "total_cost": 9.42e-06,
+    "total_input_tokens": 30.0,
+    "total_output_tokens": 99.0,
+    "model_details": [
+      {
+        "model": "llama3-8b-8192",
+        "total_cost": 9.42e-06,
+        "total_input_tokens": 30,
+        "total_output_tokens": 99
+      }
+    ]
+  }
+]
+```
+
+</TabItem>
+
 </Tabs>

 #### Allowing Non-Proxy Admins to access `/spend` endpoints 
--- a/docs/my-website/docs/proxy/enterprise.md
+++ b/docs/my-website/docs/proxy/enterprise.md
@ -1120,12 +1120,14 @@ This is a beta feature, and subject to changes.
 USE_AWS_KMS="True"
 ```

-**Step 2.** Add `aws_kms/` to encrypted keys in env 
+**Step 2.** Add `LITELLM_SECRET_AWS_KMS_` to encrypted keys in env 

 ```env
-DATABASE_URL="aws_kms/AQICAH.."
+LITELLM_SECRET_AWS_KMS_DATABASE_URL="AQICAH.."
 ```

+LiteLLM will find this and use the decrypted `DATABASE_URL="postgres://.."` value in runtime.
+
 **Step 3.** Start proxy 

 ```
--- a/docs/my-website/docs/proxy/user_keys.md
+++ b/docs/my-website/docs/proxy/user_keys.md
@ -1,7 +1,7 @@
 import Tabs from '@theme/Tabs';
 import TabItem from '@theme/TabItem';

-# Use with Langchain, OpenAI SDK, LlamaIndex, Curl
+# Use with Langchain, OpenAI SDK, LlamaIndex, Instructor, Curl

 :::info

@ -173,6 +173,37 @@ console.log(message);

 ```

+</TabItem>
+<TabItem value="instructor" label="Instructor">
+
+```python
+from openai import OpenAI
+import instructor
+from pydantic import BaseModel
+
+my_proxy_api_key = "" # e.g. sk-1234
+my_proxy_base_url = "" # e.g. http://0.0.0.0:4000
+
+# This enables response_model keyword
+# from client.chat.completions.create
+client = instructor.from_openai(OpenAI(api_key=my_proxy_api_key, base_url=my_proxy_base_url))
+
+class UserDetail(BaseModel):
+    name: str
+    age: int
+
+user = client.chat.completions.create(
+    model="gemini-pro-flash",
+    response_model=UserDetail,
+    messages=[
+        {"role": "user", "content": "Extract Jason is 25 years old"},
+    ]
+)
+
+assert isinstance(user, UserDetail)
+assert user.name == "Jason"
+assert user.age == 25
+```
 </TabItem>
 </Tabs>

@ -205,6 +236,97 @@ console.log(message);

 ```

+### Function Calling 
+
+Here's some examples of doing function calling with the proxy. 
+
+You can use the proxy for function calling with **any** openai-compatible project. 
+
+<Tabs>
+<TabItem value="curl" label="curl">
+
+```bash
+curl http://0.0.0.0:4000/v1/chat/completions \
+-H "Content-Type: application/json" \
+-H "Authorization: Bearer $OPTIONAL_YOUR_PROXY_KEY" \
+-d '{
+  "model": "gpt-4-turbo",
+  "messages": [
+    {
+      "role": "user",
+      "content": "What'\''s the weather like in Boston today?"
+    }
+  ],
+  "tools": [
+    {
+      "type": "function",
+      "function": {
+        "name": "get_current_weather",
+        "description": "Get the current weather in a given location",
+        "parameters": {
+          "type": "object",
+          "properties": {
+            "location": {
+              "type": "string",
+              "description": "The city and state, e.g. San Francisco, CA"
+            },
+            "unit": {
+              "type": "string",
+              "enum": ["celsius", "fahrenheit"]
+            }
+          },
+          "required": ["location"]
+        }
+      }
+    }
+  ],
+  "tool_choice": "auto"
+}'
+```
+</TabItem>
+<TabItem value="sdk" label="SDK">
+
+```python 
+from openai import OpenAI
+client = OpenAI(
+    api_key="sk-1234", # [OPTIONAL] set if you set one on proxy, else set ""
+    base_url="http://0.0.0.0:4000",
+)
+
+tools = [
+  {
+    "type": "function",
+    "function": {
+      "name": "get_current_weather",
+      "description": "Get the current weather in a given location",
+      "parameters": {
+        "type": "object",
+        "properties": {
+          "location": {
+            "type": "string",
+            "description": "The city and state, e.g. San Francisco, CA",
+          },
+          "unit": {"type": "string", "enum": ["celsius", "fahrenheit"]},
+        },
+        "required": ["location"],
+      },
+    }
+  }
+]
+messages = [{"role": "user", "content": "What's the weather like in Boston today?"}]
+completion = client.chat.completions.create(
+  model="gpt-4o", # use 'model_name' from config.yaml
+  messages=messages,
+  tools=tools,
+  tool_choice="auto"
+)
+
+print(completion)
+
+```
+</TabItem>
+</Tabs>
+
 ## `/embeddings`

 ### Request Format
--- a/litellm/caching.py
+++ b/litellm/caching.py
@ -248,8 +248,14 @@ class RedisCache(BaseCache):
            # asyncio.get_running_loop().create_task(self.ping())
            result = asyncio.get_running_loop().create_task(self.ping())
        except Exception as e:
+            if "no running event loop" in str(e):
+                verbose_logger.debug(
+                    "Ignoring async redis ping. No running event loop."
+                )
+            else:
                verbose_logger.error(
-                "Error connecting to Async Redis client", extra={"error": str(e)}
+                    "Error connecting to Async Redis client - {}".format(str(e)),
+                    extra={"error": str(e)},
                )

        ### SYNC HEALTH PING ###
--- a/litellm/cost_calculator.py
+++ b/litellm/cost_calculator.py
@ -4,6 +4,8 @@ import time
 import traceback
 from typing import List, Literal, Optional, Tuple, Union

+from pydantic import BaseModel
+
 import litellm
 import litellm._logging
 from litellm import verbose_logger
@ -13,6 +15,10 @@ from litellm.litellm_core_utils.llm_cost_calc.google import (
 from litellm.litellm_core_utils.llm_cost_calc.google import (
    cost_per_token as google_cost_per_token,
 )
+from litellm.litellm_core_utils.llm_cost_calc.utils import _generic_cost_per_character
+from litellm.types.llms.openai import HttpxBinaryResponseContent
+from litellm.types.router import SPECIAL_MODEL_INFO_PARAMS
+
 from litellm.utils import (
    CallTypes,
    CostPerToken,
@ -62,6 +68,23 @@ def cost_per_token(
    ### CUSTOM PRICING ###
    custom_cost_per_token: Optional[CostPerToken] = None,
    custom_cost_per_second: Optional[float] = None,
+    ### CALL TYPE ###
+    call_type: Literal[
+        "embedding",
+        "aembedding",
+        "completion",
+        "acompletion",
+        "atext_completion",
+        "text_completion",
+        "image_generation",
+        "aimage_generation",
+        "moderation",
+        "amoderation",
+        "atranscription",
+        "transcription",
+        "aspeech",
+        "speech",
+    ] = "completion",
 ) -> Tuple[float, float]:
    """
    Calculates the cost per token for a given model, prompt tokens, and completion tokens.
@ -76,6 +99,7 @@ def cost_per_token(
        custom_llm_provider (str): The llm provider to whom the call was made (see init.py for full list)
        custom_cost_per_token: Optional[CostPerToken]: the cost per input + output token for the llm api call.
        custom_cost_per_second: Optional[float]: the cost per second for the llm api call.
+        call_type: Optional[str]: the call type

    Returns:
        tuple: A tuple containing the cost in USD dollars for prompt tokens and completion tokens, respectively.
@ -159,6 +183,27 @@ def cost_per_token(
            prompt_tokens=prompt_tokens,
            completion_tokens=completion_tokens,
        )
+    elif call_type == "speech" or call_type == "aspeech":
+        prompt_cost, completion_cost = _generic_cost_per_character(
+            model=model_without_prefix,
+            custom_llm_provider=custom_llm_provider,
+            prompt_characters=prompt_characters,
+            completion_characters=completion_characters,
+            custom_prompt_cost=None,
+            custom_completion_cost=0,
+        )
+        if prompt_cost is None or completion_cost is None:
+            raise ValueError(
+                "cost for tts call is None. prompt_cost={}, completion_cost={}, model={}, custom_llm_provider={}, prompt_characters={}, completion_characters={}".format(
+                    prompt_cost,
+                    completion_cost,
+                    model_without_prefix,
+                    custom_llm_provider,
+                    prompt_characters,
+                    completion_characters,
+                )
+            )
+        return prompt_cost, completion_cost
    elif model in model_cost_ref:
        print_verbose(f"Success: model={model} in model_cost_map")
        print_verbose(
@ -289,7 +334,7 @@ def cost_per_token(
        return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar
    else:
        # if model is not in model_prices_and_context_window.json. Raise an exception-let users know
-        error_str = f"Model not in model_prices_and_context_window.json. You passed model={model}. Register pricing for model - https://docs.litellm.ai/docs/proxy/custom_pricing\n"
+        error_str = f"Model not in model_prices_and_context_window.json. You passed model={model}, custom_llm_provider={custom_llm_provider}. Register pricing for model - https://docs.litellm.ai/docs/proxy/custom_pricing\n"
        raise litellm.exceptions.NotFoundError(  # type: ignore
            message=error_str,
            model=model,
@ -429,7 +474,10 @@ def completion_cost(
        prompt_characters = 0
        completion_tokens = 0
        completion_characters = 0
-        if completion_response is not None:
+        if completion_response is not None and (
+            isinstance(completion_response, BaseModel)
+            or isinstance(completion_response, dict)
+        ):  # tts returns a custom class
            # get input/output tokens from completion_response
            prompt_tokens = completion_response.get("usage", {}).get("prompt_tokens", 0)
            completion_tokens = completion_response.get("usage", {}).get(
@ -535,6 +583,11 @@ def completion_cost(
                raise Exception(
                    f"Model={image_gen_model_name} not found in completion cost model map"
                )
+        elif (
+            call_type == CallTypes.speech.value or call_type == CallTypes.aspeech.value
+        ):
+            prompt_characters = litellm.utils._count_characters(text=prompt)
+
        # Calculate cost based on prompt_tokens, completion_tokens
        if (
            "togethercomputer" in model
@ -591,6 +644,7 @@ def completion_cost(
            custom_cost_per_token=custom_cost_per_token,
            prompt_characters=prompt_characters,
            completion_characters=completion_characters,
+            call_type=call_type,
        )
        _final_cost = prompt_tokens_cost_usd_dollar + completion_tokens_cost_usd_dollar
        print_verbose(
@ -608,6 +662,7 @@ def response_cost_calculator(
        ImageResponse,
        TranscriptionResponse,
        TextCompletionResponse,
+        HttpxBinaryResponseContent,
    ],
    model: str,
    custom_llm_provider: Optional[str],
@ -641,6 +696,7 @@ def response_cost_calculator(
        if cache_hit is not None and cache_hit is True:
            response_cost = 0.0
        else:
+            if isinstance(response_object, BaseModel):
                response_object._hidden_params["optional_params"] = optional_params
            if isinstance(response_object, ImageResponse):
                response_cost = completion_cost(
@ -651,12 +707,11 @@ def response_cost_calculator(
                )
            else:
                if (
-                    model in litellm.model_cost
-                    and custom_pricing is not None
-                    and custom_llm_provider is True
+                    model in litellm.model_cost or custom_pricing is True
                ):  # override defaults if custom pricing is set
                    base_model = model
                # base_model defaults to None if not set on model_info
+
                response_cost = completion_cost(
                    completion_response=response_object,
                    call_type=call_type,
--- a/litellm/integrations/langfuse.py
+++ b/litellm/integrations/langfuse.py
@ -32,6 +32,12 @@ class LangFuseLogger:
        self.langfuse_host = langfuse_host or os.getenv(
            "LANGFUSE_HOST", "https://cloud.langfuse.com"
        )
+        if not (
+            self.langfuse_host.startswith("http://")
+            or self.langfuse_host.startswith("https://")
+        ):
+            # add http:// if unset, assume communicating over private network - e.g. render
+            self.langfuse_host = "http://" + self.langfuse_host
        self.langfuse_release = os.getenv("LANGFUSE_RELEASE")
        self.langfuse_debug = os.getenv("LANGFUSE_DEBUG")

--- a/litellm/integrations/opentelemetry.py
+++ b/litellm/integrations/opentelemetry.py
@ -29,6 +29,7 @@ else:
 LITELLM_TRACER_NAME = os.getenv("OTEL_TRACER_NAME", "litellm")
 LITELLM_RESOURCE = {
    "service.name": os.getenv("OTEL_SERVICE_NAME", "litellm"),
+    "deployment.environment": os.getenv("OTEL_ENVIRONMENT_NAME", "production"),
 }
 RAW_REQUEST_SPAN_NAME = "raw_gen_ai_request"
 LITELLM_REQUEST_SPAN_NAME = "litellm_request"
--- a/litellm/litellm_core_utils/litellm_logging.py
+++ b/litellm/litellm_core_utils/litellm_logging.py
@ -24,6 +24,8 @@ from litellm.integrations.custom_logger import CustomLogger
 from litellm.litellm_core_utils.redact_messages import (
    redact_message_input_output_from_logging,
 )
+from litellm.types.llms.openai import HttpxBinaryResponseContent
+from litellm.types.router import SPECIAL_MODEL_INFO_PARAMS
 from litellm.types.utils import (
    CallTypes,
    EmbeddingResponse,
@ -517,18 +519,20 @@ class Logging:
            self.model_call_details["cache_hit"] = cache_hit
            ## if model in model cost map - log the response cost
            ## else set cost to None
-            verbose_logger.debug(f"Model={self.model};")
            if (
-                result is not None
-                and (
+                result is not None and self.stream is not True
+            ):  # handle streaming separately
+                if (
                    isinstance(result, ModelResponse)
                    or isinstance(result, EmbeddingResponse)
                    or isinstance(result, ImageResponse)
                    or isinstance(result, TranscriptionResponse)
                    or isinstance(result, TextCompletionResponse)
+                    or isinstance(result, HttpxBinaryResponseContent)  # tts
+                ):
+                    custom_pricing = use_custom_pricing_for_model(
+                        litellm_params=self.litellm_params
                    )
-                and self.stream != True
-            ):  # handle streaming separately
                    self.model_call_details["response_cost"] = (
                        litellm.response_cost_calculator(
                            response_object=result,
@ -542,6 +546,7 @@ class Logging:
                            ),
                            call_type=self.call_type,
                            optional_params=self.optional_params,
+                            custom_pricing=custom_pricing,
                        )
                    )
            else:  # streaming chunks + image gen.
@ -600,8 +605,7 @@ class Logging:
                        verbose_logger.error(
                            "LiteLLM.LoggingError: [Non-Blocking] Exception occurred while building complete streaming response in success logging {}\n{}".format(
                                str(e), traceback.format_exc()
-                            ),
-                            log_level="ERROR",
+                            )
                        )
                        complete_streaming_response = None
                else:
@ -626,7 +630,11 @@ class Logging:
                            model_call_details=self.model_call_details
                        ),
                        call_type=self.call_type,
-                        optional_params=self.optional_params,
+                        optional_params=(
+                            self.optional_params
+                            if hasattr(self, "optional_params")
+                            else {}
+                        ),
                    )
                )
            if self.dynamic_success_callbacks is not None and isinstance(
@ -1795,7 +1803,6 @@ def set_callbacks(callback_list, function_id=None):

    try:
        for callback in callback_list:
-            print_verbose(f"init callback list: {callback}")
            if callback == "sentry":
                try:
                    import sentry_sdk
@ -2013,3 +2020,17 @@ def get_custom_logger_compatible_class(
            if isinstance(callback, _PROXY_DynamicRateLimitHandler):
                return callback  # type: ignore
    return None
+
+
+def use_custom_pricing_for_model(litellm_params: Optional[dict]) -> bool:
+    if litellm_params is None:
+        return False
+    metadata: Optional[dict] = litellm_params.get("metadata", {})
+    if metadata is None:
+        return False
+    model_info: Optional[dict] = metadata.get("model_info", {})
+    if model_info is not None:
+        for k, v in model_info.items():
+            if k in SPECIAL_MODEL_INFO_PARAMS:
+                return True
+    return False
--- a/litellm/litellm_core_utils/llm_cost_calc/utils.py
+++ b/litellm/litellm_core_utils/llm_cost_calc/utils.py
@ -0,0 +1,85 @@
+# What is this?
+## Helper utilities for cost_per_token()
+
+import traceback
+from typing import List, Literal, Optional, Tuple
+
+import litellm
+from litellm import verbose_logger
+
+
+def _generic_cost_per_character(
+    model: str,
+    custom_llm_provider: str,
+    prompt_characters: float,
+    completion_characters: float,
+    custom_prompt_cost: Optional[float],
+    custom_completion_cost: Optional[float],
+) -> Tuple[Optional[float], Optional[float]]:
+    """
+    Generic function to help calculate cost per character.
+    """
+    """
+    Calculates the cost per character for a given model, input messages, and response object.
+
+    Input:
+        - model: str, the model name without provider prefix
+        - custom_llm_provider: str, "vertex_ai-*"
+        - prompt_characters: float, the number of input characters
+        - completion_characters: float, the number of output characters
+
+    Returns:
+        Tuple[Optional[float], Optional[float]] - prompt_cost_in_usd, completion_cost_in_usd. 
+        - returns None if not able to calculate cost.
+
+    Raises:
+        Exception if 'input_cost_per_character' or 'output_cost_per_character' is missing from model_info
+    """
+    args = locals()
+    ## GET MODEL INFO
+    model_info = litellm.get_model_info(
+        model=model, custom_llm_provider=custom_llm_provider
+    )
+
+    ## CALCULATE INPUT COST
+    try:
+        if custom_prompt_cost is None:
+            assert (
+                "input_cost_per_character" in model_info
+                and model_info["input_cost_per_character"] is not None
+            ), "model info for model={} does not have 'input_cost_per_character'-pricing\nmodel_info={}".format(
+                model, model_info
+            )
+            custom_prompt_cost = model_info["input_cost_per_character"]
+
+        prompt_cost = prompt_characters * custom_prompt_cost
+    except Exception as e:
+        verbose_logger.error(
+            "litellm.litellm_core_utils.llm_cost_calc.utils.py::cost_per_character(): Exception occured - {}\n{}\nDefaulting to None".format(
+                str(e), traceback.format_exc()
+            )
+        )
+
+        prompt_cost = None
+
+    ## CALCULATE OUTPUT COST
+    try:
+        if custom_completion_cost is None:
+            assert (
+                "output_cost_per_character" in model_info
+                and model_info["output_cost_per_character"] is not None
+            ), "model info for model={} does not have 'output_cost_per_character'-pricing\nmodel_info={}".format(
+                model, model_info
+            )
+            custom_completion_cost = model_info["output_cost_per_character"]
+        completion_cost = completion_characters * custom_completion_cost
+    except Exception as e:
+        verbose_logger.error(
+            "litellm.litellm_core_utils.llm_cost_calc.utils.py::cost_per_character(): Exception occured - {}\n{}\nDefaulting to None".format(
+                str(e), traceback.format_exc()
+            )
+        )
+
+        completion_cost = None
+
+    return prompt_cost, completion_cost
--- a/litellm/llms/azure.py
+++ b/litellm/llms/azure.py
@ -55,7 +55,6 @@ from ..types.llms.openai import (
    Thread,
 )
 from .base import BaseLLM
-from .custom_httpx.azure_dall_e_2 import AsyncCustomHTTPTransport, CustomHTTPTransport

 azure_ad_cache = DualCache()

@ -1718,9 +1717,7 @@ class AzureChatCompletion(BaseLLM):
        input: Optional[list] = None,
        prompt: Optional[str] = None,
    ) -> dict:
-        client_session = litellm.client_session or httpx.Client(
-            transport=CustomHTTPTransport(),  # handle dall-e-2 calls
-        )
+        client_session = litellm.client_session or httpx.Client()
        if "gateway.ai.cloudflare.com" in api_base:
            ## build base url - assume api base includes resource name
            if not api_base.endswith("/"):
@ -1793,9 +1790,10 @@ class AzureChatCompletion(BaseLLM):
        input: Optional[list] = None,
        prompt: Optional[str] = None,
    ) -> dict:
-        client_session = litellm.aclient_session or httpx.AsyncClient(
-            transport=AsyncCustomHTTPTransport(),  # handle dall-e-2 calls
-        )
+        client_session = (
+            litellm.aclient_session or httpx.AsyncClient()
+        )  # handle dall-e-2 calls
+
        if "gateway.ai.cloudflare.com" in api_base:
            ## build base url - assume api base includes resource name
            if not api_base.endswith("/"):
--- a/litellm/llms/azure_text.py
+++ b/litellm/llms/azure_text.py
@ -1,24 +1,27 @@
-from typing import Optional, Union, Any
-import types, requests  # type: ignore
-from .base import BaseLLM
-from litellm.utils import (
-    ModelResponse,
-    Choices,
-    Message,
-    CustomStreamWrapper,
-    convert_to_model_response_object,
-    TranscriptionResponse,
-    TextCompletionResponse,
-)
-from typing import Callable, Optional, BinaryIO
-from litellm import OpenAIConfig
-import litellm, json
-import httpx
-from .custom_httpx.azure_dall_e_2 import CustomHTTPTransport, AsyncCustomHTTPTransport
-from openai import AzureOpenAI, AsyncAzureOpenAI
-from ..llms.openai import OpenAITextCompletion, OpenAITextCompletionConfig
+import json
+import types  # type: ignore
 import uuid
-from .prompt_templates.factory import prompt_factory, custom_prompt
+from typing import Any, BinaryIO, Callable, Optional, Union
+
+import httpx
+import requests
+from openai import AsyncAzureOpenAI, AzureOpenAI
+
+import litellm
+from litellm import OpenAIConfig
+from litellm.utils import (
+    Choices,
+    CustomStreamWrapper,
+    Message,
+    ModelResponse,
+    TextCompletionResponse,
+    TranscriptionResponse,
+    convert_to_model_response_object,
+)
+
+from ..llms.openai import OpenAITextCompletion, OpenAITextCompletionConfig
+from .base import BaseLLM
+from .prompt_templates.factory import custom_prompt, prompt_factory

 openai_text_completion_config = OpenAITextCompletionConfig()

--- a/litellm/llms/custom_httpx/azure_dall_e_2.py
+++ b/litellm/llms/custom_httpx/azure_dall_e_2.py
@ -1,143 +0,0 @@
-import asyncio
-import json
-import time
-
-import httpx
-
-
-class AsyncCustomHTTPTransport(httpx.AsyncHTTPTransport):
-    """
-    Async implementation of custom http transport
-    """
-
-    async def handle_async_request(self, request: httpx.Request) -> httpx.Response:
-        _api_version = request.url.params.get("api-version", "")
-        if (
-            "images/generations" in request.url.path
-            and _api_version
-            in [  # dall-e-3 starts from `2023-12-01-preview` so we should be able to avoid conflict
-                "2023-06-01-preview",
-                "2023-07-01-preview",
-                "2023-08-01-preview",
-                "2023-09-01-preview",
-                "2023-10-01-preview",
-            ]
-        ):
-            request.url = request.url.copy_with(
-                path="/openai/images/generations:submit"
-            )
-            response = await super().handle_async_request(request)
-            operation_location_url = response.headers["operation-location"]
-            request.url = httpx.URL(operation_location_url)
-            request.method = "GET"
-            response = await super().handle_async_request(request)
-            await response.aread()
-
-            timeout_secs: int = 120
-            start_time = time.time()
-            while response.json()["status"] not in ["succeeded", "failed"]:
-                if time.time() - start_time > timeout_secs:
-                    timeout = {
-                        "error": {
-                            "code": "Timeout",
-                            "message": "Operation polling timed out.",
-                        }
-                    }
-                    return httpx.Response(
-                        status_code=400,
-                        headers=response.headers,
-                        content=json.dumps(timeout).encode("utf-8"),
-                        request=request,
-                    )
-
-                await asyncio.sleep(int(response.headers.get("retry-after") or 10))
-                response = await super().handle_async_request(request)
-                await response.aread()
-
-            if response.json()["status"] == "failed":
-                error_data = response.json()
-                return httpx.Response(
-                    status_code=400,
-                    headers=response.headers,
-                    content=json.dumps(error_data).encode("utf-8"),
-                    request=request,
-                )
-
-            result = response.json()["result"]
-            return httpx.Response(
-                status_code=200,
-                headers=response.headers,
-                content=json.dumps(result).encode("utf-8"),
-                request=request,
-            )
-        return await super().handle_async_request(request)
-
-
-class CustomHTTPTransport(httpx.HTTPTransport):
-    """
-    This class was written as a workaround to support dall-e-2 on openai > v1.x
-
-    Refer to this issue for more: https://github.com/openai/openai-python/issues/692
-    """
-
-    def handle_request(
-        self,
-        request: httpx.Request,
-    ) -> httpx.Response:
-        _api_version = request.url.params.get("api-version", "")
-        if (
-            "images/generations" in request.url.path
-            and _api_version
-            in [  # dall-e-3 starts from `2023-12-01-preview` so we should be able to avoid conflict
-                "2023-06-01-preview",
-                "2023-07-01-preview",
-                "2023-08-01-preview",
-                "2023-09-01-preview",
-                "2023-10-01-preview",
-            ]
-        ):
-            request.url = request.url.copy_with(
-                path="/openai/images/generations:submit"
-            )
-            response = super().handle_request(request)
-            operation_location_url = response.headers["operation-location"]
-            request.url = httpx.URL(operation_location_url)
-            request.method = "GET"
-            response = super().handle_request(request)
-            response.read()
-            timeout_secs: int = 120
-            start_time = time.time()
-            while response.json()["status"] not in ["succeeded", "failed"]:
-                if time.time() - start_time > timeout_secs:
-                    timeout = {
-                        "error": {
-                            "code": "Timeout",
-                            "message": "Operation polling timed out.",
-                        }
-                    }
-                    return httpx.Response(
-                        status_code=400,
-                        headers=response.headers,
-                        content=json.dumps(timeout).encode("utf-8"),
-                        request=request,
-                    )
-                time.sleep(int(response.headers.get("retry-after", None) or 10))
-                response = super().handle_request(request)
-                response.read()
-            if response.json()["status"] == "failed":
-                error_data = response.json()
-                return httpx.Response(
-                    status_code=400,
-                    headers=response.headers,
-                    content=json.dumps(error_data).encode("utf-8"),
-                    request=request,
-                )
-
-            result = response.json()["result"]
-            return httpx.Response(
-                status_code=200,
-                headers=response.headers,
-                content=json.dumps(result).encode("utf-8"),
-                request=request,
-            )
-        return super().handle_request(request)
--- a/litellm/llms/custom_httpx/http_handler.py
+++ b/litellm/llms/custom_httpx/http_handler.py
@ -26,30 +26,12 @@ class AsyncHTTPHandler:
        self, timeout: Optional[Union[float, httpx.Timeout]], concurrent_limit: int
    ) -> httpx.AsyncClient:

-        async_proxy_mounts = None
        # Check if the HTTP_PROXY and HTTPS_PROXY environment variables are set and use them accordingly.
-        http_proxy = os.getenv("HTTP_PROXY", None)
-        https_proxy = os.getenv("HTTPS_PROXY", None)
-        no_proxy = os.getenv("NO_PROXY", None)
        ssl_verify = bool(os.getenv("SSL_VERIFY", litellm.ssl_verify))
        cert = os.getenv(
            "SSL_CERTIFICATE", litellm.ssl_certificate
        )  # /path/to/client.pem

-        if http_proxy is not None and https_proxy is not None:
-            async_proxy_mounts = {
-                "http://": httpx.AsyncHTTPTransport(proxy=httpx.Proxy(url=http_proxy)),
-                "https://": httpx.AsyncHTTPTransport(
-                    proxy=httpx.Proxy(url=https_proxy)
-                ),
-            }
-            # assume no_proxy is a list of comma separated urls
-            if no_proxy is not None and isinstance(no_proxy, str):
-                no_proxy_urls = no_proxy.split(",")
-
-                for url in no_proxy_urls:  # set no-proxy support for specific urls
-                    async_proxy_mounts[url] = None  # type: ignore
-
        if timeout is None:
            timeout = _DEFAULT_TIMEOUT
        # Create a client with a connection pool
@ -61,7 +43,6 @@ class AsyncHTTPHandler:
                max_keepalive_connections=concurrent_limit,
            ),
            verify=ssl_verify,
-            mounts=async_proxy_mounts,
            cert=cert,
        )

@ -163,27 +144,11 @@ class HTTPHandler:
            timeout = _DEFAULT_TIMEOUT

        # Check if the HTTP_PROXY and HTTPS_PROXY environment variables are set and use them accordingly.
-        http_proxy = os.getenv("HTTP_PROXY", None)
-        https_proxy = os.getenv("HTTPS_PROXY", None)
-        no_proxy = os.getenv("NO_PROXY", None)
        ssl_verify = bool(os.getenv("SSL_VERIFY", litellm.ssl_verify))
        cert = os.getenv(
            "SSL_CERTIFICATE", litellm.ssl_certificate
        )  # /path/to/client.pem

-        sync_proxy_mounts = None
-        if http_proxy is not None and https_proxy is not None:
-            sync_proxy_mounts = {
-                "http://": httpx.HTTPTransport(proxy=httpx.Proxy(url=http_proxy)),
-                "https://": httpx.HTTPTransport(proxy=httpx.Proxy(url=https_proxy)),
-            }
-            # assume no_proxy is a list of comma separated urls
-            if no_proxy is not None and isinstance(no_proxy, str):
-                no_proxy_urls = no_proxy.split(",")
-
-                for url in no_proxy_urls:  # set no-proxy support for specific urls
-                    sync_proxy_mounts[url] = None  # type: ignore
-
        if client is None:
            # Create a client with a connection pool
            self.client = httpx.Client(
@ -193,7 +158,6 @@ class HTTPHandler:
                    max_keepalive_connections=concurrent_limit,
                ),
                verify=ssl_verify,
-                mounts=sync_proxy_mounts,
                cert=cert,
            )
        else:
--- a/litellm/llms/vertex_httpx.py
+++ b/litellm/llms/vertex_httpx.py
@ -1330,17 +1330,30 @@ class ModelResponseIterator:

            gemini_chunk = processed_chunk["candidates"][0]

-            if (
-                "content" in gemini_chunk
-                and "text" in gemini_chunk["content"]["parts"][0]
-            ):
+            if "content" in gemini_chunk:
+                if "text" in gemini_chunk["content"]["parts"][0]:
                    text = gemini_chunk["content"]["parts"][0]["text"]
+                elif "functionCall" in gemini_chunk["content"]["parts"][0]:
+                    function_call = ChatCompletionToolCallFunctionChunk(
+                        name=gemini_chunk["content"]["parts"][0]["functionCall"][
+                            "name"
+                        ],
+                        arguments=json.dumps(
+                            gemini_chunk["content"]["parts"][0]["functionCall"]["args"]
+                        ),
+                    )
+                    tool_use = ChatCompletionToolCallChunk(
+                        id=str(uuid.uuid4()),
+                        type="function",
+                        function=function_call,
+                        index=0,
+                    )

            if "finishReason" in gemini_chunk:
                finish_reason = map_finish_reason(
                    finish_reason=gemini_chunk["finishReason"]
                )
-                ## DO NOT SET 'finish_reason' = True
+                ## DO NOT SET 'is_finished' = True
                ## GEMINI SETS FINISHREASON ON EVERY CHUNK!

            if "usageMetadata" in processed_chunk:
--- a/litellm/main.py
+++ b/litellm/main.py
@ -896,7 +896,7 @@ def completion(
        if (
            supports_system_message is not None
            and isinstance(supports_system_message, bool)
-            and supports_system_message == False
+            and supports_system_message is False
        ):
            messages = map_system_message_pt(messages=messages)
        model_api_key = get_api_key(
@ -5028,10 +5028,9 @@ def stream_chunk_builder(
    for chunk in chunks:
        if "usage" in chunk:
            if "prompt_tokens" in chunk["usage"]:
-                prompt_tokens += chunk["usage"].get("prompt_tokens", 0) or 0
+                prompt_tokens = chunk["usage"].get("prompt_tokens", 0) or 0
            if "completion_tokens" in chunk["usage"]:
-                completion_tokens += chunk["usage"].get("completion_tokens", 0) or 0
-
+                completion_tokens = chunk["usage"].get("completion_tokens", 0) or 0
    try:
        response["usage"]["prompt_tokens"] = prompt_tokens or token_counter(
            model=model, messages=messages
--- a/litellm/model_prices_and_context_window_backup.json
+++ b/litellm/model_prices_and_context_window_backup.json
@ -2022,10 +2022,10 @@
        "max_tokens": 8192,
        "max_input_tokens": 2097152,
        "max_output_tokens": 8192,
-        "input_cost_per_token": 0.00000035, 
-        "input_cost_per_token_above_128k_tokens": 0.0000007, 
-        "output_cost_per_token": 0.00000105, 
-        "output_cost_per_token_above_128k_tokens": 0.0000021, 
+        "input_cost_per_token": 0.0000035, 
+        "input_cost_per_token_above_128k_tokens": 0.000007, 
+        "output_cost_per_token": 0.0000105, 
+        "output_cost_per_token_above_128k_tokens": 0.000021, 
        "litellm_provider": "gemini",
        "mode": "chat",
        "supports_system_messages": true,
@ -2033,16 +2033,16 @@
        "supports_vision": true,
        "supports_tool_choice": true, 
        "supports_response_schema": true, 
-        "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
+        "source": "https://ai.google.dev/pricing"
    },
    "gemini/gemini-1.5-pro-latest": {
        "max_tokens": 8192,
        "max_input_tokens": 1048576,
        "max_output_tokens": 8192,
-        "input_cost_per_token": 0.00000035, 
-        "input_cost_per_token_above_128k_tokens": 0.0000007, 
+        "input_cost_per_token": 0.0000035, 
+        "input_cost_per_token_above_128k_tokens": 0.000007, 
        "output_cost_per_token": 0.00000105, 
-        "output_cost_per_token_above_128k_tokens": 0.0000021, 
+        "output_cost_per_token_above_128k_tokens": 0.000021, 
        "litellm_provider": "gemini",
        "mode": "chat",
        "supports_system_messages": true,
@ -2050,7 +2050,7 @@
        "supports_vision": true,
        "supports_tool_choice": true, 
        "supports_response_schema": true, 
-        "source": "https://ai.google.dev/models/gemini"
+        "source": "https://ai.google.dev/pricing"
    },
    "gemini/gemini-pro-vision": {
        "max_tokens": 2048,
--- a/litellm/proxy/_experimental/out/404.html
+++ b/litellm/proxy/_experimental/out/404.html
--- a/litellm/proxy/_experimental/out/_next/static/RDLpeUaSstfmeQiKITNBo/_buildManifest.js
+++ b/litellm/proxy/_experimental/out/_next/static/RDLpeUaSstfmeQiKITNBo/_buildManifest.js
--- a/litellm/proxy/_experimental/out/_next/static/RDLpeUaSstfmeQiKITNBo/_ssgManifest.js
+++ b/litellm/proxy/_experimental/out/_next/static/RDLpeUaSstfmeQiKITNBo/_ssgManifest.js
--- a/litellm/proxy/_experimental/out/_next/static/chunks/131-19b05e5ce40fa85d.js
+++ b/litellm/proxy/_experimental/out/_next/static/chunks/131-19b05e5ce40fa85d.js
--- a/litellm/proxy/_experimental/out/_next/static/chunks/131-6a03368053f9d26d.js
+++ b/litellm/proxy/_experimental/out/_next/static/chunks/131-6a03368053f9d26d.js
--- a/litellm/proxy/_experimental/out/_next/static/chunks/759-d7572f2a46f911d5.js
+++ b/litellm/proxy/_experimental/out/_next/static/chunks/759-d7572f2a46f911d5.js
--- a/litellm/proxy/_experimental/out/_next/static/chunks/777-906d7dd6a5bf7be4.js
+++ b/litellm/proxy/_experimental/out/_next/static/chunks/777-906d7dd6a5bf7be4.js
--- a/litellm/proxy/_experimental/out/_next/static/chunks/777-f76791513e294b30.js
+++ b/litellm/proxy/_experimental/out/_next/static/chunks/777-f76791513e294b30.js
--- a/litellm/proxy/_experimental/out/_next/static/chunks/app/page-567f85145e7f0f35.js
+++ b/litellm/proxy/_experimental/out/_next/static/chunks/app/page-567f85145e7f0f35.js
--- a/litellm/proxy/_experimental/out/_next/static/chunks/app/page-da7d95729f2529b5.js
+++ b/litellm/proxy/_experimental/out/_next/static/chunks/app/page-da7d95729f2529b5.js
--- a/litellm/proxy/_experimental/out/index.html
+++ b/litellm/proxy/_experimental/out/index.html
@ -1 +1 @@
-<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-a8fd417ac0c6c8a5.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-f960ab1e6d32b002.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-04708d7d4a17c1ee.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-a8fd417ac0c6c8a5.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/0f6908625573deae.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[48951,[\"665\",\"static/chunks/3014691f-589a5f4865c3822f.js\",\"936\",\"static/chunks/2f6dbc85-052c4579f80d66ae.js\",\"294\",\"static/chunks/294-0e35509d5ca95267.js\",\"131\",\"static/chunks/131-6a03368053f9d26d.js\",\"684\",\"static/chunks/684-bb2d2f93d92acb0b.js\",\"759\",\"static/chunks/759-83a8bdddfe32b5d9.js\",\"777\",\"static/chunks/777-f76791513e294b30.js\",\"931\",\"static/chunks/app/page-da7d95729f2529b5.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/0f6908625573deae.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"0gt3_bF2KkdKeE61mic4M\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_12bbc4\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>
+<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-a8fd417ac0c6c8a5.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-f960ab1e6d32b002.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-04708d7d4a17c1ee.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-a8fd417ac0c6c8a5.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/0f6908625573deae.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[48951,[\"665\",\"static/chunks/3014691f-589a5f4865c3822f.js\",\"936\",\"static/chunks/2f6dbc85-052c4579f80d66ae.js\",\"294\",\"static/chunks/294-0e35509d5ca95267.js\",\"131\",\"static/chunks/131-19b05e5ce40fa85d.js\",\"684\",\"static/chunks/684-bb2d2f93d92acb0b.js\",\"759\",\"static/chunks/759-d7572f2a46f911d5.js\",\"777\",\"static/chunks/777-906d7dd6a5bf7be4.js\",\"931\",\"static/chunks/app/page-567f85145e7f0f35.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/0f6908625573deae.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"RDLpeUaSstfmeQiKITNBo\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_12bbc4\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>
--- a/litellm/proxy/_experimental/out/index.txt
+++ b/litellm/proxy/_experimental/out/index.txt
@ -1,7 +1,7 @@
 2:I[77831,[],""]
-3:I[48951,["665","static/chunks/3014691f-589a5f4865c3822f.js","936","static/chunks/2f6dbc85-052c4579f80d66ae.js","294","static/chunks/294-0e35509d5ca95267.js","131","static/chunks/131-6a03368053f9d26d.js","684","static/chunks/684-bb2d2f93d92acb0b.js","759","static/chunks/759-83a8bdddfe32b5d9.js","777","static/chunks/777-f76791513e294b30.js","931","static/chunks/app/page-da7d95729f2529b5.js"],""]
+3:I[48951,["665","static/chunks/3014691f-589a5f4865c3822f.js","936","static/chunks/2f6dbc85-052c4579f80d66ae.js","294","static/chunks/294-0e35509d5ca95267.js","131","static/chunks/131-19b05e5ce40fa85d.js","684","static/chunks/684-bb2d2f93d92acb0b.js","759","static/chunks/759-d7572f2a46f911d5.js","777","static/chunks/777-906d7dd6a5bf7be4.js","931","static/chunks/app/page-567f85145e7f0f35.js"],""]
 4:I[5613,[],""]
 5:I[31778,[],""]
-0:["0gt3_bF2KkdKeE61mic4M",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_12bbc4","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/0f6908625573deae.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
+0:["RDLpeUaSstfmeQiKITNBo",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_12bbc4","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/0f6908625573deae.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
 6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
 1:null
--- a/litellm/proxy/_experimental/out/model_hub.html
+++ b/litellm/proxy/_experimental/out/model_hub.html
--- a/litellm/proxy/_experimental/out/model_hub.txt
+++ b/litellm/proxy/_experimental/out/model_hub.txt
@ -1,7 +1,7 @@
 2:I[77831,[],""]
-3:I[87494,["294","static/chunks/294-0e35509d5ca95267.js","131","static/chunks/131-6a03368053f9d26d.js","777","static/chunks/777-f76791513e294b30.js","418","static/chunks/app/model_hub/page-ba7819b59161aa64.js"],""]
+3:I[87494,["294","static/chunks/294-0e35509d5ca95267.js","131","static/chunks/131-19b05e5ce40fa85d.js","777","static/chunks/777-906d7dd6a5bf7be4.js","418","static/chunks/app/model_hub/page-ba7819b59161aa64.js"],""]
 4:I[5613,[],""]
 5:I[31778,[],""]
-0:["0gt3_bF2KkdKeE61mic4M",[[["",{"children":["model_hub",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["model_hub",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","model_hub","children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined","styles":null}]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_12bbc4","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/0f6908625573deae.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
+0:["RDLpeUaSstfmeQiKITNBo",[[["",{"children":["model_hub",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["model_hub",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","model_hub","children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined","styles":null}]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_12bbc4","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/0f6908625573deae.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
 6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
 1:null
--- a/litellm/proxy/_experimental/out/onboarding.html
+++ b/litellm/proxy/_experimental/out/onboarding.html
--- a/litellm/proxy/_experimental/out/onboarding.txt
+++ b/litellm/proxy/_experimental/out/onboarding.txt
@ -1,7 +1,7 @@
 2:I[77831,[],""]
-3:I[667,["665","static/chunks/3014691f-589a5f4865c3822f.js","294","static/chunks/294-0e35509d5ca95267.js","684","static/chunks/684-bb2d2f93d92acb0b.js","777","static/chunks/777-f76791513e294b30.js","461","static/chunks/app/onboarding/page-1ed08595d570934e.js"],""]
+3:I[667,["665","static/chunks/3014691f-589a5f4865c3822f.js","294","static/chunks/294-0e35509d5ca95267.js","684","static/chunks/684-bb2d2f93d92acb0b.js","777","static/chunks/777-906d7dd6a5bf7be4.js","461","static/chunks/app/onboarding/page-1ed08595d570934e.js"],""]
 4:I[5613,[],""]
 5:I[31778,[],""]
-0:["0gt3_bF2KkdKeE61mic4M",[[["",{"children":["onboarding",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["onboarding",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","onboarding","children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined","styles":null}]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_12bbc4","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/0f6908625573deae.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
+0:["RDLpeUaSstfmeQiKITNBo",[[["",{"children":["onboarding",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["onboarding",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","onboarding","children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined","styles":null}]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_12bbc4","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/0f6908625573deae.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
 6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
 1:null
--- a/litellm/proxy/_new_secret_config.yaml
+++ b/litellm/proxy/_new_secret_config.yaml
@ -1,12 +1,10 @@
 model_list:
-  - model_name: "*"             
+  - model_name: tts
    litellm_params:
      model: "openai/*"
-      mock_response: "Hello world!"
-
-litellm_settings:
-  success_callback: ["langfuse"]
-  failure_callback: ["langfuse"]
+  - model_name: gemini-1.5-flash
+    litellm_params:
+      model: gemini/gemini-1.5-flash

 general_settings:
  alerting: ["slack"]
--- a/litellm/proxy/common_utils/admin_ui_utils.py
+++ b/litellm/proxy/common_utils/admin_ui_utils.py
@ -0,0 +1,167 @@
+import os
+
+
+def show_missing_vars_in_env():
+    from fastapi.responses import HTMLResponse
+
+    from litellm.proxy.proxy_server import master_key, prisma_client
+
+    if prisma_client is None and master_key is None:
+        return HTMLResponse(
+            content=missing_keys_form(
+                missing_key_names="DATABASE_URL, LITELLM_MASTER_KEY"
+            ),
+            status_code=200,
+        )
+    if prisma_client is None:
+        return HTMLResponse(
+            content=missing_keys_form(missing_key_names="DATABASE_URL"), status_code=200
+        )
+
+    if master_key is None:
+        return HTMLResponse(
+            content=missing_keys_form(missing_key_names="LITELLM_MASTER_KEY"),
+            status_code=200,
+        )
+    return None
+
+
+# LiteLLM Admin UI - Non SSO Login
+url_to_redirect_to = os.getenv("PROXY_BASE_URL", "")
+url_to_redirect_to += "/login"
+html_form = f"""
+<!DOCTYPE html>
+<html>
+<head>
+    <title>LiteLLM Login</title>
+    <style>
+        body {{
+            font-family: Arial, sans-serif;
+            background-color: #f4f4f4;
+            margin: 0;
+            padding: 0;
+            display: flex;
+            justify-content: center;
+            align-items: center;
+            height: 100vh;
+        }}
+
+        form {{
+            background-color: #fff;
+            padding: 20px;
+            border-radius: 8px;
+            box-shadow: 0 0 10px rgba(0, 0, 0, 0.1);
+        }}
+
+        label {{
+            display: block;
+            margin-bottom: 8px;
+        }}
+
+        input {{
+            width: 100%;
+            padding: 8px;
+            margin-bottom: 16px;
+            box-sizing: border-box;
+            border: 1px solid #ccc;
+            border-radius: 4px;
+        }}
+
+        input[type="submit"] {{
+            background-color: #4caf50;
+            color: #fff;
+            cursor: pointer;
+        }}
+
+        input[type="submit"]:hover {{
+            background-color: #45a049;
+        }}
+    </style>
+</head>
+<body>
+    <form action="{url_to_redirect_to}" method="post">
+        <h2>LiteLLM Login</h2>
+
+        <p>By default Username is "admin" and Password is your set LiteLLM Proxy `MASTER_KEY`</p>
+        <p>If you need to set UI credentials / SSO docs here: <a href="https://docs.litellm.ai/docs/proxy/ui" target="_blank">https://docs.litellm.ai/docs/proxy/ui</a></p>
+        <br>
+        <label for="username">Username:</label>
+        <input type="text" id="username" name="username" required>
+        <label for="password">Password:</label>
+        <input type="password" id="password" name="password" required>
+        <input type="submit" value="Submit">
+    </form>
+"""
+
+
+def missing_keys_form(missing_key_names: str):
+    missing_keys_html_form = """
+        <!DOCTYPE html>
+        <html lang="en">
+        <head>
+            <meta charset="UTF-8">
+            <meta name="viewport" content="width=device-width, initial-scale=1.0">
+            <style>
+                body {{
+                    font-family: Arial, sans-serif;
+                    background-color: #f4f4f9;
+                    color: #333;
+                    margin: 20px;
+                    line-height: 1.6;
+                }}
+                .container {{
+                    max-width: 800px;
+                    margin: auto;
+                    padding: 20px;
+                    background: #fff;
+                    border: 1px solid #ddd;
+                    border-radius: 5px;
+                    box-shadow: 0 0 10px rgba(0, 0, 0, 0.1);
+                }}
+                h1 {{
+                    font-size: 24px;
+                    margin-bottom: 20px;
+                }}
+                pre {{
+                    background: #f8f8f8;
+                    padding: 1px;
+                    border: 1px solid #ccc;
+                    border-radius: 4px;
+                    overflow-x: auto;
+                    font-size: 14px;
+                }}
+                .env-var {{
+                    font-weight: normal;
+                }}
+                .comment {{
+                    font-weight: normal;
+                    color: #777;
+                }}
+            </style>
+            <title>Environment Setup Instructions</title>
+        </head>
+        <body>
+            <div class="container">
+                <h1>Environment Setup Instructions</h1>
+                <p>Please add the following variables to your environment variables:</p>
+                <pre>
+    <span class="env-var">LITELLM_MASTER_KEY="sk-1234"</span> <span class="comment"># Your master key for the proxy server. Can use this to send /chat/completion requests etc</span>
+    <span class="env-var">LITELLM_SALT_KEY="sk-XXXXXXXX"</span> <span class="comment"># Can NOT CHANGE THIS ONCE SET - It is used to encrypt/decrypt credentials stored in DB. If value of 'LITELLM_SALT_KEY' changes your models cannot be retrieved from DB</span>
+    <span class="env-var">DATABASE_URL="postgres://..."</span> <span class="comment"># Need a postgres database? (Check out Supabase, Neon, etc)</span>
+    <span class="comment">## OPTIONAL ##</span>
+    <span class="env-var">PORT=4000</span> <span class="comment"># DO THIS FOR RENDER/RAILWAY</span>
+    <span class="env-var">STORE_MODEL_IN_DB="True"</span> <span class="comment"># Allow storing models in db</span>
+                </pre>
+                <h1>Missing Environment Variables</h1>
+                <p>{missing_keys}</p>
+            </div>
+
+            <div class="container">
+            <h1>Need Help? Support</h1>
+            <p>Discord: <a href="https://discord.com/invite/wuPM9dRgDw" target="_blank">https://discord.com/invite/wuPM9dRgDw</a></p>
+            <p>Docs: <a href="https://docs.litellm.ai/docs/" target="_blank">https://docs.litellm.ai/docs/</a></p>
+            </div>
+        </body>
+        </html>
+    """
+    return missing_keys_html_form.format(missing_keys=missing_key_names)
--- a/litellm/proxy/common_utils/encrypt_decrypt_utils.py
+++ b/litellm/proxy/common_utils/encrypt_decrypt_utils.py
@ -0,0 +1,89 @@
+import base64
+import os
+
+from litellm._logging import verbose_proxy_logger
+
+LITELLM_SALT_KEY = os.getenv("LITELLM_SALT_KEY", None)
+verbose_proxy_logger.debug(
+    "LITELLM_SALT_KEY is None using master_key to encrypt/decrypt secrets stored in DB"
+)
+
+
+def encrypt_value_helper(value: str):
+    from litellm.proxy.proxy_server import master_key
+
+    signing_key = LITELLM_SALT_KEY
+    if LITELLM_SALT_KEY is None:
+        signing_key = master_key
+
+    try:
+        if isinstance(value, str):
+            encrypted_value = encrypt_value(value=value, signing_key=signing_key)  # type: ignore
+            encrypted_value = base64.b64encode(encrypted_value).decode("utf-8")
+
+            return encrypted_value
+
+        raise ValueError(
+            f"Invalid value type passed to encrypt_value: {type(value)} for Value: {value}\n Value must be a string"
+        )
+    except Exception as e:
+        raise e
+
+
+def decrypt_value_helper(value: str):
+    from litellm.proxy.proxy_server import master_key
+
+    signing_key = LITELLM_SALT_KEY
+    if LITELLM_SALT_KEY is None:
+        signing_key = master_key
+
+    try:
+        if isinstance(value, str):
+            decoded_b64 = base64.b64decode(value)
+            value = decrypt_value(value=decoded_b64, signing_key=signing_key)  # type: ignore
+            return value
+    except Exception as e:
+        verbose_proxy_logger.error(f"Error decrypting value: {value}\nError: {str(e)}")
+        # [Non-Blocking Exception. - this should not block decrypting other values]
+        pass
+
+
+def encrypt_value(value: str, signing_key: str):
+    import hashlib
+
+    import nacl.secret
+    import nacl.utils
+
+    # get 32 byte master key #
+    hash_object = hashlib.sha256(signing_key.encode())
+    hash_bytes = hash_object.digest()
+
+    # initialize secret box #
+    box = nacl.secret.SecretBox(hash_bytes)
+
+    # encode message #
+    value_bytes = value.encode("utf-8")
+
+    encrypted = box.encrypt(value_bytes)
+
+    return encrypted
+
+
+def decrypt_value(value: bytes, signing_key: str) -> str:
+    import hashlib
+
+    import nacl.secret
+    import nacl.utils
+
+    # get 32 byte master key #
+    hash_object = hashlib.sha256(signing_key.encode())
+    hash_bytes = hash_object.digest()
+
+    # initialize secret box #
+    box = nacl.secret.SecretBox(hash_bytes)
+
+    # Convert the bytes object to a string
+    plaintext = box.decrypt(value)
+
+    plaintext = plaintext.decode("utf-8")  # type: ignore
+    return plaintext  # type: ignore
--- a/litellm/proxy/common_utils/init_callbacks.py
+++ b/litellm/proxy/common_utils/init_callbacks.py
@ -31,10 +31,12 @@ def initialize_callbacks_on_proxy(
                imported_list.append(callback)
            elif isinstance(callback, str) and callback == "otel":
                from litellm.integrations.opentelemetry import OpenTelemetry
+                from litellm.proxy import proxy_server

                open_telemetry_logger = OpenTelemetry()

                imported_list.append(open_telemetry_logger)
+                setattr(proxy_server, "open_telemetry_logger", open_telemetry_logger)
            elif isinstance(callback, str) and callback == "presidio":
                from litellm.proxy.hooks.presidio_pii_masking import (
                    _OPTIONAL_PresidioPIIMasking,
--- a/litellm/proxy/hooks/presidio_pii_masking.py
+++ b/litellm/proxy/hooks/presidio_pii_masking.py
@ -8,21 +8,26 @@
 #  Tell us how we can improve! - Krrish & Ishaan


+import asyncio
+import json
+import traceback
+import uuid
 from typing import Optional, Union
-import litellm, traceback, uuid, json  # noqa: E401
-from litellm.caching import DualCache
-from litellm.proxy._types import UserAPIKeyAuth
-from litellm.integrations.custom_logger import CustomLogger
+
+import aiohttp
 from fastapi import HTTPException
+
+import litellm  # noqa: E401
 from litellm._logging import verbose_proxy_logger
+from litellm.caching import DualCache
+from litellm.integrations.custom_logger import CustomLogger
+from litellm.proxy._types import UserAPIKeyAuth
 from litellm.utils import (
-    ModelResponse,
    EmbeddingResponse,
    ImageResponse,
+    ModelResponse,
    StreamingChoices,
 )
-import aiohttp
-import asyncio


 class _OPTIONAL_PresidioPIIMasking(CustomLogger):
@ -57,22 +62,41 @@ class _OPTIONAL_PresidioPIIMasking(CustomLogger):
                    f"An error occurred: {str(e)}, file_path={ad_hoc_recognizers}"
                )

-        self.presidio_analyzer_api_base = litellm.get_secret(
+        self.validate_environment()
+
+    def validate_environment(self):
+        self.presidio_analyzer_api_base: Optional[str] = litellm.get_secret(
            "PRESIDIO_ANALYZER_API_BASE", None
-        )
-        self.presidio_anonymizer_api_base = litellm.get_secret(
+        )  # type: ignore
+        self.presidio_anonymizer_api_base: Optional[str] = litellm.get_secret(
            "PRESIDIO_ANONYMIZER_API_BASE", None
-        )
+        )  # type: ignore

        if self.presidio_analyzer_api_base is None:
            raise Exception("Missing `PRESIDIO_ANALYZER_API_BASE` from environment")
-        elif not self.presidio_analyzer_api_base.endswith("/"):
+        if not self.presidio_analyzer_api_base.endswith("/"):
            self.presidio_analyzer_api_base += "/"
+        if not (
+            self.presidio_analyzer_api_base.startswith("http://")
+            or self.presidio_analyzer_api_base.startswith("https://")
+        ):
+            # add http:// if unset, assume communicating over private network - e.g. render
+            self.presidio_analyzer_api_base = (
+                "http://" + self.presidio_analyzer_api_base
+            )

        if self.presidio_anonymizer_api_base is None:
            raise Exception("Missing `PRESIDIO_ANONYMIZER_API_BASE` from environment")
-        elif not self.presidio_anonymizer_api_base.endswith("/"):
+        if not self.presidio_anonymizer_api_base.endswith("/"):
            self.presidio_anonymizer_api_base += "/"
+        if not (
+            self.presidio_anonymizer_api_base.startswith("http://")
+            or self.presidio_anonymizer_api_base.startswith("https://")
+        ):
+            # add http:// if unset, assume communicating over private network - e.g. render
+            self.presidio_anonymizer_api_base = (
+                "http://" + self.presidio_anonymizer_api_base
+            )

    def print_verbose(self, print_statement):
        try:
--- a/litellm/proxy/litellm_pre_call_utils.py
+++ b/litellm/proxy/litellm_pre_call_utils.py
@ -176,6 +176,7 @@ async def add_litellm_data_to_request(

 def _add_otel_traceparent_to_data(data: dict, request: Request):
    from litellm.proxy.proxy_server import open_telemetry_logger
+
    if data is None:
        return
    if open_telemetry_logger is None:
--- a/litellm/proxy/proxy_config.yaml
+++ b/litellm/proxy/proxy_config.yaml
@ -35,6 +35,7 @@ general_settings:
        LANGFUSE_SECRET_KEY: "os.environ/LANGFUSE_DEV_SK_KEY"

 litellm_settings:
+  callbacks: ["otel"]
  guardrails:
    - prompt_injection:
        callbacks: [lakera_prompt_injection, hide_secrets]
--- a/litellm/proxy/proxy_server.py
+++ b/litellm/proxy/proxy_server.py
@ -140,7 +140,15 @@ from litellm.proxy.auth.user_api_key_auth import user_api_key_auth

 ## Import All Misc routes here ##
 from litellm.proxy.caching_routes import router as caching_router
+from litellm.proxy.common_utils.admin_ui_utils import (
+    html_form,
+    show_missing_vars_in_env,
+)
 from litellm.proxy.common_utils.debug_utils import router as debugging_endpoints_router
+from litellm.proxy.common_utils.encrypt_decrypt_utils import (
+    decrypt_value_helper,
+    encrypt_value_helper,
+)
 from litellm.proxy.common_utils.http_parsing_utils import _read_request_body
 from litellm.proxy.common_utils.init_callbacks import initialize_callbacks_on_proxy
 from litellm.proxy.common_utils.openai_endpoint_utils import (
@ -186,13 +194,9 @@ from litellm.proxy.utils import (
    _get_projected_spend_over_limit,
    _is_projected_spend_over_limit,
    _is_valid_team_configs,
-    decrypt_value,
-    encrypt_value,
    get_error_message_str,
    get_instance_fn,
    hash_token,
-    html_form,
-    missing_keys_html_form,
    reset_budget,
    send_email,
    update_spend,
@ -207,6 +211,7 @@ from litellm.router import ModelInfo as RouterModelInfo
 from litellm.router import updateDeployment
 from litellm.scheduler import DefaultPriorities, FlowItem, Scheduler
 from litellm.types.llms.openai import HttpxBinaryResponseContent
+from litellm.types.router import RouterGeneralSettings

 try:
    from litellm._version import version
@ -1242,6 +1247,7 @@ class ProxyConfig:
        ## DB
        if prisma_client is not None and (
            general_settings.get("store_model_in_db", False) == True
+            or store_model_in_db is True
        ):
            _tasks = []
            keys = [
@ -1765,7 +1771,11 @@ class ProxyConfig:
                if k in available_args:
                    router_params[k] = v
        router = litellm.Router(
-            **router_params, assistants_config=assistants_config
+            **router_params,
+            assistants_config=assistants_config,
+            router_general_settings=RouterGeneralSettings(
+                async_only_mode=True  # only init async clients
+            ),
        )  # type:ignore
        return router, router.get_model_list(), general_settings

@ -1880,16 +1890,8 @@ class ProxyConfig:
                # decrypt values
                for k, v in _litellm_params.items():
                    if isinstance(v, str):
-                        # decode base64
-                        try:
-                            decoded_b64 = base64.b64decode(v)
-                        except Exception as e:
-                            verbose_proxy_logger.error(
-                                "Error decoding value - {}".format(v)
-                            )
-                            continue
                        # decrypt value
-                        _value = decrypt_value(value=decoded_b64, master_key=master_key)
+                        _value = decrypt_value_helper(value=v)
                        # sanity check if string > size 0
                        if len(_value) > 0:
                            _litellm_params[k] = _value
@ -1933,13 +1935,8 @@ class ProxyConfig:
                    if isinstance(_litellm_params, dict):
                        # decrypt values
                        for k, v in _litellm_params.items():
-                            if isinstance(v, str):
-                                # decode base64
-                                decoded_b64 = base64.b64decode(v)
-                                # decrypt value
-                                _litellm_params[k] = decrypt_value(
-                                    value=decoded_b64, master_key=master_key  # type: ignore
-                                )
+                            decrypted_value = decrypt_value_helper(value=v)
+                            _litellm_params[k] = decrypted_value
                        _litellm_params = LiteLLM_Params(**_litellm_params)
                    else:
                        verbose_proxy_logger.error(
@ -1957,7 +1954,12 @@ class ProxyConfig:
                    )
                if len(_model_list) > 0:
                    verbose_proxy_logger.debug(f"_model_list: {_model_list}")
-                    llm_router = litellm.Router(model_list=_model_list)
+                    llm_router = litellm.Router(
+                        model_list=_model_list,
+                        router_general_settings=RouterGeneralSettings(
+                            async_only_mode=True  # only init async clients
+                        ),
+                    )
                    verbose_proxy_logger.debug(f"updated llm_router: {llm_router}")
            else:
                verbose_proxy_logger.debug(f"len new_models: {len(new_models)}")
@ -1995,10 +1997,8 @@ class ProxyConfig:
        environment_variables = config_data.get("environment_variables", {})
        for k, v in environment_variables.items():
            try:
-                if v is not None:
-                    decoded_b64 = base64.b64decode(v)
-                    value = decrypt_value(value=decoded_b64, master_key=master_key)  # type: ignore
-                    os.environ[k] = value
+                decrypted_value = decrypt_value_helper(value=v)
+                os.environ[k] = decrypted_value
            except Exception as e:
                verbose_proxy_logger.error(
                    "Error setting env variable: %s - %s", k, str(e)
@ -2720,6 +2720,10 @@ async def chat_completion(
        except:
            data = json.loads(body_str)

+        verbose_proxy_logger.debug(
+            "Request received by LiteLLM:\n{}".format(json.dumps(data, indent=4)),
+        )
+
        data = await add_litellm_data_to_request(
            data=data,
            request=request,
@ -3372,8 +3376,9 @@ async def embeddings(
        )
        verbose_proxy_logger.debug(traceback.format_exc())
        if isinstance(e, HTTPException):
+            message = get_error_message_str(e)
            raise ProxyException(
-                message=getattr(e, "message", str(e)),
+                message=message,
                type=getattr(e, "type", "None"),
                param=getattr(e, "param", "None"),
                code=getattr(e, "status_code", status.HTTP_400_BAD_REQUEST),
@ -5930,11 +5935,8 @@ async def add_new_model(
            _litellm_params_dict = model_params.litellm_params.dict(exclude_none=True)
            _orignal_litellm_model_name = model_params.litellm_params.model
            for k, v in _litellm_params_dict.items():
-                if isinstance(v, str):
-                    encrypted_value = encrypt_value(value=v, master_key=master_key)  # type: ignore
-                    model_params.litellm_params[k] = base64.b64encode(
-                        encrypted_value
-                    ).decode("utf-8")
+                encrypted_value = encrypt_value_helper(value=v)
+                model_params.litellm_params[k] = encrypted_value
            _data: dict = {
                "model_id": model_params.model_info.id,
                "model_name": model_params.model_name,
@ -6065,11 +6067,8 @@ async def update_model(

            ### ENCRYPT PARAMS ###
            for k, v in _new_litellm_params_dict.items():
-                if isinstance(v, str):
-                    encrypted_value = encrypt_value(value=v, master_key=master_key)  # type: ignore
-                    model_params.litellm_params[k] = base64.b64encode(
-                        encrypted_value
-                    ).decode("utf-8")
+                encrypted_value = encrypt_value_helper(value=v)
+                model_params.litellm_params[k] = encrypted_value

            ### MERGE WITH EXISTING DATA ###
            merged_dictionary = {}
@ -7187,10 +7186,9 @@ async def google_login(request: Request):
            )

    ####### Detect DB + MASTER KEY in .env #######
-    if prisma_client is None or master_key is None:
-        from fastapi.responses import HTMLResponse
-
-        return HTMLResponse(content=missing_keys_html_form, status_code=200)
+    missing_env_vars = show_missing_vars_in_env()
+    if missing_env_vars is not None:
+        return missing_env_vars

    # get url from request
    redirect_url = os.getenv("PROXY_BASE_URL", str(request.base_url))
@ -8393,11 +8391,8 @@ async def update_config(config_info: ConfigYAML):

            # encrypt updated_environment_variables #
            for k, v in _updated_environment_variables.items():
-                if isinstance(v, str):
-                    encrypted_value = encrypt_value(value=v, master_key=master_key)  # type: ignore
-                    _updated_environment_variables[k] = base64.b64encode(
-                        encrypted_value
-                    ).decode("utf-8")
+                encrypted_value = encrypt_value_helper(value=v)
+                _updated_environment_variables[k] = encrypted_value

            _existing_env_variables = config["environment_variables"]

@ -8814,11 +8809,8 @@ async def get_config():
                        env_vars_dict[_var] = None
                    else:
                        # decode + decrypt the value
-                        decoded_b64 = base64.b64decode(env_variable)
-                        _decrypted_value = decrypt_value(
-                            value=decoded_b64, master_key=master_key
-                        )
-                        env_vars_dict[_var] = _decrypted_value
+                        decrypted_value = decrypt_value_helper(value=env_variable)
+                        env_vars_dict[_var] = decrypted_value

                _data_to_return.append({"name": _callback, "variables": env_vars_dict})
            elif _callback == "langfuse":
@ -8834,11 +8826,8 @@ async def get_config():
                        _langfuse_env_vars[_var] = None
                    else:
                        # decode + decrypt the value
-                        decoded_b64 = base64.b64decode(env_variable)
-                        _decrypted_value = decrypt_value(
-                            value=decoded_b64, master_key=master_key
-                        )
-                        _langfuse_env_vars[_var] = _decrypted_value
+                        decrypted_value = decrypt_value_helper(value=env_variable)
+                        _langfuse_env_vars[_var] = decrypted_value

                _data_to_return.append(
                    {"name": _callback, "variables": _langfuse_env_vars}
@ -8859,10 +8848,7 @@ async def get_config():
                    _slack_env_vars[_var] = _value
                else:
                    # decode + decrypt the value
-                    decoded_b64 = base64.b64decode(env_variable)
-                    _decrypted_value = decrypt_value(
-                        value=decoded_b64, master_key=master_key
-                    )
+                    _decrypted_value = decrypt_value_helper(value=env_variable)
                    _slack_env_vars[_var] = _decrypted_value

            _alerting_types = proxy_logging_obj.slack_alerting_instance.alert_types
@ -8898,10 +8884,7 @@ async def get_config():
                _email_env_vars[_var] = None
            else:
                # decode + decrypt the value
-                decoded_b64 = base64.b64decode(env_variable)
-                _decrypted_value = decrypt_value(
-                    value=decoded_b64, master_key=master_key
-                )
+                _decrypted_value = decrypt_value_helper(value=env_variable)
                _email_env_vars[_var] = _decrypted_value

        alerting_data.append(
--- a/litellm/proxy/spend_tracking/spend_management_endpoints.py
+++ b/litellm/proxy/spend_tracking/spend_management_endpoints.py
@ -821,6 +821,14 @@ async def get_global_spend_report(
        default="team",
        description="Group spend by internal team or customer or api_key",
    ),
+    api_key: Optional[str] = fastapi.Query(
+        default=None,
+        description="View spend for a specific api_key. Example api_key='sk-1234",
+    ),
+    internal_user_id: Optional[str] = fastapi.Query(
+        default=None,
+        description="View spend for a specific internal_user_id. Example internal_user_id='1234",
+    ),
 ):
    """
    Get Daily Spend per Team, based on specific startTime and endTime. Per team, view usage by each key, model
@ -873,6 +881,96 @@ async def get_global_spend_report(
            raise ValueError(
                "/spend/report endpoint " + CommonProxyErrors.not_premium_user.value
            )
+        if api_key is not None:
+            verbose_proxy_logger.debug("Getting /spend for api_key: %s", api_key)
+            if api_key.startswith("sk-"):
+                api_key = hash_token(token=api_key)
+            sql_query = """
+                WITH SpendByModelApiKey AS (
+                    SELECT
+                        sl.api_key,
+                        sl.model,
+                        SUM(sl.spend) AS model_cost,
+                        SUM(sl.prompt_tokens) AS model_input_tokens,
+                        SUM(sl.completion_tokens) AS model_output_tokens
+                    FROM
+                        "LiteLLM_SpendLogs" sl
+                    WHERE
+                        sl."startTime" BETWEEN $1::date AND $2::date AND sl.api_key = $3
+                    GROUP BY
+                        sl.api_key,
+                        sl.model
+                )
+                SELECT
+                    api_key,
+                    SUM(model_cost) AS total_cost,
+                    SUM(model_input_tokens) AS total_input_tokens,
+                    SUM(model_output_tokens) AS total_output_tokens,
+                    jsonb_agg(jsonb_build_object(
+                        'model', model,
+                        'total_cost', model_cost,
+                        'total_input_tokens', model_input_tokens,
+                        'total_output_tokens', model_output_tokens
+                    )) AS model_details
+                FROM
+                    SpendByModelApiKey
+                GROUP BY
+                    api_key
+                ORDER BY
+                    total_cost DESC;
+            """
+            db_response = await prisma_client.db.query_raw(
+                sql_query, start_date_obj, end_date_obj, api_key
+            )
+            if db_response is None:
+                return []
+
+            return db_response
+        elif internal_user_id is not None:
+            verbose_proxy_logger.debug(
+                "Getting /spend for internal_user_id: %s", internal_user_id
+            )
+            sql_query = """
+                WITH SpendByModelApiKey AS (
+                    SELECT
+                        sl.api_key,
+                        sl.model,
+                        SUM(sl.spend) AS model_cost,
+                        SUM(sl.prompt_tokens) AS model_input_tokens,
+                        SUM(sl.completion_tokens) AS model_output_tokens
+                    FROM
+                        "LiteLLM_SpendLogs" sl
+                    WHERE
+                        sl."startTime" BETWEEN $1::date AND $2::date AND sl.user = $3
+                    GROUP BY
+                        sl.api_key,
+                        sl.model
+                )
+                SELECT
+                    api_key,
+                    SUM(model_cost) AS total_cost,
+                    SUM(model_input_tokens) AS total_input_tokens,
+                    SUM(model_output_tokens) AS total_output_tokens,
+                    jsonb_agg(jsonb_build_object(
+                        'model', model,
+                        'total_cost', model_cost,
+                        'total_input_tokens', model_input_tokens,
+                        'total_output_tokens', model_output_tokens
+                    )) AS model_details
+                FROM
+                    SpendByModelApiKey
+                GROUP BY
+                    api_key
+                ORDER BY
+                    total_cost DESC;
+            """
+            db_response = await prisma_client.db.query_raw(
+                sql_query, start_date_obj, end_date_obj, internal_user_id
+            )
+            if db_response is None:
+                return []
+
+            return db_response

        if group_by == "team":
            # first get data from spend logs -> SpendByModelApiKey
--- a/litellm/proxy/utils.py
+++ b/litellm/proxy/utils.py
@ -353,7 +353,7 @@ class ProxyLogging:
                                raise HTTPException(
                                    status_code=400, detail={"error": response}
                                )
-            print_verbose(f"final data being sent to {call_type} call: {data}")
+
            return data
        except Exception as e:
            raise e
@ -2705,178 +2705,6 @@ def _is_valid_team_configs(team_id=None, team_config=None, request_data=None):
    return


-def encrypt_value(value: str, master_key: str):
-    import hashlib
-
-    import nacl.secret
-    import nacl.utils
-
-    # get 32 byte master key #
-    hash_object = hashlib.sha256(master_key.encode())
-    hash_bytes = hash_object.digest()
-
-    # initialize secret box #
-    box = nacl.secret.SecretBox(hash_bytes)
-
-    # encode message #
-    value_bytes = value.encode("utf-8")
-
-    encrypted = box.encrypt(value_bytes)
-
-    return encrypted
-
-
-def decrypt_value(value: bytes, master_key: str) -> str:
-    import hashlib
-
-    import nacl.secret
-    import nacl.utils
-
-    # get 32 byte master key #
-    hash_object = hashlib.sha256(master_key.encode())
-    hash_bytes = hash_object.digest()
-
-    # initialize secret box #
-    box = nacl.secret.SecretBox(hash_bytes)
-
-    # Convert the bytes object to a string
-    plaintext = box.decrypt(value)
-
-    plaintext = plaintext.decode("utf-8")  # type: ignore
-    return plaintext  # type: ignore
-
-
-# LiteLLM Admin UI - Non SSO Login
-url_to_redirect_to = os.getenv("PROXY_BASE_URL", "")
-url_to_redirect_to += "/login"
-html_form = f"""
-<!DOCTYPE html>
-<html>
-<head>
-    <title>LiteLLM Login</title>
-    <style>
-        body {{
-            font-family: Arial, sans-serif;
-            background-color: #f4f4f4;
-            margin: 0;
-            padding: 0;
-            display: flex;
-            justify-content: center;
-            align-items: center;
-            height: 100vh;
-        }}
-
-        form {{
-            background-color: #fff;
-            padding: 20px;
-            border-radius: 8px;
-            box-shadow: 0 0 10px rgba(0, 0, 0, 0.1);
-        }}
-
-        label {{
-            display: block;
-            margin-bottom: 8px;
-        }}
-
-        input {{
-            width: 100%;
-            padding: 8px;
-            margin-bottom: 16px;
-            box-sizing: border-box;
-            border: 1px solid #ccc;
-            border-radius: 4px;
-        }}
-
-        input[type="submit"] {{
-            background-color: #4caf50;
-            color: #fff;
-            cursor: pointer;
-        }}
-
-        input[type="submit"]:hover {{
-            background-color: #45a049;
-        }}
-    </style>
-</head>
-<body>
-    <form action="{url_to_redirect_to}" method="post">
-        <h2>LiteLLM Login</h2>
-
-        <p>By default Username is "admin" and Password is your set LiteLLM Proxy `MASTER_KEY`</p>
-        <p>If you need to set UI credentials / SSO docs here: <a href="https://docs.litellm.ai/docs/proxy/ui" target="_blank">https://docs.litellm.ai/docs/proxy/ui</a></p>
-        <br>
-        <label for="username">Username:</label>
-        <input type="text" id="username" name="username" required>
-        <label for="password">Password:</label>
-        <input type="password" id="password" name="password" required>
-        <input type="submit" value="Submit">
-    </form>
-"""
-
-
-missing_keys_html_form = """
-    <!DOCTYPE html>
-    <html lang="en">
-    <head>
-        <meta charset="UTF-8">
-        <meta name="viewport" content="width=device-width, initial-scale=1.0">
-        <style>
-            body {
-                font-family: Arial, sans-serif;
-                background-color: #f4f4f9;
-                color: #333;
-                margin: 20px;
-                line-height: 1.6;
-            }
-            .container {
-                max-width: 600px;
-                margin: auto;
-                padding: 20px;
-                background: #fff;
-                border: 1px solid #ddd;
-                border-radius: 5px;
-                box-shadow: 0 0 10px rgba(0, 0, 0, 0.1);
-            }
-            h1 {
-                font-size: 24px;
-                margin-bottom: 20px;
-            }
-            pre {
-                background: #f8f8f8;
-                padding: 10px;
-                border: 1px solid #ccc;
-                border-radius: 4px;
-                overflow-x: auto;
-                font-size: 14px;
-            }
-            .env-var {
-                font-weight: normal;
-            }
-            .comment {
-                font-weight: normal;
-                color: #777;
-            }
-        </style>
-        <title>Environment Setup Instructions</title>
-    </head>
-    <body>
-        <div class="container">
-            <h1>Environment Setup Instructions</h1>
-            <p>Please add the following configurations to your environment variables:</p>
-            <pre>
-<span class="env-var">LITELLM_MASTER_KEY="sk-1234"</span> <span class="comment"># make this unique. must start with `sk-`.</span>
-<span class="env-var">DATABASE_URL="postgres://..."</span> <span class="comment"># Need a postgres database? (Check out Supabase, Neon, etc)</span>
-
-<span class="comment">## OPTIONAL ##</span>
-<span class="env-var">PORT=4000</span> <span class="comment"># DO THIS FOR RENDER/RAILWAY</span>
-<span class="env-var">STORE_MODEL_IN_DB="True"</span> <span class="comment"># Allow storing models in db</span>
-            </pre>
-        </div>
-    </body>
-    </html>
-    """
-
-
 def _to_ns(dt):
    return int(dt.timestamp() * 1e9)

@ -2888,6 +2716,11 @@ def get_error_message_str(e: Exception) -> str:
            error_message = e.detail
        elif isinstance(e.detail, dict):
            error_message = json.dumps(e.detail)
+        elif hasattr(e, "message"):
+            if isinstance(e.message, "str"):
+                error_message = e.message
+            elif isinstance(e.message, dict):
+                error_message = json.dumps(e.message)
        else:
            error_message = str(e)
    else:
--- a/litellm/router.py
+++ b/litellm/router.py
@ -46,15 +46,15 @@ from litellm._logging import verbose_router_logger
 from litellm.caching import DualCache, InMemoryCache, RedisCache
 from litellm.integrations.custom_logger import CustomLogger
 from litellm.llms.azure import get_azure_ad_token_from_oidc
-from litellm.llms.custom_httpx.azure_dall_e_2 import (
-    AsyncCustomHTTPTransport,
-    CustomHTTPTransport,
-)
 from litellm.router_strategy.least_busy import LeastBusyLoggingHandler
 from litellm.router_strategy.lowest_cost import LowestCostLoggingHandler
 from litellm.router_strategy.lowest_latency import LowestLatencyLoggingHandler
 from litellm.router_strategy.lowest_tpm_rpm import LowestTPMLoggingHandler
 from litellm.router_strategy.lowest_tpm_rpm_v2 import LowestTPMLoggingHandler_v2
+from litellm.router_utils.client_initalization_utils import (
+    set_client,
+    should_initialize_sync_client,
+)
 from litellm.router_utils.handle_error import send_llm_exception_alert
 from litellm.scheduler import FlowItem, Scheduler
 from litellm.types.llms.openai import (
@ -79,6 +79,7 @@ from litellm.types.router import (
    ModelInfo,
    RetryPolicy,
    RouterErrors,
+    RouterGeneralSettings,
    updateDeployment,
    updateLiteLLMParams,
 )
@ -88,6 +89,7 @@ from litellm.utils import (
    ModelResponse,
    _is_region_eu,
    calculate_max_parallel_requests,
+    create_proxy_transport_and_mounts,
    get_utc_datetime,
 )

@ -169,6 +171,7 @@ class Router:
        routing_strategy_args: dict = {},  # just for latency-based routing
        semaphore: Optional[asyncio.Semaphore] = None,
        alerting_config: Optional[AlertingConfig] = None,
+        router_general_settings: Optional[RouterGeneralSettings] = None,
    ) -> None:
        """
        Initialize the Router class with the given parameters for caching, reliability, and routing strategy.
@ -246,6 +249,9 @@ class Router:
                verbose_router_logger.setLevel(logging.INFO)
            elif debug_level == "DEBUG":
                verbose_router_logger.setLevel(logging.DEBUG)
+        self.router_general_settings: Optional[RouterGeneralSettings] = (
+            router_general_settings
+        )

        self.assistants_config = assistants_config
        self.deployment_names: List = (
@ -3247,520 +3253,6 @@ class Router:
                except Exception as e:
                    raise e

-    def set_client(self, model: dict):
-        """
-        - Initializes Azure/OpenAI clients. Stores them in cache, b/c of this - https://github.com/BerriAI/litellm/issues/1278
-        - Initializes Semaphore for client w/ rpm. Stores them in cache. b/c of this - https://github.com/BerriAI/litellm/issues/2994
-        """
-        client_ttl = self.client_ttl
-        litellm_params = model.get("litellm_params", {})
-        model_name = litellm_params.get("model")
-        model_id = model["model_info"]["id"]
-        # ### IF RPM SET - initialize a semaphore ###
-        rpm = litellm_params.get("rpm", None)
-        tpm = litellm_params.get("tpm", None)
-        max_parallel_requests = litellm_params.get("max_parallel_requests", None)
-        calculated_max_parallel_requests = calculate_max_parallel_requests(
-            rpm=rpm,
-            max_parallel_requests=max_parallel_requests,
-            tpm=tpm,
-            default_max_parallel_requests=self.default_max_parallel_requests,
-        )
-        if calculated_max_parallel_requests:
-            semaphore = asyncio.Semaphore(calculated_max_parallel_requests)
-            cache_key = f"{model_id}_max_parallel_requests_client"
-            self.cache.set_cache(
-                key=cache_key,
-                value=semaphore,
-                local_only=True,
-            )
-
-        ####  for OpenAI / Azure we need to initalize the Client for High Traffic ########
-        custom_llm_provider = litellm_params.get("custom_llm_provider")
-        custom_llm_provider = custom_llm_provider or model_name.split("/", 1)[0] or ""
-        default_api_base = None
-        default_api_key = None
-        if custom_llm_provider in litellm.openai_compatible_providers:
-            _, custom_llm_provider, api_key, api_base = litellm.get_llm_provider(
-                model=model_name
-            )
-            default_api_base = api_base
-            default_api_key = api_key
-
-        if (
-            model_name in litellm.open_ai_chat_completion_models
-            or custom_llm_provider in litellm.openai_compatible_providers
-            or custom_llm_provider == "azure"
-            or custom_llm_provider == "azure_text"
-            or custom_llm_provider == "custom_openai"
-            or custom_llm_provider == "openai"
-            or custom_llm_provider == "text-completion-openai"
-            or "ft:gpt-3.5-turbo" in model_name
-            or model_name in litellm.open_ai_embedding_models
-        ):
-            is_azure_ai_studio_model: bool = False
-            if custom_llm_provider == "azure":
-                if litellm.utils._is_non_openai_azure_model(model_name):
-                    is_azure_ai_studio_model = True
-                    custom_llm_provider = "openai"
-                    # remove azure prefx from model_name
-                    model_name = model_name.replace("azure/", "")
-            # glorified / complicated reading of configs
-            # user can pass vars directly or they can pas os.environ/AZURE_API_KEY, in which case we will read the env
-            # we do this here because we init clients for Azure, OpenAI and we need to set the right key
-            api_key = litellm_params.get("api_key") or default_api_key
-            if (
-                api_key
-                and isinstance(api_key, str)
-                and api_key.startswith("os.environ/")
-            ):
-                api_key_env_name = api_key.replace("os.environ/", "")
-                api_key = litellm.get_secret(api_key_env_name)
-                litellm_params["api_key"] = api_key
-
-            api_base = litellm_params.get("api_base")
-            base_url = litellm_params.get("base_url")
-            api_base = (
-                api_base or base_url or default_api_base
-            )  # allow users to pass in `api_base` or `base_url` for azure
-            if api_base and api_base.startswith("os.environ/"):
-                api_base_env_name = api_base.replace("os.environ/", "")
-                api_base = litellm.get_secret(api_base_env_name)
-                litellm_params["api_base"] = api_base
-
-            ## AZURE AI STUDIO MISTRAL CHECK ##
-            """
-            Make sure api base ends in /v1/
-
-            if not, add it - https://github.com/BerriAI/litellm/issues/2279
-            """
-            if (
-                is_azure_ai_studio_model is True
-                and api_base is not None
-                and isinstance(api_base, str)
-                and not api_base.endswith("/v1/")
-            ):
-                # check if it ends with a trailing slash
-                if api_base.endswith("/"):
-                    api_base += "v1/"
-                elif api_base.endswith("/v1"):
-                    api_base += "/"
-                else:
-                    api_base += "/v1/"
-
-            api_version = litellm_params.get("api_version")
-            if api_version and api_version.startswith("os.environ/"):
-                api_version_env_name = api_version.replace("os.environ/", "")
-                api_version = litellm.get_secret(api_version_env_name)
-                litellm_params["api_version"] = api_version
-
-            timeout = litellm_params.pop("timeout", None) or litellm.request_timeout
-            if isinstance(timeout, str) and timeout.startswith("os.environ/"):
-                timeout_env_name = timeout.replace("os.environ/", "")
-                timeout = litellm.get_secret(timeout_env_name)
-                litellm_params["timeout"] = timeout
-
-            stream_timeout = litellm_params.pop(
-                "stream_timeout", timeout
-            )  # if no stream_timeout is set, default to timeout
-            if isinstance(stream_timeout, str) and stream_timeout.startswith(
-                "os.environ/"
-            ):
-                stream_timeout_env_name = stream_timeout.replace("os.environ/", "")
-                stream_timeout = litellm.get_secret(stream_timeout_env_name)
-                litellm_params["stream_timeout"] = stream_timeout
-
-            max_retries = litellm_params.pop(
-                "max_retries", 0
-            )  # router handles retry logic
-            if isinstance(max_retries, str) and max_retries.startswith("os.environ/"):
-                max_retries_env_name = max_retries.replace("os.environ/", "")
-                max_retries = litellm.get_secret(max_retries_env_name)
-                litellm_params["max_retries"] = max_retries
-
-            # proxy support
-            import os
-
-            import httpx
-
-            # Check if the HTTP_PROXY and HTTPS_PROXY environment variables are set and use them accordingly.
-            http_proxy = os.getenv("HTTP_PROXY", None)
-            https_proxy = os.getenv("HTTPS_PROXY", None)
-            no_proxy = os.getenv("NO_PROXY", None)
-
-            # Create the proxies dictionary only if the environment variables are set.
-            sync_proxy_mounts = None
-            async_proxy_mounts = None
-            if http_proxy is not None and https_proxy is not None:
-                sync_proxy_mounts = {
-                    "http://": httpx.HTTPTransport(proxy=httpx.Proxy(url=http_proxy)),
-                    "https://": httpx.HTTPTransport(proxy=httpx.Proxy(url=https_proxy)),
-                }
-                async_proxy_mounts = {
-                    "http://": httpx.AsyncHTTPTransport(
-                        proxy=httpx.Proxy(url=http_proxy)
-                    ),
-                    "https://": httpx.AsyncHTTPTransport(
-                        proxy=httpx.Proxy(url=https_proxy)
-                    ),
-                }
-
-                # assume no_proxy is a list of comma separated urls
-                if no_proxy is not None and isinstance(no_proxy, str):
-                    no_proxy_urls = no_proxy.split(",")
-
-                    for url in no_proxy_urls:  # set no-proxy support for specific urls
-                        sync_proxy_mounts[url] = None  # type: ignore
-                        async_proxy_mounts[url] = None  # type: ignore
-
-            organization = litellm_params.get("organization", None)
-            if isinstance(organization, str) and organization.startswith("os.environ/"):
-                organization_env_name = organization.replace("os.environ/", "")
-                organization = litellm.get_secret(organization_env_name)
-                litellm_params["organization"] = organization
-
-            if custom_llm_provider == "azure" or custom_llm_provider == "azure_text":
-                if api_base is None or not isinstance(api_base, str):
-                    filtered_litellm_params = {
-                        k: v
-                        for k, v in model["litellm_params"].items()
-                        if k != "api_key"
-                    }
-                    _filtered_model = {
-                        "model_name": model["model_name"],
-                        "litellm_params": filtered_litellm_params,
-                    }
-                    raise ValueError(
-                        f"api_base is required for Azure OpenAI. Set it on your config. Model - {_filtered_model}"
-                    )
-                azure_ad_token = litellm_params.get("azure_ad_token")
-                if azure_ad_token is not None:
-                    if azure_ad_token.startswith("oidc/"):
-                        azure_ad_token = get_azure_ad_token_from_oidc(azure_ad_token)
-                if api_version is None:
-                    api_version = litellm.AZURE_DEFAULT_API_VERSION
-
-                if "gateway.ai.cloudflare.com" in api_base:
-                    if not api_base.endswith("/"):
-                        api_base += "/"
-                    azure_model = model_name.replace("azure/", "")
-                    api_base += f"{azure_model}"
-                    cache_key = f"{model_id}_async_client"
-                    _client = openai.AsyncAzureOpenAI(
-                        api_key=api_key,
-                        azure_ad_token=azure_ad_token,
-                        base_url=api_base,
-                        api_version=api_version,
-                        timeout=timeout,
-                        max_retries=max_retries,
-                        http_client=httpx.AsyncClient(
-                            transport=AsyncCustomHTTPTransport(
-                                limits=httpx.Limits(
-                                    max_connections=1000, max_keepalive_connections=100
-                                ),
-                                verify=litellm.ssl_verify,
-                            ),
-                            mounts=async_proxy_mounts,
-                        ),  # type: ignore
-                    )
-                    self.cache.set_cache(
-                        key=cache_key,
-                        value=_client,
-                        ttl=client_ttl,
-                        local_only=True,
-                    )  # cache for 1 hr
-
-                    cache_key = f"{model_id}_client"
-                    _client = openai.AzureOpenAI(  # type: ignore
-                        api_key=api_key,
-                        azure_ad_token=azure_ad_token,
-                        base_url=api_base,
-                        api_version=api_version,
-                        timeout=timeout,
-                        max_retries=max_retries,
-                        http_client=httpx.Client(
-                            transport=CustomHTTPTransport(
-                                limits=httpx.Limits(
-                                    max_connections=1000, max_keepalive_connections=100
-                                ),
-                                verify=litellm.ssl_verify,
-                            ),
-                            mounts=sync_proxy_mounts,
-                        ),  # type: ignore
-                    )
-                    self.cache.set_cache(
-                        key=cache_key,
-                        value=_client,
-                        ttl=client_ttl,
-                        local_only=True,
-                    )  # cache for 1 hr
-                    # streaming clients can have diff timeouts
-                    cache_key = f"{model_id}_stream_async_client"
-                    _client = openai.AsyncAzureOpenAI(  # type: ignore
-                        api_key=api_key,
-                        azure_ad_token=azure_ad_token,
-                        base_url=api_base,
-                        api_version=api_version,
-                        timeout=stream_timeout,
-                        max_retries=max_retries,
-                        http_client=httpx.AsyncClient(
-                            transport=AsyncCustomHTTPTransport(
-                                limits=httpx.Limits(
-                                    max_connections=1000, max_keepalive_connections=100
-                                ),
-                                verify=litellm.ssl_verify,
-                            ),
-                            mounts=async_proxy_mounts,
-                        ),  # type: ignore
-                    )
-                    self.cache.set_cache(
-                        key=cache_key,
-                        value=_client,
-                        ttl=client_ttl,
-                        local_only=True,
-                    )  # cache for 1 hr
-
-                    cache_key = f"{model_id}_stream_client"
-                    _client = openai.AzureOpenAI(  # type: ignore
-                        api_key=api_key,
-                        azure_ad_token=azure_ad_token,
-                        base_url=api_base,
-                        api_version=api_version,
-                        timeout=stream_timeout,
-                        max_retries=max_retries,
-                        http_client=httpx.Client(
-                            transport=CustomHTTPTransport(
-                                limits=httpx.Limits(
-                                    max_connections=1000, max_keepalive_connections=100
-                                ),
-                                verify=litellm.ssl_verify,
-                            ),
-                            mounts=sync_proxy_mounts,
-                        ),  # type: ignore
-                    )
-                    self.cache.set_cache(
-                        key=cache_key,
-                        value=_client,
-                        ttl=client_ttl,
-                        local_only=True,
-                    )  # cache for 1 hr
-                else:
-                    _api_key = api_key
-                    if _api_key is not None and isinstance(_api_key, str):
-                        # only show first 5 chars of api_key
-                        _api_key = _api_key[:8] + "*" * 15
-                    verbose_router_logger.debug(
-                        f"Initializing Azure OpenAI Client for {model_name}, Api Base: {str(api_base)}, Api Key:{_api_key}"
-                    )
-                    azure_client_params = {
-                        "api_key": api_key,
-                        "azure_endpoint": api_base,
-                        "api_version": api_version,
-                        "azure_ad_token": azure_ad_token,
-                    }
-                    from litellm.llms.azure import select_azure_base_url_or_endpoint
-
-                    # this decides if we should set azure_endpoint or base_url on Azure OpenAI Client
-                    # required to support GPT-4 vision enhancements, since base_url needs to be set on Azure OpenAI Client
-                    azure_client_params = select_azure_base_url_or_endpoint(
-                        azure_client_params
-                    )
-
-                    cache_key = f"{model_id}_async_client"
-                    _client = openai.AsyncAzureOpenAI(  # type: ignore
-                        **azure_client_params,
-                        timeout=timeout,
-                        max_retries=max_retries,
-                        http_client=httpx.AsyncClient(
-                            transport=AsyncCustomHTTPTransport(
-                                limits=httpx.Limits(
-                                    max_connections=1000, max_keepalive_connections=100
-                                ),
-                                verify=litellm.ssl_verify,
-                            ),
-                            mounts=async_proxy_mounts,
-                        ),  # type: ignore
-                    )
-                    self.cache.set_cache(
-                        key=cache_key,
-                        value=_client,
-                        ttl=client_ttl,
-                        local_only=True,
-                    )  # cache for 1 hr
-
-                    cache_key = f"{model_id}_client"
-                    _client = openai.AzureOpenAI(  # type: ignore
-                        **azure_client_params,
-                        timeout=timeout,
-                        max_retries=max_retries,
-                        http_client=httpx.Client(
-                            transport=CustomHTTPTransport(
-                                verify=litellm.ssl_verify,
-                                limits=httpx.Limits(
-                                    max_connections=1000, max_keepalive_connections=100
-                                ),
-                            ),
-                            mounts=sync_proxy_mounts,
-                        ),  # type: ignore
-                    )
-                    self.cache.set_cache(
-                        key=cache_key,
-                        value=_client,
-                        ttl=client_ttl,
-                        local_only=True,
-                    )  # cache for 1 hr
-
-                    # streaming clients should have diff timeouts
-                    cache_key = f"{model_id}_stream_async_client"
-                    _client = openai.AsyncAzureOpenAI(  # type: ignore
-                        **azure_client_params,
-                        timeout=stream_timeout,
-                        max_retries=max_retries,
-                        http_client=httpx.AsyncClient(
-                            transport=AsyncCustomHTTPTransport(
-                                limits=httpx.Limits(
-                                    max_connections=1000, max_keepalive_connections=100
-                                ),
-                                verify=litellm.ssl_verify,
-                            ),
-                            mounts=async_proxy_mounts,
-                        ),
-                    )
-                    self.cache.set_cache(
-                        key=cache_key,
-                        value=_client,
-                        ttl=client_ttl,
-                        local_only=True,
-                    )  # cache for 1 hr
-
-                    cache_key = f"{model_id}_stream_client"
-                    _client = openai.AzureOpenAI(  # type: ignore
-                        **azure_client_params,
-                        timeout=stream_timeout,
-                        max_retries=max_retries,
-                        http_client=httpx.Client(
-                            transport=CustomHTTPTransport(
-                                limits=httpx.Limits(
-                                    max_connections=1000, max_keepalive_connections=100
-                                ),
-                                verify=litellm.ssl_verify,
-                            ),
-                            mounts=sync_proxy_mounts,
-                        ),
-                    )
-                    self.cache.set_cache(
-                        key=cache_key,
-                        value=_client,
-                        ttl=client_ttl,
-                        local_only=True,
-                    )  # cache for 1 hr
-
-            else:
-                _api_key = api_key  # type: ignore
-                if _api_key is not None and isinstance(_api_key, str):
-                    # only show first 5 chars of api_key
-                    _api_key = _api_key[:8] + "*" * 15
-                verbose_router_logger.debug(
-                    f"Initializing OpenAI Client for {model_name}, Api Base:{str(api_base)}, Api Key:{_api_key}"
-                )
-                cache_key = f"{model_id}_async_client"
-                _client = openai.AsyncOpenAI(  # type: ignore
-                    api_key=api_key,
-                    base_url=api_base,
-                    timeout=timeout,
-                    max_retries=max_retries,
-                    organization=organization,
-                    http_client=httpx.AsyncClient(
-                        transport=AsyncCustomHTTPTransport(
-                            limits=httpx.Limits(
-                                max_connections=1000, max_keepalive_connections=100
-                            ),
-                            verify=litellm.ssl_verify,
-                        ),
-                        mounts=async_proxy_mounts,
-                    ),  # type: ignore
-                )
-                self.cache.set_cache(
-                    key=cache_key,
-                    value=_client,
-                    ttl=client_ttl,
-                    local_only=True,
-                )  # cache for 1 hr
-
-                cache_key = f"{model_id}_client"
-                _client = openai.OpenAI(  # type: ignore
-                    api_key=api_key,
-                    base_url=api_base,
-                    timeout=timeout,
-                    max_retries=max_retries,
-                    organization=organization,
-                    http_client=httpx.Client(
-                        transport=CustomHTTPTransport(
-                            limits=httpx.Limits(
-                                max_connections=1000, max_keepalive_connections=100
-                            ),
-                            verify=litellm.ssl_verify,
-                        ),
-                        mounts=sync_proxy_mounts,
-                    ),  # type: ignore
-                )
-                self.cache.set_cache(
-                    key=cache_key,
-                    value=_client,
-                    ttl=client_ttl,
-                    local_only=True,
-                )  # cache for 1 hr
-
-                # streaming clients should have diff timeouts
-                cache_key = f"{model_id}_stream_async_client"
-                _client = openai.AsyncOpenAI(  # type: ignore
-                    api_key=api_key,
-                    base_url=api_base,
-                    timeout=stream_timeout,
-                    max_retries=max_retries,
-                    organization=organization,
-                    http_client=httpx.AsyncClient(
-                        transport=AsyncCustomHTTPTransport(
-                            limits=httpx.Limits(
-                                max_connections=1000, max_keepalive_connections=100
-                            ),
-                            verify=litellm.ssl_verify,
-                        ),
-                        mounts=async_proxy_mounts,
-                    ),  # type: ignore
-                )
-                self.cache.set_cache(
-                    key=cache_key,
-                    value=_client,
-                    ttl=client_ttl,
-                    local_only=True,
-                )  # cache for 1 hr
-
-                # streaming clients should have diff timeouts
-                cache_key = f"{model_id}_stream_client"
-                _client = openai.OpenAI(  # type: ignore
-                    api_key=api_key,
-                    base_url=api_base,
-                    timeout=stream_timeout,
-                    max_retries=max_retries,
-                    organization=organization,
-                    http_client=httpx.Client(
-                        transport=CustomHTTPTransport(
-                            limits=httpx.Limits(
-                                max_connections=1000, max_keepalive_connections=100
-                            ),
-                            verify=litellm.ssl_verify,
-                        ),
-                        mounts=sync_proxy_mounts,
-                    ),  # type: ignore
-                )
-                self.cache.set_cache(
-                    key=cache_key,
-                    value=_client,
-                    ttl=client_ttl,
-                    local_only=True,
-                )  # cache for 1 hr
-
    def _generate_model_id(self, model_group: str, litellm_params: dict):
        """
        Helper function to consistently generate the same id for a deployment
@ -3904,7 +3396,9 @@ class Router:
            raise Exception(f"Unsupported provider - {custom_llm_provider}")

        # init OpenAI, Azure clients
-        self.set_client(model=deployment.to_json(exclude_none=True))
+        set_client(
+            litellm_router_instance=self, model=deployment.to_json(exclude_none=True)
+        )

        # set region (if azure model) ## PREVIEW FEATURE ##
        if litellm.enable_preview_features == True:
@ -4432,7 +3926,7 @@ class Router:
                    """
                    Re-initialize the client
                    """
-                    self.set_client(model=deployment)
+                    set_client(litellm_router_instance=self, model=deployment)
                    client = self.cache.get_cache(key=cache_key, local_only=True)
                return client
            else:
@ -4442,7 +3936,7 @@ class Router:
                    """
                    Re-initialize the client
                    """
-                    self.set_client(model=deployment)
+                    set_client(litellm_router_instance=self, model=deployment)
                    client = self.cache.get_cache(key=cache_key, local_only=True)
                return client
        else:
@ -4453,7 +3947,7 @@ class Router:
                    """
                    Re-initialize the client
                    """
-                    self.set_client(model=deployment)
+                    set_client(litellm_router_instance=self, model=deployment)
                    client = self.cache.get_cache(key=cache_key)
                return client
            else:
@ -4463,7 +3957,7 @@ class Router:
                    """
                    Re-initialize the client
                    """
-                    self.set_client(model=deployment)
+                    set_client(litellm_router_instance=self, model=deployment)
                    client = self.cache.get_cache(key=cache_key)
                return client

--- a/litellm/router_utils/client_initalization_utils.py
+++ b/litellm/router_utils/client_initalization_utils.py
@ -0,0 +1,495 @@
+import asyncio
+import os
+import traceback
+from typing import TYPE_CHECKING, Any
+
+import httpx
+import openai
+
+import litellm
+from litellm._logging import verbose_router_logger
+from litellm.llms.azure import get_azure_ad_token_from_oidc
+from litellm.utils import calculate_max_parallel_requests
+
+if TYPE_CHECKING:
+    from litellm.router import Router as _Router
+
+    LitellmRouter = _Router
+else:
+    LitellmRouter = Any
+
+
+def should_initialize_sync_client(
+    litellm_router_instance: LitellmRouter,
+) -> bool:
+    """
+    Returns if Sync OpenAI, Azure Clients should be initialized.
+
+    Do not init sync clients when router.router_general_settings.async_only_mode is True
+
+    """
+    if litellm_router_instance is None:
+        return False
+
+    if litellm_router_instance.router_general_settings is not None:
+        if (
+            hasattr(litellm_router_instance, "router_general_settings")
+            and hasattr(
+                litellm_router_instance.router_general_settings, "async_only_mode"
+            )
+            and litellm_router_instance.router_general_settings.async_only_mode is True
+        ):
+            return False
+
+    return True
+
+
+def set_client(litellm_router_instance: LitellmRouter, model: dict):
+    """
+    - Initializes Azure/OpenAI clients. Stores them in cache, b/c of this - https://github.com/BerriAI/litellm/issues/1278
+    - Initializes Semaphore for client w/ rpm. Stores them in cache. b/c of this - https://github.com/BerriAI/litellm/issues/2994
+    """
+    client_ttl = litellm_router_instance.client_ttl
+    litellm_params = model.get("litellm_params", {})
+    model_name = litellm_params.get("model")
+    model_id = model["model_info"]["id"]
+    # ### IF RPM SET - initialize a semaphore ###
+    rpm = litellm_params.get("rpm", None)
+    tpm = litellm_params.get("tpm", None)
+    max_parallel_requests = litellm_params.get("max_parallel_requests", None)
+    calculated_max_parallel_requests = calculate_max_parallel_requests(
+        rpm=rpm,
+        max_parallel_requests=max_parallel_requests,
+        tpm=tpm,
+        default_max_parallel_requests=litellm_router_instance.default_max_parallel_requests,
+    )
+    if calculated_max_parallel_requests:
+        semaphore = asyncio.Semaphore(calculated_max_parallel_requests)
+        cache_key = f"{model_id}_max_parallel_requests_client"
+        litellm_router_instance.cache.set_cache(
+            key=cache_key,
+            value=semaphore,
+            local_only=True,
+        )
+
+    ####  for OpenAI / Azure we need to initalize the Client for High Traffic ########
+    custom_llm_provider = litellm_params.get("custom_llm_provider")
+    custom_llm_provider = custom_llm_provider or model_name.split("/", 1)[0] or ""
+    default_api_base = None
+    default_api_key = None
+    if custom_llm_provider in litellm.openai_compatible_providers:
+        _, custom_llm_provider, api_key, api_base = litellm.get_llm_provider(
+            model=model_name
+        )
+        default_api_base = api_base
+        default_api_key = api_key
+
+    if (
+        model_name in litellm.open_ai_chat_completion_models
+        or custom_llm_provider in litellm.openai_compatible_providers
+        or custom_llm_provider == "azure"
+        or custom_llm_provider == "azure_text"
+        or custom_llm_provider == "custom_openai"
+        or custom_llm_provider == "openai"
+        or custom_llm_provider == "text-completion-openai"
+        or "ft:gpt-3.5-turbo" in model_name
+        or model_name in litellm.open_ai_embedding_models
+    ):
+        is_azure_ai_studio_model: bool = False
+        if custom_llm_provider == "azure":
+            if litellm.utils._is_non_openai_azure_model(model_name):
+                is_azure_ai_studio_model = True
+                custom_llm_provider = "openai"
+                # remove azure prefx from model_name
+                model_name = model_name.replace("azure/", "")
+        # glorified / complicated reading of configs
+        # user can pass vars directly or they can pas os.environ/AZURE_API_KEY, in which case we will read the env
+        # we do this here because we init clients for Azure, OpenAI and we need to set the right key
+        api_key = litellm_params.get("api_key") or default_api_key
+        if api_key and isinstance(api_key, str) and api_key.startswith("os.environ/"):
+            api_key_env_name = api_key.replace("os.environ/", "")
+            api_key = litellm.get_secret(api_key_env_name)
+            litellm_params["api_key"] = api_key
+
+        api_base = litellm_params.get("api_base")
+        base_url = litellm_params.get("base_url")
+        api_base = (
+            api_base or base_url or default_api_base
+        )  # allow users to pass in `api_base` or `base_url` for azure
+        if api_base and api_base.startswith("os.environ/"):
+            api_base_env_name = api_base.replace("os.environ/", "")
+            api_base = litellm.get_secret(api_base_env_name)
+            litellm_params["api_base"] = api_base
+
+        ## AZURE AI STUDIO MISTRAL CHECK ##
+        """
+        Make sure api base ends in /v1/
+
+        if not, add it - https://github.com/BerriAI/litellm/issues/2279
+        """
+        if (
+            is_azure_ai_studio_model is True
+            and api_base is not None
+            and isinstance(api_base, str)
+            and not api_base.endswith("/v1/")
+        ):
+            # check if it ends with a trailing slash
+            if api_base.endswith("/"):
+                api_base += "v1/"
+            elif api_base.endswith("/v1"):
+                api_base += "/"
+            else:
+                api_base += "/v1/"
+
+        api_version = litellm_params.get("api_version")
+        if api_version and api_version.startswith("os.environ/"):
+            api_version_env_name = api_version.replace("os.environ/", "")
+            api_version = litellm.get_secret(api_version_env_name)
+            litellm_params["api_version"] = api_version
+
+        timeout = litellm_params.pop("timeout", None) or litellm.request_timeout
+        if isinstance(timeout, str) and timeout.startswith("os.environ/"):
+            timeout_env_name = timeout.replace("os.environ/", "")
+            timeout = litellm.get_secret(timeout_env_name)
+            litellm_params["timeout"] = timeout
+
+        stream_timeout = litellm_params.pop(
+            "stream_timeout", timeout
+        )  # if no stream_timeout is set, default to timeout
+        if isinstance(stream_timeout, str) and stream_timeout.startswith("os.environ/"):
+            stream_timeout_env_name = stream_timeout.replace("os.environ/", "")
+            stream_timeout = litellm.get_secret(stream_timeout_env_name)
+            litellm_params["stream_timeout"] = stream_timeout
+
+        max_retries = litellm_params.pop("max_retries", 0)  # router handles retry logic
+        if isinstance(max_retries, str) and max_retries.startswith("os.environ/"):
+            max_retries_env_name = max_retries.replace("os.environ/", "")
+            max_retries = litellm.get_secret(max_retries_env_name)
+            litellm_params["max_retries"] = max_retries
+
+        organization = litellm_params.get("organization", None)
+        if isinstance(organization, str) and organization.startswith("os.environ/"):
+            organization_env_name = organization.replace("os.environ/", "")
+            organization = litellm.get_secret(organization_env_name)
+            litellm_params["organization"] = organization
+
+        if custom_llm_provider == "azure" or custom_llm_provider == "azure_text":
+            if api_base is None or not isinstance(api_base, str):
+                filtered_litellm_params = {
+                    k: v for k, v in model["litellm_params"].items() if k != "api_key"
+                }
+                _filtered_model = {
+                    "model_name": model["model_name"],
+                    "litellm_params": filtered_litellm_params,
+                }
+                raise ValueError(
+                    f"api_base is required for Azure OpenAI. Set it on your config. Model - {_filtered_model}"
+                )
+            azure_ad_token = litellm_params.get("azure_ad_token")
+            if azure_ad_token is not None:
+                if azure_ad_token.startswith("oidc/"):
+                    azure_ad_token = get_azure_ad_token_from_oidc(azure_ad_token)
+            if api_version is None:
+                api_version = litellm.AZURE_DEFAULT_API_VERSION
+
+            if "gateway.ai.cloudflare.com" in api_base:
+                if not api_base.endswith("/"):
+                    api_base += "/"
+                azure_model = model_name.replace("azure/", "")
+                api_base += f"{azure_model}"
+                cache_key = f"{model_id}_async_client"
+                _client = openai.AsyncAzureOpenAI(
+                    api_key=api_key,
+                    azure_ad_token=azure_ad_token,
+                    base_url=api_base,
+                    api_version=api_version,
+                    timeout=timeout,
+                    max_retries=max_retries,
+                    http_client=httpx.AsyncClient(
+                        limits=httpx.Limits(
+                            max_connections=1000, max_keepalive_connections=100
+                        ),
+                        verify=litellm.ssl_verify,
+                    ),  # type: ignore
+                )
+                litellm_router_instance.cache.set_cache(
+                    key=cache_key,
+                    value=_client,
+                    ttl=client_ttl,
+                    local_only=True,
+                )  # cache for 1 hr
+
+                if should_initialize_sync_client(
+                    litellm_router_instance=litellm_router_instance
+                ):
+                    cache_key = f"{model_id}_client"
+                    _client = openai.AzureOpenAI(  # type: ignore
+                        api_key=api_key,
+                        azure_ad_token=azure_ad_token,
+                        base_url=api_base,
+                        api_version=api_version,
+                        timeout=timeout,
+                        max_retries=max_retries,
+                        http_client=httpx.Client(
+                            limits=httpx.Limits(
+                                max_connections=1000, max_keepalive_connections=100
+                            ),
+                            verify=litellm.ssl_verify,
+                        ),  # type: ignore
+                    )
+                    litellm_router_instance.cache.set_cache(
+                        key=cache_key,
+                        value=_client,
+                        ttl=client_ttl,
+                        local_only=True,
+                    )  # cache for 1 hr
+                # streaming clients can have diff timeouts
+                cache_key = f"{model_id}_stream_async_client"
+                _client = openai.AsyncAzureOpenAI(  # type: ignore
+                    api_key=api_key,
+                    azure_ad_token=azure_ad_token,
+                    base_url=api_base,
+                    api_version=api_version,
+                    timeout=stream_timeout,
+                    max_retries=max_retries,
+                    http_client=httpx.AsyncClient(
+                        limits=httpx.Limits(
+                            max_connections=1000, max_keepalive_connections=100
+                        ),
+                        verify=litellm.ssl_verify,
+                    ),  # type: ignore
+                )
+                litellm_router_instance.cache.set_cache(
+                    key=cache_key,
+                    value=_client,
+                    ttl=client_ttl,
+                    local_only=True,
+                )  # cache for 1 hr
+
+                if should_initialize_sync_client(
+                    litellm_router_instance=litellm_router_instance
+                ):
+                    cache_key = f"{model_id}_stream_client"
+                    _client = openai.AzureOpenAI(  # type: ignore
+                        api_key=api_key,
+                        azure_ad_token=azure_ad_token,
+                        base_url=api_base,
+                        api_version=api_version,
+                        timeout=stream_timeout,
+                        max_retries=max_retries,
+                        http_client=httpx.Client(
+                            limits=httpx.Limits(
+                                max_connections=1000, max_keepalive_connections=100
+                            ),
+                            verify=litellm.ssl_verify,
+                        ),  # type: ignore
+                    )
+                    litellm_router_instance.cache.set_cache(
+                        key=cache_key,
+                        value=_client,
+                        ttl=client_ttl,
+                        local_only=True,
+                    )  # cache for 1 hr
+            else:
+                _api_key = api_key
+                if _api_key is not None and isinstance(_api_key, str):
+                    # only show first 5 chars of api_key
+                    _api_key = _api_key[:8] + "*" * 15
+                verbose_router_logger.debug(
+                    f"Initializing Azure OpenAI Client for {model_name}, Api Base: {str(api_base)}, Api Key:{_api_key}"
+                )
+                azure_client_params = {
+                    "api_key": api_key,
+                    "azure_endpoint": api_base,
+                    "api_version": api_version,
+                    "azure_ad_token": azure_ad_token,
+                }
+                from litellm.llms.azure import select_azure_base_url_or_endpoint
+
+                # this decides if we should set azure_endpoint or base_url on Azure OpenAI Client
+                # required to support GPT-4 vision enhancements, since base_url needs to be set on Azure OpenAI Client
+                azure_client_params = select_azure_base_url_or_endpoint(
+                    azure_client_params
+                )
+
+                cache_key = f"{model_id}_async_client"
+                _client = openai.AsyncAzureOpenAI(  # type: ignore
+                    **azure_client_params,
+                    timeout=timeout,
+                    max_retries=max_retries,
+                    http_client=httpx.AsyncClient(
+                        limits=httpx.Limits(
+                            max_connections=1000, max_keepalive_connections=100
+                        ),
+                        verify=litellm.ssl_verify,
+                    ),  # type: ignore
+                )
+                litellm_router_instance.cache.set_cache(
+                    key=cache_key,
+                    value=_client,
+                    ttl=client_ttl,
+                    local_only=True,
+                )  # cache for 1 hr
+                if should_initialize_sync_client(
+                    litellm_router_instance=litellm_router_instance
+                ):
+                    cache_key = f"{model_id}_client"
+                    _client = openai.AzureOpenAI(  # type: ignore
+                        **azure_client_params,
+                        timeout=timeout,
+                        max_retries=max_retries,
+                        http_client=httpx.Client(
+                            limits=httpx.Limits(
+                                max_connections=1000, max_keepalive_connections=100
+                            ),
+                            verify=litellm.ssl_verify,
+                        ),  # type: ignore
+                    )
+                    litellm_router_instance.cache.set_cache(
+                        key=cache_key,
+                        value=_client,
+                        ttl=client_ttl,
+                        local_only=True,
+                    )  # cache for 1 hr
+
+                # streaming clients should have diff timeouts
+                cache_key = f"{model_id}_stream_async_client"
+                _client = openai.AsyncAzureOpenAI(  # type: ignore
+                    **azure_client_params,
+                    timeout=stream_timeout,
+                    max_retries=max_retries,
+                    http_client=httpx.AsyncClient(
+                        limits=httpx.Limits(
+                            max_connections=1000, max_keepalive_connections=100
+                        ),
+                        verify=litellm.ssl_verify,
+                    ),
+                )
+                litellm_router_instance.cache.set_cache(
+                    key=cache_key,
+                    value=_client,
+                    ttl=client_ttl,
+                    local_only=True,
+                )  # cache for 1 hr
+
+                if should_initialize_sync_client(
+                    litellm_router_instance=litellm_router_instance
+                ):
+                    cache_key = f"{model_id}_stream_client"
+                    _client = openai.AzureOpenAI(  # type: ignore
+                        **azure_client_params,
+                        timeout=stream_timeout,
+                        max_retries=max_retries,
+                        http_client=httpx.Client(
+                            limits=httpx.Limits(
+                                max_connections=1000, max_keepalive_connections=100
+                            ),
+                            verify=litellm.ssl_verify,
+                        ),
+                    )
+                    litellm_router_instance.cache.set_cache(
+                        key=cache_key,
+                        value=_client,
+                        ttl=client_ttl,
+                        local_only=True,
+                    )  # cache for 1 hr
+
+        else:
+            _api_key = api_key  # type: ignore
+            if _api_key is not None and isinstance(_api_key, str):
+                # only show first 5 chars of api_key
+                _api_key = _api_key[:8] + "*" * 15
+            verbose_router_logger.debug(
+                f"Initializing OpenAI Client for {model_name}, Api Base:{str(api_base)}, Api Key:{_api_key}"
+            )
+            cache_key = f"{model_id}_async_client"
+            _client = openai.AsyncOpenAI(  # type: ignore
+                api_key=api_key,
+                base_url=api_base,
+                timeout=timeout,
+                max_retries=max_retries,
+                organization=organization,
+                http_client=httpx.AsyncClient(
+                    limits=httpx.Limits(
+                        max_connections=1000, max_keepalive_connections=100
+                    ),
+                    verify=litellm.ssl_verify,
+                ),  # type: ignore
+            )
+            litellm_router_instance.cache.set_cache(
+                key=cache_key,
+                value=_client,
+                ttl=client_ttl,
+                local_only=True,
+            )  # cache for 1 hr
+
+            if should_initialize_sync_client(
+                litellm_router_instance=litellm_router_instance
+            ):
+                cache_key = f"{model_id}_client"
+                _client = openai.OpenAI(  # type: ignore
+                    api_key=api_key,
+                    base_url=api_base,
+                    timeout=timeout,
+                    max_retries=max_retries,
+                    organization=organization,
+                    http_client=httpx.Client(
+                        limits=httpx.Limits(
+                            max_connections=1000, max_keepalive_connections=100
+                        ),
+                        verify=litellm.ssl_verify,
+                    ),  # type: ignore
+                )
+                litellm_router_instance.cache.set_cache(
+                    key=cache_key,
+                    value=_client,
+                    ttl=client_ttl,
+                    local_only=True,
+                )  # cache for 1 hr
+
+            # streaming clients should have diff timeouts
+            cache_key = f"{model_id}_stream_async_client"
+            _client = openai.AsyncOpenAI(  # type: ignore
+                api_key=api_key,
+                base_url=api_base,
+                timeout=stream_timeout,
+                max_retries=max_retries,
+                organization=organization,
+                http_client=httpx.AsyncClient(
+                    limits=httpx.Limits(
+                        max_connections=1000, max_keepalive_connections=100
+                    ),
+                    verify=litellm.ssl_verify,
+                ),  # type: ignore
+            )
+            litellm_router_instance.cache.set_cache(
+                key=cache_key,
+                value=_client,
+                ttl=client_ttl,
+                local_only=True,
+            )  # cache for 1 hr
+
+            if should_initialize_sync_client(
+                litellm_router_instance=litellm_router_instance
+            ):
+                # streaming clients should have diff timeouts
+                cache_key = f"{model_id}_stream_client"
+                _client = openai.OpenAI(  # type: ignore
+                    api_key=api_key,
+                    base_url=api_base,
+                    timeout=stream_timeout,
+                    max_retries=max_retries,
+                    organization=organization,
+                    http_client=httpx.Client(
+                        limits=httpx.Limits(
+                            max_connections=1000, max_keepalive_connections=100
+                        ),
+                        verify=litellm.ssl_verify,
+                    ),  # type: ignore
+                )
+                litellm_router_instance.cache.set_cache(
+                    key=cache_key,
+                    value=_client,
+                    ttl=client_ttl,
+                    local_only=True,
+                )  # cache for 1 hr
--- a/litellm/tests/langfuse.log
+++ b/litellm/tests/langfuse.log
--- a/litellm/tests/test_caching.py
+++ b/litellm/tests/test_caching.py
@ -1607,7 +1607,17 @@ def test_caching_redis_simple(caplog):
        print(m)
    print(time.time() - s2)

+    redis_async_caching_error = False
+    redis_service_logging_error = False
    captured_logs = [rec.message for rec in caplog.records]

-    assert "LiteLLM Redis Caching: async set" not in captured_logs
-    assert "ServiceLogging.async_service_success_hook" not in captured_logs
+    print(f"captured_logs: {captured_logs}")
+    for item in captured_logs:
+        if "Error connecting to Async Redis client" in item:
+            redis_async_caching_error = True
+
+        if "ServiceLogging.async_service_success_hook" in item:
+            redis_service_logging_error = True
+
+    assert redis_async_caching_error is False
+    assert redis_service_logging_error is False
--- a/litellm/tests/test_completion_cost.py
+++ b/litellm/tests/test_completion_cost.py
@ -712,7 +712,6 @@ def test_vertex_ai_claude_completion_cost():
    assert cost == predicted_cost


-
@pytest.mark.parametrize("sync_mode", [True, False])
@pytest.mark.asyncio
 async def test_completion_cost_hidden_params(sync_mode):
@ -732,6 +731,7 @@ async def test_completion_cost_hidden_params(sync_mode):
    assert "response_cost" in response._hidden_params
    assert isinstance(response._hidden_params["response_cost"], float)

+
 def test_vertex_ai_gemini_predict_cost():
    model = "gemini-1.5-flash"
    messages = [{"role": "user", "content": "Hey, hows it going???"}]
@ -739,3 +739,16 @@ def test_vertex_ai_gemini_predict_cost():

    assert predictive_cost > 0

+
+@pytest.mark.parametrize("model", ["openai/tts-1", "azure/tts-1"])
+def test_completion_cost_tts(model):
+    os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
+    litellm.model_cost = litellm.get_model_cost_map(url="")
+
+    cost = completion_cost(
+        model=model,
+        prompt="the quick brown fox jumped over the lazy dogs",
+        call_type="speech",
+    )
+
+    assert cost > 0
--- a/litellm/tests/test_config.py
+++ b/litellm/tests/test_config.py
@ -2,23 +2,30 @@
 ## Unit tests for ProxyConfig class


-import sys, os
+import os
+import sys
 import traceback
+
 from dotenv import load_dotenv

 load_dotenv()
-import os, io
+import io
+import os

 sys.path.insert(
    0, os.path.abspath("../..")
 )  # Adds the parent directory to the system path
-import pytest, litellm
-from pydantic import BaseModel, ConfigDict
-from litellm.proxy.proxy_server import ProxyConfig
-from litellm.proxy.utils import encrypt_value, ProxyLogging, DualCache
-from litellm.types.router import Deployment, LiteLLM_Params, ModelInfo
 from typing import Literal

+import pytest
+from pydantic import BaseModel, ConfigDict
+
+import litellm
+from litellm.proxy.common_utils.encrypt_decrypt_utils import encrypt_value
+from litellm.proxy.proxy_server import ProxyConfig
+from litellm.proxy.utils import DualCache, ProxyLogging
+from litellm.types.router import Deployment, LiteLLM_Params, ModelInfo
+

 class DBModel(BaseModel):
    model_id: str
@ -28,6 +35,7 @@ class DBModel(BaseModel):

    model_config = ConfigDict(protected_namespaces=())

+
@pytest.mark.asyncio
 async def test_delete_deployment():
    """
--- a/litellm/tests/test_presidio_masking.py
+++ b/litellm/tests/test_presidio_masking.py
@ -1,8 +1,13 @@
 # What is this?
 ## Unit test for presidio pii masking
-import sys, os, asyncio, time, random
-from datetime import datetime
+import asyncio
+import os
+import random
+import sys
+import time
 import traceback
+from datetime import datetime
+
 from dotenv import load_dotenv

 load_dotenv()
@ -12,12 +17,40 @@ sys.path.insert(
    0, os.path.abspath("../..")
 )  # Adds the parent directory to the system path
 import pytest
+
 import litellm
-from litellm.proxy.hooks.presidio_pii_masking import _OPTIONAL_PresidioPIIMasking
 from litellm import Router, mock_completion
-from litellm.proxy.utils import ProxyLogging
-from litellm.proxy._types import UserAPIKeyAuth
 from litellm.caching import DualCache
+from litellm.proxy._types import UserAPIKeyAuth
+from litellm.proxy.hooks.presidio_pii_masking import _OPTIONAL_PresidioPIIMasking
+from litellm.proxy.utils import ProxyLogging
+
+
+@pytest.mark.parametrize(
+    "base_url",
+    [
+        "presidio-analyzer-s3pa:10000",
+        "https://presidio-analyzer-s3pa:10000",
+        "http://presidio-analyzer-s3pa:10000",
+    ],
+)
+def test_validate_environment_missing_http(base_url):
+    pii_masking = _OPTIONAL_PresidioPIIMasking(mock_testing=True)
+
+    os.environ["PRESIDIO_ANALYZER_API_BASE"] = f"{base_url}/analyze"
+    os.environ["PRESIDIO_ANONYMIZER_API_BASE"] = f"{base_url}/anonymize"
+    pii_masking.validate_environment()
+
+    expected_url = base_url
+    if not (base_url.startswith("https://") or base_url.startswith("http://")):
+        expected_url = "http://" + base_url
+
+    assert (
+        pii_masking.presidio_anonymizer_api_base == f"{expected_url}/anonymize/"
+    ), "Got={}, Expected={}".format(
+        pii_masking.presidio_anonymizer_api_base, f"{expected_url}/anonymize/"
+    )
+    assert pii_masking.presidio_analyzer_api_base == f"{expected_url}/analyze/"


@pytest.mark.asyncio
--- a/litellm/tests/test_router.py
+++ b/litellm/tests/test_router.py
@ -1894,6 +1894,49 @@ async def test_router_model_usage(mock_response):
                raise e


+@pytest.mark.skip(reason="Check if this is causing ci/cd issues.")
+@pytest.mark.asyncio
+async def test_is_proxy_set():
+    """
+    Assert if proxy is set
+    """
+    from httpx import AsyncHTTPTransport
+
+    os.environ["HTTPS_PROXY"] = "https://proxy.example.com:8080"
+    from openai import AsyncAzureOpenAI
+
+    # Function to check if a proxy is set on the client
+    # Function to check if a proxy is set on the client
+    def check_proxy(client: httpx.AsyncClient) -> bool:
+        print(f"client._mounts: {client._mounts}")
+        assert len(client._mounts) == 1
+        for k, v in client._mounts.items():
+            assert isinstance(v, AsyncHTTPTransport)
+        return True
+
+    llm_router = Router(
+        model_list=[
+            {
+                "model_name": "gpt-4",
+                "litellm_params": {
+                    "model": "azure/gpt-3.5-turbo",
+                    "api_key": "my-key",
+                    "api_base": "my-base",
+                    "mock_response": "hello world",
+                },
+                "model_info": {"id": "1"},
+            }
+        ]
+    )
+
+    _deployment = llm_router.get_deployment(model_id="1")
+    model_client: AsyncAzureOpenAI = llm_router._get_client(
+        deployment=_deployment, kwargs={}, client_type="async"
+    )  # type: ignore
+
+    assert check_proxy(client=model_client._client)
+
+
@pytest.mark.parametrize(
    "model, base_model, llm_provider",
    [
--- a/litellm/tests/test_router_init.py
+++ b/litellm/tests/test_router_init.py
@ -1,16 +1,22 @@
 # this tests if the router is initialized correctly
-import sys, os, time
-import traceback, asyncio
+import asyncio
+import os
+import sys
+import time
+import traceback
+
 import pytest

 sys.path.insert(
    0, os.path.abspath("../..")
 )  # Adds the parent directory to the system path
+from collections import defaultdict
+from concurrent.futures import ThreadPoolExecutor
+
+from dotenv import load_dotenv
+
 import litellm
 from litellm import Router
-from concurrent.futures import ThreadPoolExecutor
-from collections import defaultdict
-from dotenv import load_dotenv

 load_dotenv()

@ -24,6 +30,7 @@ load_dotenv()
 def test_init_clients():
    litellm.set_verbose = True
    import logging
+
    from litellm._logging import verbose_router_logger

    verbose_router_logger.setLevel(logging.DEBUG)
@ -489,6 +496,7 @@ def test_init_clients_azure_command_r_plus():
    # For azure/command-r-plus we need to use openai.OpenAI because of how the Azure provider requires requests being sent
    litellm.set_verbose = True
    import logging
+
    from litellm._logging import verbose_router_logger

    verbose_router_logger.setLevel(logging.DEBUG)
@ -585,3 +593,46 @@ async def test_text_completion_with_organization():

    except Exception as e:
        pytest.fail(f"Error occurred: {e}")
+
+
+def test_init_clients_async_mode():
+    litellm.set_verbose = True
+    import logging
+
+    from litellm._logging import verbose_router_logger
+    from litellm.types.router import RouterGeneralSettings
+
+    verbose_router_logger.setLevel(logging.DEBUG)
+    try:
+        print("testing init 4 clients with diff timeouts")
+        model_list = [
+            {
+                "model_name": "gpt-3.5-turbo",
+                "litellm_params": {
+                    "model": "azure/chatgpt-v-2",
+                    "api_key": os.getenv("AZURE_API_KEY"),
+                    "api_version": os.getenv("AZURE_API_VERSION"),
+                    "api_base": os.getenv("AZURE_API_BASE"),
+                    "timeout": 0.01,
+                    "stream_timeout": 0.000_001,
+                    "max_retries": 7,
+                },
+            },
+        ]
+        router = Router(
+            model_list=model_list,
+            set_verbose=True,
+            router_general_settings=RouterGeneralSettings(async_only_mode=True),
+        )
+        for elem in router.model_list:
+            model_id = elem["model_info"]["id"]
+
+            # sync clients not initialized in async_only_mode=True
+            assert router.cache.get_cache(f"{model_id}_client") is None
+            assert router.cache.get_cache(f"{model_id}_stream_client") is None
+
+            # only async clients initialized in async_only_mode=True
+            assert router.cache.get_cache(f"{model_id}_async_client") is not None
+            assert router.cache.get_cache(f"{model_id}_stream_async_client") is not None
+    except Exception as e:
+        pytest.fail(f"Error occurred: {e}")
--- a/litellm/tests/test_stream_chunk_builder.py
+++ b/litellm/tests/test_stream_chunk_builder.py
@ -1,15 +1,22 @@
-import sys, os, time
-import traceback, asyncio
+import asyncio
+import os
+import sys
+import time
+import traceback
+
 import pytest

 sys.path.insert(
    0, os.path.abspath("../..")
 )  # Adds the parent directory to the system path
-from litellm import completion, stream_chunk_builder
-import litellm
-import os, dotenv
-from openai import OpenAI
+import os
+
+import dotenv
 import pytest
+from openai import OpenAI
+
+import litellm
+from litellm import completion, stream_chunk_builder

 dotenv.load_dotenv()

@ -147,3 +154,45 @@ def test_stream_chunk_builder_litellm_tool_call_regular_message():


 # test_stream_chunk_builder_litellm_tool_call_regular_message()
+
+
+def test_stream_chunk_builder_litellm_usage_chunks():
+    """
+    Checks if stream_chunk_builder is able to correctly rebuild with given metadata from streaming chunks
+    """
+    messages = [
+        {"role": "user", "content": "Tell me the funniest joke you know."},
+        {
+            "role": "assistant",
+            "content": "Why did the chicken cross the road?\nYou will not guess this one I bet\n",
+        },
+        {"role": "user", "content": "I do not know, why?"},
+        {"role": "assistant", "content": "uhhhh\n\n\nhmmmm.....\nthinking....\n"},
+        {"role": "user", "content": "\nI am waiting...\n\n...\n"},
+    ]
+    # make a regular gemini call
+    response = completion(
+        model="gemini/gemini-1.5-flash",
+        messages=messages,
+    )
+
+    usage: litellm.Usage = response.usage
+
+    gemini_pt = usage.prompt_tokens
+
+    # make a streaming gemini call
+    response = completion(
+        model="gemini/gemini-1.5-flash",
+        messages=messages,
+        stream=True,
+        complete_response=True,
+        stream_options={"include_usage": True},
+    )
+
+    usage: litellm.Usage = response.usage
+
+    stream_rebuilt_pt = usage.prompt_tokens
+
+    # assert prompt tokens are the same
+
+    assert gemini_pt == stream_rebuilt_pt
--- a/litellm/tests/test_streaming.py
+++ b/litellm/tests/test_streaming.py
@ -12,6 +12,9 @@ from typing import Tuple
 import pytest
 from pydantic import BaseModel

+import litellm.litellm_core_utils
+import litellm.litellm_core_utils.litellm_logging
+
 sys.path.insert(
    0, os.path.abspath("../..")
 )  # Adds the parent directory to the system path
@ -1078,7 +1081,6 @@ def test_vertex_ai_stream(provider):
            print(f"completion_response: {complete_response}")
            assert is_finished == True

-            assert False
        except litellm.RateLimitError as e:
            pass
        except Exception as e:
@ -3034,8 +3036,11 @@ def test_completion_claude_3_function_call_with_streaming():
        pytest.fail(f"Error occurred: {e}")


+@pytest.mark.parametrize(
+    "model", ["gemini/gemini-1.5-flash"]
+)  # "claude-3-opus-20240229",
@pytest.mark.asyncio
-async def test_acompletion_claude_3_function_call_with_streaming():
+async def test_acompletion_claude_3_function_call_with_streaming(model):
    litellm.set_verbose = True
    tools = [
        {
@ -3066,7 +3071,7 @@ async def test_acompletion_claude_3_function_call_with_streaming():
    try:
        # test without max tokens
        response = await acompletion(
-            model="claude-3-opus-20240229",
+            model=model,
            messages=messages,
            tools=tools,
            tool_choice="required",
@ -3453,3 +3458,55 @@ def test_aamazing_unit_test_custom_stream_wrapper_n():
        assert (
            chunk_dict == chunks[idx]
        ), f"idx={idx} translated chunk = {chunk_dict} != openai chunk = {chunks[idx]}"
+
+
+def test_unit_test_custom_stream_wrapper_function_call():
+    """
+    Test if model returns a tool call, the finish reason is correctly set to 'tool_calls'
+    """
+    from litellm.types.llms.openai import ChatCompletionDeltaChunk
+
+    litellm.set_verbose = False
+    delta: ChatCompletionDeltaChunk = {
+        "content": None,
+        "role": "assistant",
+        "tool_calls": [
+            {
+                "function": {"arguments": '"}'},
+                "type": "function",
+                "index": 0,
+            }
+        ],
+    }
+    chunk = {
+        "id": "chatcmpl-123",
+        "object": "chat.completion.chunk",
+        "created": 1694268190,
+        "model": "gpt-3.5-turbo-0125",
+        "system_fingerprint": "fp_44709d6fcb",
+        "choices": [{"index": 0, "delta": delta, "finish_reason": "stop"}],
+    }
+    chunk = litellm.ModelResponse(**chunk, stream=True)
+
+    completion_stream = ModelResponseIterator(model_response=chunk)
+
+    response = litellm.CustomStreamWrapper(
+        completion_stream=completion_stream,
+        model="gpt-3.5-turbo",
+        custom_llm_provider="cached_response",
+        logging_obj=litellm.litellm_core_utils.litellm_logging.Logging(
+            model="gpt-3.5-turbo",
+            messages=[{"role": "user", "content": "Hey"}],
+            stream=True,
+            call_type="completion",
+            start_time=time.time(),
+            litellm_call_id="12345",
+            function_id="1245",
+        ),
+    )
+
+    finish_reason: Optional[str] = None
+    for chunk in response:
+        if chunk.choices[0].finish_reason is not None:
+            finish_reason = chunk.choices[0].finish_reason
+    assert finish_reason == "tool_calls"
--- a/litellm/types/llms/openai.py
+++ b/litellm/types/llms/openai.py
@ -300,7 +300,7 @@ class ListBatchRequest(TypedDict, total=False):
    timeout: Optional[float]


-class ChatCompletionToolCallFunctionChunk(TypedDict):
+class ChatCompletionToolCallFunctionChunk(TypedDict, total=False):
    name: Optional[str]
    arguments: str

@ -312,7 +312,7 @@ class ChatCompletionToolCallChunk(TypedDict):
    index: int


-class ChatCompletionDeltaToolCallChunk(TypedDict):
+class ChatCompletionDeltaToolCallChunk(TypedDict, total=False):
    id: str
    type: Literal["function"]
    function: ChatCompletionToolCallFunctionChunk
--- a/litellm/types/router.py
+++ b/litellm/types/router.py
@ -324,7 +324,12 @@ class DeploymentTypedDict(TypedDict):
    litellm_params: LiteLLMParamsTypedDict


-SPECIAL_MODEL_INFO_PARAMS = ["input_cost_per_token", "output_cost_per_token"]
+SPECIAL_MODEL_INFO_PARAMS = [
+    "input_cost_per_token",
+    "output_cost_per_token",
+    "input_cost_per_character",
+    "output_cost_per_character",
+]


 class Deployment(BaseModel):
@ -517,3 +522,9 @@ class CustomRoutingStrategyBase:

        """
        pass
+
+
+class RouterGeneralSettings(BaseModel):
+    async_only_mode: bool = Field(
+        default=False
+    )  # this will only initialize async clients. Good for memory utils
--- a/litellm/utils.py
+++ b/litellm/utils.py
@ -42,6 +42,8 @@ import httpx
 import openai
 import requests
 import tiktoken
+from httpx import Proxy
+from httpx._utils import get_environment_proxies
 from pydantic import BaseModel
 from tokenizers import Tokenizer

@ -2555,6 +2557,24 @@ def get_optional_params(
                    message=f"Function calling is not supported by {custom_llm_provider}.",
                )

+    if "tools" in non_default_params:
+        tools = non_default_params["tools"]
+        for (
+            tool
+        ) in (
+            tools
+        ):  # clean out 'additionalProperties = False'. Causes vertexai/gemini OpenAI API Schema errors - https://github.com/langchain-ai/langchainjs/issues/5240
+            tool_function = tool.get("function", {})
+            parameters = tool_function.get("parameters", None)
+            if parameters is not None:
+                new_parameters = copy.deepcopy(parameters)
+                if (
+                    "additionalProperties" in new_parameters
+                    and new_parameters["additionalProperties"] is False
+                ):
+                    new_parameters.pop("additionalProperties", None)
+                tool_function["parameters"] = new_parameters
+
    def _check_valid_arg(supported_params):
        verbose_logger.debug(
            f"\nLiteLLM completion() model= {model}; provider = {custom_llm_provider}"
@ -4707,7 +4727,9 @@ def get_model_info(model: str, custom_llm_provider: Optional[str] = None) -> Mod
            )
    except Exception:
        raise Exception(
-            "This model isn't mapped yet. Add it here - https://github.com/BerriAI/litellm/blob/main/model_prices_and_context_window.json"
+            "This model isn't mapped yet. model={}, custom_llm_provider={}. Add it here - https://github.com/BerriAI/litellm/blob/main/model_prices_and_context_window.json".format(
+                model, custom_llm_provider
+            )
        )


@ -4893,6 +4915,34 @@ def get_provider_fields(custom_llm_provider: str) -> List[ProviderField]:
        return []


+def create_proxy_transport_and_mounts():
+    proxies = {
+        key: None if url is None else Proxy(url=url)
+        for key, url in get_environment_proxies().items()
+    }
+
+    sync_proxy_mounts = {}
+    async_proxy_mounts = {}
+
+    # Retrieve NO_PROXY environment variable
+    no_proxy = os.getenv("NO_PROXY", None)
+    no_proxy_urls = no_proxy.split(",") if no_proxy else []
+
+    for key, proxy in proxies.items():
+        if proxy is None:
+            sync_proxy_mounts[key] = httpx.HTTPTransport()
+            async_proxy_mounts[key] = httpx.AsyncHTTPTransport()
+        else:
+            sync_proxy_mounts[key] = httpx.HTTPTransport(proxy=proxy)
+            async_proxy_mounts[key] = httpx.AsyncHTTPTransport(proxy=proxy)
+
+    for url in no_proxy_urls:
+        sync_proxy_mounts[url] = httpx.HTTPTransport()
+        async_proxy_mounts[url] = httpx.AsyncHTTPTransport()
+
+    return sync_proxy_mounts, async_proxy_mounts
+
+
 def validate_environment(model: Optional[str] = None) -> dict:
    """
    Checks if the environment variables are valid for the given model.
@ -7519,7 +7569,7 @@ def exception_type(
                    if original_exception.status_code == 400:
                        exception_mapping_worked = True
                        raise BadRequestError(
-                            message=f"{exception_provider} - {message}",
+                            message=f"{exception_provider} - {error_str}",
                            llm_provider=custom_llm_provider,
                            model=model,
                            response=original_exception.response,
@ -7528,7 +7578,7 @@ def exception_type(
                    elif original_exception.status_code == 401:
                        exception_mapping_worked = True
                        raise AuthenticationError(
-                            message=f"AuthenticationError: {exception_provider} - {message}",
+                            message=f"AuthenticationError: {exception_provider} - {error_str}",
                            llm_provider=custom_llm_provider,
                            model=model,
                            response=original_exception.response,
@ -7537,7 +7587,7 @@ def exception_type(
                    elif original_exception.status_code == 404:
                        exception_mapping_worked = True
                        raise NotFoundError(
-                            message=f"NotFoundError: {exception_provider} - {message}",
+                            message=f"NotFoundError: {exception_provider} - {error_str}",
                            model=model,
                            llm_provider=custom_llm_provider,
                            response=original_exception.response,
@ -7546,7 +7596,7 @@ def exception_type(
                    elif original_exception.status_code == 408:
                        exception_mapping_worked = True
                        raise Timeout(
-                            message=f"Timeout Error: {exception_provider} - {message}",
+                            message=f"Timeout Error: {exception_provider} - {error_str}",
                            model=model,
                            llm_provider=custom_llm_provider,
                            litellm_debug_info=extra_information,
@ -7554,7 +7604,7 @@ def exception_type(
                    elif original_exception.status_code == 422:
                        exception_mapping_worked = True
                        raise BadRequestError(
-                            message=f"BadRequestError: {exception_provider} - {message}",
+                            message=f"BadRequestError: {exception_provider} - {error_str}",
                            model=model,
                            llm_provider=custom_llm_provider,
                            response=original_exception.response,
@ -7563,7 +7613,7 @@ def exception_type(
                    elif original_exception.status_code == 429:
                        exception_mapping_worked = True
                        raise RateLimitError(
-                            message=f"RateLimitError: {exception_provider} - {message}",
+                            message=f"RateLimitError: {exception_provider} - {error_str}",
                            model=model,
                            llm_provider=custom_llm_provider,
                            response=original_exception.response,
@ -7572,7 +7622,7 @@ def exception_type(
                    elif original_exception.status_code == 503:
                        exception_mapping_worked = True
                        raise ServiceUnavailableError(
-                            message=f"ServiceUnavailableError: {exception_provider} - {message}",
+                            message=f"ServiceUnavailableError: {exception_provider} - {error_str}",
                            model=model,
                            llm_provider=custom_llm_provider,
                            response=original_exception.response,
@ -7581,7 +7631,7 @@ def exception_type(
                    elif original_exception.status_code == 504:  # gateway timeout error
                        exception_mapping_worked = True
                        raise Timeout(
-                            message=f"Timeout Error: {exception_provider} - {message}",
+                            message=f"Timeout Error: {exception_provider} - {error_str}",
                            model=model,
                            llm_provider=custom_llm_provider,
                            litellm_debug_info=extra_information,
@ -7590,7 +7640,7 @@ def exception_type(
                        exception_mapping_worked = True
                        raise APIError(
                            status_code=original_exception.status_code,
-                            message=f"APIError: {exception_provider} - {message}",
+                            message=f"APIError: {exception_provider} - {error_str}",
                            llm_provider=custom_llm_provider,
                            model=model,
                            request=original_exception.request,
@ -7599,7 +7649,7 @@ def exception_type(
                else:
                    # if no status code then it is an APIConnectionError: https://github.com/openai/openai-python#handling-errors
                    raise APIConnectionError(
-                        message=f"APIConnectionError: {exception_provider} - {message}",
+                        message=f"APIConnectionError: {exception_provider} - {error_str}",
                        llm_provider=custom_llm_provider,
                        model=model,
                        litellm_debug_info=extra_information,
@ -7950,6 +8000,7 @@ class CustomStreamWrapper:
        )
        self.messages = getattr(logging_obj, "messages", None)
        self.sent_stream_usage = False
+        self.tool_call = False
        self.chunks: List = (
            []
        )  # keep track of the returned chunks - used for calculating the input/output tokens for stream options
@ -9192,9 +9243,16 @@ class CustomStreamWrapper:
                    "is_finished": True,
                    "finish_reason": chunk.choices[0].finish_reason,
                    "original_chunk": chunk,
+                    "tool_calls": (
+                        chunk.choices[0].delta.tool_calls
+                        if hasattr(chunk.choices[0].delta, "tool_calls")
+                        else None
+                    ),
                }

                completion_obj["content"] = response_obj["text"]
+                if response_obj["tool_calls"] is not None:
+                    completion_obj["tool_calls"] = response_obj["tool_calls"]
                print_verbose(f"completion obj content: {completion_obj['content']}")
                if hasattr(chunk, "id"):
                    model_response.id = chunk.id
@ -9352,6 +9410,10 @@ class CustomStreamWrapper:
            )
            print_verbose(f"self.sent_first_chunk: {self.sent_first_chunk}")

+            ## CHECK FOR TOOL USE
+            if "tool_calls" in completion_obj and len(completion_obj["tool_calls"]) > 0:
+                self.tool_call = True
+
            ## RETURN ARG
            if (
                "content" in completion_obj
@ -9530,6 +9592,12 @@ class CustomStreamWrapper:
            )
        else:
            model_response.choices[0].finish_reason = "stop"
+
+        ## if tool use
+        if (
+            model_response.choices[0].finish_reason == "stop" and self.tool_call
+        ):  # don't overwrite for other - potential error finish reasons
+            model_response.choices[0].finish_reason = "tool_calls"
        return model_response

    def __next__(self):
@ -9583,7 +9651,7 @@ class CustomStreamWrapper:
                    return response

        except StopIteration:
-            if self.sent_last_chunk == True:
+            if self.sent_last_chunk is True:
                if (
                    self.sent_stream_usage == False
                    and self.stream_options is not None
--- a/model_prices_and_context_window.json
+++ b/model_prices_and_context_window.json
@ -2022,10 +2022,10 @@
        "max_tokens": 8192,
        "max_input_tokens": 2097152,
        "max_output_tokens": 8192,
-        "input_cost_per_token": 0.00000035, 
-        "input_cost_per_token_above_128k_tokens": 0.0000007, 
-        "output_cost_per_token": 0.00000105, 
-        "output_cost_per_token_above_128k_tokens": 0.0000021, 
+        "input_cost_per_token": 0.0000035, 
+        "input_cost_per_token_above_128k_tokens": 0.000007, 
+        "output_cost_per_token": 0.0000105, 
+        "output_cost_per_token_above_128k_tokens": 0.000021, 
        "litellm_provider": "gemini",
        "mode": "chat",
        "supports_system_messages": true,
@ -2033,16 +2033,16 @@
        "supports_vision": true,
        "supports_tool_choice": true, 
        "supports_response_schema": true, 
-        "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
+        "source": "https://ai.google.dev/pricing"
    },
    "gemini/gemini-1.5-pro-latest": {
        "max_tokens": 8192,
        "max_input_tokens": 1048576,
        "max_output_tokens": 8192,
-        "input_cost_per_token": 0.00000035, 
-        "input_cost_per_token_above_128k_tokens": 0.0000007, 
+        "input_cost_per_token": 0.0000035, 
+        "input_cost_per_token_above_128k_tokens": 0.000007, 
        "output_cost_per_token": 0.00000105, 
-        "output_cost_per_token_above_128k_tokens": 0.0000021, 
+        "output_cost_per_token_above_128k_tokens": 0.000021, 
        "litellm_provider": "gemini",
        "mode": "chat",
        "supports_system_messages": true,
@ -2050,7 +2050,7 @@
        "supports_vision": true,
        "supports_tool_choice": true, 
        "supports_response_schema": true, 
-        "source": "https://ai.google.dev/models/gemini"
+        "source": "https://ai.google.dev/pricing"
    },
    "gemini/gemini-pro-vision": {
        "max_tokens": 2048,
--- a/poetry.lock
+++ b/poetry.lock
@ -1,4 +1,4 @@
-# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand.
+# This file is automatically @generated by Poetry 1.7.1 and should not be changed by hand.

 [[package]]
 name = "aiohttp"
@ -2115,6 +2115,32 @@ dev = ["coverage[toml] (==5.0.4)", "cryptography (>=3.4.0)", "pre-commit", "pyte
 docs = ["sphinx (>=4.5.0,<5.0.0)", "sphinx-rtd-theme", "zope.interface"]
 tests = ["coverage[toml] (==5.0.4)", "pytest (>=6.0.0,<7.0.0)"]

+[[package]]
+name = "pynacl"
+version = "1.5.0"
+description = "Python binding to the Networking and Cryptography (NaCl) library"
+optional = true
+python-versions = ">=3.6"
+files = [
+    {file = "PyNaCl-1.5.0-cp36-abi3-macosx_10_10_universal2.whl", hash = "sha256:401002a4aaa07c9414132aaed7f6836ff98f59277a234704ff66878c2ee4a0d1"},
+    {file = "PyNaCl-1.5.0-cp36-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_24_aarch64.whl", hash = "sha256:52cb72a79269189d4e0dc537556f4740f7f0a9ec41c1322598799b0bdad4ef92"},
+    {file = "PyNaCl-1.5.0-cp36-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a36d4a9dda1f19ce6e03c9a784a2921a4b726b02e1c736600ca9c22029474394"},
+    {file = "PyNaCl-1.5.0-cp36-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:0c84947a22519e013607c9be43706dd42513f9e6ae5d39d3613ca1e142fba44d"},
+    {file = "PyNaCl-1.5.0-cp36-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:06b8f6fa7f5de8d5d2f7573fe8c863c051225a27b61e6860fd047b1775807858"},
+    {file = "PyNaCl-1.5.0-cp36-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:a422368fc821589c228f4c49438a368831cb5bbc0eab5ebe1d7fac9dded6567b"},
+    {file = "PyNaCl-1.5.0-cp36-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:61f642bf2378713e2c2e1de73444a3778e5f0a38be6fee0fe532fe30060282ff"},
+    {file = "PyNaCl-1.5.0-cp36-abi3-win32.whl", hash = "sha256:e46dae94e34b085175f8abb3b0aaa7da40767865ac82c928eeb9e57e1ea8a543"},
+    {file = "PyNaCl-1.5.0-cp36-abi3-win_amd64.whl", hash = "sha256:20f42270d27e1b6a29f54032090b972d97f0a1b0948cc52392041ef7831fee93"},
+    {file = "PyNaCl-1.5.0.tar.gz", hash = "sha256:8ac7448f09ab85811607bdd21ec2464495ac8b7c66d146bf545b0f08fb9220ba"},
+]
+
+[package.dependencies]
+cffi = ">=1.4.1"
+
+[package.extras]
+docs = ["sphinx (>=1.6.5)", "sphinx-rtd-theme"]
+tests = ["hypothesis (>=3.27.0)", "pytest (>=3.2.1,!=3.3.0)"]
+
 [[package]]
 name = "pytest"
 version = "7.4.4"
@ -3381,10 +3407,10 @@ docs = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "rst.link
 testing = ["big-O", "jaraco.functools", "jaraco.itertools", "jaraco.test", "more-itertools", "pytest (>=6,!=8.1.*)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-ignore-flaky", "pytest-mypy", "pytest-ruff (>=0.2.1)"]

 [extras]
-extra-proxy = ["azure-identity", "azure-keyvault-secrets", "google-cloud-kms", "prisma", "resend"]
+extra-proxy = ["azure-identity", "azure-keyvault-secrets", "google-cloud-kms", "prisma", "pynacl", "resend"]
 proxy = ["PyJWT", "apscheduler", "backoff", "cryptography", "fastapi", "fastapi-sso", "gunicorn", "orjson", "python-multipart", "pyyaml", "rq", "uvicorn"]

 [metadata]
 lock-version = "2.0"
 python-versions = ">=3.8.1,<4.0, !=3.9.7"
-content-hash = "925b604bed171282827c8b046191ad858ce37fa3b011a393345382f8ff86e68c"
+content-hash = "6025cae7749c94755d17362f77adf76f834863dba2126501cd3111d53a9c5779"
--- a/pyproject.toml
+++ b/pyproject.toml
@ -1,6 +1,6 @@
 [tool.poetry]
 name = "litellm"
-version = "1.41.8"
+version = "1.41.11"
 description = "Library to easily interface with LLM API providers"
 authors = ["BerriAI"]
 license = "MIT"
@ -46,6 +46,7 @@ azure-identity = {version = "^1.15.0", optional = true}
 azure-keyvault-secrets = {version = "^4.8.0", optional = true}
 google-cloud-kms = {version = "^2.21.3", optional = true}
 resend = {version = "^0.8.0", optional = true}
+pynacl = {version = "^1.5.0", optional = true}

 [tool.poetry.extras]
 proxy = [
@ -90,7 +91,7 @@ requires = ["poetry-core", "wheel"]
 build-backend = "poetry.core.masonry.api"

 [tool.commitizen]
-version = "1.41.8"
+version = "1.41.11"
 version_files = [
    "pyproject.toml:^version"
 ]
--- a/requirements.txt
+++ b/requirements.txt
@ -42,7 +42,7 @@ tokenizers==0.14.0 # for calculating usage
 click==8.1.7 # for proxy cli 
 jinja2==3.1.4 # for prompt templates
 certifi==2024.7.4 # [TODO] clean up 
-aiohttp==3.9.0 # for network calls
+aiohttp==3.9.4 # for network calls
 aioboto3==12.3.0 # for async sagemaker calls
 tenacity==8.2.3  # for retrying requests, when litellm.num_retries set
 pydantic==2.7.1 # proxy + openai req.
--- a/ui/litellm-dashboard/out/404.html
+++ b/ui/litellm-dashboard/out/404.html
--- a/ui/litellm-dashboard/out/_next/static/RDLpeUaSstfmeQiKITNBo/_buildManifest.js
+++ b/ui/litellm-dashboard/out/_next/static/RDLpeUaSstfmeQiKITNBo/_buildManifest.js
--- a/ui/litellm-dashboard/out/_next/static/RDLpeUaSstfmeQiKITNBo/_ssgManifest.js
+++ b/ui/litellm-dashboard/out/_next/static/RDLpeUaSstfmeQiKITNBo/_ssgManifest.js
--- a/ui/litellm-dashboard/out/_next/static/chunks/131-19b05e5ce40fa85d.js
+++ b/ui/litellm-dashboard/out/_next/static/chunks/131-19b05e5ce40fa85d.js
--- a/ui/litellm-dashboard/out/_next/static/chunks/131-6a03368053f9d26d.js
+++ b/ui/litellm-dashboard/out/_next/static/chunks/131-6a03368053f9d26d.js
--- a/ui/litellm-dashboard/out/_next/static/chunks/759-d7572f2a46f911d5.js
+++ b/ui/litellm-dashboard/out/_next/static/chunks/759-d7572f2a46f911d5.js
--- a/ui/litellm-dashboard/out/_next/static/chunks/777-906d7dd6a5bf7be4.js
+++ b/ui/litellm-dashboard/out/_next/static/chunks/777-906d7dd6a5bf7be4.js
--- a/ui/litellm-dashboard/out/_next/static/chunks/777-f76791513e294b30.js
+++ b/ui/litellm-dashboard/out/_next/static/chunks/777-f76791513e294b30.js
--- a/ui/litellm-dashboard/out/_next/static/chunks/app/page-567f85145e7f0f35.js
+++ b/ui/litellm-dashboard/out/_next/static/chunks/app/page-567f85145e7f0f35.js
--- a/ui/litellm-dashboard/out/_next/static/chunks/app/page-da7d95729f2529b5.js
+++ b/ui/litellm-dashboard/out/_next/static/chunks/app/page-da7d95729f2529b5.js
--- a/ui/litellm-dashboard/out/index.html
+++ b/ui/litellm-dashboard/out/index.html
@ -1 +1 @@
-<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-a8fd417ac0c6c8a5.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-f960ab1e6d32b002.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-04708d7d4a17c1ee.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-a8fd417ac0c6c8a5.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/0f6908625573deae.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[48951,[\"665\",\"static/chunks/3014691f-589a5f4865c3822f.js\",\"936\",\"static/chunks/2f6dbc85-052c4579f80d66ae.js\",\"294\",\"static/chunks/294-0e35509d5ca95267.js\",\"131\",\"static/chunks/131-6a03368053f9d26d.js\",\"684\",\"static/chunks/684-bb2d2f93d92acb0b.js\",\"759\",\"static/chunks/759-83a8bdddfe32b5d9.js\",\"777\",\"static/chunks/777-f76791513e294b30.js\",\"931\",\"static/chunks/app/page-da7d95729f2529b5.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/0f6908625573deae.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"0gt3_bF2KkdKeE61mic4M\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_12bbc4\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>
+<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-a8fd417ac0c6c8a5.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-f960ab1e6d32b002.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-04708d7d4a17c1ee.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-a8fd417ac0c6c8a5.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/0f6908625573deae.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[48951,[\"665\",\"static/chunks/3014691f-589a5f4865c3822f.js\",\"936\",\"static/chunks/2f6dbc85-052c4579f80d66ae.js\",\"294\",\"static/chunks/294-0e35509d5ca95267.js\",\"131\",\"static/chunks/131-19b05e5ce40fa85d.js\",\"684\",\"static/chunks/684-bb2d2f93d92acb0b.js\",\"759\",\"static/chunks/759-d7572f2a46f911d5.js\",\"777\",\"static/chunks/777-906d7dd6a5bf7be4.js\",\"931\",\"static/chunks/app/page-567f85145e7f0f35.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/0f6908625573deae.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"RDLpeUaSstfmeQiKITNBo\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_12bbc4\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>
--- a/ui/litellm-dashboard/out/index.txt
+++ b/ui/litellm-dashboard/out/index.txt
@ -1,7 +1,7 @@
 2:I[77831,[],""]
-3:I[48951,["665","static/chunks/3014691f-589a5f4865c3822f.js","936","static/chunks/2f6dbc85-052c4579f80d66ae.js","294","static/chunks/294-0e35509d5ca95267.js","131","static/chunks/131-6a03368053f9d26d.js","684","static/chunks/684-bb2d2f93d92acb0b.js","759","static/chunks/759-83a8bdddfe32b5d9.js","777","static/chunks/777-f76791513e294b30.js","931","static/chunks/app/page-da7d95729f2529b5.js"],""]
+3:I[48951,["665","static/chunks/3014691f-589a5f4865c3822f.js","936","static/chunks/2f6dbc85-052c4579f80d66ae.js","294","static/chunks/294-0e35509d5ca95267.js","131","static/chunks/131-19b05e5ce40fa85d.js","684","static/chunks/684-bb2d2f93d92acb0b.js","759","static/chunks/759-d7572f2a46f911d5.js","777","static/chunks/777-906d7dd6a5bf7be4.js","931","static/chunks/app/page-567f85145e7f0f35.js"],""]
 4:I[5613,[],""]
 5:I[31778,[],""]
-0:["0gt3_bF2KkdKeE61mic4M",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_12bbc4","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/0f6908625573deae.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
+0:["RDLpeUaSstfmeQiKITNBo",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_12bbc4","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/0f6908625573deae.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
 6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
 1:null
--- a/ui/litellm-dashboard/out/model_hub.html
+++ b/ui/litellm-dashboard/out/model_hub.html
--- a/ui/litellm-dashboard/out/model_hub.txt
+++ b/ui/litellm-dashboard/out/model_hub.txt
@ -1,7 +1,7 @@
 2:I[77831,[],""]
-3:I[87494,["294","static/chunks/294-0e35509d5ca95267.js","131","static/chunks/131-6a03368053f9d26d.js","777","static/chunks/777-f76791513e294b30.js","418","static/chunks/app/model_hub/page-ba7819b59161aa64.js"],""]
+3:I[87494,["294","static/chunks/294-0e35509d5ca95267.js","131","static/chunks/131-19b05e5ce40fa85d.js","777","static/chunks/777-906d7dd6a5bf7be4.js","418","static/chunks/app/model_hub/page-ba7819b59161aa64.js"],""]
 4:I[5613,[],""]
 5:I[31778,[],""]
-0:["0gt3_bF2KkdKeE61mic4M",[[["",{"children":["model_hub",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["model_hub",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","model_hub","children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined","styles":null}]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_12bbc4","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/0f6908625573deae.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
+0:["RDLpeUaSstfmeQiKITNBo",[[["",{"children":["model_hub",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["model_hub",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","model_hub","children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined","styles":null}]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_12bbc4","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/0f6908625573deae.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
 6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
 1:null
--- a/ui/litellm-dashboard/out/onboarding.html
+++ b/ui/litellm-dashboard/out/onboarding.html
--- a/ui/litellm-dashboard/out/onboarding.txt
+++ b/ui/litellm-dashboard/out/onboarding.txt
@ -1,7 +1,7 @@
 2:I[77831,[],""]
-3:I[667,["665","static/chunks/3014691f-589a5f4865c3822f.js","294","static/chunks/294-0e35509d5ca95267.js","684","static/chunks/684-bb2d2f93d92acb0b.js","777","static/chunks/777-f76791513e294b30.js","461","static/chunks/app/onboarding/page-1ed08595d570934e.js"],""]
+3:I[667,["665","static/chunks/3014691f-589a5f4865c3822f.js","294","static/chunks/294-0e35509d5ca95267.js","684","static/chunks/684-bb2d2f93d92acb0b.js","777","static/chunks/777-906d7dd6a5bf7be4.js","461","static/chunks/app/onboarding/page-1ed08595d570934e.js"],""]
 4:I[5613,[],""]
 5:I[31778,[],""]
-0:["0gt3_bF2KkdKeE61mic4M",[[["",{"children":["onboarding",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["onboarding",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","onboarding","children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined","styles":null}]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_12bbc4","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/0f6908625573deae.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
+0:["RDLpeUaSstfmeQiKITNBo",[[["",{"children":["onboarding",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["onboarding",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","onboarding","children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined","styles":null}]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_12bbc4","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/0f6908625573deae.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
 6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
 1:null
--- a/ui/litellm-dashboard/src/components/model_dashboard.tsx
+++ b/ui/litellm-dashboard/src/components/model_dashboard.tsx
@ -743,7 +743,7 @@ const ModelDashboard: React.FC<ModelDashboardProps> = ({
    }

    const fetchModelMap = async () => {
-      const data = await modelCostMap();
+      const data = await modelCostMap(accessToken);
      console.log(`received model cost map data: ${Object.keys(data)}`);
      setModelMap(data);
    };
--- a/ui/litellm-dashboard/src/components/networking.tsx
+++ b/ui/litellm-dashboard/src/components/networking.tsx
@ -12,11 +12,19 @@ export interface Model {
  model_info: Object | null;
 }

-export const modelCostMap = async () => {
+export const modelCostMap = async (
+  accessToken: string,
+) => {
  try {
    const url = proxyBaseUrl ? `${proxyBaseUrl}/get/litellm_model_cost_map` : `/get/litellm_model_cost_map`;
    const response = await fetch(
-      url
+      url, {
+        method: "GET",
+        headers: {
+          Authorization: `Bearer ${accessToken}`,
+          "Content-Type": "application/json",
+        },
+      }
    );
    const jsonData = await response.json();
    console.log(`received litellm model cost data: ${jsonData}`);
@ -693,6 +701,9 @@ export const claimOnboardingToken = async (
    throw error;
  }
 };
+let ModelListerrorShown = false;
+let errorTimer: NodeJS.Timeout | null = null;
+
 export const modelInfoCall = async (
  accessToken: String,
  userID: String,
@ -714,8 +725,21 @@ export const modelInfoCall = async (
    });

    if (!response.ok) {
-      const errorData = await response.text();
-      message.error(errorData, 10);
+      let errorData = await response.text();
+      errorData += `error shown=${ModelListerrorShown}`
+      if (!ModelListerrorShown) {
+        if (errorData.includes("No model list passed")) {
+          errorData = "No Models Exist. Click Add Model to get started.";
+        }
+        message.info(errorData, 10);
+        ModelListerrorShown = true;
+        
+        if (errorTimer) clearTimeout(errorTimer);
+        errorTimer = setTimeout(() => {
+          ModelListerrorShown = false;
+        }, 10000);
+      }
+
      throw new Error("Network response was not ok");
    }

@ -750,7 +774,6 @@ export const modelHubCall = async (accessToken: String) => {

    if (!response.ok) {
      const errorData = await response.text();
-      message.error(errorData, 10);
      throw new Error("Network response was not ok");
    }

--- a/ui/litellm-dashboard/src/components/usage.tsx
+++ b/ui/litellm-dashboard/src/components/usage.tsx
@ -32,7 +32,6 @@ import {
  allTagNamesCall,
  modelMetricsCall,
  modelAvailableCall,
-  modelInfoCall,
  adminspendByProvider,
  adminGlobalActivity,
  adminGlobalActivityPerModel,