Merge branch 'BerriAI:main' into main

2024-07-07 18:00:11 +02:00 · 2024-07-07 18:00:11 +02:00 · d54d4b6734
commit d54d4b6734
parent ea952a57b0 49d7faa31e
81 changed files with 2022 additions and 1231 deletions
--- a/.github/workflows/ghcr_deploy.yml
+++ b/.github/workflows/ghcr_deploy.yml
@ -289,7 +289,8 @@ jobs:
                repo: context.repo.repo,
                release_id: process.env.RELEASE_ID,
              });
-              return response.data.body;
+              const formattedBody = JSON.stringify(response.data.body).slice(1, -1);
              return formattedBody;
            } catch (error) {
              core.setFailed(error.message);
            }
@ -302,14 +303,15 @@ jobs:
          RELEASE_NOTES: ${{ steps.release-notes.outputs.result }}
        run: |
          curl -H "Content-Type: application/json" -X POST -d '{
-            "content": "New LiteLLM release ${{ env.RELEASE_TAG }}",
+            "content": "New LiteLLM release '"${RELEASE_TAG}"'",
            "username": "Release Changelog",
            "avatar_url": "https://cdn.discordapp.com/avatars/487431320314576937/bd64361e4ba6313d561d54e78c9e7171.png",
            "embeds": [
              {
-                "title": "Changelog for LiteLLM ${{ env.RELEASE_TAG }}",
+                "title": "Changelog for LiteLLM '"${RELEASE_TAG}"'",
-                "description": "${{ env.RELEASE_NOTES }}",
+                "description": "'"${RELEASE_NOTES}"'",
                "color": 2105893
              }
            ]
          }' $WEBHOOK_URL
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@ -25,6 +25,10 @@ repos:
       exclude: ^litellm/tests/|^litellm/proxy/tests/
       additional_dependencies: [flake8-print]
       files: litellm/.*\.py
 -   repo: https://github.com/python-poetry/poetry
    rev: 1.8.0
    hooks:
      - id: poetry-check
 -   repo: local
    hooks:
    -   id: check-files-match
--- a/docs/my-website/docs/proxy/cost_tracking.md
+++ b/docs/my-website/docs/proxy/cost_tracking.md
@ -151,12 +151,9 @@ Navigate to the Usage Tab on the LiteLLM UI (found on https://your-proxy-endpoin
 </Tabs>
 ## ✨ (Enterprise) API Endpoints to get Spend
-#### Getting Spend Reports - To Charge Other Teams, Customers
+#### Getting Spend Reports - To Charge Other Teams, Customers, Users
-Use the `/global/spend/report` endpoint to get daily spend report per 
+Use the `/global/spend/report` endpoint to get spend reports
 - Team
 - Customer [this is `user` passed to `/chat/completions` request](#how-to-track-spend-with-litellm)
 - [LiteLLM API key](virtual_keys.md)
 <Tabs>
@ -285,6 +282,16 @@ Output from script
 <TabItem value="per customer" label="Spend Per Customer">
 :::info
 Customer This is the value of `user_id` passed when calling [`/key/generate`](https://litellm-api.up.railway.app/#/key%20management/generate_key_fn_key_generate_post)
 [this is `user` passed to `/chat/completions` request](#how-to-track-spend-with-litellm)
 - [LiteLLM API key](virtual_keys.md)
 :::
 ##### Example Request
 👉 Key Change: Specify `group_by=customer`
@ -341,14 +348,14 @@ curl -X GET 'http://localhost:4000/global/spend/report?start_date=2024-04-01&end
 </TabItem>
-<TabItem value="per key" label="Spend Per API Key">
+<TabItem value="per key" label="Spend for Specific API Key">
-👉 Key Change: Specify `group_by=api_key`
+👉 Key Change: Specify `api_key=sk-1234`
 ```shell
-curl -X GET 'http://localhost:4000/global/spend/report?start_date=2024-04-01&end_date=2024-06-30&group_by=api_key' \
+curl -X GET 'http://localhost:4000/global/spend/report?start_date=2024-04-01&end_date=2024-06-30&api_key=sk-1234' \
  -H 'Authorization: Bearer sk-1234'
 ```
@ -357,32 +364,18 @@ curl -X GET 'http://localhost:4000/global/spend/report?start_date=2024-04-01&end
 ```shell
 [
  {
    "api_key": "ad64768847d05d978d62f623d872bff0f9616cc14b9c1e651c84d14fe3b9f539",
    "total_cost": 0.0002157,
    "total_input_tokens": 45.0,
    "total_output_tokens": 1375.0,
    "model_details": [
      {
        "model": "gpt-3.5-turbo",
        "total_cost": 0.0001095,
        "total_input_tokens": 9,
        "total_output_tokens": 70
      },
      {
        "model": "llama3-8b-8192",
        "total_cost": 0.0001062,
        "total_input_tokens": 36,
        "total_output_tokens": 1305
      }
    ]
  },
  {
    "api_key": "88dc28d0f030c55ed4ab77ed8faf098196cb1c05df778539800c9f1243fe6b4b",
-    "total_cost": 0.00012924,
+    "total_cost": 0.3201286305151999,
    "total_input_tokens": 36.0,
    "total_output_tokens": 1593.0,
    "model_details": [
      {
        "model": "dall-e-3",
        "total_cost": 0.31999939051519993,
        "total_input_tokens": 0,
        "total_output_tokens": 0
      },
      {
        "model": "llama3-8b-8192",
        "total_cost": 0.00012924,
@ -396,6 +389,87 @@ curl -X GET 'http://localhost:4000/global/spend/report?start_date=2024-04-01&end
 </TabItem>
 <TabItem value="per user" label="Spend for Internal User (Key Owner)">
 :::info
 Internal User (Key Owner): This is the value of `user_id` passed when calling [`/key/generate`](https://litellm-api.up.railway.app/#/key%20management/generate_key_fn_key_generate_post)
 :::
 👉 Key Change: Specify `internal_user_id=ishaan`
 ```shell
 curl -X GET 'http://localhost:4000/global/spend/report?start_date=2024-04-01&end_date=2024-12-30&internal_user_id=ishaan' \
  -H 'Authorization: Bearer sk-1234'
 ```
 ##### Example Response
 ```shell
 [
  {
    "api_key": "88dc28d0f030c55ed4ab77ed8faf098196cb1c05df778539800c9f1243fe6b4b",
    "total_cost": 0.00013132,
    "total_input_tokens": 105.0,
    "total_output_tokens": 872.0,
    "model_details": [
      {
        "model": "gpt-3.5-turbo-instruct",
        "total_cost": 5.85e-05,
        "total_input_tokens": 15,
        "total_output_tokens": 18
      },
      {
        "model": "llama3-8b-8192",
        "total_cost": 7.282000000000001e-05,
        "total_input_tokens": 90,
        "total_output_tokens": 854
      }
    ]
  },
  {
    "api_key": "151e85e46ab8c9c7fad090793e3fe87940213f6ae665b543ca633b0b85ba6dc6",
    "total_cost": 5.2699999999999993e-05,
    "total_input_tokens": 26.0,
    "total_output_tokens": 27.0,
    "model_details": [
      {
        "model": "gpt-3.5-turbo",
        "total_cost": 5.2499999999999995e-05,
        "total_input_tokens": 24,
        "total_output_tokens": 27
      },
      {
        "model": "text-embedding-ada-002",
        "total_cost": 2e-07,
        "total_input_tokens": 2,
        "total_output_tokens": 0
      }
    ]
  },
  {
    "api_key": "60cb83a2dcbf13531bd27a25f83546ecdb25a1a6deebe62d007999dc00e1e32a",
    "total_cost": 9.42e-06,
    "total_input_tokens": 30.0,
    "total_output_tokens": 99.0,
    "model_details": [
      {
        "model": "llama3-8b-8192",
        "total_cost": 9.42e-06,
        "total_input_tokens": 30,
        "total_output_tokens": 99
      }
    ]
  }
 ]
 ```
 </TabItem>
 </Tabs>
 #### Allowing Non-Proxy Admins to access `/spend` endpoints 
--- a/docs/my-website/docs/proxy/enterprise.md
+++ b/docs/my-website/docs/proxy/enterprise.md
@ -1120,12 +1120,14 @@ This is a beta feature, and subject to changes.
 USE_AWS_KMS="True"
 ```
-**Step 2.** Add `aws_kms/` to encrypted keys in env 
+**Step 2.** Add `LITELLM_SECRET_AWS_KMS_` to encrypted keys in env 
 ```env
-DATABASE_URL="aws_kms/AQICAH.."
+LITELLM_SECRET_AWS_KMS_DATABASE_URL="AQICAH.."
 ```
 LiteLLM will find this and use the decrypted `DATABASE_URL="postgres://.."` value in runtime.
 **Step 3.** Start proxy 
 ```
--- a/docs/my-website/docs/proxy/user_keys.md
+++ b/docs/my-website/docs/proxy/user_keys.md
@ -1,7 +1,7 @@
 import Tabs from '@theme/Tabs';
 import TabItem from '@theme/TabItem';
-# Use with Langchain, OpenAI SDK, LlamaIndex, Curl
+# Use with Langchain, OpenAI SDK, LlamaIndex, Instructor, Curl
 :::info
@ -173,6 +173,37 @@ console.log(message);
 ```
 </TabItem>
 <TabItem value="instructor" label="Instructor">
 ```python
 from openai import OpenAI
 import instructor
 from pydantic import BaseModel
 my_proxy_api_key = "" # e.g. sk-1234
 my_proxy_base_url = "" # e.g. http://0.0.0.0:4000
 # This enables response_model keyword
 # from client.chat.completions.create
 client = instructor.from_openai(OpenAI(api_key=my_proxy_api_key, base_url=my_proxy_base_url))
 class UserDetail(BaseModel):
    name: str
    age: int
 user = client.chat.completions.create(
    model="gemini-pro-flash",
    response_model=UserDetail,
    messages=[
        {"role": "user", "content": "Extract Jason is 25 years old"},
    ]
 )
 assert isinstance(user, UserDetail)
 assert user.name == "Jason"
 assert user.age == 25
 ```
 </TabItem>
 </Tabs>
@ -205,6 +236,97 @@ console.log(message);
 ```
 ### Function Calling 
 Here's some examples of doing function calling with the proxy. 
 You can use the proxy for function calling with **any** openai-compatible project. 
 <Tabs>
 <TabItem value="curl" label="curl">
 ```bash
 curl http://0.0.0.0:4000/v1/chat/completions \
 -H "Content-Type: application/json" \
 -H "Authorization: Bearer $OPTIONAL_YOUR_PROXY_KEY" \
 -d '{
  "model": "gpt-4-turbo",
  "messages": [
    {
      "role": "user",
      "content": "What'\''s the weather like in Boston today?"
    }
  ],
  "tools": [
    {
      "type": "function",
      "function": {
        "name": "get_current_weather",
        "description": "Get the current weather in a given location",
        "parameters": {
          "type": "object",
          "properties": {
            "location": {
              "type": "string",
              "description": "The city and state, e.g. San Francisco, CA"
            },
            "unit": {
              "type": "string",
              "enum": ["celsius", "fahrenheit"]
            }
          },
          "required": ["location"]
        }
      }
    }
  ],
  "tool_choice": "auto"
 }'
 ```
 </TabItem>
 <TabItem value="sdk" label="SDK">
 ```python 
 from openai import OpenAI
 client = OpenAI(
    api_key="sk-1234", # [OPTIONAL] set if you set one on proxy, else set ""
    base_url="http://0.0.0.0:4000",
 )
 tools = [
  {
    "type": "function",
    "function": {
      "name": "get_current_weather",
      "description": "Get the current weather in a given location",
      "parameters": {
        "type": "object",
        "properties": {
          "location": {
            "type": "string",
            "description": "The city and state, e.g. San Francisco, CA",
          },
          "unit": {"type": "string", "enum": ["celsius", "fahrenheit"]},
        },
        "required": ["location"],
      },
    }
  }
 ]
 messages = [{"role": "user", "content": "What's the weather like in Boston today?"}]
 completion = client.chat.completions.create(
  model="gpt-4o", # use 'model_name' from config.yaml
  messages=messages,
  tools=tools,
  tool_choice="auto"
 )
 print(completion)
 ```
 </TabItem>
 </Tabs>
 ## `/embeddings`
 ### Request Format
--- a/litellm/caching.py
+++ b/litellm/caching.py
@ -248,9 +248,15 @@ class RedisCache(BaseCache):
            # asyncio.get_running_loop().create_task(self.ping())
            result = asyncio.get_running_loop().create_task(self.ping())
        except Exception as e:
-            verbose_logger.error(
+            if "no running event loop" in str(e):
-                "Error connecting to Async Redis client", extra={"error": str(e)}
+                verbose_logger.debug(
-            )
+                    "Ignoring async redis ping. No running event loop."
                )
            else:
                verbose_logger.error(
                    "Error connecting to Async Redis client - {}".format(str(e)),
                    extra={"error": str(e)},
                )
        ### SYNC HEALTH PING ###
        try:
--- a/litellm/cost_calculator.py
+++ b/litellm/cost_calculator.py
@ -4,6 +4,8 @@ import time
 import traceback
 from typing import List, Literal, Optional, Tuple, Union
 from pydantic import BaseModel
 import litellm
 import litellm._logging
 from litellm import verbose_logger
@ -13,6 +15,10 @@ from litellm.litellm_core_utils.llm_cost_calc.google import (
 from litellm.litellm_core_utils.llm_cost_calc.google import (
    cost_per_token as google_cost_per_token,
 )
 from litellm.litellm_core_utils.llm_cost_calc.utils import _generic_cost_per_character
 from litellm.types.llms.openai import HttpxBinaryResponseContent
 from litellm.types.router import SPECIAL_MODEL_INFO_PARAMS
 from litellm.utils import (
    CallTypes,
    CostPerToken,
@ -62,6 +68,23 @@ def cost_per_token(
    ### CUSTOM PRICING ###
    custom_cost_per_token: Optional[CostPerToken] = None,
    custom_cost_per_second: Optional[float] = None,
    ### CALL TYPE ###
    call_type: Literal[
        "embedding",
        "aembedding",
        "completion",
        "acompletion",
        "atext_completion",
        "text_completion",
        "image_generation",
        "aimage_generation",
        "moderation",
        "amoderation",
        "atranscription",
        "transcription",
        "aspeech",
        "speech",
    ] = "completion",
 ) -> Tuple[float, float]:
    """
    Calculates the cost per token for a given model, prompt tokens, and completion tokens.
@ -76,6 +99,7 @@ def cost_per_token(
        custom_llm_provider (str): The llm provider to whom the call was made (see init.py for full list)
        custom_cost_per_token: Optional[CostPerToken]: the cost per input + output token for the llm api call.
        custom_cost_per_second: Optional[float]: the cost per second for the llm api call.
        call_type: Optional[str]: the call type
    Returns:
        tuple: A tuple containing the cost in USD dollars for prompt tokens and completion tokens, respectively.
@ -159,6 +183,27 @@ def cost_per_token(
            prompt_tokens=prompt_tokens,
            completion_tokens=completion_tokens,
        )
    elif call_type == "speech" or call_type == "aspeech":
        prompt_cost, completion_cost = _generic_cost_per_character(
            model=model_without_prefix,
            custom_llm_provider=custom_llm_provider,
            prompt_characters=prompt_characters,
            completion_characters=completion_characters,
            custom_prompt_cost=None,
            custom_completion_cost=0,
        )
        if prompt_cost is None or completion_cost is None:
            raise ValueError(
                "cost for tts call is None. prompt_cost={}, completion_cost={}, model={}, custom_llm_provider={}, prompt_characters={}, completion_characters={}".format(
                    prompt_cost,
                    completion_cost,
                    model_without_prefix,
                    custom_llm_provider,
                    prompt_characters,
                    completion_characters,
                )
            )
        return prompt_cost, completion_cost
    elif model in model_cost_ref:
        print_verbose(f"Success: model={model} in model_cost_map")
        print_verbose(
@ -289,7 +334,7 @@ def cost_per_token(
        return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar
    else:
        # if model is not in model_prices_and_context_window.json. Raise an exception-let users know
-        error_str = f"Model not in model_prices_and_context_window.json. You passed model={model}. Register pricing for model - https://docs.litellm.ai/docs/proxy/custom_pricing\n"
+        error_str = f"Model not in model_prices_and_context_window.json. You passed model={model}, custom_llm_provider={custom_llm_provider}. Register pricing for model - https://docs.litellm.ai/docs/proxy/custom_pricing\n"
        raise litellm.exceptions.NotFoundError(  # type: ignore
            message=error_str,
            model=model,
@ -429,7 +474,10 @@ def completion_cost(
        prompt_characters = 0
        completion_tokens = 0
        completion_characters = 0
-        if completion_response is not None:
+        if completion_response is not None and (
            isinstance(completion_response, BaseModel)
            or isinstance(completion_response, dict)
        ):  # tts returns a custom class
            # get input/output tokens from completion_response
            prompt_tokens = completion_response.get("usage", {}).get("prompt_tokens", 0)
            completion_tokens = completion_response.get("usage", {}).get(
@ -535,6 +583,11 @@ def completion_cost(
                raise Exception(
                    f"Model={image_gen_model_name} not found in completion cost model map"
                )
        elif (
            call_type == CallTypes.speech.value or call_type == CallTypes.aspeech.value
        ):
            prompt_characters = litellm.utils._count_characters(text=prompt)
        # Calculate cost based on prompt_tokens, completion_tokens
        if (
            "togethercomputer" in model
@ -591,6 +644,7 @@ def completion_cost(
            custom_cost_per_token=custom_cost_per_token,
            prompt_characters=prompt_characters,
            completion_characters=completion_characters,
            call_type=call_type,
        )
        _final_cost = prompt_tokens_cost_usd_dollar + completion_tokens_cost_usd_dollar
        print_verbose(
@ -608,6 +662,7 @@ def response_cost_calculator(
        ImageResponse,
        TranscriptionResponse,
        TextCompletionResponse,
        HttpxBinaryResponseContent,
    ],
    model: str,
    custom_llm_provider: Optional[str],
@ -641,7 +696,8 @@ def response_cost_calculator(
        if cache_hit is not None and cache_hit is True:
            response_cost = 0.0
        else:
-            response_object._hidden_params["optional_params"] = optional_params
+            if isinstance(response_object, BaseModel):
                response_object._hidden_params["optional_params"] = optional_params
            if isinstance(response_object, ImageResponse):
                response_cost = completion_cost(
                    completion_response=response_object,
@ -651,12 +707,11 @@ def response_cost_calculator(
                )
            else:
                if (
-                    model in litellm.model_cost
+                    model in litellm.model_cost or custom_pricing is True
                    and custom_pricing is not None
                    and custom_llm_provider is True
                ):  # override defaults if custom pricing is set
                    base_model = model
                # base_model defaults to None if not set on model_info
                response_cost = completion_cost(
                    completion_response=response_object,
                    call_type=call_type,
--- a/litellm/integrations/langfuse.py
+++ b/litellm/integrations/langfuse.py
@ -32,6 +32,12 @@ class LangFuseLogger:
        self.langfuse_host = langfuse_host or os.getenv(
            "LANGFUSE_HOST", "https://cloud.langfuse.com"
        )
        if not (
            self.langfuse_host.startswith("http://")
            or self.langfuse_host.startswith("https://")
        ):
            # add http:// if unset, assume communicating over private network - e.g. render
            self.langfuse_host = "http://" + self.langfuse_host
        self.langfuse_release = os.getenv("LANGFUSE_RELEASE")
        self.langfuse_debug = os.getenv("LANGFUSE_DEBUG")
--- a/litellm/integrations/opentelemetry.py
+++ b/litellm/integrations/opentelemetry.py
@ -29,6 +29,7 @@ else:
 LITELLM_TRACER_NAME = os.getenv("OTEL_TRACER_NAME", "litellm")
 LITELLM_RESOURCE = {
    "service.name": os.getenv("OTEL_SERVICE_NAME", "litellm"),
    "deployment.environment": os.getenv("OTEL_ENVIRONMENT_NAME", "production"),
 }
 RAW_REQUEST_SPAN_NAME = "raw_gen_ai_request"
 LITELLM_REQUEST_SPAN_NAME = "litellm_request"
--- a/litellm/litellm_core_utils/litellm_logging.py
+++ b/litellm/litellm_core_utils/litellm_logging.py
@ -24,6 +24,8 @@ from litellm.integrations.custom_logger import CustomLogger
 from litellm.litellm_core_utils.redact_messages import (
    redact_message_input_output_from_logging,
 )
 from litellm.types.llms.openai import HttpxBinaryResponseContent
 from litellm.types.router import SPECIAL_MODEL_INFO_PARAMS
 from litellm.types.utils import (
    CallTypes,
    EmbeddingResponse,
@ -517,33 +519,36 @@ class Logging:
            self.model_call_details["cache_hit"] = cache_hit
            ## if model in model cost map - log the response cost
            ## else set cost to None
            verbose_logger.debug(f"Model={self.model};")
            if (
-                result is not None
+                result is not None and self.stream is not True
-                and (
+            ):  # handle streaming separately
                if (
                    isinstance(result, ModelResponse)
                    or isinstance(result, EmbeddingResponse)
                    or isinstance(result, ImageResponse)
                    or isinstance(result, TranscriptionResponse)
                    or isinstance(result, TextCompletionResponse)
-                )
+                    or isinstance(result, HttpxBinaryResponseContent)  # tts
-                and self.stream != True
+                ):
-            ):  # handle streaming separately
+                    custom_pricing = use_custom_pricing_for_model(
-                self.model_call_details["response_cost"] = (
+                        litellm_params=self.litellm_params
-                    litellm.response_cost_calculator(
+                    )
-                        response_object=result,
+                    self.model_call_details["response_cost"] = (
-                        model=self.model,
+                        litellm.response_cost_calculator(
-                        cache_hit=self.model_call_details.get("cache_hit", False),
+                            response_object=result,
-                        custom_llm_provider=self.model_call_details.get(
+                            model=self.model,
-                            "custom_llm_provider", None
+                            cache_hit=self.model_call_details.get("cache_hit", False),
-                        ),
+                            custom_llm_provider=self.model_call_details.get(
-                        base_model=_get_base_model_from_metadata(
+                                "custom_llm_provider", None
-                            model_call_details=self.model_call_details
+                            ),
-                        ),
+                            base_model=_get_base_model_from_metadata(
-                        call_type=self.call_type,
+                                model_call_details=self.model_call_details
-                        optional_params=self.optional_params,
+                            ),
                            call_type=self.call_type,
                            optional_params=self.optional_params,
                            custom_pricing=custom_pricing,
                        )
                    )
                )
            else:  # streaming chunks + image gen.
                self.model_call_details["response_cost"] = None
@ -600,8 +605,7 @@ class Logging:
                        verbose_logger.error(
                            "LiteLLM.LoggingError: [Non-Blocking] Exception occurred while building complete streaming response in success logging {}\n{}".format(
                                str(e), traceback.format_exc()
-                            ),
+                            )
                            log_level="ERROR",
                        )
                        complete_streaming_response = None
                else:
@ -626,7 +630,11 @@ class Logging:
                            model_call_details=self.model_call_details
                        ),
                        call_type=self.call_type,
-                        optional_params=self.optional_params,
+                        optional_params=(
                            self.optional_params
                            if hasattr(self, "optional_params")
                            else {}
                        ),
                    )
                )
            if self.dynamic_success_callbacks is not None and isinstance(
@ -1795,7 +1803,6 @@ def set_callbacks(callback_list, function_id=None):
    try:
        for callback in callback_list:
            print_verbose(f"init callback list: {callback}")
            if callback == "sentry":
                try:
                    import sentry_sdk
@ -2013,3 +2020,17 @@ def get_custom_logger_compatible_class(
            if isinstance(callback, _PROXY_DynamicRateLimitHandler):
                return callback  # type: ignore
    return None
 def use_custom_pricing_for_model(litellm_params: Optional[dict]) -> bool:
    if litellm_params is None:
        return False
    metadata: Optional[dict] = litellm_params.get("metadata", {})
    if metadata is None:
        return False
    model_info: Optional[dict] = metadata.get("model_info", {})
    if model_info is not None:
        for k, v in model_info.items():
            if k in SPECIAL_MODEL_INFO_PARAMS:
                return True
    return False
--- a/litellm/litellm_core_utils/llm_cost_calc/utils.py
+++ b/litellm/litellm_core_utils/llm_cost_calc/utils.py
@ -0,0 +1,85 @@
 # What is this?
 ## Helper utilities for cost_per_token()
 import traceback
 from typing import List, Literal, Optional, Tuple
 import litellm
 from litellm import verbose_logger
 def _generic_cost_per_character(
    model: str,
    custom_llm_provider: str,
    prompt_characters: float,
    completion_characters: float,
    custom_prompt_cost: Optional[float],
    custom_completion_cost: Optional[float],
 ) -> Tuple[Optional[float], Optional[float]]:
    """
    Generic function to help calculate cost per character.
    """
    """
    Calculates the cost per character for a given model, input messages, and response object.
    Input:
        - model: str, the model name without provider prefix
        - custom_llm_provider: str, "vertex_ai-*"
        - prompt_characters: float, the number of input characters
        - completion_characters: float, the number of output characters
    Returns:
        Tuple[Optional[float], Optional[float]] - prompt_cost_in_usd, completion_cost_in_usd. 
        - returns None if not able to calculate cost.
    Raises:
        Exception if 'input_cost_per_character' or 'output_cost_per_character' is missing from model_info
    """
    args = locals()
    ## GET MODEL INFO
    model_info = litellm.get_model_info(
        model=model, custom_llm_provider=custom_llm_provider
    )
    ## CALCULATE INPUT COST
    try:
        if custom_prompt_cost is None:
            assert (
                "input_cost_per_character" in model_info
                and model_info["input_cost_per_character"] is not None
            ), "model info for model={} does not have 'input_cost_per_character'-pricing\nmodel_info={}".format(
                model, model_info
            )
            custom_prompt_cost = model_info["input_cost_per_character"]
        prompt_cost = prompt_characters * custom_prompt_cost
    except Exception as e:
        verbose_logger.error(
            "litellm.litellm_core_utils.llm_cost_calc.utils.py::cost_per_character(): Exception occured - {}\n{}\nDefaulting to None".format(
                str(e), traceback.format_exc()
            )
        )
        prompt_cost = None
    ## CALCULATE OUTPUT COST
    try:
        if custom_completion_cost is None:
            assert (
                "output_cost_per_character" in model_info
                and model_info["output_cost_per_character"] is not None
            ), "model info for model={} does not have 'output_cost_per_character'-pricing\nmodel_info={}".format(
                model, model_info
            )
            custom_completion_cost = model_info["output_cost_per_character"]
        completion_cost = completion_characters * custom_completion_cost
    except Exception as e:
        verbose_logger.error(
            "litellm.litellm_core_utils.llm_cost_calc.utils.py::cost_per_character(): Exception occured - {}\n{}\nDefaulting to None".format(
                str(e), traceback.format_exc()
            )
        )
        completion_cost = None
    return prompt_cost, completion_cost
--- a/litellm/llms/azure.py
+++ b/litellm/llms/azure.py
@ -55,7 +55,6 @@ from ..types.llms.openai import (
    Thread,
 )
 from .base import BaseLLM
 from .custom_httpx.azure_dall_e_2 import AsyncCustomHTTPTransport, CustomHTTPTransport
 azure_ad_cache = DualCache()
@ -1718,9 +1717,7 @@ class AzureChatCompletion(BaseLLM):
        input: Optional[list] = None,
        prompt: Optional[str] = None,
    ) -> dict:
-        client_session = litellm.client_session or httpx.Client(
+        client_session = litellm.client_session or httpx.Client()
            transport=CustomHTTPTransport(),  # handle dall-e-2 calls
        )
        if "gateway.ai.cloudflare.com" in api_base:
            ## build base url - assume api base includes resource name
            if not api_base.endswith("/"):
@ -1793,9 +1790,10 @@ class AzureChatCompletion(BaseLLM):
        input: Optional[list] = None,
        prompt: Optional[str] = None,
    ) -> dict:
-        client_session = litellm.aclient_session or httpx.AsyncClient(
+        client_session = (
-            transport=AsyncCustomHTTPTransport(),  # handle dall-e-2 calls
+            litellm.aclient_session or httpx.AsyncClient()
-        )
+        )  # handle dall-e-2 calls
        if "gateway.ai.cloudflare.com" in api_base:
            ## build base url - assume api base includes resource name
            if not api_base.endswith("/"):
--- a/litellm/llms/azure_text.py
+++ b/litellm/llms/azure_text.py
@ -1,24 +1,27 @@
-from typing import Optional, Union, Any
+import json
-import types, requests  # type: ignore
+import types  # type: ignore
 from .base import BaseLLM
 from litellm.utils import (
    ModelResponse,
    Choices,
    Message,
    CustomStreamWrapper,
    convert_to_model_response_object,
    TranscriptionResponse,
    TextCompletionResponse,
 )
 from typing import Callable, Optional, BinaryIO
 from litellm import OpenAIConfig
 import litellm, json
 import httpx
 from .custom_httpx.azure_dall_e_2 import CustomHTTPTransport, AsyncCustomHTTPTransport
 from openai import AzureOpenAI, AsyncAzureOpenAI
 from ..llms.openai import OpenAITextCompletion, OpenAITextCompletionConfig
 import uuid
-from .prompt_templates.factory import prompt_factory, custom_prompt
+from typing import Any, BinaryIO, Callable, Optional, Union
 import httpx
 import requests
 from openai import AsyncAzureOpenAI, AzureOpenAI
 import litellm
 from litellm import OpenAIConfig
 from litellm.utils import (
    Choices,
    CustomStreamWrapper,
    Message,
    ModelResponse,
    TextCompletionResponse,
    TranscriptionResponse,
    convert_to_model_response_object,
 )
 from ..llms.openai import OpenAITextCompletion, OpenAITextCompletionConfig
 from .base import BaseLLM
 from .prompt_templates.factory import custom_prompt, prompt_factory
 openai_text_completion_config = OpenAITextCompletionConfig()
--- a/litellm/llms/custom_httpx/azure_dall_e_2.py
+++ b/litellm/llms/custom_httpx/azure_dall_e_2.py
@ -1,143 +0,0 @@
 import asyncio
 import json
 import time
 import httpx
 class AsyncCustomHTTPTransport(httpx.AsyncHTTPTransport):
    """
    Async implementation of custom http transport
    """
    async def handle_async_request(self, request: httpx.Request) -> httpx.Response:
        _api_version = request.url.params.get("api-version", "")
        if (
            "images/generations" in request.url.path
            and _api_version
            in [  # dall-e-3 starts from `2023-12-01-preview` so we should be able to avoid conflict
                "2023-06-01-preview",
                "2023-07-01-preview",
                "2023-08-01-preview",
                "2023-09-01-preview",
                "2023-10-01-preview",
            ]
        ):
            request.url = request.url.copy_with(
                path="/openai/images/generations:submit"
            )
            response = await super().handle_async_request(request)
            operation_location_url = response.headers["operation-location"]
            request.url = httpx.URL(operation_location_url)
            request.method = "GET"
            response = await super().handle_async_request(request)
            await response.aread()
            timeout_secs: int = 120
            start_time = time.time()
            while response.json()["status"] not in ["succeeded", "failed"]:
                if time.time() - start_time > timeout_secs:
                    timeout = {
                        "error": {
                            "code": "Timeout",
                            "message": "Operation polling timed out.",
                        }
                    }
                    return httpx.Response(
                        status_code=400,
                        headers=response.headers,
                        content=json.dumps(timeout).encode("utf-8"),
                        request=request,
                    )
                await asyncio.sleep(int(response.headers.get("retry-after") or 10))
                response = await super().handle_async_request(request)
                await response.aread()
            if response.json()["status"] == "failed":
                error_data = response.json()
                return httpx.Response(
                    status_code=400,
                    headers=response.headers,
                    content=json.dumps(error_data).encode("utf-8"),
                    request=request,
                )
            result = response.json()["result"]
            return httpx.Response(
                status_code=200,
                headers=response.headers,
                content=json.dumps(result).encode("utf-8"),
                request=request,
            )
        return await super().handle_async_request(request)
 class CustomHTTPTransport(httpx.HTTPTransport):
    """
    This class was written as a workaround to support dall-e-2 on openai > v1.x
    Refer to this issue for more: https://github.com/openai/openai-python/issues/692
    """
    def handle_request(
        self,
        request: httpx.Request,
    ) -> httpx.Response:
        _api_version = request.url.params.get("api-version", "")
        if (
            "images/generations" in request.url.path
            and _api_version
            in [  # dall-e-3 starts from `2023-12-01-preview` so we should be able to avoid conflict
                "2023-06-01-preview",
                "2023-07-01-preview",
                "2023-08-01-preview",
                "2023-09-01-preview",
                "2023-10-01-preview",
            ]
        ):
            request.url = request.url.copy_with(
                path="/openai/images/generations:submit"
            )
            response = super().handle_request(request)
            operation_location_url = response.headers["operation-location"]
            request.url = httpx.URL(operation_location_url)
            request.method = "GET"
            response = super().handle_request(request)
            response.read()
            timeout_secs: int = 120
            start_time = time.time()
            while response.json()["status"] not in ["succeeded", "failed"]:
                if time.time() - start_time > timeout_secs:
                    timeout = {
                        "error": {
                            "code": "Timeout",
                            "message": "Operation polling timed out.",
                        }
                    }
                    return httpx.Response(
                        status_code=400,
                        headers=response.headers,
                        content=json.dumps(timeout).encode("utf-8"),
                        request=request,
                    )
                time.sleep(int(response.headers.get("retry-after", None) or 10))
                response = super().handle_request(request)
                response.read()
            if response.json()["status"] == "failed":
                error_data = response.json()
                return httpx.Response(
                    status_code=400,
                    headers=response.headers,
                    content=json.dumps(error_data).encode("utf-8"),
                    request=request,
                )
            result = response.json()["result"]
            return httpx.Response(
                status_code=200,
                headers=response.headers,
                content=json.dumps(result).encode("utf-8"),
                request=request,
            )
        return super().handle_request(request)
--- a/litellm/llms/custom_httpx/http_handler.py
+++ b/litellm/llms/custom_httpx/http_handler.py
@ -26,30 +26,12 @@ class AsyncHTTPHandler:
        self, timeout: Optional[Union[float, httpx.Timeout]], concurrent_limit: int
    ) -> httpx.AsyncClient:
        async_proxy_mounts = None
        # Check if the HTTP_PROXY and HTTPS_PROXY environment variables are set and use them accordingly.
        http_proxy = os.getenv("HTTP_PROXY", None)
        https_proxy = os.getenv("HTTPS_PROXY", None)
        no_proxy = os.getenv("NO_PROXY", None)
        ssl_verify = bool(os.getenv("SSL_VERIFY", litellm.ssl_verify))
        cert = os.getenv(
            "SSL_CERTIFICATE", litellm.ssl_certificate
        )  # /path/to/client.pem
        if http_proxy is not None and https_proxy is not None:
            async_proxy_mounts = {
                "http://": httpx.AsyncHTTPTransport(proxy=httpx.Proxy(url=http_proxy)),
                "https://": httpx.AsyncHTTPTransport(
                    proxy=httpx.Proxy(url=https_proxy)
                ),
            }
            # assume no_proxy is a list of comma separated urls
            if no_proxy is not None and isinstance(no_proxy, str):
                no_proxy_urls = no_proxy.split(",")
                for url in no_proxy_urls:  # set no-proxy support for specific urls
                    async_proxy_mounts[url] = None  # type: ignore
        if timeout is None:
            timeout = _DEFAULT_TIMEOUT
        # Create a client with a connection pool
@ -61,7 +43,6 @@ class AsyncHTTPHandler:
                max_keepalive_connections=concurrent_limit,
            ),
            verify=ssl_verify,
            mounts=async_proxy_mounts,
            cert=cert,
        )
@ -163,27 +144,11 @@ class HTTPHandler:
            timeout = _DEFAULT_TIMEOUT
        # Check if the HTTP_PROXY and HTTPS_PROXY environment variables are set and use them accordingly.
        http_proxy = os.getenv("HTTP_PROXY", None)
        https_proxy = os.getenv("HTTPS_PROXY", None)
        no_proxy = os.getenv("NO_PROXY", None)
        ssl_verify = bool(os.getenv("SSL_VERIFY", litellm.ssl_verify))
        cert = os.getenv(
            "SSL_CERTIFICATE", litellm.ssl_certificate
        )  # /path/to/client.pem
        sync_proxy_mounts = None
        if http_proxy is not None and https_proxy is not None:
            sync_proxy_mounts = {
                "http://": httpx.HTTPTransport(proxy=httpx.Proxy(url=http_proxy)),
                "https://": httpx.HTTPTransport(proxy=httpx.Proxy(url=https_proxy)),
            }
            # assume no_proxy is a list of comma separated urls
            if no_proxy is not None and isinstance(no_proxy, str):
                no_proxy_urls = no_proxy.split(",")
                for url in no_proxy_urls:  # set no-proxy support for specific urls
                    sync_proxy_mounts[url] = None  # type: ignore
        if client is None:
            # Create a client with a connection pool
            self.client = httpx.Client(
@ -193,7 +158,6 @@ class HTTPHandler:
                    max_keepalive_connections=concurrent_limit,
                ),
                verify=ssl_verify,
                mounts=sync_proxy_mounts,
                cert=cert,
            )
        else:
--- a/litellm/llms/vertex_httpx.py
+++ b/litellm/llms/vertex_httpx.py
@ -1330,17 +1330,30 @@ class ModelResponseIterator:
            gemini_chunk = processed_chunk["candidates"][0]
-            if (
+            if "content" in gemini_chunk:
-                "content" in gemini_chunk
+                if "text" in gemini_chunk["content"]["parts"][0]:
-                and "text" in gemini_chunk["content"]["parts"][0]
+                    text = gemini_chunk["content"]["parts"][0]["text"]
-            ):
+                elif "functionCall" in gemini_chunk["content"]["parts"][0]:
-                text = gemini_chunk["content"]["parts"][0]["text"]
+                    function_call = ChatCompletionToolCallFunctionChunk(
                        name=gemini_chunk["content"]["parts"][0]["functionCall"][
                            "name"
                        ],
                        arguments=json.dumps(
                            gemini_chunk["content"]["parts"][0]["functionCall"]["args"]
                        ),
                    )
                    tool_use = ChatCompletionToolCallChunk(
                        id=str(uuid.uuid4()),
                        type="function",
                        function=function_call,
                        index=0,
                    )
            if "finishReason" in gemini_chunk:
                finish_reason = map_finish_reason(
                    finish_reason=gemini_chunk["finishReason"]
                )
-                ## DO NOT SET 'finish_reason' = True
+                ## DO NOT SET 'is_finished' = True
                ## GEMINI SETS FINISHREASON ON EVERY CHUNK!
            if "usageMetadata" in processed_chunk:
--- a/litellm/main.py
+++ b/litellm/main.py
@ -896,7 +896,7 @@ def completion(
        if (
            supports_system_message is not None
            and isinstance(supports_system_message, bool)
-            and supports_system_message == False
+            and supports_system_message is False
        ):
            messages = map_system_message_pt(messages=messages)
        model_api_key = get_api_key(
@ -5028,10 +5028,9 @@ def stream_chunk_builder(
    for chunk in chunks:
        if "usage" in chunk:
            if "prompt_tokens" in chunk["usage"]:
-                prompt_tokens += chunk["usage"].get("prompt_tokens", 0) or 0
+                prompt_tokens = chunk["usage"].get("prompt_tokens", 0) or 0
            if "completion_tokens" in chunk["usage"]:
-                completion_tokens += chunk["usage"].get("completion_tokens", 0) or 0
+                completion_tokens = chunk["usage"].get("completion_tokens", 0) or 0
    try:
        response["usage"]["prompt_tokens"] = prompt_tokens or token_counter(
            model=model, messages=messages
--- a/litellm/model_prices_and_context_window_backup.json
+++ b/litellm/model_prices_and_context_window_backup.json
@ -2022,10 +2022,10 @@
        "max_tokens": 8192,
        "max_input_tokens": 2097152,
        "max_output_tokens": 8192,
-        "input_cost_per_token": 0.00000035, 
+        "input_cost_per_token": 0.0000035, 
-        "input_cost_per_token_above_128k_tokens": 0.0000007, 
+        "input_cost_per_token_above_128k_tokens": 0.000007, 
-        "output_cost_per_token": 0.00000105, 
+        "output_cost_per_token": 0.0000105, 
-        "output_cost_per_token_above_128k_tokens": 0.0000021, 
+        "output_cost_per_token_above_128k_tokens": 0.000021, 
        "litellm_provider": "gemini",
        "mode": "chat",
        "supports_system_messages": true,
@ -2033,16 +2033,16 @@
        "supports_vision": true,
        "supports_tool_choice": true, 
        "supports_response_schema": true, 
-        "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
+        "source": "https://ai.google.dev/pricing"
    },
    "gemini/gemini-1.5-pro-latest": {
        "max_tokens": 8192,
        "max_input_tokens": 1048576,
        "max_output_tokens": 8192,
-        "input_cost_per_token": 0.00000035, 
+        "input_cost_per_token": 0.0000035, 
-        "input_cost_per_token_above_128k_tokens": 0.0000007, 
+        "input_cost_per_token_above_128k_tokens": 0.000007, 
        "output_cost_per_token": 0.00000105, 
-        "output_cost_per_token_above_128k_tokens": 0.0000021, 
+        "output_cost_per_token_above_128k_tokens": 0.000021, 
        "litellm_provider": "gemini",
        "mode": "chat",
        "supports_system_messages": true,
@ -2050,7 +2050,7 @@
        "supports_vision": true,
        "supports_tool_choice": true, 
        "supports_response_schema": true, 
-        "source": "https://ai.google.dev/models/gemini"
+        "source": "https://ai.google.dev/pricing"
    },
    "gemini/gemini-pro-vision": {
        "max_tokens": 2048,
--- a/litellm/proxy/_experimental/out/404.html
+++ b/litellm/proxy/_experimental/out/404.html
--- a/litellm/proxy/_experimental/out/_next/static/RDLpeUaSstfmeQiKITNBo/_buildManifest.js
+++ b/litellm/proxy/_experimental/out/_next/static/RDLpeUaSstfmeQiKITNBo/_buildManifest.js
--- a/litellm/proxy/_experimental/out/_next/static/RDLpeUaSstfmeQiKITNBo/_ssgManifest.js
+++ b/litellm/proxy/_experimental/out/_next/static/RDLpeUaSstfmeQiKITNBo/_ssgManifest.js
--- a/litellm/proxy/_experimental/out/_next/static/chunks/131-19b05e5ce40fa85d.js
+++ b/litellm/proxy/_experimental/out/_next/static/chunks/131-19b05e5ce40fa85d.js
--- a/litellm/proxy/_experimental/out/_next/static/chunks/131-6a03368053f9d26d.js
+++ b/litellm/proxy/_experimental/out/_next/static/chunks/131-6a03368053f9d26d.js
--- a/litellm/proxy/_experimental/out/_next/static/chunks/759-d7572f2a46f911d5.js
+++ b/litellm/proxy/_experimental/out/_next/static/chunks/759-d7572f2a46f911d5.js
--- a/litellm/proxy/_experimental/out/_next/static/chunks/777-906d7dd6a5bf7be4.js
+++ b/litellm/proxy/_experimental/out/_next/static/chunks/777-906d7dd6a5bf7be4.js
--- a/litellm/proxy/_experimental/out/_next/static/chunks/777-f76791513e294b30.js
+++ b/litellm/proxy/_experimental/out/_next/static/chunks/777-f76791513e294b30.js
--- a/litellm/proxy/_experimental/out/_next/static/chunks/app/page-567f85145e7f0f35.js
+++ b/litellm/proxy/_experimental/out/_next/static/chunks/app/page-567f85145e7f0f35.js
--- a/litellm/proxy/_experimental/out/_next/static/chunks/app/page-da7d95729f2529b5.js
+++ b/litellm/proxy/_experimental/out/_next/static/chunks/app/page-da7d95729f2529b5.js
--- a/litellm/proxy/_experimental/out/index.html
+++ b/litellm/proxy/_experimental/out/index.html
@ -1 +1 @@
-<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-a8fd417ac0c6c8a5.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-f960ab1e6d32b002.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-04708d7d4a17c1ee.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-a8fd417ac0c6c8a5.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/0f6908625573deae.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[48951,[\"665\",\"static/chunks/3014691f-589a5f4865c3822f.js\",\"936\",\"static/chunks/2f6dbc85-052c4579f80d66ae.js\",\"294\",\"static/chunks/294-0e35509d5ca95267.js\",\"131\",\"static/chunks/131-6a03368053f9d26d.js\",\"684\",\"static/chunks/684-bb2d2f93d92acb0b.js\",\"759\",\"static/chunks/759-83a8bdddfe32b5d9.js\",\"777\",\"static/chunks/777-f76791513e294b30.js\",\"931\",\"static/chunks/app/page-da7d95729f2529b5.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/0f6908625573deae.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"0gt3_bF2KkdKeE61mic4M\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_12bbc4\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>
+<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-a8fd417ac0c6c8a5.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-f960ab1e6d32b002.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-04708d7d4a17c1ee.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-a8fd417ac0c6c8a5.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/0f6908625573deae.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[48951,[\"665\",\"static/chunks/3014691f-589a5f4865c3822f.js\",\"936\",\"static/chunks/2f6dbc85-052c4579f80d66ae.js\",\"294\",\"static/chunks/294-0e35509d5ca95267.js\",\"131\",\"static/chunks/131-19b05e5ce40fa85d.js\",\"684\",\"static/chunks/684-bb2d2f93d92acb0b.js\",\"759\",\"static/chunks/759-d7572f2a46f911d5.js\",\"777\",\"static/chunks/777-906d7dd6a5bf7be4.js\",\"931\",\"static/chunks/app/page-567f85145e7f0f35.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/0f6908625573deae.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"RDLpeUaSstfmeQiKITNBo\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_12bbc4\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>
--- a/litellm/proxy/_experimental/out/index.txt
+++ b/litellm/proxy/_experimental/out/index.txt
@ -1,7 +1,7 @@
 2:I[77831,[],""]
-3:I[48951,["665","static/chunks/3014691f-589a5f4865c3822f.js","936","static/chunks/2f6dbc85-052c4579f80d66ae.js","294","static/chunks/294-0e35509d5ca95267.js","131","static/chunks/131-6a03368053f9d26d.js","684","static/chunks/684-bb2d2f93d92acb0b.js","759","static/chunks/759-83a8bdddfe32b5d9.js","777","static/chunks/777-f76791513e294b30.js","931","static/chunks/app/page-da7d95729f2529b5.js"],""]
+3:I[48951,["665","static/chunks/3014691f-589a5f4865c3822f.js","936","static/chunks/2f6dbc85-052c4579f80d66ae.js","294","static/chunks/294-0e35509d5ca95267.js","131","static/chunks/131-19b05e5ce40fa85d.js","684","static/chunks/684-bb2d2f93d92acb0b.js","759","static/chunks/759-d7572f2a46f911d5.js","777","static/chunks/777-906d7dd6a5bf7be4.js","931","static/chunks/app/page-567f85145e7f0f35.js"],""]
 4:I[5613,[],""]
 5:I[31778,[],""]
-0:["0gt3_bF2KkdKeE61mic4M",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_12bbc4","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/0f6908625573deae.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
+0:["RDLpeUaSstfmeQiKITNBo",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_12bbc4","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/0f6908625573deae.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
 6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
 1:null
--- a/litellm/proxy/_experimental/out/model_hub.html
+++ b/litellm/proxy/_experimental/out/model_hub.html
--- a/litellm/proxy/_experimental/out/model_hub.txt
+++ b/litellm/proxy/_experimental/out/model_hub.txt
@ -1,7 +1,7 @@
 2:I[77831,[],""]
-3:I[87494,["294","static/chunks/294-0e35509d5ca95267.js","131","static/chunks/131-6a03368053f9d26d.js","777","static/chunks/777-f76791513e294b30.js","418","static/chunks/app/model_hub/page-ba7819b59161aa64.js"],""]
+3:I[87494,["294","static/chunks/294-0e35509d5ca95267.js","131","static/chunks/131-19b05e5ce40fa85d.js","777","static/chunks/777-906d7dd6a5bf7be4.js","418","static/chunks/app/model_hub/page-ba7819b59161aa64.js"],""]
 4:I[5613,[],""]
 5:I[31778,[],""]
-0:["0gt3_bF2KkdKeE61mic4M",[[["",{"children":["model_hub",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["model_hub",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","model_hub","children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined","styles":null}]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_12bbc4","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/0f6908625573deae.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
+0:["RDLpeUaSstfmeQiKITNBo",[[["",{"children":["model_hub",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["model_hub",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","model_hub","children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined","styles":null}]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_12bbc4","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/0f6908625573deae.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
 6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
 1:null
--- a/litellm/proxy/_experimental/out/onboarding.html
+++ b/litellm/proxy/_experimental/out/onboarding.html
--- a/litellm/proxy/_experimental/out/onboarding.txt
+++ b/litellm/proxy/_experimental/out/onboarding.txt
@ -1,7 +1,7 @@
 2:I[77831,[],""]
-3:I[667,["665","static/chunks/3014691f-589a5f4865c3822f.js","294","static/chunks/294-0e35509d5ca95267.js","684","static/chunks/684-bb2d2f93d92acb0b.js","777","static/chunks/777-f76791513e294b30.js","461","static/chunks/app/onboarding/page-1ed08595d570934e.js"],""]
+3:I[667,["665","static/chunks/3014691f-589a5f4865c3822f.js","294","static/chunks/294-0e35509d5ca95267.js","684","static/chunks/684-bb2d2f93d92acb0b.js","777","static/chunks/777-906d7dd6a5bf7be4.js","461","static/chunks/app/onboarding/page-1ed08595d570934e.js"],""]
 4:I[5613,[],""]
 5:I[31778,[],""]
-0:["0gt3_bF2KkdKeE61mic4M",[[["",{"children":["onboarding",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["onboarding",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","onboarding","children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined","styles":null}]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_12bbc4","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/0f6908625573deae.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
+0:["RDLpeUaSstfmeQiKITNBo",[[["",{"children":["onboarding",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["onboarding",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","onboarding","children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined","styles":null}]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_12bbc4","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/0f6908625573deae.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
 6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
 1:null
--- a/litellm/proxy/_new_secret_config.yaml
+++ b/litellm/proxy/_new_secret_config.yaml
@ -1,12 +1,10 @@
 model_list:
-  - model_name: "*"             
+  - model_name: tts
    litellm_params:
      model: "openai/*"
-      mock_response: "Hello world!"
+  - model_name: gemini-1.5-flash
-
+    litellm_params:
-litellm_settings:
+      model: gemini/gemini-1.5-flash
  success_callback: ["langfuse"]
  failure_callback: ["langfuse"]
 general_settings:
  alerting: ["slack"]
--- a/litellm/proxy/common_utils/admin_ui_utils.py
+++ b/litellm/proxy/common_utils/admin_ui_utils.py
@ -0,0 +1,167 @@
 import os
 def show_missing_vars_in_env():
    from fastapi.responses import HTMLResponse
    from litellm.proxy.proxy_server import master_key, prisma_client
    if prisma_client is None and master_key is None:
        return HTMLResponse(
            content=missing_keys_form(
                missing_key_names="DATABASE_URL, LITELLM_MASTER_KEY"
            ),
            status_code=200,
        )
    if prisma_client is None:
        return HTMLResponse(
            content=missing_keys_form(missing_key_names="DATABASE_URL"), status_code=200
        )
    if master_key is None:
        return HTMLResponse(
            content=missing_keys_form(missing_key_names="LITELLM_MASTER_KEY"),
            status_code=200,
        )
    return None
 # LiteLLM Admin UI - Non SSO Login
 url_to_redirect_to = os.getenv("PROXY_BASE_URL", "")
 url_to_redirect_to += "/login"
 html_form = f"""
 <!DOCTYPE html>
 <html>
 <head>
    <title>LiteLLM Login</title>
    <style>
        body {{
            font-family: Arial, sans-serif;
            background-color: #f4f4f4;
            margin: 0;
            padding: 0;
            display: flex;
            justify-content: center;
            align-items: center;
            height: 100vh;
        }}
        form {{
            background-color: #fff;
            padding: 20px;
            border-radius: 8px;
            box-shadow: 0 0 10px rgba(0, 0, 0, 0.1);
        }}
        label {{
            display: block;
            margin-bottom: 8px;
        }}
        input {{
            width: 100%;
            padding: 8px;
            margin-bottom: 16px;
            box-sizing: border-box;
            border: 1px solid #ccc;
            border-radius: 4px;
        }}
        input[type="submit"] {{
            background-color: #4caf50;
            color: #fff;
            cursor: pointer;
        }}
        input[type="submit"]:hover {{
            background-color: #45a049;
        }}
    </style>
 </head>
 <body>
    <form action="{url_to_redirect_to}" method="post">
        <h2>LiteLLM Login</h2>
        <p>By default Username is "admin" and Password is your set LiteLLM Proxy `MASTER_KEY`</p>
        <p>If you need to set UI credentials / SSO docs here: <a href="https://docs.litellm.ai/docs/proxy/ui" target="_blank">https://docs.litellm.ai/docs/proxy/ui</a></p>
        <br>
        <label for="username">Username:</label>
        <input type="text" id="username" name="username" required>
        <label for="password">Password:</label>
        <input type="password" id="password" name="password" required>
        <input type="submit" value="Submit">
    </form>
 """
 def missing_keys_form(missing_key_names: str):
    missing_keys_html_form = """
        <!DOCTYPE html>
        <html lang="en">
        <head>
            <meta charset="UTF-8">
            <meta name="viewport" content="width=device-width, initial-scale=1.0">
            <style>
                body {{
                    font-family: Arial, sans-serif;
                    background-color: #f4f4f9;
                    color: #333;
                    margin: 20px;
                    line-height: 1.6;
                }}
                .container {{
                    max-width: 800px;
                    margin: auto;
                    padding: 20px;
                    background: #fff;
                    border: 1px solid #ddd;
                    border-radius: 5px;
                    box-shadow: 0 0 10px rgba(0, 0, 0, 0.1);
                }}
                h1 {{
                    font-size: 24px;
                    margin-bottom: 20px;
                }}
                pre {{
                    background: #f8f8f8;
                    padding: 1px;
                    border: 1px solid #ccc;
                    border-radius: 4px;
                    overflow-x: auto;
                    font-size: 14px;
                }}
                .env-var {{
                    font-weight: normal;
                }}
                .comment {{
                    font-weight: normal;
                    color: #777;
                }}
            </style>
            <title>Environment Setup Instructions</title>
        </head>
        <body>
            <div class="container">
                <h1>Environment Setup Instructions</h1>
                <p>Please add the following variables to your environment variables:</p>
                <pre>
    <span class="env-var">LITELLM_MASTER_KEY="sk-1234"</span> <span class="comment"># Your master key for the proxy server. Can use this to send /chat/completion requests etc</span>
    <span class="env-var">LITELLM_SALT_KEY="sk-XXXXXXXX"</span> <span class="comment"># Can NOT CHANGE THIS ONCE SET - It is used to encrypt/decrypt credentials stored in DB. If value of 'LITELLM_SALT_KEY' changes your models cannot be retrieved from DB</span>
    <span class="env-var">DATABASE_URL="postgres://..."</span> <span class="comment"># Need a postgres database? (Check out Supabase, Neon, etc)</span>
    <span class="comment">## OPTIONAL ##</span>
    <span class="env-var">PORT=4000</span> <span class="comment"># DO THIS FOR RENDER/RAILWAY</span>
    <span class="env-var">STORE_MODEL_IN_DB="True"</span> <span class="comment"># Allow storing models in db</span>
                </pre>
                <h1>Missing Environment Variables</h1>
                <p>{missing_keys}</p>
            </div>
            <div class="container">
            <h1>Need Help? Support</h1>
            <p>Discord: <a href="https://discord.com/invite/wuPM9dRgDw" target="_blank">https://discord.com/invite/wuPM9dRgDw</a></p>
            <p>Docs: <a href="https://docs.litellm.ai/docs/" target="_blank">https://docs.litellm.ai/docs/</a></p>
            </div>
        </body>
        </html>
    """
    return missing_keys_html_form.format(missing_keys=missing_key_names)
--- a/litellm/proxy/common_utils/encrypt_decrypt_utils.py
+++ b/litellm/proxy/common_utils/encrypt_decrypt_utils.py
@ -0,0 +1,89 @@
 import base64
 import os
 from litellm._logging import verbose_proxy_logger
 LITELLM_SALT_KEY = os.getenv("LITELLM_SALT_KEY", None)
 verbose_proxy_logger.debug(
    "LITELLM_SALT_KEY is None using master_key to encrypt/decrypt secrets stored in DB"
 )
 def encrypt_value_helper(value: str):
    from litellm.proxy.proxy_server import master_key
    signing_key = LITELLM_SALT_KEY
    if LITELLM_SALT_KEY is None:
        signing_key = master_key
    try:
        if isinstance(value, str):
            encrypted_value = encrypt_value(value=value, signing_key=signing_key)  # type: ignore
            encrypted_value = base64.b64encode(encrypted_value).decode("utf-8")
            return encrypted_value
        raise ValueError(
            f"Invalid value type passed to encrypt_value: {type(value)} for Value: {value}\n Value must be a string"
        )
    except Exception as e:
        raise e
 def decrypt_value_helper(value: str):
    from litellm.proxy.proxy_server import master_key
    signing_key = LITELLM_SALT_KEY
    if LITELLM_SALT_KEY is None:
        signing_key = master_key
    try:
        if isinstance(value, str):
            decoded_b64 = base64.b64decode(value)
            value = decrypt_value(value=decoded_b64, signing_key=signing_key)  # type: ignore
            return value
    except Exception as e:
        verbose_proxy_logger.error(f"Error decrypting value: {value}\nError: {str(e)}")
        # [Non-Blocking Exception. - this should not block decrypting other values]
        pass
 def encrypt_value(value: str, signing_key: str):
    import hashlib
    import nacl.secret
    import nacl.utils
    # get 32 byte master key #
    hash_object = hashlib.sha256(signing_key.encode())
    hash_bytes = hash_object.digest()
    # initialize secret box #
    box = nacl.secret.SecretBox(hash_bytes)
    # encode message #
    value_bytes = value.encode("utf-8")
    encrypted = box.encrypt(value_bytes)
    return encrypted
 def decrypt_value(value: bytes, signing_key: str) -> str:
    import hashlib
    import nacl.secret
    import nacl.utils
    # get 32 byte master key #
    hash_object = hashlib.sha256(signing_key.encode())
    hash_bytes = hash_object.digest()
    # initialize secret box #
    box = nacl.secret.SecretBox(hash_bytes)
    # Convert the bytes object to a string
    plaintext = box.decrypt(value)
    plaintext = plaintext.decode("utf-8")  # type: ignore
    return plaintext  # type: ignore
--- a/litellm/proxy/common_utils/init_callbacks.py
+++ b/litellm/proxy/common_utils/init_callbacks.py
@ -31,10 +31,12 @@ def initialize_callbacks_on_proxy(
                imported_list.append(callback)
            elif isinstance(callback, str) and callback == "otel":
                from litellm.integrations.opentelemetry import OpenTelemetry
                from litellm.proxy import proxy_server
                open_telemetry_logger = OpenTelemetry()
                imported_list.append(open_telemetry_logger)
                setattr(proxy_server, "open_telemetry_logger", open_telemetry_logger)
            elif isinstance(callback, str) and callback == "presidio":
                from litellm.proxy.hooks.presidio_pii_masking import (
                    _OPTIONAL_PresidioPIIMasking,
--- a/litellm/proxy/hooks/presidio_pii_masking.py
+++ b/litellm/proxy/hooks/presidio_pii_masking.py
@ -8,21 +8,26 @@
 #  Tell us how we can improve! - Krrish & Ishaan
 import asyncio
 import json
 import traceback
 import uuid
 from typing import Optional, Union
-import litellm, traceback, uuid, json  # noqa: E401
+
-from litellm.caching import DualCache
+import aiohttp
 from litellm.proxy._types import UserAPIKeyAuth
 from litellm.integrations.custom_logger import CustomLogger
 from fastapi import HTTPException
 import litellm  # noqa: E401
 from litellm._logging import verbose_proxy_logger
 from litellm.caching import DualCache
 from litellm.integrations.custom_logger import CustomLogger
 from litellm.proxy._types import UserAPIKeyAuth
 from litellm.utils import (
    ModelResponse,
    EmbeddingResponse,
    ImageResponse,
    ModelResponse,
    StreamingChoices,
 )
 import aiohttp
 import asyncio
 class _OPTIONAL_PresidioPIIMasking(CustomLogger):
@ -57,22 +62,41 @@ class _OPTIONAL_PresidioPIIMasking(CustomLogger):
                    f"An error occurred: {str(e)}, file_path={ad_hoc_recognizers}"
                )
-        self.presidio_analyzer_api_base = litellm.get_secret(
+        self.validate_environment()
    def validate_environment(self):
        self.presidio_analyzer_api_base: Optional[str] = litellm.get_secret(
            "PRESIDIO_ANALYZER_API_BASE", None
-        )
+        )  # type: ignore
-        self.presidio_anonymizer_api_base = litellm.get_secret(
+        self.presidio_anonymizer_api_base: Optional[str] = litellm.get_secret(
            "PRESIDIO_ANONYMIZER_API_BASE", None
-        )
+        )  # type: ignore
        if self.presidio_analyzer_api_base is None:
            raise Exception("Missing `PRESIDIO_ANALYZER_API_BASE` from environment")
-        elif not self.presidio_analyzer_api_base.endswith("/"):
+        if not self.presidio_analyzer_api_base.endswith("/"):
            self.presidio_analyzer_api_base += "/"
        if not (
            self.presidio_analyzer_api_base.startswith("http://")
            or self.presidio_analyzer_api_base.startswith("https://")
        ):
            # add http:// if unset, assume communicating over private network - e.g. render
            self.presidio_analyzer_api_base = (
                "http://" + self.presidio_analyzer_api_base
            )
        if self.presidio_anonymizer_api_base is None:
            raise Exception("Missing `PRESIDIO_ANONYMIZER_API_BASE` from environment")
-        elif not self.presidio_anonymizer_api_base.endswith("/"):
+        if not self.presidio_anonymizer_api_base.endswith("/"):
            self.presidio_anonymizer_api_base += "/"
        if not (
            self.presidio_anonymizer_api_base.startswith("http://")
            or self.presidio_anonymizer_api_base.startswith("https://")
        ):
            # add http:// if unset, assume communicating over private network - e.g. render
            self.presidio_anonymizer_api_base = (
                "http://" + self.presidio_anonymizer_api_base
            )
    def print_verbose(self, print_statement):
        try:
--- a/litellm/proxy/litellm_pre_call_utils.py
+++ b/litellm/proxy/litellm_pre_call_utils.py
@ -176,6 +176,7 @@ async def add_litellm_data_to_request(
 def _add_otel_traceparent_to_data(data: dict, request: Request):
    from litellm.proxy.proxy_server import open_telemetry_logger
    if data is None:
        return
    if open_telemetry_logger is None:
--- a/litellm/proxy/proxy_config.yaml
+++ b/litellm/proxy/proxy_config.yaml
@ -35,6 +35,7 @@ general_settings:
        LANGFUSE_SECRET_KEY: "os.environ/LANGFUSE_DEV_SK_KEY"
 litellm_settings:
  callbacks: ["otel"]
  guardrails:
    - prompt_injection:
        callbacks: [lakera_prompt_injection, hide_secrets]
--- a/litellm/proxy/proxy_server.py
+++ b/litellm/proxy/proxy_server.py
@ -140,7 +140,15 @@ from litellm.proxy.auth.user_api_key_auth import user_api_key_auth
 ## Import All Misc routes here ##
 from litellm.proxy.caching_routes import router as caching_router
 from litellm.proxy.common_utils.admin_ui_utils import (
    html_form,
    show_missing_vars_in_env,
 )
 from litellm.proxy.common_utils.debug_utils import router as debugging_endpoints_router
 from litellm.proxy.common_utils.encrypt_decrypt_utils import (
    decrypt_value_helper,
    encrypt_value_helper,
 )
 from litellm.proxy.common_utils.http_parsing_utils import _read_request_body
 from litellm.proxy.common_utils.init_callbacks import initialize_callbacks_on_proxy
 from litellm.proxy.common_utils.openai_endpoint_utils import (
@ -186,13 +194,9 @@ from litellm.proxy.utils import (
    _get_projected_spend_over_limit,
    _is_projected_spend_over_limit,
    _is_valid_team_configs,
    decrypt_value,
    encrypt_value,
    get_error_message_str,
    get_instance_fn,
    hash_token,
    html_form,
    missing_keys_html_form,
    reset_budget,
    send_email,
    update_spend,
@ -207,6 +211,7 @@ from litellm.router import ModelInfo as RouterModelInfo
 from litellm.router import updateDeployment
 from litellm.scheduler import DefaultPriorities, FlowItem, Scheduler
 from litellm.types.llms.openai import HttpxBinaryResponseContent
 from litellm.types.router import RouterGeneralSettings
 try:
    from litellm._version import version
@ -1242,6 +1247,7 @@ class ProxyConfig:
        ## DB
        if prisma_client is not None and (
            general_settings.get("store_model_in_db", False) == True
            or store_model_in_db is True
        ):
            _tasks = []
            keys = [
@ -1765,7 +1771,11 @@ class ProxyConfig:
                if k in available_args:
                    router_params[k] = v
        router = litellm.Router(
-            **router_params, assistants_config=assistants_config
+            **router_params,
            assistants_config=assistants_config,
            router_general_settings=RouterGeneralSettings(
                async_only_mode=True  # only init async clients
            ),
        )  # type:ignore
        return router, router.get_model_list(), general_settings
@ -1880,16 +1890,8 @@ class ProxyConfig:
                # decrypt values
                for k, v in _litellm_params.items():
                    if isinstance(v, str):
                        # decode base64
                        try:
                            decoded_b64 = base64.b64decode(v)
                        except Exception as e:
                            verbose_proxy_logger.error(
                                "Error decoding value - {}".format(v)
                            )
                            continue
                        # decrypt value
-                        _value = decrypt_value(value=decoded_b64, master_key=master_key)
+                        _value = decrypt_value_helper(value=v)
                        # sanity check if string > size 0
                        if len(_value) > 0:
                            _litellm_params[k] = _value
@ -1933,13 +1935,8 @@ class ProxyConfig:
                    if isinstance(_litellm_params, dict):
                        # decrypt values
                        for k, v in _litellm_params.items():
-                            if isinstance(v, str):
+                            decrypted_value = decrypt_value_helper(value=v)
-                                # decode base64
+                            _litellm_params[k] = decrypted_value
                                decoded_b64 = base64.b64decode(v)
                                # decrypt value
                                _litellm_params[k] = decrypt_value(
                                    value=decoded_b64, master_key=master_key  # type: ignore
                                )
                        _litellm_params = LiteLLM_Params(**_litellm_params)
                    else:
                        verbose_proxy_logger.error(
@ -1957,7 +1954,12 @@ class ProxyConfig:
                    )
                if len(_model_list) > 0:
                    verbose_proxy_logger.debug(f"_model_list: {_model_list}")
-                    llm_router = litellm.Router(model_list=_model_list)
+                    llm_router = litellm.Router(
                        model_list=_model_list,
                        router_general_settings=RouterGeneralSettings(
                            async_only_mode=True  # only init async clients
                        ),
                    )
                    verbose_proxy_logger.debug(f"updated llm_router: {llm_router}")
            else:
                verbose_proxy_logger.debug(f"len new_models: {len(new_models)}")
@ -1995,10 +1997,8 @@ class ProxyConfig:
        environment_variables = config_data.get("environment_variables", {})
        for k, v in environment_variables.items():
            try:
-                if v is not None:
+                decrypted_value = decrypt_value_helper(value=v)
-                    decoded_b64 = base64.b64decode(v)
+                os.environ[k] = decrypted_value
                    value = decrypt_value(value=decoded_b64, master_key=master_key)  # type: ignore
                    os.environ[k] = value
            except Exception as e:
                verbose_proxy_logger.error(
                    "Error setting env variable: %s - %s", k, str(e)
@ -2720,6 +2720,10 @@ async def chat_completion(
        except:
            data = json.loads(body_str)
        verbose_proxy_logger.debug(
            "Request received by LiteLLM:\n{}".format(json.dumps(data, indent=4)),
        )
        data = await add_litellm_data_to_request(
            data=data,
            request=request,
@ -3372,8 +3376,9 @@ async def embeddings(
        )
        verbose_proxy_logger.debug(traceback.format_exc())
        if isinstance(e, HTTPException):
            message = get_error_message_str(e)
            raise ProxyException(
-                message=getattr(e, "message", str(e)),
+                message=message,
                type=getattr(e, "type", "None"),
                param=getattr(e, "param", "None"),
                code=getattr(e, "status_code", status.HTTP_400_BAD_REQUEST),
@ -5930,11 +5935,8 @@ async def add_new_model(
            _litellm_params_dict = model_params.litellm_params.dict(exclude_none=True)
            _orignal_litellm_model_name = model_params.litellm_params.model
            for k, v in _litellm_params_dict.items():
-                if isinstance(v, str):
+                encrypted_value = encrypt_value_helper(value=v)
-                    encrypted_value = encrypt_value(value=v, master_key=master_key)  # type: ignore
+                model_params.litellm_params[k] = encrypted_value
                    model_params.litellm_params[k] = base64.b64encode(
                        encrypted_value
                    ).decode("utf-8")
            _data: dict = {
                "model_id": model_params.model_info.id,
                "model_name": model_params.model_name,
@ -6065,11 +6067,8 @@ async def update_model(
            ### ENCRYPT PARAMS ###
            for k, v in _new_litellm_params_dict.items():
-                if isinstance(v, str):
+                encrypted_value = encrypt_value_helper(value=v)
-                    encrypted_value = encrypt_value(value=v, master_key=master_key)  # type: ignore
+                model_params.litellm_params[k] = encrypted_value
                    model_params.litellm_params[k] = base64.b64encode(
                        encrypted_value
                    ).decode("utf-8")
            ### MERGE WITH EXISTING DATA ###
            merged_dictionary = {}
@ -7187,10 +7186,9 @@ async def google_login(request: Request):
            )
    ####### Detect DB + MASTER KEY in .env #######
-    if prisma_client is None or master_key is None:
+    missing_env_vars = show_missing_vars_in_env()
-        from fastapi.responses import HTMLResponse
+    if missing_env_vars is not None:
-
+        return missing_env_vars
        return HTMLResponse(content=missing_keys_html_form, status_code=200)
    # get url from request
    redirect_url = os.getenv("PROXY_BASE_URL", str(request.base_url))
@ -8393,11 +8391,8 @@ async def update_config(config_info: ConfigYAML):
            # encrypt updated_environment_variables #
            for k, v in _updated_environment_variables.items():
-                if isinstance(v, str):
+                encrypted_value = encrypt_value_helper(value=v)
-                    encrypted_value = encrypt_value(value=v, master_key=master_key)  # type: ignore
+                _updated_environment_variables[k] = encrypted_value
                    _updated_environment_variables[k] = base64.b64encode(
                        encrypted_value
                    ).decode("utf-8")
            _existing_env_variables = config["environment_variables"]
@ -8814,11 +8809,8 @@ async def get_config():
                        env_vars_dict[_var] = None
                    else:
                        # decode + decrypt the value
-                        decoded_b64 = base64.b64decode(env_variable)
+                        decrypted_value = decrypt_value_helper(value=env_variable)
-                        _decrypted_value = decrypt_value(
+                        env_vars_dict[_var] = decrypted_value
                            value=decoded_b64, master_key=master_key
                        )
                        env_vars_dict[_var] = _decrypted_value
                _data_to_return.append({"name": _callback, "variables": env_vars_dict})
            elif _callback == "langfuse":
@ -8834,11 +8826,8 @@ async def get_config():
                        _langfuse_env_vars[_var] = None
                    else:
                        # decode + decrypt the value
-                        decoded_b64 = base64.b64decode(env_variable)
+                        decrypted_value = decrypt_value_helper(value=env_variable)
-                        _decrypted_value = decrypt_value(
+                        _langfuse_env_vars[_var] = decrypted_value
                            value=decoded_b64, master_key=master_key
                        )
                        _langfuse_env_vars[_var] = _decrypted_value
                _data_to_return.append(
                    {"name": _callback, "variables": _langfuse_env_vars}
@ -8859,10 +8848,7 @@ async def get_config():
                    _slack_env_vars[_var] = _value
                else:
                    # decode + decrypt the value
-                    decoded_b64 = base64.b64decode(env_variable)
+                    _decrypted_value = decrypt_value_helper(value=env_variable)
                    _decrypted_value = decrypt_value(
                        value=decoded_b64, master_key=master_key
                    )
                    _slack_env_vars[_var] = _decrypted_value
            _alerting_types = proxy_logging_obj.slack_alerting_instance.alert_types
@ -8898,10 +8884,7 @@ async def get_config():
                _email_env_vars[_var] = None
            else:
                # decode + decrypt the value
-                decoded_b64 = base64.b64decode(env_variable)
+                _decrypted_value = decrypt_value_helper(value=env_variable)
                _decrypted_value = decrypt_value(
                    value=decoded_b64, master_key=master_key
                )
                _email_env_vars[_var] = _decrypted_value
        alerting_data.append(
--- a/litellm/proxy/spend_tracking/spend_management_endpoints.py
+++ b/litellm/proxy/spend_tracking/spend_management_endpoints.py
@ -821,6 +821,14 @@ async def get_global_spend_report(
        default="team",
        description="Group spend by internal team or customer or api_key",
    ),
    api_key: Optional[str] = fastapi.Query(
        default=None,
        description="View spend for a specific api_key. Example api_key='sk-1234",
    ),
    internal_user_id: Optional[str] = fastapi.Query(
        default=None,
        description="View spend for a specific internal_user_id. Example internal_user_id='1234",
    ),
 ):
    """
    Get Daily Spend per Team, based on specific startTime and endTime. Per team, view usage by each key, model
@ -873,6 +881,96 @@ async def get_global_spend_report(
            raise ValueError(
                "/spend/report endpoint " + CommonProxyErrors.not_premium_user.value
            )
        if api_key is not None:
            verbose_proxy_logger.debug("Getting /spend for api_key: %s", api_key)
            if api_key.startswith("sk-"):
                api_key = hash_token(token=api_key)
            sql_query = """
                WITH SpendByModelApiKey AS (
                    SELECT
                        sl.api_key,
                        sl.model,
                        SUM(sl.spend) AS model_cost,
                        SUM(sl.prompt_tokens) AS model_input_tokens,
                        SUM(sl.completion_tokens) AS model_output_tokens
                    FROM
                        "LiteLLM_SpendLogs" sl
                    WHERE
                        sl."startTime" BETWEEN $1::date AND $2::date AND sl.api_key = $3
                    GROUP BY
                        sl.api_key,
                        sl.model
                )
                SELECT
                    api_key,
                    SUM(model_cost) AS total_cost,
                    SUM(model_input_tokens) AS total_input_tokens,
                    SUM(model_output_tokens) AS total_output_tokens,
                    jsonb_agg(jsonb_build_object(
                        'model', model,
                        'total_cost', model_cost,
                        'total_input_tokens', model_input_tokens,
                        'total_output_tokens', model_output_tokens
                    )) AS model_details
                FROM
                    SpendByModelApiKey
                GROUP BY
                    api_key
                ORDER BY
                    total_cost DESC;
            """
            db_response = await prisma_client.db.query_raw(
                sql_query, start_date_obj, end_date_obj, api_key
            )
            if db_response is None:
                return []
            return db_response
        elif internal_user_id is not None:
            verbose_proxy_logger.debug(
                "Getting /spend for internal_user_id: %s", internal_user_id
            )
            sql_query = """
                WITH SpendByModelApiKey AS (
                    SELECT
                        sl.api_key,
                        sl.model,
                        SUM(sl.spend) AS model_cost,
                        SUM(sl.prompt_tokens) AS model_input_tokens,
                        SUM(sl.completion_tokens) AS model_output_tokens
                    FROM
                        "LiteLLM_SpendLogs" sl
                    WHERE
                        sl."startTime" BETWEEN $1::date AND $2::date AND sl.user = $3
                    GROUP BY
                        sl.api_key,
                        sl.model
                )
                SELECT
                    api_key,
                    SUM(model_cost) AS total_cost,
                    SUM(model_input_tokens) AS total_input_tokens,
                    SUM(model_output_tokens) AS total_output_tokens,
                    jsonb_agg(jsonb_build_object(
                        'model', model,
                        'total_cost', model_cost,
                        'total_input_tokens', model_input_tokens,
                        'total_output_tokens', model_output_tokens
                    )) AS model_details
                FROM
                    SpendByModelApiKey
                GROUP BY
                    api_key
                ORDER BY
                    total_cost DESC;
            """
            db_response = await prisma_client.db.query_raw(
                sql_query, start_date_obj, end_date_obj, internal_user_id
            )
            if db_response is None:
                return []
            return db_response
        if group_by == "team":
            # first get data from spend logs -> SpendByModelApiKey
--- a/litellm/proxy/utils.py
+++ b/litellm/proxy/utils.py
@ -353,7 +353,7 @@ class ProxyLogging:
                                raise HTTPException(
                                    status_code=400, detail={"error": response}
                                )
-            print_verbose(f"final data being sent to {call_type} call: {data}")
+
            return data
        except Exception as e:
            raise e
@ -2705,178 +2705,6 @@ def _is_valid_team_configs(team_id=None, team_config=None, request_data=None):
    return
 def encrypt_value(value: str, master_key: str):
    import hashlib
    import nacl.secret
    import nacl.utils
    # get 32 byte master key #
    hash_object = hashlib.sha256(master_key.encode())
    hash_bytes = hash_object.digest()
    # initialize secret box #
    box = nacl.secret.SecretBox(hash_bytes)
    # encode message #
    value_bytes = value.encode("utf-8")
    encrypted = box.encrypt(value_bytes)
    return encrypted
 def decrypt_value(value: bytes, master_key: str) -> str:
    import hashlib
    import nacl.secret
    import nacl.utils
    # get 32 byte master key #
    hash_object = hashlib.sha256(master_key.encode())
    hash_bytes = hash_object.digest()
    # initialize secret box #
    box = nacl.secret.SecretBox(hash_bytes)
    # Convert the bytes object to a string
    plaintext = box.decrypt(value)
    plaintext = plaintext.decode("utf-8")  # type: ignore
    return plaintext  # type: ignore
 # LiteLLM Admin UI - Non SSO Login
 url_to_redirect_to = os.getenv("PROXY_BASE_URL", "")
 url_to_redirect_to += "/login"
 html_form = f"""
 <!DOCTYPE html>
 <html>
 <head>
    <title>LiteLLM Login</title>
    <style>
        body {{
            font-family: Arial, sans-serif;
            background-color: #f4f4f4;
            margin: 0;
            padding: 0;
            display: flex;
            justify-content: center;
            align-items: center;
            height: 100vh;
        }}
        form {{
            background-color: #fff;
            padding: 20px;
            border-radius: 8px;
            box-shadow: 0 0 10px rgba(0, 0, 0, 0.1);
        }}
        label {{
            display: block;
            margin-bottom: 8px;
        }}
        input {{
            width: 100%;
            padding: 8px;
            margin-bottom: 16px;
            box-sizing: border-box;
            border: 1px solid #ccc;
            border-radius: 4px;
        }}
        input[type="submit"] {{
            background-color: #4caf50;
            color: #fff;
            cursor: pointer;
        }}
        input[type="submit"]:hover {{
            background-color: #45a049;
        }}
    </style>
 </head>
 <body>
    <form action="{url_to_redirect_to}" method="post">
        <h2>LiteLLM Login</h2>
        <p>By default Username is "admin" and Password is your set LiteLLM Proxy `MASTER_KEY`</p>
        <p>If you need to set UI credentials / SSO docs here: <a href="https://docs.litellm.ai/docs/proxy/ui" target="_blank">https://docs.litellm.ai/docs/proxy/ui</a></p>
        <br>
        <label for="username">Username:</label>
        <input type="text" id="username" name="username" required>
        <label for="password">Password:</label>
        <input type="password" id="password" name="password" required>
        <input type="submit" value="Submit">
    </form>
 """
 missing_keys_html_form = """
    <!DOCTYPE html>
    <html lang="en">
    <head>
        <meta charset="UTF-8">
        <meta name="viewport" content="width=device-width, initial-scale=1.0">
        <style>
            body {
                font-family: Arial, sans-serif;
                background-color: #f4f4f9;
                color: #333;
                margin: 20px;
                line-height: 1.6;
            }
            .container {
                max-width: 600px;
                margin: auto;
                padding: 20px;
                background: #fff;
                border: 1px solid #ddd;
                border-radius: 5px;
                box-shadow: 0 0 10px rgba(0, 0, 0, 0.1);
            }
            h1 {
                font-size: 24px;
                margin-bottom: 20px;
            }
            pre {
                background: #f8f8f8;
                padding: 10px;
                border: 1px solid #ccc;
                border-radius: 4px;
                overflow-x: auto;
                font-size: 14px;
            }
            .env-var {
                font-weight: normal;
            }
            .comment {
                font-weight: normal;
                color: #777;
            }
        </style>
        <title>Environment Setup Instructions</title>
    </head>
    <body>
        <div class="container">
            <h1>Environment Setup Instructions</h1>
            <p>Please add the following configurations to your environment variables:</p>
            <pre>
 <span class="env-var">LITELLM_MASTER_KEY="sk-1234"</span> <span class="comment"># make this unique. must start with `sk-`.</span>
 <span class="env-var">DATABASE_URL="postgres://..."</span> <span class="comment"># Need a postgres database? (Check out Supabase, Neon, etc)</span>
 <span class="comment">## OPTIONAL ##</span>
 <span class="env-var">PORT=4000</span> <span class="comment"># DO THIS FOR RENDER/RAILWAY</span>
 <span class="env-var">STORE_MODEL_IN_DB="True"</span> <span class="comment"># Allow storing models in db</span>
            </pre>
        </div>
    </body>
    </html>
    """
 def _to_ns(dt):
    return int(dt.timestamp() * 1e9)
@ -2888,6 +2716,11 @@ def get_error_message_str(e: Exception) -> str:
            error_message = e.detail
        elif isinstance(e.detail, dict):
            error_message = json.dumps(e.detail)
        elif hasattr(e, "message"):
            if isinstance(e.message, "str"):
                error_message = e.message
            elif isinstance(e.message, dict):
                error_message = json.dumps(e.message)
        else:
            error_message = str(e)
    else:
--- a/litellm/router.py
+++ b/litellm/router.py
@ -46,15 +46,15 @@ from litellm._logging import verbose_router_logger
 from litellm.caching import DualCache, InMemoryCache, RedisCache
 from litellm.integrations.custom_logger import CustomLogger
 from litellm.llms.azure import get_azure_ad_token_from_oidc
 from litellm.llms.custom_httpx.azure_dall_e_2 import (
    AsyncCustomHTTPTransport,
    CustomHTTPTransport,
 )
 from litellm.router_strategy.least_busy import LeastBusyLoggingHandler
 from litellm.router_strategy.lowest_cost import LowestCostLoggingHandler
 from litellm.router_strategy.lowest_latency import LowestLatencyLoggingHandler
 from litellm.router_strategy.lowest_tpm_rpm import LowestTPMLoggingHandler
 from litellm.router_strategy.lowest_tpm_rpm_v2 import LowestTPMLoggingHandler_v2
 from litellm.router_utils.client_initalization_utils import (
    set_client,
    should_initialize_sync_client,
 )
 from litellm.router_utils.handle_error import send_llm_exception_alert
 from litellm.scheduler import FlowItem, Scheduler
 from litellm.types.llms.openai import (
@ -79,6 +79,7 @@ from litellm.types.router import (
    ModelInfo,
    RetryPolicy,
    RouterErrors,
    RouterGeneralSettings,
    updateDeployment,
    updateLiteLLMParams,
 )
@ -88,6 +89,7 @@ from litellm.utils import (
    ModelResponse,
    _is_region_eu,
    calculate_max_parallel_requests,
    create_proxy_transport_and_mounts,
    get_utc_datetime,
 )
@ -169,6 +171,7 @@ class Router:
        routing_strategy_args: dict = {},  # just for latency-based routing
        semaphore: Optional[asyncio.Semaphore] = None,
        alerting_config: Optional[AlertingConfig] = None,
        router_general_settings: Optional[RouterGeneralSettings] = None,
    ) -> None:
        """
        Initialize the Router class with the given parameters for caching, reliability, and routing strategy.
@ -246,6 +249,9 @@ class Router:
                verbose_router_logger.setLevel(logging.INFO)
            elif debug_level == "DEBUG":
                verbose_router_logger.setLevel(logging.DEBUG)
        self.router_general_settings: Optional[RouterGeneralSettings] = (
            router_general_settings
        )
        self.assistants_config = assistants_config
        self.deployment_names: List = (
@ -3247,520 +3253,6 @@ class Router:
                except Exception as e:
                    raise e
    def set_client(self, model: dict):
        """
        - Initializes Azure/OpenAI clients. Stores them in cache, b/c of this - https://github.com/BerriAI/litellm/issues/1278
        - Initializes Semaphore for client w/ rpm. Stores them in cache. b/c of this - https://github.com/BerriAI/litellm/issues/2994
        """
        client_ttl = self.client_ttl
        litellm_params = model.get("litellm_params", {})
        model_name = litellm_params.get("model")
        model_id = model["model_info"]["id"]
        # ### IF RPM SET - initialize a semaphore ###
        rpm = litellm_params.get("rpm", None)
        tpm = litellm_params.get("tpm", None)
        max_parallel_requests = litellm_params.get("max_parallel_requests", None)
        calculated_max_parallel_requests = calculate_max_parallel_requests(
            rpm=rpm,
            max_parallel_requests=max_parallel_requests,
            tpm=tpm,
            default_max_parallel_requests=self.default_max_parallel_requests,
        )
        if calculated_max_parallel_requests:
            semaphore = asyncio.Semaphore(calculated_max_parallel_requests)
            cache_key = f"{model_id}_max_parallel_requests_client"
            self.cache.set_cache(
                key=cache_key,
                value=semaphore,
                local_only=True,
            )
        ####  for OpenAI / Azure we need to initalize the Client for High Traffic ########
        custom_llm_provider = litellm_params.get("custom_llm_provider")
        custom_llm_provider = custom_llm_provider or model_name.split("/", 1)[0] or ""
        default_api_base = None
        default_api_key = None
        if custom_llm_provider in litellm.openai_compatible_providers:
            _, custom_llm_provider, api_key, api_base = litellm.get_llm_provider(
                model=model_name
            )
            default_api_base = api_base
            default_api_key = api_key
        if (
            model_name in litellm.open_ai_chat_completion_models
            or custom_llm_provider in litellm.openai_compatible_providers
            or custom_llm_provider == "azure"
            or custom_llm_provider == "azure_text"
            or custom_llm_provider == "custom_openai"
            or custom_llm_provider == "openai"
            or custom_llm_provider == "text-completion-openai"
            or "ft:gpt-3.5-turbo" in model_name
            or model_name in litellm.open_ai_embedding_models
        ):
            is_azure_ai_studio_model: bool = False
            if custom_llm_provider == "azure":
                if litellm.utils._is_non_openai_azure_model(model_name):
                    is_azure_ai_studio_model = True
                    custom_llm_provider = "openai"
                    # remove azure prefx from model_name
                    model_name = model_name.replace("azure/", "")
            # glorified / complicated reading of configs
            # user can pass vars directly or they can pas os.environ/AZURE_API_KEY, in which case we will read the env
            # we do this here because we init clients for Azure, OpenAI and we need to set the right key
            api_key = litellm_params.get("api_key") or default_api_key
            if (
                api_key
                and isinstance(api_key, str)
                and api_key.startswith("os.environ/")
            ):
                api_key_env_name = api_key.replace("os.environ/", "")
                api_key = litellm.get_secret(api_key_env_name)
                litellm_params["api_key"] = api_key
            api_base = litellm_params.get("api_base")
            base_url = litellm_params.get("base_url")
            api_base = (
                api_base or base_url or default_api_base
            )  # allow users to pass in `api_base` or `base_url` for azure
            if api_base and api_base.startswith("os.environ/"):
                api_base_env_name = api_base.replace("os.environ/", "")
                api_base = litellm.get_secret(api_base_env_name)
                litellm_params["api_base"] = api_base
            ## AZURE AI STUDIO MISTRAL CHECK ##
            """
            Make sure api base ends in /v1/
            if not, add it - https://github.com/BerriAI/litellm/issues/2279
            """
            if (
                is_azure_ai_studio_model is True
                and api_base is not None
                and isinstance(api_base, str)
                and not api_base.endswith("/v1/")
            ):
                # check if it ends with a trailing slash
                if api_base.endswith("/"):
                    api_base += "v1/"
                elif api_base.endswith("/v1"):
                    api_base += "/"
                else:
                    api_base += "/v1/"
            api_version = litellm_params.get("api_version")
            if api_version and api_version.startswith("os.environ/"):
                api_version_env_name = api_version.replace("os.environ/", "")
                api_version = litellm.get_secret(api_version_env_name)
                litellm_params["api_version"] = api_version
            timeout = litellm_params.pop("timeout", None) or litellm.request_timeout
            if isinstance(timeout, str) and timeout.startswith("os.environ/"):
                timeout_env_name = timeout.replace("os.environ/", "")
                timeout = litellm.get_secret(timeout_env_name)
                litellm_params["timeout"] = timeout
            stream_timeout = litellm_params.pop(
                "stream_timeout", timeout
            )  # if no stream_timeout is set, default to timeout
            if isinstance(stream_timeout, str) and stream_timeout.startswith(
                "os.environ/"
            ):
                stream_timeout_env_name = stream_timeout.replace("os.environ/", "")
                stream_timeout = litellm.get_secret(stream_timeout_env_name)
                litellm_params["stream_timeout"] = stream_timeout
            max_retries = litellm_params.pop(
                "max_retries", 0
            )  # router handles retry logic
            if isinstance(max_retries, str) and max_retries.startswith("os.environ/"):
                max_retries_env_name = max_retries.replace("os.environ/", "")
                max_retries = litellm.get_secret(max_retries_env_name)
                litellm_params["max_retries"] = max_retries
            # proxy support
            import os
            import httpx
            # Check if the HTTP_PROXY and HTTPS_PROXY environment variables are set and use them accordingly.
            http_proxy = os.getenv("HTTP_PROXY", None)
            https_proxy = os.getenv("HTTPS_PROXY", None)
            no_proxy = os.getenv("NO_PROXY", None)
            # Create the proxies dictionary only if the environment variables are set.
            sync_proxy_mounts = None
            async_proxy_mounts = None
            if http_proxy is not None and https_proxy is not None:
                sync_proxy_mounts = {
                    "http://": httpx.HTTPTransport(proxy=httpx.Proxy(url=http_proxy)),
                    "https://": httpx.HTTPTransport(proxy=httpx.Proxy(url=https_proxy)),
                }
                async_proxy_mounts = {
                    "http://": httpx.AsyncHTTPTransport(
                        proxy=httpx.Proxy(url=http_proxy)
                    ),
                    "https://": httpx.AsyncHTTPTransport(
                        proxy=httpx.Proxy(url=https_proxy)
                    ),
                }
                # assume no_proxy is a list of comma separated urls
                if no_proxy is not None and isinstance(no_proxy, str):
                    no_proxy_urls = no_proxy.split(",")
                    for url in no_proxy_urls:  # set no-proxy support for specific urls
                        sync_proxy_mounts[url] = None  # type: ignore
                        async_proxy_mounts[url] = None  # type: ignore
            organization = litellm_params.get("organization", None)
            if isinstance(organization, str) and organization.startswith("os.environ/"):
                organization_env_name = organization.replace("os.environ/", "")
                organization = litellm.get_secret(organization_env_name)
                litellm_params["organization"] = organization
            if custom_llm_provider == "azure" or custom_llm_provider == "azure_text":
                if api_base is None or not isinstance(api_base, str):
                    filtered_litellm_params = {
                        k: v
                        for k, v in model["litellm_params"].items()
                        if k != "api_key"
                    }
                    _filtered_model = {
                        "model_name": model["model_name"],
                        "litellm_params": filtered_litellm_params,
                    }
                    raise ValueError(
                        f"api_base is required for Azure OpenAI. Set it on your config. Model - {_filtered_model}"
                    )
                azure_ad_token = litellm_params.get("azure_ad_token")
                if azure_ad_token is not None:
                    if azure_ad_token.startswith("oidc/"):
                        azure_ad_token = get_azure_ad_token_from_oidc(azure_ad_token)
                if api_version is None:
                    api_version = litellm.AZURE_DEFAULT_API_VERSION
                if "gateway.ai.cloudflare.com" in api_base:
                    if not api_base.endswith("/"):
                        api_base += "/"
                    azure_model = model_name.replace("azure/", "")
                    api_base += f"{azure_model}"
                    cache_key = f"{model_id}_async_client"
                    _client = openai.AsyncAzureOpenAI(
                        api_key=api_key,
                        azure_ad_token=azure_ad_token,
                        base_url=api_base,
                        api_version=api_version,
                        timeout=timeout,
                        max_retries=max_retries,
                        http_client=httpx.AsyncClient(
                            transport=AsyncCustomHTTPTransport(
                                limits=httpx.Limits(
                                    max_connections=1000, max_keepalive_connections=100
                                ),
                                verify=litellm.ssl_verify,
                            ),
                            mounts=async_proxy_mounts,
                        ),  # type: ignore
                    )
                    self.cache.set_cache(
                        key=cache_key,
                        value=_client,
                        ttl=client_ttl,
                        local_only=True,
                    )  # cache for 1 hr
                    cache_key = f"{model_id}_client"
                    _client = openai.AzureOpenAI(  # type: ignore
                        api_key=api_key,
                        azure_ad_token=azure_ad_token,
                        base_url=api_base,
                        api_version=api_version,
                        timeout=timeout,
                        max_retries=max_retries,
                        http_client=httpx.Client(
                            transport=CustomHTTPTransport(
                                limits=httpx.Limits(
                                    max_connections=1000, max_keepalive_connections=100
                                ),
                                verify=litellm.ssl_verify,
                            ),
                            mounts=sync_proxy_mounts,
                        ),  # type: ignore
                    )
                    self.cache.set_cache(
                        key=cache_key,
                        value=_client,
                        ttl=client_ttl,
                        local_only=True,
                    )  # cache for 1 hr
                    # streaming clients can have diff timeouts
                    cache_key = f"{model_id}_stream_async_client"
                    _client = openai.AsyncAzureOpenAI(  # type: ignore
                        api_key=api_key,
                        azure_ad_token=azure_ad_token,
                        base_url=api_base,
                        api_version=api_version,
                        timeout=stream_timeout,
                        max_retries=max_retries,
                        http_client=httpx.AsyncClient(
                            transport=AsyncCustomHTTPTransport(
                                limits=httpx.Limits(
                                    max_connections=1000, max_keepalive_connections=100
                                ),
                                verify=litellm.ssl_verify,
                            ),
                            mounts=async_proxy_mounts,
                        ),  # type: ignore
                    )
                    self.cache.set_cache(
                        key=cache_key,
                        value=_client,
                        ttl=client_ttl,
                        local_only=True,
                    )  # cache for 1 hr
                    cache_key = f"{model_id}_stream_client"
                    _client = openai.AzureOpenAI(  # type: ignore
                        api_key=api_key,
                        azure_ad_token=azure_ad_token,
                        base_url=api_base,
                        api_version=api_version,
                        timeout=stream_timeout,
                        max_retries=max_retries,
                        http_client=httpx.Client(
                            transport=CustomHTTPTransport(
                                limits=httpx.Limits(
                                    max_connections=1000, max_keepalive_connections=100
                                ),
                                verify=litellm.ssl_verify,
                            ),
                            mounts=sync_proxy_mounts,
                        ),  # type: ignore
                    )
                    self.cache.set_cache(
                        key=cache_key,
                        value=_client,
                        ttl=client_ttl,
                        local_only=True,
                    )  # cache for 1 hr
                else:
                    _api_key = api_key
                    if _api_key is not None and isinstance(_api_key, str):
                        # only show first 5 chars of api_key
                        _api_key = _api_key[:8] + "*" * 15
                    verbose_router_logger.debug(
                        f"Initializing Azure OpenAI Client for {model_name}, Api Base: {str(api_base)}, Api Key:{_api_key}"
                    )
                    azure_client_params = {
                        "api_key": api_key,
                        "azure_endpoint": api_base,
                        "api_version": api_version,
                        "azure_ad_token": azure_ad_token,
                    }
                    from litellm.llms.azure import select_azure_base_url_or_endpoint
                    # this decides if we should set azure_endpoint or base_url on Azure OpenAI Client
                    # required to support GPT-4 vision enhancements, since base_url needs to be set on Azure OpenAI Client
                    azure_client_params = select_azure_base_url_or_endpoint(
                        azure_client_params
                    )
                    cache_key = f"{model_id}_async_client"
                    _client = openai.AsyncAzureOpenAI(  # type: ignore
                        **azure_client_params,
                        timeout=timeout,
                        max_retries=max_retries,
                        http_client=httpx.AsyncClient(
                            transport=AsyncCustomHTTPTransport(
                                limits=httpx.Limits(
                                    max_connections=1000, max_keepalive_connections=100
                                ),
                                verify=litellm.ssl_verify,
                            ),
                            mounts=async_proxy_mounts,
                        ),  # type: ignore
                    )
                    self.cache.set_cache(
                        key=cache_key,
                        value=_client,
                        ttl=client_ttl,
                        local_only=True,
                    )  # cache for 1 hr
                    cache_key = f"{model_id}_client"
                    _client = openai.AzureOpenAI(  # type: ignore
                        **azure_client_params,
                        timeout=timeout,
                        max_retries=max_retries,
                        http_client=httpx.Client(
                            transport=CustomHTTPTransport(
                                verify=litellm.ssl_verify,
                                limits=httpx.Limits(
                                    max_connections=1000, max_keepalive_connections=100
                                ),
                            ),
                            mounts=sync_proxy_mounts,
                        ),  # type: ignore
                    )
                    self.cache.set_cache(
                        key=cache_key,
                        value=_client,
                        ttl=client_ttl,
                        local_only=True,
                    )  # cache for 1 hr
                    # streaming clients should have diff timeouts
                    cache_key = f"{model_id}_stream_async_client"
                    _client = openai.AsyncAzureOpenAI(  # type: ignore
                        **azure_client_params,
                        timeout=stream_timeout,
                        max_retries=max_retries,
                        http_client=httpx.AsyncClient(
                            transport=AsyncCustomHTTPTransport(
                                limits=httpx.Limits(
                                    max_connections=1000, max_keepalive_connections=100
                                ),
                                verify=litellm.ssl_verify,
                            ),
                            mounts=async_proxy_mounts,
                        ),
                    )
                    self.cache.set_cache(
                        key=cache_key,
                        value=_client,
                        ttl=client_ttl,
                        local_only=True,
                    )  # cache for 1 hr
                    cache_key = f"{model_id}_stream_client"
                    _client = openai.AzureOpenAI(  # type: ignore
                        **azure_client_params,
                        timeout=stream_timeout,
                        max_retries=max_retries,
                        http_client=httpx.Client(
                            transport=CustomHTTPTransport(
                                limits=httpx.Limits(
                                    max_connections=1000, max_keepalive_connections=100
                                ),
                                verify=litellm.ssl_verify,
                            ),
                            mounts=sync_proxy_mounts,
                        ),
                    )
                    self.cache.set_cache(
                        key=cache_key,
                        value=_client,
                        ttl=client_ttl,
                        local_only=True,
                    )  # cache for 1 hr
            else:
                _api_key = api_key  # type: ignore
                if _api_key is not None and isinstance(_api_key, str):
                    # only show first 5 chars of api_key
                    _api_key = _api_key[:8] + "*" * 15
                verbose_router_logger.debug(
                    f"Initializing OpenAI Client for {model_name}, Api Base:{str(api_base)}, Api Key:{_api_key}"
                )
                cache_key = f"{model_id}_async_client"
                _client = openai.AsyncOpenAI(  # type: ignore
                    api_key=api_key,
                    base_url=api_base,
                    timeout=timeout,
                    max_retries=max_retries,
                    organization=organization,
                    http_client=httpx.AsyncClient(
                        transport=AsyncCustomHTTPTransport(
                            limits=httpx.Limits(
                                max_connections=1000, max_keepalive_connections=100
                            ),
                            verify=litellm.ssl_verify,
                        ),
                        mounts=async_proxy_mounts,
                    ),  # type: ignore
                )
                self.cache.set_cache(
                    key=cache_key,
                    value=_client,
                    ttl=client_ttl,
                    local_only=True,
                )  # cache for 1 hr
                cache_key = f"{model_id}_client"
                _client = openai.OpenAI(  # type: ignore
                    api_key=api_key,
                    base_url=api_base,
                    timeout=timeout,
                    max_retries=max_retries,
                    organization=organization,
                    http_client=httpx.Client(
                        transport=CustomHTTPTransport(
                            limits=httpx.Limits(
                                max_connections=1000, max_keepalive_connections=100
                            ),
                            verify=litellm.ssl_verify,
                        ),
                        mounts=sync_proxy_mounts,
                    ),  # type: ignore
                )
                self.cache.set_cache(
                    key=cache_key,
                    value=_client,
                    ttl=client_ttl,
                    local_only=True,
                )  # cache for 1 hr
                # streaming clients should have diff timeouts
                cache_key = f"{model_id}_stream_async_client"
                _client = openai.AsyncOpenAI(  # type: ignore
                    api_key=api_key,
                    base_url=api_base,
                    timeout=stream_timeout,
                    max_retries=max_retries,
                    organization=organization,
                    http_client=httpx.AsyncClient(
                        transport=AsyncCustomHTTPTransport(
                            limits=httpx.Limits(
                                max_connections=1000, max_keepalive_connections=100
                            ),
                            verify=litellm.ssl_verify,
                        ),
                        mounts=async_proxy_mounts,
                    ),  # type: ignore
                )
                self.cache.set_cache(
                    key=cache_key,
                    value=_client,
                    ttl=client_ttl,
                    local_only=True,
                )  # cache for 1 hr
                # streaming clients should have diff timeouts
                cache_key = f"{model_id}_stream_client"
                _client = openai.OpenAI(  # type: ignore
                    api_key=api_key,
                    base_url=api_base,
                    timeout=stream_timeout,
                    max_retries=max_retries,
                    organization=organization,
                    http_client=httpx.Client(
                        transport=CustomHTTPTransport(
                            limits=httpx.Limits(
                                max_connections=1000, max_keepalive_connections=100
                            ),
                            verify=litellm.ssl_verify,
                        ),
                        mounts=sync_proxy_mounts,
                    ),  # type: ignore
                )
                self.cache.set_cache(
                    key=cache_key,
                    value=_client,
                    ttl=client_ttl,
                    local_only=True,
                )  # cache for 1 hr
    def _generate_model_id(self, model_group: str, litellm_params: dict):
        """
        Helper function to consistently generate the same id for a deployment
@ -3904,7 +3396,9 @@ class Router:
            raise Exception(f"Unsupported provider - {custom_llm_provider}")
        # init OpenAI, Azure clients
-        self.set_client(model=deployment.to_json(exclude_none=True))
+        set_client(
            litellm_router_instance=self, model=deployment.to_json(exclude_none=True)
        )
        # set region (if azure model) ## PREVIEW FEATURE ##
        if litellm.enable_preview_features == True:
@ -4432,7 +3926,7 @@ class Router:
                    """
                    Re-initialize the client
                    """
-                    self.set_client(model=deployment)
+                    set_client(litellm_router_instance=self, model=deployment)
                    client = self.cache.get_cache(key=cache_key, local_only=True)
                return client
            else:
@ -4442,7 +3936,7 @@ class Router:
                    """
                    Re-initialize the client
                    """
-                    self.set_client(model=deployment)
+                    set_client(litellm_router_instance=self, model=deployment)
                    client = self.cache.get_cache(key=cache_key, local_only=True)
                return client
        else:
@ -4453,7 +3947,7 @@ class Router:
                    """
                    Re-initialize the client
                    """
-                    self.set_client(model=deployment)
+                    set_client(litellm_router_instance=self, model=deployment)
                    client = self.cache.get_cache(key=cache_key)
                return client
            else:
@ -4463,7 +3957,7 @@ class Router:
                    """
                    Re-initialize the client
                    """
-                    self.set_client(model=deployment)
+                    set_client(litellm_router_instance=self, model=deployment)
                    client = self.cache.get_cache(key=cache_key)
                return client
--- a/litellm/router_utils/client_initalization_utils.py
+++ b/litellm/router_utils/client_initalization_utils.py
@ -0,0 +1,495 @@
 import asyncio
 import os
 import traceback
 from typing import TYPE_CHECKING, Any
 import httpx
 import openai
 import litellm
 from litellm._logging import verbose_router_logger
 from litellm.llms.azure import get_azure_ad_token_from_oidc
 from litellm.utils import calculate_max_parallel_requests
 if TYPE_CHECKING:
    from litellm.router import Router as _Router
    LitellmRouter = _Router
 else:
    LitellmRouter = Any
 def should_initialize_sync_client(
    litellm_router_instance: LitellmRouter,
 ) -> bool:
    """
    Returns if Sync OpenAI, Azure Clients should be initialized.
    Do not init sync clients when router.router_general_settings.async_only_mode is True
    """
    if litellm_router_instance is None:
        return False
    if litellm_router_instance.router_general_settings is not None:
        if (
            hasattr(litellm_router_instance, "router_general_settings")
            and hasattr(
                litellm_router_instance.router_general_settings, "async_only_mode"
            )
            and litellm_router_instance.router_general_settings.async_only_mode is True
        ):
            return False
    return True
 def set_client(litellm_router_instance: LitellmRouter, model: dict):
    """
    - Initializes Azure/OpenAI clients. Stores them in cache, b/c of this - https://github.com/BerriAI/litellm/issues/1278
    - Initializes Semaphore for client w/ rpm. Stores them in cache. b/c of this - https://github.com/BerriAI/litellm/issues/2994
    """
    client_ttl = litellm_router_instance.client_ttl
    litellm_params = model.get("litellm_params", {})
    model_name = litellm_params.get("model")
    model_id = model["model_info"]["id"]
    # ### IF RPM SET - initialize a semaphore ###
    rpm = litellm_params.get("rpm", None)
    tpm = litellm_params.get("tpm", None)
    max_parallel_requests = litellm_params.get("max_parallel_requests", None)
    calculated_max_parallel_requests = calculate_max_parallel_requests(
        rpm=rpm,
        max_parallel_requests=max_parallel_requests,
        tpm=tpm,
        default_max_parallel_requests=litellm_router_instance.default_max_parallel_requests,
    )
    if calculated_max_parallel_requests:
        semaphore = asyncio.Semaphore(calculated_max_parallel_requests)
        cache_key = f"{model_id}_max_parallel_requests_client"
        litellm_router_instance.cache.set_cache(
            key=cache_key,
            value=semaphore,
            local_only=True,
        )
    ####  for OpenAI / Azure we need to initalize the Client for High Traffic ########
    custom_llm_provider = litellm_params.get("custom_llm_provider")
    custom_llm_provider = custom_llm_provider or model_name.split("/", 1)[0] or ""
    default_api_base = None
    default_api_key = None
    if custom_llm_provider in litellm.openai_compatible_providers:
        _, custom_llm_provider, api_key, api_base = litellm.get_llm_provider(
            model=model_name
        )
        default_api_base = api_base
        default_api_key = api_key
    if (
        model_name in litellm.open_ai_chat_completion_models
        or custom_llm_provider in litellm.openai_compatible_providers
        or custom_llm_provider == "azure"
        or custom_llm_provider == "azure_text"
        or custom_llm_provider == "custom_openai"
        or custom_llm_provider == "openai"
        or custom_llm_provider == "text-completion-openai"
        or "ft:gpt-3.5-turbo" in model_name
        or model_name in litellm.open_ai_embedding_models
    ):
        is_azure_ai_studio_model: bool = False
        if custom_llm_provider == "azure":
            if litellm.utils._is_non_openai_azure_model(model_name):
                is_azure_ai_studio_model = True
                custom_llm_provider = "openai"
                # remove azure prefx from model_name
                model_name = model_name.replace("azure/", "")
        # glorified / complicated reading of configs
        # user can pass vars directly or they can pas os.environ/AZURE_API_KEY, in which case we will read the env
        # we do this here because we init clients for Azure, OpenAI and we need to set the right key
        api_key = litellm_params.get("api_key") or default_api_key
        if api_key and isinstance(api_key, str) and api_key.startswith("os.environ/"):
            api_key_env_name = api_key.replace("os.environ/", "")
            api_key = litellm.get_secret(api_key_env_name)
            litellm_params["api_key"] = api_key
        api_base = litellm_params.get("api_base")
        base_url = litellm_params.get("base_url")
        api_base = (
            api_base or base_url or default_api_base
        )  # allow users to pass in `api_base` or `base_url` for azure
        if api_base and api_base.startswith("os.environ/"):
            api_base_env_name = api_base.replace("os.environ/", "")
            api_base = litellm.get_secret(api_base_env_name)
            litellm_params["api_base"] = api_base
        ## AZURE AI STUDIO MISTRAL CHECK ##
        """
        Make sure api base ends in /v1/
        if not, add it - https://github.com/BerriAI/litellm/issues/2279
        """
        if (
            is_azure_ai_studio_model is True
            and api_base is not None
            and isinstance(api_base, str)
            and not api_base.endswith("/v1/")
        ):
            # check if it ends with a trailing slash
            if api_base.endswith("/"):
                api_base += "v1/"
            elif api_base.endswith("/v1"):
                api_base += "/"
            else:
                api_base += "/v1/"
        api_version = litellm_params.get("api_version")
        if api_version and api_version.startswith("os.environ/"):
            api_version_env_name = api_version.replace("os.environ/", "")
            api_version = litellm.get_secret(api_version_env_name)
            litellm_params["api_version"] = api_version
        timeout = litellm_params.pop("timeout", None) or litellm.request_timeout
        if isinstance(timeout, str) and timeout.startswith("os.environ/"):
            timeout_env_name = timeout.replace("os.environ/", "")
            timeout = litellm.get_secret(timeout_env_name)
            litellm_params["timeout"] = timeout
        stream_timeout = litellm_params.pop(
            "stream_timeout", timeout
        )  # if no stream_timeout is set, default to timeout
        if isinstance(stream_timeout, str) and stream_timeout.startswith("os.environ/"):
            stream_timeout_env_name = stream_timeout.replace("os.environ/", "")
            stream_timeout = litellm.get_secret(stream_timeout_env_name)
            litellm_params["stream_timeout"] = stream_timeout
        max_retries = litellm_params.pop("max_retries", 0)  # router handles retry logic
        if isinstance(max_retries, str) and max_retries.startswith("os.environ/"):
            max_retries_env_name = max_retries.replace("os.environ/", "")
            max_retries = litellm.get_secret(max_retries_env_name)
            litellm_params["max_retries"] = max_retries
        organization = litellm_params.get("organization", None)
        if isinstance(organization, str) and organization.startswith("os.environ/"):
            organization_env_name = organization.replace("os.environ/", "")
            organization = litellm.get_secret(organization_env_name)
            litellm_params["organization"] = organization
        if custom_llm_provider == "azure" or custom_llm_provider == "azure_text":
            if api_base is None or not isinstance(api_base, str):
                filtered_litellm_params = {
                    k: v for k, v in model["litellm_params"].items() if k != "api_key"
                }
                _filtered_model = {
                    "model_name": model["model_name"],
                    "litellm_params": filtered_litellm_params,
                }
                raise ValueError(
                    f"api_base is required for Azure OpenAI. Set it on your config. Model - {_filtered_model}"
                )
            azure_ad_token = litellm_params.get("azure_ad_token")
            if azure_ad_token is not None:
                if azure_ad_token.startswith("oidc/"):
                    azure_ad_token = get_azure_ad_token_from_oidc(azure_ad_token)
            if api_version is None:
                api_version = litellm.AZURE_DEFAULT_API_VERSION
            if "gateway.ai.cloudflare.com" in api_base:
                if not api_base.endswith("/"):
                    api_base += "/"
                azure_model = model_name.replace("azure/", "")
                api_base += f"{azure_model}"
                cache_key = f"{model_id}_async_client"
                _client = openai.AsyncAzureOpenAI(
                    api_key=api_key,
                    azure_ad_token=azure_ad_token,
                    base_url=api_base,
                    api_version=api_version,
                    timeout=timeout,
                    max_retries=max_retries,
                    http_client=httpx.AsyncClient(
                        limits=httpx.Limits(
                            max_connections=1000, max_keepalive_connections=100
                        ),
                        verify=litellm.ssl_verify,
                    ),  # type: ignore
                )
                litellm_router_instance.cache.set_cache(
                    key=cache_key,
                    value=_client,
                    ttl=client_ttl,
                    local_only=True,
                )  # cache for 1 hr
                if should_initialize_sync_client(
                    litellm_router_instance=litellm_router_instance
                ):
                    cache_key = f"{model_id}_client"
                    _client = openai.AzureOpenAI(  # type: ignore
                        api_key=api_key,
                        azure_ad_token=azure_ad_token,
                        base_url=api_base,
                        api_version=api_version,
                        timeout=timeout,
                        max_retries=max_retries,
                        http_client=httpx.Client(
                            limits=httpx.Limits(
                                max_connections=1000, max_keepalive_connections=100
                            ),
                            verify=litellm.ssl_verify,
                        ),  # type: ignore
                    )
                    litellm_router_instance.cache.set_cache(
                        key=cache_key,
                        value=_client,
                        ttl=client_ttl,
                        local_only=True,
                    )  # cache for 1 hr
                # streaming clients can have diff timeouts
                cache_key = f"{model_id}_stream_async_client"
                _client = openai.AsyncAzureOpenAI(  # type: ignore
                    api_key=api_key,
                    azure_ad_token=azure_ad_token,
                    base_url=api_base,
                    api_version=api_version,
                    timeout=stream_timeout,
                    max_retries=max_retries,
                    http_client=httpx.AsyncClient(
                        limits=httpx.Limits(
                            max_connections=1000, max_keepalive_connections=100
                        ),
                        verify=litellm.ssl_verify,
                    ),  # type: ignore
                )
                litellm_router_instance.cache.set_cache(
                    key=cache_key,
                    value=_client,
                    ttl=client_ttl,
                    local_only=True,
                )  # cache for 1 hr
                if should_initialize_sync_client(
                    litellm_router_instance=litellm_router_instance
                ):
                    cache_key = f"{model_id}_stream_client"
                    _client = openai.AzureOpenAI(  # type: ignore
                        api_key=api_key,
                        azure_ad_token=azure_ad_token,
                        base_url=api_base,
                        api_version=api_version,
                        timeout=stream_timeout,
                        max_retries=max_retries,
                        http_client=httpx.Client(
                            limits=httpx.Limits(
                                max_connections=1000, max_keepalive_connections=100
                            ),
                            verify=litellm.ssl_verify,
                        ),  # type: ignore
                    )
                    litellm_router_instance.cache.set_cache(
                        key=cache_key,
                        value=_client,
                        ttl=client_ttl,
                        local_only=True,
                    )  # cache for 1 hr
            else:
                _api_key = api_key
                if _api_key is not None and isinstance(_api_key, str):
                    # only show first 5 chars of api_key
                    _api_key = _api_key[:8] + "*" * 15
                verbose_router_logger.debug(
                    f"Initializing Azure OpenAI Client for {model_name}, Api Base: {str(api_base)}, Api Key:{_api_key}"
                )
                azure_client_params = {
                    "api_key": api_key,
                    "azure_endpoint": api_base,
                    "api_version": api_version,
                    "azure_ad_token": azure_ad_token,
                }
                from litellm.llms.azure import select_azure_base_url_or_endpoint
                # this decides if we should set azure_endpoint or base_url on Azure OpenAI Client
                # required to support GPT-4 vision enhancements, since base_url needs to be set on Azure OpenAI Client
                azure_client_params = select_azure_base_url_or_endpoint(
                    azure_client_params
                )
                cache_key = f"{model_id}_async_client"
                _client = openai.AsyncAzureOpenAI(  # type: ignore
                    **azure_client_params,
                    timeout=timeout,
                    max_retries=max_retries,
                    http_client=httpx.AsyncClient(
                        limits=httpx.Limits(
                            max_connections=1000, max_keepalive_connections=100
                        ),
                        verify=litellm.ssl_verify,
                    ),  # type: ignore
                )
                litellm_router_instance.cache.set_cache(
                    key=cache_key,
                    value=_client,
                    ttl=client_ttl,
                    local_only=True,
                )  # cache for 1 hr
                if should_initialize_sync_client(
                    litellm_router_instance=litellm_router_instance
                ):
                    cache_key = f"{model_id}_client"
                    _client = openai.AzureOpenAI(  # type: ignore
                        **azure_client_params,
                        timeout=timeout,
                        max_retries=max_retries,
                        http_client=httpx.Client(
                            limits=httpx.Limits(
                                max_connections=1000, max_keepalive_connections=100
                            ),
                            verify=litellm.ssl_verify,
                        ),  # type: ignore
                    )
                    litellm_router_instance.cache.set_cache(
                        key=cache_key,
                        value=_client,
                        ttl=client_ttl,
                        local_only=True,
                    )  # cache for 1 hr
                # streaming clients should have diff timeouts
                cache_key = f"{model_id}_stream_async_client"
                _client = openai.AsyncAzureOpenAI(  # type: ignore
                    **azure_client_params,
                    timeout=stream_timeout,
                    max_retries=max_retries,
                    http_client=httpx.AsyncClient(
                        limits=httpx.Limits(
                            max_connections=1000, max_keepalive_connections=100
                        ),
                        verify=litellm.ssl_verify,
                    ),
                )
                litellm_router_instance.cache.set_cache(
                    key=cache_key,
                    value=_client,
                    ttl=client_ttl,
                    local_only=True,
                )  # cache for 1 hr
                if should_initialize_sync_client(
                    litellm_router_instance=litellm_router_instance
                ):
                    cache_key = f"{model_id}_stream_client"
                    _client = openai.AzureOpenAI(  # type: ignore
                        **azure_client_params,
                        timeout=stream_timeout,
                        max_retries=max_retries,
                        http_client=httpx.Client(
                            limits=httpx.Limits(
                                max_connections=1000, max_keepalive_connections=100
                            ),
                            verify=litellm.ssl_verify,
                        ),
                    )
                    litellm_router_instance.cache.set_cache(
                        key=cache_key,
                        value=_client,
                        ttl=client_ttl,
                        local_only=True,
                    )  # cache for 1 hr
        else:
            _api_key = api_key  # type: ignore
            if _api_key is not None and isinstance(_api_key, str):
                # only show first 5 chars of api_key
                _api_key = _api_key[:8] + "*" * 15
            verbose_router_logger.debug(
                f"Initializing OpenAI Client for {model_name}, Api Base:{str(api_base)}, Api Key:{_api_key}"
            )
            cache_key = f"{model_id}_async_client"
            _client = openai.AsyncOpenAI(  # type: ignore
                api_key=api_key,
                base_url=api_base,
                timeout=timeout,
                max_retries=max_retries,
                organization=organization,
                http_client=httpx.AsyncClient(
                    limits=httpx.Limits(
                        max_connections=1000, max_keepalive_connections=100
                    ),
                    verify=litellm.ssl_verify,
                ),  # type: ignore
            )
            litellm_router_instance.cache.set_cache(
                key=cache_key,
                value=_client,
                ttl=client_ttl,
                local_only=True,
            )  # cache for 1 hr
            if should_initialize_sync_client(
                litellm_router_instance=litellm_router_instance
            ):
                cache_key = f"{model_id}_client"
                _client = openai.OpenAI(  # type: ignore
                    api_key=api_key,
                    base_url=api_base,
                    timeout=timeout,
                    max_retries=max_retries,
                    organization=organization,
                    http_client=httpx.Client(
                        limits=httpx.Limits(
                            max_connections=1000, max_keepalive_connections=100
                        ),
                        verify=litellm.ssl_verify,
                    ),  # type: ignore
                )
                litellm_router_instance.cache.set_cache(
                    key=cache_key,
                    value=_client,
                    ttl=client_ttl,
                    local_only=True,
                )  # cache for 1 hr
            # streaming clients should have diff timeouts
            cache_key = f"{model_id}_stream_async_client"
            _client = openai.AsyncOpenAI(  # type: ignore
                api_key=api_key,
                base_url=api_base,
                timeout=stream_timeout,
                max_retries=max_retries,
                organization=organization,
                http_client=httpx.AsyncClient(
                    limits=httpx.Limits(
                        max_connections=1000, max_keepalive_connections=100
                    ),
                    verify=litellm.ssl_verify,
                ),  # type: ignore
            )
            litellm_router_instance.cache.set_cache(
                key=cache_key,
                value=_client,
                ttl=client_ttl,
                local_only=True,
            )  # cache for 1 hr
            if should_initialize_sync_client(
                litellm_router_instance=litellm_router_instance
            ):
                # streaming clients should have diff timeouts
                cache_key = f"{model_id}_stream_client"
                _client = openai.OpenAI(  # type: ignore
                    api_key=api_key,
                    base_url=api_base,
                    timeout=stream_timeout,
                    max_retries=max_retries,
                    organization=organization,
                    http_client=httpx.Client(
                        limits=httpx.Limits(
                            max_connections=1000, max_keepalive_connections=100
                        ),
                        verify=litellm.ssl_verify,
                    ),  # type: ignore
                )
                litellm_router_instance.cache.set_cache(
                    key=cache_key,
                    value=_client,
                    ttl=client_ttl,
                    local_only=True,
                )  # cache for 1 hr
--- a/litellm/tests/langfuse.log
+++ b/litellm/tests/langfuse.log
--- a/litellm/tests/test_caching.py
+++ b/litellm/tests/test_caching.py
@ -1607,7 +1607,17 @@ def test_caching_redis_simple(caplog):
        print(m)
    print(time.time() - s2)
    redis_async_caching_error = False
    redis_service_logging_error = False
    captured_logs = [rec.message for rec in caplog.records]
-    assert "LiteLLM Redis Caching: async set" not in captured_logs
+    print(f"captured_logs: {captured_logs}")
-    assert "ServiceLogging.async_service_success_hook" not in captured_logs
+    for item in captured_logs:
        if "Error connecting to Async Redis client" in item:
            redis_async_caching_error = True
        if "ServiceLogging.async_service_success_hook" in item:
            redis_service_logging_error = True
    assert redis_async_caching_error is False
    assert redis_service_logging_error is False
--- a/litellm/tests/test_completion_cost.py
+++ b/litellm/tests/test_completion_cost.py
@ -712,7 +712,6 @@ def test_vertex_ai_claude_completion_cost():
    assert cost == predicted_cost
@pytest.mark.parametrize("sync_mode", [True, False])
@pytest.mark.asyncio
 async def test_completion_cost_hidden_params(sync_mode):
@ -732,6 +731,7 @@ async def test_completion_cost_hidden_params(sync_mode):
    assert "response_cost" in response._hidden_params
    assert isinstance(response._hidden_params["response_cost"], float)
 def test_vertex_ai_gemini_predict_cost():
    model = "gemini-1.5-flash"
    messages = [{"role": "user", "content": "Hey, hows it going???"}]
@ -739,3 +739,16 @@ def test_vertex_ai_gemini_predict_cost():
    assert predictive_cost > 0
@pytest.mark.parametrize("model", ["openai/tts-1", "azure/tts-1"])
 def test_completion_cost_tts(model):
    os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
    litellm.model_cost = litellm.get_model_cost_map(url="")
    cost = completion_cost(
        model=model,
        prompt="the quick brown fox jumped over the lazy dogs",
        call_type="speech",
    )
    assert cost > 0
--- a/litellm/tests/test_config.py
+++ b/litellm/tests/test_config.py
@ -2,23 +2,30 @@
 ## Unit tests for ProxyConfig class
-import sys, os
+import os
 import sys
 import traceback
 from dotenv import load_dotenv
 load_dotenv()
-import os, io
+import io
 import os
 sys.path.insert(
    0, os.path.abspath("../..")
 )  # Adds the parent directory to the system path
 import pytest, litellm
 from pydantic import BaseModel, ConfigDict
 from litellm.proxy.proxy_server import ProxyConfig
 from litellm.proxy.utils import encrypt_value, ProxyLogging, DualCache
 from litellm.types.router import Deployment, LiteLLM_Params, ModelInfo
 from typing import Literal
 import pytest
 from pydantic import BaseModel, ConfigDict
 import litellm
 from litellm.proxy.common_utils.encrypt_decrypt_utils import encrypt_value
 from litellm.proxy.proxy_server import ProxyConfig
 from litellm.proxy.utils import DualCache, ProxyLogging
 from litellm.types.router import Deployment, LiteLLM_Params, ModelInfo
 class DBModel(BaseModel):
    model_id: str
@ -28,6 +35,7 @@ class DBModel(BaseModel):
    model_config = ConfigDict(protected_namespaces=())
@pytest.mark.asyncio
 async def test_delete_deployment():
    """
--- a/litellm/tests/test_presidio_masking.py
+++ b/litellm/tests/test_presidio_masking.py
@ -1,8 +1,13 @@
 # What is this?
 ## Unit test for presidio pii masking
-import sys, os, asyncio, time, random
+import asyncio
-from datetime import datetime
+import os
 import random
 import sys
 import time
 import traceback
 from datetime import datetime
 from dotenv import load_dotenv
 load_dotenv()
@ -12,12 +17,40 @@ sys.path.insert(
    0, os.path.abspath("../..")
 )  # Adds the parent directory to the system path
 import pytest
 import litellm
 from litellm.proxy.hooks.presidio_pii_masking import _OPTIONAL_PresidioPIIMasking
 from litellm import Router, mock_completion
 from litellm.proxy.utils import ProxyLogging
 from litellm.proxy._types import UserAPIKeyAuth
 from litellm.caching import DualCache
 from litellm.proxy._types import UserAPIKeyAuth
 from litellm.proxy.hooks.presidio_pii_masking import _OPTIONAL_PresidioPIIMasking
 from litellm.proxy.utils import ProxyLogging
@pytest.mark.parametrize(
    "base_url",
    [
        "presidio-analyzer-s3pa:10000",
        "https://presidio-analyzer-s3pa:10000",
        "http://presidio-analyzer-s3pa:10000",
    ],
 )
 def test_validate_environment_missing_http(base_url):
    pii_masking = _OPTIONAL_PresidioPIIMasking(mock_testing=True)
    os.environ["PRESIDIO_ANALYZER_API_BASE"] = f"{base_url}/analyze"
    os.environ["PRESIDIO_ANONYMIZER_API_BASE"] = f"{base_url}/anonymize"
    pii_masking.validate_environment()
    expected_url = base_url
    if not (base_url.startswith("https://") or base_url.startswith("http://")):
        expected_url = "http://" + base_url
    assert (
        pii_masking.presidio_anonymizer_api_base == f"{expected_url}/anonymize/"
    ), "Got={}, Expected={}".format(
        pii_masking.presidio_anonymizer_api_base, f"{expected_url}/anonymize/"
    )
    assert pii_masking.presidio_analyzer_api_base == f"{expected_url}/analyze/"
@pytest.mark.asyncio
--- a/litellm/tests/test_router.py
+++ b/litellm/tests/test_router.py
@ -1894,6 +1894,49 @@ async def test_router_model_usage(mock_response):
                raise e
@pytest.mark.skip(reason="Check if this is causing ci/cd issues.")
@pytest.mark.asyncio
 async def test_is_proxy_set():
    """
    Assert if proxy is set
    """
    from httpx import AsyncHTTPTransport
    os.environ["HTTPS_PROXY"] = "https://proxy.example.com:8080"
    from openai import AsyncAzureOpenAI
    # Function to check if a proxy is set on the client
    # Function to check if a proxy is set on the client
    def check_proxy(client: httpx.AsyncClient) -> bool:
        print(f"client._mounts: {client._mounts}")
        assert len(client._mounts) == 1
        for k, v in client._mounts.items():
            assert isinstance(v, AsyncHTTPTransport)
        return True
    llm_router = Router(
        model_list=[
            {
                "model_name": "gpt-4",
                "litellm_params": {
                    "model": "azure/gpt-3.5-turbo",
                    "api_key": "my-key",
                    "api_base": "my-base",
                    "mock_response": "hello world",
                },
                "model_info": {"id": "1"},
            }
        ]
    )
    _deployment = llm_router.get_deployment(model_id="1")
    model_client: AsyncAzureOpenAI = llm_router._get_client(
        deployment=_deployment, kwargs={}, client_type="async"
    )  # type: ignore
    assert check_proxy(client=model_client._client)
@pytest.mark.parametrize(
    "model, base_model, llm_provider",
    [
--- a/litellm/tests/test_router_init.py
+++ b/litellm/tests/test_router_init.py
@ -1,16 +1,22 @@
 # this tests if the router is initialized correctly
-import sys, os, time
+import asyncio
-import traceback, asyncio
+import os
 import sys
 import time
 import traceback
 import pytest
 sys.path.insert(
    0, os.path.abspath("../..")
 )  # Adds the parent directory to the system path
 from collections import defaultdict
 from concurrent.futures import ThreadPoolExecutor
 from dotenv import load_dotenv
 import litellm
 from litellm import Router
 from concurrent.futures import ThreadPoolExecutor
 from collections import defaultdict
 from dotenv import load_dotenv
 load_dotenv()
@ -24,6 +30,7 @@ load_dotenv()
 def test_init_clients():
    litellm.set_verbose = True
    import logging
    from litellm._logging import verbose_router_logger
    verbose_router_logger.setLevel(logging.DEBUG)
@ -489,6 +496,7 @@ def test_init_clients_azure_command_r_plus():
    # For azure/command-r-plus we need to use openai.OpenAI because of how the Azure provider requires requests being sent
    litellm.set_verbose = True
    import logging
    from litellm._logging import verbose_router_logger
    verbose_router_logger.setLevel(logging.DEBUG)
@ -585,3 +593,46 @@ async def test_text_completion_with_organization():
    except Exception as e:
        pytest.fail(f"Error occurred: {e}")
 def test_init_clients_async_mode():
    litellm.set_verbose = True
    import logging
    from litellm._logging import verbose_router_logger
    from litellm.types.router import RouterGeneralSettings
    verbose_router_logger.setLevel(logging.DEBUG)
    try:
        print("testing init 4 clients with diff timeouts")
        model_list = [
            {
                "model_name": "gpt-3.5-turbo",
                "litellm_params": {
                    "model": "azure/chatgpt-v-2",
                    "api_key": os.getenv("AZURE_API_KEY"),
                    "api_version": os.getenv("AZURE_API_VERSION"),
                    "api_base": os.getenv("AZURE_API_BASE"),
                    "timeout": 0.01,
                    "stream_timeout": 0.000_001,
                    "max_retries": 7,
                },
            },
        ]
        router = Router(
            model_list=model_list,
            set_verbose=True,
            router_general_settings=RouterGeneralSettings(async_only_mode=True),
        )
        for elem in router.model_list:
            model_id = elem["model_info"]["id"]
            # sync clients not initialized in async_only_mode=True
            assert router.cache.get_cache(f"{model_id}_client") is None
            assert router.cache.get_cache(f"{model_id}_stream_client") is None
            # only async clients initialized in async_only_mode=True
            assert router.cache.get_cache(f"{model_id}_async_client") is not None
            assert router.cache.get_cache(f"{model_id}_stream_async_client") is not None
    except Exception as e:
        pytest.fail(f"Error occurred: {e}")
--- a/litellm/tests/test_stream_chunk_builder.py
+++ b/litellm/tests/test_stream_chunk_builder.py
@ -1,15 +1,22 @@
-import sys, os, time
+import asyncio
-import traceback, asyncio
+import os
 import sys
 import time
 import traceback
 import pytest
 sys.path.insert(
    0, os.path.abspath("../..")
 )  # Adds the parent directory to the system path
-from litellm import completion, stream_chunk_builder
+import os
-import litellm
+
-import os, dotenv
+import dotenv
 from openai import OpenAI
 import pytest
 from openai import OpenAI
 import litellm
 from litellm import completion, stream_chunk_builder
 dotenv.load_dotenv()
@ -147,3 +154,45 @@ def test_stream_chunk_builder_litellm_tool_call_regular_message():
 # test_stream_chunk_builder_litellm_tool_call_regular_message()
 def test_stream_chunk_builder_litellm_usage_chunks():
    """
    Checks if stream_chunk_builder is able to correctly rebuild with given metadata from streaming chunks
    """
    messages = [
        {"role": "user", "content": "Tell me the funniest joke you know."},
        {
            "role": "assistant",
            "content": "Why did the chicken cross the road?\nYou will not guess this one I bet\n",
        },
        {"role": "user", "content": "I do not know, why?"},
        {"role": "assistant", "content": "uhhhh\n\n\nhmmmm.....\nthinking....\n"},
        {"role": "user", "content": "\nI am waiting...\n\n...\n"},
    ]
    # make a regular gemini call
    response = completion(
        model="gemini/gemini-1.5-flash",
        messages=messages,
    )
    usage: litellm.Usage = response.usage
    gemini_pt = usage.prompt_tokens
    # make a streaming gemini call
    response = completion(
        model="gemini/gemini-1.5-flash",
        messages=messages,
        stream=True,
        complete_response=True,
        stream_options={"include_usage": True},
    )
    usage: litellm.Usage = response.usage
    stream_rebuilt_pt = usage.prompt_tokens
    # assert prompt tokens are the same
    assert gemini_pt == stream_rebuilt_pt
--- a/litellm/tests/test_streaming.py
+++ b/litellm/tests/test_streaming.py
@ -12,6 +12,9 @@ from typing import Tuple
 import pytest
 from pydantic import BaseModel
 import litellm.litellm_core_utils
 import litellm.litellm_core_utils.litellm_logging
 sys.path.insert(
    0, os.path.abspath("../..")
 )  # Adds the parent directory to the system path
@ -1078,7 +1081,6 @@ def test_vertex_ai_stream(provider):
            print(f"completion_response: {complete_response}")
            assert is_finished == True
            assert False
        except litellm.RateLimitError as e:
            pass
        except Exception as e:
@ -3034,8 +3036,11 @@ def test_completion_claude_3_function_call_with_streaming():
        pytest.fail(f"Error occurred: {e}")
@pytest.mark.parametrize(
    "model", ["gemini/gemini-1.5-flash"]
 )  # "claude-3-opus-20240229",
@pytest.mark.asyncio
-async def test_acompletion_claude_3_function_call_with_streaming():
+async def test_acompletion_claude_3_function_call_with_streaming(model):
    litellm.set_verbose = True
    tools = [
        {
@ -3066,7 +3071,7 @@ async def test_acompletion_claude_3_function_call_with_streaming():
    try:
        # test without max tokens
        response = await acompletion(
-            model="claude-3-opus-20240229",
+            model=model,
            messages=messages,
            tools=tools,
            tool_choice="required",
@ -3453,3 +3458,55 @@ def test_aamazing_unit_test_custom_stream_wrapper_n():
        assert (
            chunk_dict == chunks[idx]
        ), f"idx={idx} translated chunk = {chunk_dict} != openai chunk = {chunks[idx]}"
 def test_unit_test_custom_stream_wrapper_function_call():
    """
    Test if model returns a tool call, the finish reason is correctly set to 'tool_calls'
    """
    from litellm.types.llms.openai import ChatCompletionDeltaChunk
    litellm.set_verbose = False
    delta: ChatCompletionDeltaChunk = {
        "content": None,
        "role": "assistant",
        "tool_calls": [
            {
                "function": {"arguments": '"}'},
                "type": "function",
                "index": 0,
            }
        ],
    }
    chunk = {
        "id": "chatcmpl-123",
        "object": "chat.completion.chunk",
        "created": 1694268190,
        "model": "gpt-3.5-turbo-0125",
        "system_fingerprint": "fp_44709d6fcb",
        "choices": [{"index": 0, "delta": delta, "finish_reason": "stop"}],
    }
    chunk = litellm.ModelResponse(**chunk, stream=True)
    completion_stream = ModelResponseIterator(model_response=chunk)
    response = litellm.CustomStreamWrapper(
        completion_stream=completion_stream,
        model="gpt-3.5-turbo",
        custom_llm_provider="cached_response",
        logging_obj=litellm.litellm_core_utils.litellm_logging.Logging(
            model="gpt-3.5-turbo",
            messages=[{"role": "user", "content": "Hey"}],
            stream=True,
            call_type="completion",
            start_time=time.time(),
            litellm_call_id="12345",
            function_id="1245",
        ),
    )
    finish_reason: Optional[str] = None
    for chunk in response:
        if chunk.choices[0].finish_reason is not None:
            finish_reason = chunk.choices[0].finish_reason
    assert finish_reason == "tool_calls"
--- a/litellm/types/llms/openai.py
+++ b/litellm/types/llms/openai.py
@ -300,7 +300,7 @@ class ListBatchRequest(TypedDict, total=False):
    timeout: Optional[float]
-class ChatCompletionToolCallFunctionChunk(TypedDict):
+class ChatCompletionToolCallFunctionChunk(TypedDict, total=False):
    name: Optional[str]
    arguments: str
@ -312,7 +312,7 @@ class ChatCompletionToolCallChunk(TypedDict):
    index: int
-class ChatCompletionDeltaToolCallChunk(TypedDict):
+class ChatCompletionDeltaToolCallChunk(TypedDict, total=False):
    id: str
    type: Literal["function"]
    function: ChatCompletionToolCallFunctionChunk
--- a/litellm/types/router.py
+++ b/litellm/types/router.py
@ -324,7 +324,12 @@ class DeploymentTypedDict(TypedDict):
    litellm_params: LiteLLMParamsTypedDict
-SPECIAL_MODEL_INFO_PARAMS = ["input_cost_per_token", "output_cost_per_token"]
+SPECIAL_MODEL_INFO_PARAMS = [
    "input_cost_per_token",
    "output_cost_per_token",
    "input_cost_per_character",
    "output_cost_per_character",
 ]
 class Deployment(BaseModel):
@ -517,3 +522,9 @@ class CustomRoutingStrategyBase:
        """
        pass
 class RouterGeneralSettings(BaseModel):
    async_only_mode: bool = Field(
        default=False
    )  # this will only initialize async clients. Good for memory utils
--- a/litellm/utils.py
+++ b/litellm/utils.py
@ -42,6 +42,8 @@ import httpx
 import openai
 import requests
 import tiktoken
 from httpx import Proxy
 from httpx._utils import get_environment_proxies
 from pydantic import BaseModel
 from tokenizers import Tokenizer
@ -2555,6 +2557,24 @@ def get_optional_params(
                    message=f"Function calling is not supported by {custom_llm_provider}.",
                )
    if "tools" in non_default_params:
        tools = non_default_params["tools"]
        for (
            tool
        ) in (
            tools
        ):  # clean out 'additionalProperties = False'. Causes vertexai/gemini OpenAI API Schema errors - https://github.com/langchain-ai/langchainjs/issues/5240
            tool_function = tool.get("function", {})
            parameters = tool_function.get("parameters", None)
            if parameters is not None:
                new_parameters = copy.deepcopy(parameters)
                if (
                    "additionalProperties" in new_parameters
                    and new_parameters["additionalProperties"] is False
                ):
                    new_parameters.pop("additionalProperties", None)
                tool_function["parameters"] = new_parameters
    def _check_valid_arg(supported_params):
        verbose_logger.debug(
            f"\nLiteLLM completion() model= {model}; provider = {custom_llm_provider}"
@ -4707,7 +4727,9 @@ def get_model_info(model: str, custom_llm_provider: Optional[str] = None) -> Mod
            )
    except Exception:
        raise Exception(
-            "This model isn't mapped yet. Add it here - https://github.com/BerriAI/litellm/blob/main/model_prices_and_context_window.json"
+            "This model isn't mapped yet. model={}, custom_llm_provider={}. Add it here - https://github.com/BerriAI/litellm/blob/main/model_prices_and_context_window.json".format(
                model, custom_llm_provider
            )
        )
@ -4893,6 +4915,34 @@ def get_provider_fields(custom_llm_provider: str) -> List[ProviderField]:
        return []
 def create_proxy_transport_and_mounts():
    proxies = {
        key: None if url is None else Proxy(url=url)
        for key, url in get_environment_proxies().items()
    }
    sync_proxy_mounts = {}
    async_proxy_mounts = {}
    # Retrieve NO_PROXY environment variable
    no_proxy = os.getenv("NO_PROXY", None)
    no_proxy_urls = no_proxy.split(",") if no_proxy else []
    for key, proxy in proxies.items():
        if proxy is None:
            sync_proxy_mounts[key] = httpx.HTTPTransport()
            async_proxy_mounts[key] = httpx.AsyncHTTPTransport()
        else:
            sync_proxy_mounts[key] = httpx.HTTPTransport(proxy=proxy)
            async_proxy_mounts[key] = httpx.AsyncHTTPTransport(proxy=proxy)
    for url in no_proxy_urls:
        sync_proxy_mounts[url] = httpx.HTTPTransport()
        async_proxy_mounts[url] = httpx.AsyncHTTPTransport()
    return sync_proxy_mounts, async_proxy_mounts
 def validate_environment(model: Optional[str] = None) -> dict:
    """
    Checks if the environment variables are valid for the given model.
@ -7519,7 +7569,7 @@ def exception_type(
                    if original_exception.status_code == 400:
                        exception_mapping_worked = True
                        raise BadRequestError(
-                            message=f"{exception_provider} - {message}",
+                            message=f"{exception_provider} - {error_str}",
                            llm_provider=custom_llm_provider,
                            model=model,
                            response=original_exception.response,
@ -7528,7 +7578,7 @@ def exception_type(
                    elif original_exception.status_code == 401:
                        exception_mapping_worked = True
                        raise AuthenticationError(
-                            message=f"AuthenticationError: {exception_provider} - {message}",
+                            message=f"AuthenticationError: {exception_provider} - {error_str}",
                            llm_provider=custom_llm_provider,
                            model=model,
                            response=original_exception.response,
@ -7537,7 +7587,7 @@ def exception_type(
                    elif original_exception.status_code == 404:
                        exception_mapping_worked = True
                        raise NotFoundError(
-                            message=f"NotFoundError: {exception_provider} - {message}",
+                            message=f"NotFoundError: {exception_provider} - {error_str}",
                            model=model,
                            llm_provider=custom_llm_provider,
                            response=original_exception.response,
@ -7546,7 +7596,7 @@ def exception_type(
                    elif original_exception.status_code == 408:
                        exception_mapping_worked = True
                        raise Timeout(
-                            message=f"Timeout Error: {exception_provider} - {message}",
+                            message=f"Timeout Error: {exception_provider} - {error_str}",
                            model=model,
                            llm_provider=custom_llm_provider,
                            litellm_debug_info=extra_information,
@ -7554,7 +7604,7 @@ def exception_type(
                    elif original_exception.status_code == 422:
                        exception_mapping_worked = True
                        raise BadRequestError(
-                            message=f"BadRequestError: {exception_provider} - {message}",
+                            message=f"BadRequestError: {exception_provider} - {error_str}",
                            model=model,
                            llm_provider=custom_llm_provider,
                            response=original_exception.response,
@ -7563,7 +7613,7 @@ def exception_type(
                    elif original_exception.status_code == 429:
                        exception_mapping_worked = True
                        raise RateLimitError(
-                            message=f"RateLimitError: {exception_provider} - {message}",
+                            message=f"RateLimitError: {exception_provider} - {error_str}",
                            model=model,
                            llm_provider=custom_llm_provider,
                            response=original_exception.response,
@ -7572,7 +7622,7 @@ def exception_type(
                    elif original_exception.status_code == 503:
                        exception_mapping_worked = True
                        raise ServiceUnavailableError(
-                            message=f"ServiceUnavailableError: {exception_provider} - {message}",
+                            message=f"ServiceUnavailableError: {exception_provider} - {error_str}",
                            model=model,
                            llm_provider=custom_llm_provider,
                            response=original_exception.response,
@ -7581,7 +7631,7 @@ def exception_type(
                    elif original_exception.status_code == 504:  # gateway timeout error
                        exception_mapping_worked = True
                        raise Timeout(
-                            message=f"Timeout Error: {exception_provider} - {message}",
+                            message=f"Timeout Error: {exception_provider} - {error_str}",
                            model=model,
                            llm_provider=custom_llm_provider,
                            litellm_debug_info=extra_information,
@ -7590,7 +7640,7 @@ def exception_type(
                        exception_mapping_worked = True
                        raise APIError(
                            status_code=original_exception.status_code,
-                            message=f"APIError: {exception_provider} - {message}",
+                            message=f"APIError: {exception_provider} - {error_str}",
                            llm_provider=custom_llm_provider,
                            model=model,
                            request=original_exception.request,
@ -7599,7 +7649,7 @@ def exception_type(
                else:
                    # if no status code then it is an APIConnectionError: https://github.com/openai/openai-python#handling-errors
                    raise APIConnectionError(
-                        message=f"APIConnectionError: {exception_provider} - {message}",
+                        message=f"APIConnectionError: {exception_provider} - {error_str}",
                        llm_provider=custom_llm_provider,
                        model=model,
                        litellm_debug_info=extra_information,
@ -7950,6 +8000,7 @@ class CustomStreamWrapper:
        )
        self.messages = getattr(logging_obj, "messages", None)
        self.sent_stream_usage = False
        self.tool_call = False
        self.chunks: List = (
            []
        )  # keep track of the returned chunks - used for calculating the input/output tokens for stream options
@ -9192,9 +9243,16 @@ class CustomStreamWrapper:
                    "is_finished": True,
                    "finish_reason": chunk.choices[0].finish_reason,
                    "original_chunk": chunk,
                    "tool_calls": (
                        chunk.choices[0].delta.tool_calls
                        if hasattr(chunk.choices[0].delta, "tool_calls")
                        else None
                    ),
                }
                completion_obj["content"] = response_obj["text"]
                if response_obj["tool_calls"] is not None:
                    completion_obj["tool_calls"] = response_obj["tool_calls"]
                print_verbose(f"completion obj content: {completion_obj['content']}")
                if hasattr(chunk, "id"):
                    model_response.id = chunk.id
@ -9352,6 +9410,10 @@ class CustomStreamWrapper:
            )
            print_verbose(f"self.sent_first_chunk: {self.sent_first_chunk}")
            ## CHECK FOR TOOL USE
            if "tool_calls" in completion_obj and len(completion_obj["tool_calls"]) > 0:
                self.tool_call = True
            ## RETURN ARG
            if (
                "content" in completion_obj
@ -9530,6 +9592,12 @@ class CustomStreamWrapper:
            )
        else:
            model_response.choices[0].finish_reason = "stop"
        ## if tool use
        if (
            model_response.choices[0].finish_reason == "stop" and self.tool_call
        ):  # don't overwrite for other - potential error finish reasons
            model_response.choices[0].finish_reason = "tool_calls"
        return model_response
    def __next__(self):
@ -9583,7 +9651,7 @@ class CustomStreamWrapper:
                    return response
        except StopIteration:
-            if self.sent_last_chunk == True:
+            if self.sent_last_chunk is True:
                if (
                    self.sent_stream_usage == False
                    and self.stream_options is not None
--- a/model_prices_and_context_window.json
+++ b/model_prices_and_context_window.json
@ -2022,10 +2022,10 @@
        "max_tokens": 8192,
        "max_input_tokens": 2097152,
        "max_output_tokens": 8192,
-        "input_cost_per_token": 0.00000035, 
+        "input_cost_per_token": 0.0000035, 
-        "input_cost_per_token_above_128k_tokens": 0.0000007, 
+        "input_cost_per_token_above_128k_tokens": 0.000007, 
-        "output_cost_per_token": 0.00000105, 
+        "output_cost_per_token": 0.0000105, 
-        "output_cost_per_token_above_128k_tokens": 0.0000021, 
+        "output_cost_per_token_above_128k_tokens": 0.000021, 
        "litellm_provider": "gemini",
        "mode": "chat",
        "supports_system_messages": true,
@ -2033,16 +2033,16 @@
        "supports_vision": true,
        "supports_tool_choice": true, 
        "supports_response_schema": true, 
-        "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
+        "source": "https://ai.google.dev/pricing"
    },
    "gemini/gemini-1.5-pro-latest": {
        "max_tokens": 8192,
        "max_input_tokens": 1048576,
        "max_output_tokens": 8192,
-        "input_cost_per_token": 0.00000035, 
+        "input_cost_per_token": 0.0000035, 
-        "input_cost_per_token_above_128k_tokens": 0.0000007, 
+        "input_cost_per_token_above_128k_tokens": 0.000007, 
        "output_cost_per_token": 0.00000105, 
-        "output_cost_per_token_above_128k_tokens": 0.0000021, 
+        "output_cost_per_token_above_128k_tokens": 0.000021, 
        "litellm_provider": "gemini",
        "mode": "chat",
        "supports_system_messages": true,
@ -2050,7 +2050,7 @@
        "supports_vision": true,
        "supports_tool_choice": true, 
        "supports_response_schema": true, 
-        "source": "https://ai.google.dev/models/gemini"
+        "source": "https://ai.google.dev/pricing"
    },
    "gemini/gemini-pro-vision": {
        "max_tokens": 2048,
--- a/poetry.lock
+++ b/poetry.lock
@ -1,4 +1,4 @@
-# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand.
+# This file is automatically @generated by Poetry 1.7.1 and should not be changed by hand.
 [[package]]
 name = "aiohttp"
@ -2115,6 +2115,32 @@ dev = ["coverage[toml] (==5.0.4)", "cryptography (>=3.4.0)", "pre-commit", "pyte
 docs = ["sphinx (>=4.5.0,<5.0.0)", "sphinx-rtd-theme", "zope.interface"]
 tests = ["coverage[toml] (==5.0.4)", "pytest (>=6.0.0,<7.0.0)"]
 [[package]]
 name = "pynacl"
 version = "1.5.0"
 description = "Python binding to the Networking and Cryptography (NaCl) library"
 optional = true
 python-versions = ">=3.6"
 files = [
    {file = "PyNaCl-1.5.0-cp36-abi3-macosx_10_10_universal2.whl", hash = "sha256:401002a4aaa07c9414132aaed7f6836ff98f59277a234704ff66878c2ee4a0d1"},
    {file = "PyNaCl-1.5.0-cp36-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_24_aarch64.whl", hash = "sha256:52cb72a79269189d4e0dc537556f4740f7f0a9ec41c1322598799b0bdad4ef92"},
    {file = "PyNaCl-1.5.0-cp36-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a36d4a9dda1f19ce6e03c9a784a2921a4b726b02e1c736600ca9c22029474394"},
    {file = "PyNaCl-1.5.0-cp36-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:0c84947a22519e013607c9be43706dd42513f9e6ae5d39d3613ca1e142fba44d"},
    {file = "PyNaCl-1.5.0-cp36-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:06b8f6fa7f5de8d5d2f7573fe8c863c051225a27b61e6860fd047b1775807858"},
    {file = "PyNaCl-1.5.0-cp36-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:a422368fc821589c228f4c49438a368831cb5bbc0eab5ebe1d7fac9dded6567b"},
    {file = "PyNaCl-1.5.0-cp36-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:61f642bf2378713e2c2e1de73444a3778e5f0a38be6fee0fe532fe30060282ff"},
    {file = "PyNaCl-1.5.0-cp36-abi3-win32.whl", hash = "sha256:e46dae94e34b085175f8abb3b0aaa7da40767865ac82c928eeb9e57e1ea8a543"},
    {file = "PyNaCl-1.5.0-cp36-abi3-win_amd64.whl", hash = "sha256:20f42270d27e1b6a29f54032090b972d97f0a1b0948cc52392041ef7831fee93"},
    {file = "PyNaCl-1.5.0.tar.gz", hash = "sha256:8ac7448f09ab85811607bdd21ec2464495ac8b7c66d146bf545b0f08fb9220ba"},
 ]
 [package.dependencies]
 cffi = ">=1.4.1"
 [package.extras]
 docs = ["sphinx (>=1.6.5)", "sphinx-rtd-theme"]
 tests = ["hypothesis (>=3.27.0)", "pytest (>=3.2.1,!=3.3.0)"]
 [[package]]
 name = "pytest"
 version = "7.4.4"
@ -3381,10 +3407,10 @@ docs = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "rst.link
 testing = ["big-O", "jaraco.functools", "jaraco.itertools", "jaraco.test", "more-itertools", "pytest (>=6,!=8.1.*)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-ignore-flaky", "pytest-mypy", "pytest-ruff (>=0.2.1)"]
 [extras]
-extra-proxy = ["azure-identity", "azure-keyvault-secrets", "google-cloud-kms", "prisma", "resend"]
+extra-proxy = ["azure-identity", "azure-keyvault-secrets", "google-cloud-kms", "prisma", "pynacl", "resend"]
 proxy = ["PyJWT", "apscheduler", "backoff", "cryptography", "fastapi", "fastapi-sso", "gunicorn", "orjson", "python-multipart", "pyyaml", "rq", "uvicorn"]
 [metadata]
 lock-version = "2.0"
 python-versions = ">=3.8.1,<4.0, !=3.9.7"
-content-hash = "925b604bed171282827c8b046191ad858ce37fa3b011a393345382f8ff86e68c"
+content-hash = "6025cae7749c94755d17362f77adf76f834863dba2126501cd3111d53a9c5779"
--- a/pyproject.toml
+++ b/pyproject.toml
@ -1,6 +1,6 @@
 [tool.poetry]
 name = "litellm"
-version = "1.41.8"
+version = "1.41.11"
 description = "Library to easily interface with LLM API providers"
 authors = ["BerriAI"]
 license = "MIT"
@ -46,6 +46,7 @@ azure-identity = {version = "^1.15.0", optional = true}
 azure-keyvault-secrets = {version = "^4.8.0", optional = true}
 google-cloud-kms = {version = "^2.21.3", optional = true}
 resend = {version = "^0.8.0", optional = true}
 pynacl = {version = "^1.5.0", optional = true}
 [tool.poetry.extras]
 proxy = [
@ -90,7 +91,7 @@ requires = ["poetry-core", "wheel"]
 build-backend = "poetry.core.masonry.api"
 [tool.commitizen]
-version = "1.41.8"
+version = "1.41.11"
 version_files = [
    "pyproject.toml:^version"
 ]
--- a/requirements.txt
+++ b/requirements.txt
@ -42,7 +42,7 @@ tokenizers==0.14.0 # for calculating usage
 click==8.1.7 # for proxy cli 
 jinja2==3.1.4 # for prompt templates
 certifi==2024.7.4 # [TODO] clean up 
-aiohttp==3.9.0 # for network calls
+aiohttp==3.9.4 # for network calls
 aioboto3==12.3.0 # for async sagemaker calls
 tenacity==8.2.3  # for retrying requests, when litellm.num_retries set
 pydantic==2.7.1 # proxy + openai req.
--- a/ui/litellm-dashboard/out/404.html
+++ b/ui/litellm-dashboard/out/404.html
--- a/ui/litellm-dashboard/out/_next/static/RDLpeUaSstfmeQiKITNBo/_buildManifest.js
+++ b/ui/litellm-dashboard/out/_next/static/RDLpeUaSstfmeQiKITNBo/_buildManifest.js
--- a/ui/litellm-dashboard/out/_next/static/RDLpeUaSstfmeQiKITNBo/_ssgManifest.js
+++ b/ui/litellm-dashboard/out/_next/static/RDLpeUaSstfmeQiKITNBo/_ssgManifest.js
--- a/ui/litellm-dashboard/out/_next/static/chunks/131-19b05e5ce40fa85d.js
+++ b/ui/litellm-dashboard/out/_next/static/chunks/131-19b05e5ce40fa85d.js
--- a/ui/litellm-dashboard/out/_next/static/chunks/131-6a03368053f9d26d.js
+++ b/ui/litellm-dashboard/out/_next/static/chunks/131-6a03368053f9d26d.js
--- a/ui/litellm-dashboard/out/_next/static/chunks/759-d7572f2a46f911d5.js
+++ b/ui/litellm-dashboard/out/_next/static/chunks/759-d7572f2a46f911d5.js
--- a/ui/litellm-dashboard/out/_next/static/chunks/777-906d7dd6a5bf7be4.js
+++ b/ui/litellm-dashboard/out/_next/static/chunks/777-906d7dd6a5bf7be4.js
--- a/ui/litellm-dashboard/out/_next/static/chunks/777-f76791513e294b30.js
+++ b/ui/litellm-dashboard/out/_next/static/chunks/777-f76791513e294b30.js
--- a/ui/litellm-dashboard/out/_next/static/chunks/app/page-567f85145e7f0f35.js
+++ b/ui/litellm-dashboard/out/_next/static/chunks/app/page-567f85145e7f0f35.js
--- a/ui/litellm-dashboard/out/_next/static/chunks/app/page-da7d95729f2529b5.js
+++ b/ui/litellm-dashboard/out/_next/static/chunks/app/page-da7d95729f2529b5.js
--- a/ui/litellm-dashboard/out/index.html
+++ b/ui/litellm-dashboard/out/index.html
@ -1 +1 @@
-<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-a8fd417ac0c6c8a5.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-f960ab1e6d32b002.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-04708d7d4a17c1ee.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-a8fd417ac0c6c8a5.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/0f6908625573deae.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[48951,[\"665\",\"static/chunks/3014691f-589a5f4865c3822f.js\",\"936\",\"static/chunks/2f6dbc85-052c4579f80d66ae.js\",\"294\",\"static/chunks/294-0e35509d5ca95267.js\",\"131\",\"static/chunks/131-6a03368053f9d26d.js\",\"684\",\"static/chunks/684-bb2d2f93d92acb0b.js\",\"759\",\"static/chunks/759-83a8bdddfe32b5d9.js\",\"777\",\"static/chunks/777-f76791513e294b30.js\",\"931\",\"static/chunks/app/page-da7d95729f2529b5.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/0f6908625573deae.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"0gt3_bF2KkdKeE61mic4M\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_12bbc4\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>
+<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-a8fd417ac0c6c8a5.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-f960ab1e6d32b002.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-04708d7d4a17c1ee.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-a8fd417ac0c6c8a5.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/0f6908625573deae.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[48951,[\"665\",\"static/chunks/3014691f-589a5f4865c3822f.js\",\"936\",\"static/chunks/2f6dbc85-052c4579f80d66ae.js\",\"294\",\"static/chunks/294-0e35509d5ca95267.js\",\"131\",\"static/chunks/131-19b05e5ce40fa85d.js\",\"684\",\"static/chunks/684-bb2d2f93d92acb0b.js\",\"759\",\"static/chunks/759-d7572f2a46f911d5.js\",\"777\",\"static/chunks/777-906d7dd6a5bf7be4.js\",\"931\",\"static/chunks/app/page-567f85145e7f0f35.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/0f6908625573deae.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"RDLpeUaSstfmeQiKITNBo\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_12bbc4\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>
--- a/ui/litellm-dashboard/out/index.txt
+++ b/ui/litellm-dashboard/out/index.txt
@ -1,7 +1,7 @@
 2:I[77831,[],""]
-3:I[48951,["665","static/chunks/3014691f-589a5f4865c3822f.js","936","static/chunks/2f6dbc85-052c4579f80d66ae.js","294","static/chunks/294-0e35509d5ca95267.js","131","static/chunks/131-6a03368053f9d26d.js","684","static/chunks/684-bb2d2f93d92acb0b.js","759","static/chunks/759-83a8bdddfe32b5d9.js","777","static/chunks/777-f76791513e294b30.js","931","static/chunks/app/page-da7d95729f2529b5.js"],""]
+3:I[48951,["665","static/chunks/3014691f-589a5f4865c3822f.js","936","static/chunks/2f6dbc85-052c4579f80d66ae.js","294","static/chunks/294-0e35509d5ca95267.js","131","static/chunks/131-19b05e5ce40fa85d.js","684","static/chunks/684-bb2d2f93d92acb0b.js","759","static/chunks/759-d7572f2a46f911d5.js","777","static/chunks/777-906d7dd6a5bf7be4.js","931","static/chunks/app/page-567f85145e7f0f35.js"],""]
 4:I[5613,[],""]
 5:I[31778,[],""]
-0:["0gt3_bF2KkdKeE61mic4M",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_12bbc4","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/0f6908625573deae.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
+0:["RDLpeUaSstfmeQiKITNBo",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_12bbc4","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/0f6908625573deae.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
 6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
 1:null
--- a/ui/litellm-dashboard/out/model_hub.html
+++ b/ui/litellm-dashboard/out/model_hub.html
--- a/ui/litellm-dashboard/out/model_hub.txt
+++ b/ui/litellm-dashboard/out/model_hub.txt
@ -1,7 +1,7 @@
 2:I[77831,[],""]
-3:I[87494,["294","static/chunks/294-0e35509d5ca95267.js","131","static/chunks/131-6a03368053f9d26d.js","777","static/chunks/777-f76791513e294b30.js","418","static/chunks/app/model_hub/page-ba7819b59161aa64.js"],""]
+3:I[87494,["294","static/chunks/294-0e35509d5ca95267.js","131","static/chunks/131-19b05e5ce40fa85d.js","777","static/chunks/777-906d7dd6a5bf7be4.js","418","static/chunks/app/model_hub/page-ba7819b59161aa64.js"],""]
 4:I[5613,[],""]
 5:I[31778,[],""]
-0:["0gt3_bF2KkdKeE61mic4M",[[["",{"children":["model_hub",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["model_hub",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","model_hub","children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined","styles":null}]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_12bbc4","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/0f6908625573deae.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
+0:["RDLpeUaSstfmeQiKITNBo",[[["",{"children":["model_hub",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["model_hub",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","model_hub","children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined","styles":null}]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_12bbc4","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/0f6908625573deae.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
 6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
 1:null
--- a/ui/litellm-dashboard/out/onboarding.html
+++ b/ui/litellm-dashboard/out/onboarding.html
--- a/ui/litellm-dashboard/out/onboarding.txt
+++ b/ui/litellm-dashboard/out/onboarding.txt
@ -1,7 +1,7 @@
 2:I[77831,[],""]
-3:I[667,["665","static/chunks/3014691f-589a5f4865c3822f.js","294","static/chunks/294-0e35509d5ca95267.js","684","static/chunks/684-bb2d2f93d92acb0b.js","777","static/chunks/777-f76791513e294b30.js","461","static/chunks/app/onboarding/page-1ed08595d570934e.js"],""]
+3:I[667,["665","static/chunks/3014691f-589a5f4865c3822f.js","294","static/chunks/294-0e35509d5ca95267.js","684","static/chunks/684-bb2d2f93d92acb0b.js","777","static/chunks/777-906d7dd6a5bf7be4.js","461","static/chunks/app/onboarding/page-1ed08595d570934e.js"],""]
 4:I[5613,[],""]
 5:I[31778,[],""]
-0:["0gt3_bF2KkdKeE61mic4M",[[["",{"children":["onboarding",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["onboarding",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","onboarding","children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined","styles":null}]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_12bbc4","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/0f6908625573deae.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
+0:["RDLpeUaSstfmeQiKITNBo",[[["",{"children":["onboarding",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["onboarding",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","onboarding","children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined","styles":null}]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_12bbc4","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/0f6908625573deae.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
 6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
 1:null
--- a/ui/litellm-dashboard/src/components/model_dashboard.tsx
+++ b/ui/litellm-dashboard/src/components/model_dashboard.tsx
@ -743,7 +743,7 @@ const ModelDashboard: React.FC<ModelDashboardProps> = ({
    }
    const fetchModelMap = async () => {
-      const data = await modelCostMap();
+      const data = await modelCostMap(accessToken);
      console.log(`received model cost map data: ${Object.keys(data)}`);
      setModelMap(data);
    };
--- a/ui/litellm-dashboard/src/components/networking.tsx
+++ b/ui/litellm-dashboard/src/components/networking.tsx
@ -12,11 +12,19 @@ export interface Model {
  model_info: Object | null;
 }
-export const modelCostMap = async () => {
+export const modelCostMap = async (
  accessToken: string,
 ) => {
  try {
    const url = proxyBaseUrl ? `${proxyBaseUrl}/get/litellm_model_cost_map` : `/get/litellm_model_cost_map`;
    const response = await fetch(
-      url
+      url, {
        method: "GET",
        headers: {
          Authorization: `Bearer ${accessToken}`,
          "Content-Type": "application/json",
        },
      }
    );
    const jsonData = await response.json();
    console.log(`received litellm model cost data: ${jsonData}`);
@ -693,6 +701,9 @@ export const claimOnboardingToken = async (
    throw error;
  }
 };
 let ModelListerrorShown = false;
 let errorTimer: NodeJS.Timeout | null = null;
 export const modelInfoCall = async (
  accessToken: String,
  userID: String,
@ -714,8 +725,21 @@ export const modelInfoCall = async (
    });
    if (!response.ok) {
-      const errorData = await response.text();
+      let errorData = await response.text();
-      message.error(errorData, 10);
+      errorData += `error shown=${ModelListerrorShown}`
      if (!ModelListerrorShown) {
        if (errorData.includes("No model list passed")) {
          errorData = "No Models Exist. Click Add Model to get started.";
        }
        message.info(errorData, 10);
        ModelListerrorShown = true;
        if (errorTimer) clearTimeout(errorTimer);
        errorTimer = setTimeout(() => {
          ModelListerrorShown = false;
        }, 10000);
      }
      throw new Error("Network response was not ok");
    }
@ -750,7 +774,6 @@ export const modelHubCall = async (accessToken: String) => {
    if (!response.ok) {
      const errorData = await response.text();
      message.error(errorData, 10);
      throw new Error("Network response was not ok");
    }
--- a/ui/litellm-dashboard/src/components/usage.tsx
+++ b/ui/litellm-dashboard/src/components/usage.tsx
@ -32,7 +32,6 @@ import {
  allTagNamesCall,
  modelMetricsCall,
  modelAvailableCall,
  modelInfoCall,
  adminspendByProvider,
  adminGlobalActivity,
  adminGlobalActivityPerModel,
		`@ -1 +1 @@`
			<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-a8fd417ac0c6c8a5.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-f960ab1e6d32b002.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-04708d7d4a17c1ee.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-a8fd417ac0c6c8a5.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f\|\|[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/0f6908625573deae.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[48951,[\"665\",\"static/chunks/3014691f-589a5f4865c3822f.js\",\"936\",\"static/chunks/2f6dbc85-052c4579f80d66ae.js\",\"294\",\"static/chunks/294-0e35509d5ca95267.js\",\"131\",\"static/chunks/131-6a03368053f9d26d.js\",\"684\",\"static/chunks/684-bb2d2f93d92acb0b.js\",\"759\",\"static/chunks/759-83a8bdddfe32b5d9.js\",\"777\",\"static/chunks/777-f76791513e294b30.js\",\"931\",\"static/chunks/app/page-da7d95729f2529b5.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/0f6908625573deae.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"0gt3_bF2KkdKeE61mic4M\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_12bbc4\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>				<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-a8fd417ac0c6c8a5.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-f960ab1e6d32b002.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-04708d7d4a17c1ee.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-a8fd417ac0c6c8a5.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f\|\|[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/0f6908625573deae.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[48951,[\"665\",\"static/chunks/3014691f-589a5f4865c3822f.js\",\"936\",\"static/chunks/2f6dbc85-052c4579f80d66ae.js\",\"294\",\"static/chunks/294-0e35509d5ca95267.js\",\"131\",\"static/chunks/131-19b05e5ce40fa85d.js\",\"684\",\"static/chunks/684-bb2d2f93d92acb0b.js\",\"759\",\"static/chunks/759-d7572f2a46f911d5.js\",\"777\",\"static/chunks/777-906d7dd6a5bf7be4.js\",\"931\",\"static/chunks/app/page-567f85145e7f0f35.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/0f6908625573deae.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"RDLpeUaSstfmeQiKITNBo\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_12bbc4\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>