Merge branch 'main' into litellm_dev_contributor_prs_03_10_2025_p1

2025-04-24 18:24:20 +00:00 · 2025-03-11 22:50:02 -07:00 · 2025-03-11 22:50:02 -07:00 · 8c0bf06c87
commit 8c0bf06c87
parent 2cf8dcaad2 1051478e95
91 changed files with 1480 additions and 503 deletions
--- a/.github/pull_request_template.md
+++ b/.github/pull_request_template.md
@ -6,6 +6,16 @@

 <!-- e.g. "Fixes #000" -->

+## Pre-Submission checklist
+
+**Please complete all items before asking a LiteLLM maintainer to review your PR**
+
+- [ ] I have Added testing in the `tests/litellm/` directory, **Adding at least 1 test is a hard requirement** - [see details](https://docs.litellm.ai/docs/extras/contributing_code)
+- [ ] I have added a screenshot of my new test passing locally 
+- [ ] My PR passes all unit tests on (`make test-unit`)[https://docs.litellm.ai/docs/extras/contributing_code]
+- [ ] My PR's scope is as isolated as possible, it only solves 1 specific problem
+
+
 ## Type

 <!-- Select the type of Pull Request -->
@ -20,10 +30,4 @@

 ## Changes

-<!-- List of changes -->
-
-## [REQUIRED] Testing - Attach a screenshot of any new tests passing locally
-If UI changes, send a screenshot/GIF of working UI fixes
-
-<!-- Test procedure -->

--- a/.github/workflows/helm_unit_test.yml
+++ b/.github/workflows/helm_unit_test.yml
@ -0,0 +1,27 @@
+name: Helm unit test
+
+on:
+  pull_request:
+  push:
+    branches:
+      - main
+
+jobs:
+  unit-test:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v2
+
+      - name: Set up Helm 3.11.1
+        uses: azure/setup-helm@v1
+        with:
+          version: '3.11.1'
+
+      - name: Install Helm Unit Test Plugin
+        run: |
+          helm plugin install https://github.com/helm-unittest/helm-unittest --version v0.4.4
+
+      - name: Run unit tests
+        run:
+          helm unittest -f 'tests/*.yaml' deploy/charts/litellm-helm
--- a/21
+++ b/21
@ -0,0 +1,21 @@
+# LiteLLM Makefile
+# Simple Makefile for running tests and basic development tasks
+
+.PHONY: help test test-unit test-integration
+
+# Default target
+help:
+	@echo "Available commands:"
+	@echo "  make test               - Run all tests"
+	@echo "  make test-unit          - Run unit tests"
+	@echo "  make test-integration   - Run integration tests"
+
+# Testing
+test:
+	poetry run pytest tests/
+
+test-unit:
+	poetry run pytest tests/litellm/
+
+test-integration:
+	poetry run pytest tests/ -k "not litellm" 
--- a/README.md
+++ b/README.md
@ -340,71 +340,7 @@ curl 'http://0.0.0.0:4000/key/generate' \

 ## Contributing

-To contribute: Clone the repo locally -> Make a change -> Submit a PR with the change.
-
-Here's how to modify the repo locally:
-
-Step 1: Clone the repo
-
-```
-git clone https://github.com/BerriAI/litellm.git
-```
-
-Step 2: Install dependencies:
-
-```
-pip install -r requirements.txt
-```
-
-Step 3: Test your change:
-
-a. Add a pytest test within `tests/litellm/`
-
-This folder follows the same directory structure as `litellm/`.
-
-If a corresponding test file does not exist, create one.
-
-b. Run the test
-
-```
-cd tests/litellm # pwd: Documents/litellm/litellm/tests/litellm
-pytest /path/to/test_file.py
-```
-
-Step 4: Submit a PR with your changes! 🚀
-
- push your fork to your GitHub repo
- submit a PR from there
-
-### Building LiteLLM Docker Image 
-
-Follow these instructions if you want to build / run the LiteLLM Docker Image yourself.
-
-Step 1: Clone the repo
-
-```
-git clone https://github.com/BerriAI/litellm.git
-```
-
-Step 2: Build the Docker Image
-
-Build using Dockerfile.non_root
-```
-docker build -f docker/Dockerfile.non_root -t litellm_test_image .
-```
-
-Step 3: Run the Docker Image
-
-Make sure config.yaml is present in the root directory. This is your litellm proxy config file.
-```
-docker run \
-    -v $(pwd)/proxy_config.yaml:/app/config.yaml \
-    -e DATABASE_URL="postgresql://xxxxxxxx" \
-    -e LITELLM_MASTER_KEY="sk-1234" \
-    -p 4000:4000 \
-    litellm_test_image \
-    --config /app/config.yaml --detailed_debug
-```
+Interested in contributing? Contributions to LiteLLM Python SDK, Proxy Server, and contributing LLM integrations are both accepted and highly encouraged! [See our Contribution Guide for more details](https://docs.litellm.ai/docs/extras/contributing_code)

 # Enterprise
 For companies that need better security, user management and professional support
--- a/deploy/charts/litellm-helm/tests/deployment_tests.yaml
+++ b/deploy/charts/litellm-helm/tests/deployment_tests.yaml
@ -0,0 +1,54 @@
+suite: test deployment
+templates:
+  - deployment.yaml
+  - configmap-litellm.yaml
+tests:
+  - it: should work
+    template: deployment.yaml
+    set:
+      image.tag: test
+    asserts:
+      - isKind:
+          of: Deployment
+      - matchRegex:
+          path: metadata.name
+          pattern: -litellm$
+      - equal:
+          path: spec.template.spec.containers[0].image
+          value: ghcr.io/berriai/litellm-database:test
+  - it: should work with tolerations
+    template: deployment.yaml
+    set:
+      tolerations:
+        - key: node-role.kubernetes.io/master
+          operator: Exists
+          effect: NoSchedule
+    asserts:
+      - equal:
+          path: spec.template.spec.tolerations[0].key
+          value: node-role.kubernetes.io/master
+      - equal:
+          path: spec.template.spec.tolerations[0].operator
+          value: Exists
+  - it: should work with affinity
+    template: deployment.yaml
+    set:
+      affinity:
+        nodeAffinity:
+          requiredDuringSchedulingIgnoredDuringExecution:
+            nodeSelectorTerms:
+            - matchExpressions:
+              - key: topology.kubernetes.io/zone
+                operator: In
+                values:
+                - antarctica-east1
+    asserts:
+      - equal:
+          path: spec.template.spec.affinity.nodeAffinity.requiredDuringSchedulingIgnoredDuringExecution.nodeSelectorTerms[0].matchExpressions[0].key
+          value: topology.kubernetes.io/zone
+      - equal:
+          path: spec.template.spec.affinity.nodeAffinity.requiredDuringSchedulingIgnoredDuringExecution.nodeSelectorTerms[0].matchExpressions[0].operator
+          value: In
+      - equal:
+          path: spec.template.spec.affinity.nodeAffinity.requiredDuringSchedulingIgnoredDuringExecution.nodeSelectorTerms[0].matchExpressions[0].values[0]
+          value: antarctica-east1
--- a/docs/my-website/docs/extras/contributing_code.md
+++ b/docs/my-website/docs/extras/contributing_code.md
@ -0,0 +1,96 @@
+# Contributing Code
+
+## **Checklist before submitting a PR**
+
+Here are the core requirements for any PR submitted to LiteLLM
+
+
+- [ ] Add testing, **Adding at least 1 test is a hard requirement** - [see details](#2-adding-testing-to-your-pr)
+- [ ] Ensure your PR passes the following tests:
+    - [ ] [Unit Tests](#3-running-unit-tests)
+    - [ ] Formatting / Linting Tests
+- [ ] Keep scope as isolated as possible. As a general rule, your changes should address 1 specific problem at a time
+
+
+
+## Quick start
+
+## 1. Setup your local dev environment
+
+
+Here's how to modify the repo locally:
+
+Step 1: Clone the repo
+
+```shell
+git clone https://github.com/BerriAI/litellm.git
+```
+
+Step 2: Install dev dependencies:
+
+```shell
+poetry install --with dev --extras proxy
+```
+
+That's it, your local dev environment is ready!
+
+## 2. Adding Testing to your PR
+
+- Add your test to the [`tests/litellm/` directory](https://github.com/BerriAI/litellm/tree/main/tests/litellm)
+
+- This directory 1:1 maps the the `litellm/` directory, and can only contain mocked tests.
+- Do not add real llm api calls to this directory.
+
+### 2.1 File Naming Convention for `tests/litellm/`
+
+The `tests/litellm/` directory follows the same directory structure as `litellm/`.
+
+- `litellm/proxy/test_caching_routes.py` maps to `litellm/proxy/caching_routes.py`
+- `test_{filename}.py` maps to `litellm/{filename}.py`
+
+## 3. Running Unit Tests
+
+run the following command on the root of the litellm directory
+
+```shell
+make test-unit
+```
+
+## 4. Submit a PR with your changes!
+
+- push your fork to your GitHub repo
+- submit a PR from there
+
+
+## Advanced
+### Building LiteLLM Docker Image 
+
+Some people might want to build the LiteLLM docker image themselves. Follow these instructions if you want to build / run the LiteLLM Docker Image yourself.
+
+Step 1: Clone the repo
+
+```shell
+git clone https://github.com/BerriAI/litellm.git
+```
+
+Step 2: Build the Docker Image
+
+Build using Dockerfile.non_root
+
+```shell
+docker build -f docker/Dockerfile.non_root -t litellm_test_image .
+```
+
+Step 3: Run the Docker Image
+
+Make sure config.yaml is present in the root directory. This is your litellm proxy config file.
+
+```shell
+docker run \
+    -v $(pwd)/proxy_config.yaml:/app/config.yaml \
+    -e DATABASE_URL="postgresql://xxxxxxxx" \
+    -e LITELLM_MASTER_KEY="sk-1234" \
+    -p 4000:4000 \
+    litellm_test_image \
+    --config /app/config.yaml --detailed_debug
+```
--- a/docs/my-website/docs/observability/athina_integration.md
+++ b/docs/my-website/docs/observability/athina_integration.md
@ -78,6 +78,9 @@ Following are the allowed fields in metadata, their types, and their description
 * `context: Optional[Union[dict, str]]` - This is the context used as information for the prompt. For RAG applications, this is the "retrieved" data. You may log context as a string or as an object (dictionary).
 * `expected_response: Optional[str]` - This is the reference response to compare against for evaluation purposes. This is useful for segmenting inference calls by expected response.
 * `user_query: Optional[str]` - This is the user's query. For conversational applications, this is the user's last message.
+* `tags: Optional[list]` - This is a list of tags. This is useful for segmenting inference calls by tags.
+* `user_feedback: Optional[str]` - The end user’s feedback.
+* `model_options: Optional[dict]` - This is a dictionary of model options. This is useful for getting insights into how model behavior affects your end users.
 * `custom_attributes: Optional[dict]` - This is a dictionary of custom attributes. This is useful for additional information about the inference.

 ## Using a self hosted deployment of Athina
--- a/docs/my-website/docs/providers/bedrock.md
+++ b/docs/my-website/docs/providers/bedrock.md
@ -63,9 +63,9 @@ model_list:
  - model_name: bedrock-claude-v1
    litellm_params:
      model: bedrock/anthropic.claude-instant-v1
-      aws_access_key_id: os.environ/CUSTOM_AWS_ACCESS_KEY_ID
-      aws_secret_access_key: os.environ/CUSTOM_AWS_SECRET_ACCESS_KEY
-      aws_region_name: os.environ/CUSTOM_AWS_REGION_NAME
+      aws_access_key_id: os.environ/AWS_ACCESS_KEY_ID
+      aws_secret_access_key: os.environ/AWS_SECRET_ACCESS_KEY
+      aws_region_name: os.environ/AWS_REGION_NAME
 ```

 All possible auth params: 
@ -1910,6 +1910,8 @@ curl http://0.0.0.0:4000/rerank \
        "Capital punishment has existed in the United States since before it was a country."
    ],
    "top_n": 3
+
+
  }'
 ```

--- a/docs/my-website/docs/providers/vertex.md
+++ b/docs/my-website/docs/providers/vertex.md
@ -404,14 +404,16 @@ curl http://localhost:4000/v1/chat/completions \
 If this was your initial VertexAI Grounding code,

 ```python
-import vertexai 
+import vertexai
+from vertexai.generative_models import GenerativeModel, GenerationConfig, Tool, grounding
+

 vertexai.init(project=project_id, location="us-central1")

 model = GenerativeModel("gemini-1.5-flash-001")

 # Use Google Search for grounding
-tool = Tool.from_google_search_retrieval(grounding.GoogleSearchRetrieval(disable_attributon=False))
+tool = Tool.from_google_search_retrieval(grounding.GoogleSearchRetrieval())

 prompt = "When is the next total solar eclipse in US?"
 response = model.generate_content(
@ -428,7 +430,7 @@ print(response)
 then, this is what it looks like now

 ```python
-from litellm import completion 
+from litellm import completion


 # !gcloud auth application-default login - run this to add vertex credentials to your env
--- a/docs/my-website/docs/tutorials/openweb_ui.md
+++ b/docs/my-website/docs/tutorials/openweb_ui.md
@ -17,12 +17,6 @@ This guide walks you through connecting OpenWeb UI to LiteLLM. Using LiteLLM wit

 ## 1. Start LiteLLM & OpenWebUI

-Deploy this docker compose to deploy both OpenWebUI and LiteLLM.
-
-```bash
-docker compose up -d
-```
-
 - OpenWebUI starts running on [http://localhost:3000](http://localhost:3000)
 - LiteLLM starts running on [http://localhost:4000](http://localhost:4000)

--- a/docs/my-website/sidebars.js
+++ b/docs/my-website/sidebars.js
@ -374,13 +374,6 @@ const sidebars = {
        "load_test_rpm",
      ]
    },
-    {
-      type: "category",
-      label: "Adding Providers",
-      items: [
-        "adding_provider/directory_structure",
-        "adding_provider/new_rerank_provider"],
-    },
    {
      type: "category",
      label: "Logging & Observability",
@ -440,12 +433,26 @@ const sidebars = {
        },
      ]
    },
-
+    {
+      type: "category",
+      label: "Contributing",
+      items: [
+        "extras/contributing_code",
+        {
+          type: "category",
+          label: "Adding Providers",
+          items: [
+            "adding_provider/directory_structure",
+            "adding_provider/new_rerank_provider"],
+        },
+        "extras/contributing",
+        "contributing",
+      ]
+    },
    {
      type: "category",
      label: "Extras",
      items: [
-        "extras/contributing",
        "data_security",
        "data_retention",
        "migration_policy",
@ -481,7 +488,6 @@ const sidebars = {
            "projects/pgai",
          ],
        },
-        "contributing",
        "proxy/pii_masking",
        "extras/code_quality",
        "rules",
--- a/litellm/exceptions.py
+++ b/litellm/exceptions.py
@ -118,6 +118,7 @@ class BadRequestError(openai.BadRequestError):  # type: ignore
        litellm_debug_info: Optional[str] = None,
        max_retries: Optional[int] = None,
        num_retries: Optional[int] = None,
+        body: Optional[dict] = None,
    ):
        self.status_code = 400
        self.message = "litellm.BadRequestError: {}".format(message)
@ -133,7 +134,7 @@ class BadRequestError(openai.BadRequestError):  # type: ignore
        self.max_retries = max_retries
        self.num_retries = num_retries
        super().__init__(
-            self.message, response=response, body=None
+            self.message, response=response, body=body
        )  # Call the base class constructor with the parameters it needs

    def __str__(self):
--- a/litellm/integrations/athina.py
+++ b/litellm/integrations/athina.py
@ -23,6 +23,9 @@ class AthinaLogger:
            "context",
            "expected_response",
            "user_query",
+            "tags",
+            "user_feedback",
+            "model_options",
            "custom_attributes",
        ]

@ -81,7 +84,6 @@ class AthinaLogger:
                for key in self.additional_keys:
                    if key in metadata:
                        data[key] = metadata[key]
-
            response = litellm.module_level_client.post(
                self.athina_logging_url,
                headers=self.headers,
--- a/litellm/litellm_core_utils/exception_mapping_utils.py
+++ b/litellm/litellm_core_utils/exception_mapping_utils.py
@ -331,6 +331,7 @@ def exception_type(  # type: ignore  # noqa: PLR0915
                        model=model,
                        response=getattr(original_exception, "response", None),
                        litellm_debug_info=extra_information,
+                        body=getattr(original_exception, "body", None),
                    )
                elif (
                    "Web server is returning an unknown error" in error_str
@ -421,6 +422,7 @@ def exception_type(  # type: ignore  # noqa: PLR0915
                            llm_provider=custom_llm_provider,
                            response=getattr(original_exception, "response", None),
                            litellm_debug_info=extra_information,
+                            body=getattr(original_exception, "body", None),
                        )
                    elif original_exception.status_code == 429:
                        exception_mapping_worked = True
@ -1960,6 +1962,7 @@ def exception_type(  # type: ignore  # noqa: PLR0915
                        model=model,
                        litellm_debug_info=extra_information,
                        response=getattr(original_exception, "response", None),
+                        body=getattr(original_exception, "body", None),
                    )
                elif (
                    "The api_key client option must be set either by passing api_key to the client or by setting"
@ -1991,6 +1994,7 @@ def exception_type(  # type: ignore  # noqa: PLR0915
                            model=model,
                            litellm_debug_info=extra_information,
                            response=getattr(original_exception, "response", None),
+                            body=getattr(original_exception, "body", None),
                        )
                    elif original_exception.status_code == 401:
                        exception_mapping_worked = True
--- a/litellm/llms/azure/azure.py
+++ b/litellm/llms/azure/azure.py
@ -540,10 +540,14 @@ class AzureChatCompletion(BaseLLM):
            status_code = getattr(e, "status_code", 500)
            error_headers = getattr(e, "headers", None)
            error_response = getattr(e, "response", None)
+            error_body = getattr(e, "body", None)
            if error_headers is None and error_response:
                error_headers = getattr(error_response, "headers", None)
            raise AzureOpenAIError(
-                status_code=status_code, message=str(e), headers=error_headers
+                status_code=status_code,
+                message=str(e),
+                headers=error_headers,
+                body=error_body,
            )

    async def acompletion(
@ -649,6 +653,7 @@ class AzureChatCompletion(BaseLLM):
            raise AzureOpenAIError(status_code=500, message=str(e))
        except Exception as e:
            message = getattr(e, "message", str(e))
+            body = getattr(e, "body", None)
            ## LOGGING
            logging_obj.post_call(
                input=data["messages"],
@ -659,7 +664,7 @@ class AzureChatCompletion(BaseLLM):
            if hasattr(e, "status_code"):
                raise e
            else:
-                raise AzureOpenAIError(status_code=500, message=message)
+                raise AzureOpenAIError(status_code=500, message=message, body=body)

    def streaming(
        self,
@ -805,10 +810,14 @@ class AzureChatCompletion(BaseLLM):
            error_headers = getattr(e, "headers", None)
            error_response = getattr(e, "response", None)
            message = getattr(e, "message", str(e))
+            error_body = getattr(e, "body", None)
            if error_headers is None and error_response:
                error_headers = getattr(error_response, "headers", None)
            raise AzureOpenAIError(
-                status_code=status_code, message=message, headers=error_headers
+                status_code=status_code,
+                message=message,
+                headers=error_headers,
+                body=error_body,
            )

    async def aembedding(
--- a/litellm/llms/azure/common_utils.py
+++ b/litellm/llms/azure/common_utils.py
@ -17,6 +17,7 @@ class AzureOpenAIError(BaseLLMException):
        request: Optional[httpx.Request] = None,
        response: Optional[httpx.Response] = None,
        headers: Optional[Union[httpx.Headers, dict]] = None,
+        body: Optional[dict] = None,
    ):
        super().__init__(
            status_code=status_code,
@ -24,6 +25,7 @@ class AzureOpenAIError(BaseLLMException):
            request=request,
            response=response,
            headers=headers,
+            body=body,
        )


--- a/litellm/llms/azure_ai/chat/transformation.py
+++ b/litellm/llms/azure_ai/chat/transformation.py
@ -16,10 +16,23 @@ from litellm.llms.openai.openai import OpenAIConfig
 from litellm.secret_managers.main import get_secret_str
 from litellm.types.llms.openai import AllMessageValues
 from litellm.types.utils import ModelResponse, ProviderField
-from litellm.utils import _add_path_to_api_base
+from litellm.utils import _add_path_to_api_base, supports_tool_choice


 class AzureAIStudioConfig(OpenAIConfig):
+    def get_supported_openai_params(self, model: str) -> List:
+        model_supports_tool_choice = True  # azure ai supports this by default
+        if not supports_tool_choice(model=f"azure_ai/{model}"):
+            model_supports_tool_choice = False
+        supported_params = super().get_supported_openai_params(model)
+        if not model_supports_tool_choice:
+            filtered_supported_params = []
+            for param in supported_params:
+                if param != "tool_choice":
+                    filtered_supported_params.append(param)
+            return filtered_supported_params
+        return supported_params
+
    def validate_environment(
        self,
        headers: dict,
--- a/litellm/llms/base_llm/chat/transformation.py
+++ b/litellm/llms/base_llm/chat/transformation.py
@ -51,6 +51,7 @@ class BaseLLMException(Exception):
        headers: Optional[Union[dict, httpx.Headers]] = None,
        request: Optional[httpx.Request] = None,
        response: Optional[httpx.Response] = None,
+        body: Optional[dict] = None,
    ):
        self.status_code = status_code
        self.message: str = message
@ -67,6 +68,7 @@ class BaseLLMException(Exception):
            self.response = httpx.Response(
                status_code=status_code, request=self.request
            )
+        self.body = body
        super().__init__(
            self.message
        )  # Call the base class constructor with the parameters it needs
--- a/litellm/llms/bedrock/chat/invoke_transformations/base_invoke_transformation.py
+++ b/litellm/llms/bedrock/chat/invoke_transformations/base_invoke_transformation.py
@ -129,7 +129,6 @@ class AmazonInvokeConfig(BaseConfig, BaseAWSLLM):

        ## CREDENTIALS ##
        # pop aws_secret_access_key, aws_access_key_id, aws_session_token, aws_region_name from kwargs, since completion calls fail with them
-        extra_headers = optional_params.get("extra_headers", None)
        aws_secret_access_key = optional_params.get("aws_secret_access_key", None)
        aws_access_key_id = optional_params.get("aws_access_key_id", None)
        aws_session_token = optional_params.get("aws_session_token", None)
@ -155,9 +154,10 @@ class AmazonInvokeConfig(BaseConfig, BaseAWSLLM):
        )

        sigv4 = SigV4Auth(credentials, "bedrock", aws_region_name)
-        headers = {"Content-Type": "application/json"}
-        if extra_headers is not None:
-            headers = {"Content-Type": "application/json", **extra_headers}
+        if headers is not None:
+            headers = {"Content-Type": "application/json", **headers}
+        else:
+            headers = {"Content-Type": "application/json"}

        request = AWSRequest(
            method="POST",
@ -166,12 +166,13 @@ class AmazonInvokeConfig(BaseConfig, BaseAWSLLM):
            headers=headers,
        )
        sigv4.add_auth(request)
-        if (
-            extra_headers is not None and "Authorization" in extra_headers
-        ):  # prevent sigv4 from overwriting the auth header
-            request.headers["Authorization"] = extra_headers["Authorization"]

-        return dict(request.headers)
+        request_headers_dict = dict(request.headers)
+        if (
+            headers is not None and "Authorization" in headers
+        ):  # prevent sigv4 from overwriting the auth header
+            request_headers_dict["Authorization"] = headers["Authorization"]
+        return request_headers_dict

    def transform_request(
        self,
@ -443,7 +444,7 @@ class AmazonInvokeConfig(BaseConfig, BaseAWSLLM):
        api_key: Optional[str] = None,
        api_base: Optional[str] = None,
    ) -> dict:
-        return {}
+        return headers

    def get_error_class(
        self, error_message: str, status_code: int, headers: Union[dict, httpx.Headers]
--- a/litellm/llms/custom_httpx/llm_http_handler.py
+++ b/litellm/llms/custom_httpx/llm_http_handler.py
@ -873,7 +873,9 @@ class BaseLLMHTTPHandler:
        elif isinstance(audio_file, bytes):
            # Assume it's already binary data
            binary_data = audio_file
-        elif isinstance(audio_file, io.BufferedReader) or isinstance(audio_file, io.BytesIO):
+        elif isinstance(audio_file, io.BufferedReader) or isinstance(
+            audio_file, io.BytesIO
+        ):
            # Handle file-like objects
            binary_data = audio_file.read()

--- a/litellm/llms/openai/common_utils.py
+++ b/litellm/llms/openai/common_utils.py
@ -19,6 +19,7 @@ class OpenAIError(BaseLLMException):
        request: Optional[httpx.Request] = None,
        response: Optional[httpx.Response] = None,
        headers: Optional[Union[dict, httpx.Headers]] = None,
+        body: Optional[dict] = None,
    ):
        self.status_code = status_code
        self.message = message
@ -39,6 +40,7 @@ class OpenAIError(BaseLLMException):
            headers=self.headers,
            request=self.request,
            response=self.response,
+            body=body,
        )


--- a/litellm/llms/openai/openai.py
+++ b/litellm/llms/openai/openai.py
@ -732,10 +732,14 @@ class OpenAIChatCompletion(BaseLLM):
            error_headers = getattr(e, "headers", None)
            error_text = getattr(e, "text", str(e))
            error_response = getattr(e, "response", None)
+            error_body = getattr(e, "body", None)
            if error_headers is None and error_response:
                error_headers = getattr(error_response, "headers", None)
            raise OpenAIError(
-                status_code=status_code, message=error_text, headers=error_headers
+                status_code=status_code,
+                message=error_text,
+                headers=error_headers,
+                body=error_body,
            )

    async def acompletion(
@ -828,13 +832,17 @@ class OpenAIChatCompletion(BaseLLM):
            except Exception as e:
                exception_response = getattr(e, "response", None)
                status_code = getattr(e, "status_code", 500)
+                exception_body = getattr(e, "body", None)
                error_headers = getattr(e, "headers", None)
                if error_headers is None and exception_response:
                    error_headers = getattr(exception_response, "headers", None)
                message = getattr(e, "message", str(e))

                raise OpenAIError(
-                    status_code=status_code, message=message, headers=error_headers
+                    status_code=status_code,
+                    message=message,
+                    headers=error_headers,
+                    body=exception_body,
                )

    def streaming(
@ -973,6 +981,7 @@ class OpenAIChatCompletion(BaseLLM):
                error_headers = getattr(e, "headers", None)
                status_code = getattr(e, "status_code", 500)
                error_response = getattr(e, "response", None)
+                exception_body = getattr(e, "body", None)
                if error_headers is None and error_response:
                    error_headers = getattr(error_response, "headers", None)
                if response is not None and hasattr(response, "text"):
@ -980,6 +989,7 @@ class OpenAIChatCompletion(BaseLLM):
                        status_code=status_code,
                        message=f"{str(e)}\n\nOriginal Response: {response.text}",  # type: ignore
                        headers=error_headers,
+                        body=exception_body,
                    )
                else:
                    if type(e).__name__ == "ReadTimeout":
@ -987,16 +997,21 @@ class OpenAIChatCompletion(BaseLLM):
                            status_code=408,
                            message=f"{type(e).__name__}",
                            headers=error_headers,
+                            body=exception_body,
                        )
                    elif hasattr(e, "status_code"):
                        raise OpenAIError(
                            status_code=getattr(e, "status_code", 500),
                            message=str(e),
                            headers=error_headers,
+                            body=exception_body,
                        )
                    else:
                        raise OpenAIError(
-                            status_code=500, message=f"{str(e)}", headers=error_headers
+                            status_code=500,
+                            message=f"{str(e)}",
+                            headers=error_headers,
+                            body=exception_body,
                        )

    def get_stream_options(
--- a/litellm/llms/perplexity/chat/transformation.py
+++ b/litellm/llms/perplexity/chat/transformation.py
@ -37,6 +37,7 @@ class PerplexityChatConfig(OpenAIGPTConfig):
            "response_format",
            "stream",
            "temperature",
-            "top_p" "max_retries",
+            "top_p",
+            "max_retries",
            "extra_headers",
        ]
--- a/litellm/llms/triton/completion/transformation.py
+++ b/litellm/llms/triton/completion/transformation.py
@ -3,7 +3,7 @@ Translates from OpenAI's `/v1/chat/completions` endpoint to Triton's `/generate`
 """

 import json
-from typing import Any, Dict, List, Literal, Optional, Union
+from typing import Any, AsyncIterator, Dict, Iterator, List, Literal, Optional, Union

 from httpx import Headers, Response

@ -67,6 +67,20 @@ class TritonConfig(BaseConfig):
                optional_params[param] = value
        return optional_params

+    def get_complete_url(
+        self,
+        api_base: Optional[str],
+        model: str,
+        optional_params: dict,
+        stream: Optional[bool] = None,
+    ) -> str:
+        if api_base is None:
+            raise ValueError("api_base is required")
+        llm_type = self._get_triton_llm_type(api_base)
+        if llm_type == "generate" and stream:
+            return api_base + "_stream"
+        return api_base
+
    def transform_response(
        self,
        model: str,
@ -149,6 +163,18 @@ class TritonConfig(BaseConfig):
        else:
            raise ValueError(f"Invalid Triton API base: {api_base}")

+    def get_model_response_iterator(
+        self,
+        streaming_response: Union[Iterator[str], AsyncIterator[str], ModelResponse],
+        sync_stream: bool,
+        json_mode: Optional[bool] = False,
+    ) -> Any:
+        return TritonResponseIterator(
+            streaming_response=streaming_response,
+            sync_stream=sync_stream,
+            json_mode=json_mode,
+        )
+

 class TritonGenerateConfig(TritonConfig):
    """
@ -204,7 +230,7 @@ class TritonGenerateConfig(TritonConfig):
        return model_response


-class TritonInferConfig(TritonGenerateConfig):
+class TritonInferConfig(TritonConfig):
    """
    Transformations for triton /infer endpoint (his is an infer model with a custom model on triton)
    """
--- a/litellm/main.py
+++ b/litellm/main.py
@ -3900,42 +3900,19 @@ async def atext_completion(
        ctx = contextvars.copy_context()
        func_with_context = partial(ctx.run, func)

-        _, custom_llm_provider, _, _ = get_llm_provider(
-            model=model, api_base=kwargs.get("api_base", None)
-        )
-
-        if (
-            custom_llm_provider == "openai"
-            or custom_llm_provider == "azure"
-            or custom_llm_provider == "azure_text"
-            or custom_llm_provider == "custom_openai"
-            or custom_llm_provider == "anyscale"
-            or custom_llm_provider == "mistral"
-            or custom_llm_provider == "openrouter"
-            or custom_llm_provider == "deepinfra"
-            or custom_llm_provider == "perplexity"
-            or custom_llm_provider == "groq"
-            or custom_llm_provider == "nvidia_nim"
-            or custom_llm_provider == "cerebras"
-            or custom_llm_provider == "sambanova"
-            or custom_llm_provider == "ai21_chat"
-            or custom_llm_provider == "ai21"
-            or custom_llm_provider == "volcengine"
-            or custom_llm_provider == "text-completion-codestral"
-            or custom_llm_provider == "deepseek"
-            or custom_llm_provider == "text-completion-openai"
-            or custom_llm_provider == "huggingface"
-            or custom_llm_provider == "ollama"
-            or custom_llm_provider == "vertex_ai"
-            or custom_llm_provider in litellm.openai_compatible_providers
-        ):  # currently implemented aiohttp calls for just azure and openai, soon all.
-            # Await normally
-            response = await loop.run_in_executor(None, func_with_context)
-            if asyncio.iscoroutine(response):
-                response = await response
+        init_response = await loop.run_in_executor(None, func_with_context)
+        if isinstance(init_response, dict) or isinstance(
+            init_response, TextCompletionResponse
+        ):  ## CACHING SCENARIO
+            if isinstance(init_response, dict):
+                response = TextCompletionResponse(**init_response)
+            else:
+                response = init_response
+        elif asyncio.iscoroutine(init_response):
+            response = await init_response
        else:
-            # Call the synchronous function using run_in_executor
-            response = await loop.run_in_executor(None, func_with_context)
+            response = init_response  # type: ignore
+
        if (
            kwargs.get("stream", False) is True
            or isinstance(response, TextCompletionStreamWrapper)
--- a/litellm/model_prices_and_context_window_backup.json
+++ b/litellm/model_prices_and_context_window_backup.json
@ -6,7 +6,7 @@
        "input_cost_per_token": 0.0000,
        "output_cost_per_token": 0.000,
        "litellm_provider": "one of https://docs.litellm.ai/docs/providers",
-        "mode": "one of chat, embedding, completion, image_generation, audio_transcription, audio_speech",
+        "mode": "one of: chat, embedding, completion, image_generation, audio_transcription, audio_speech, image_generation, moderation, rerank",
        "supports_function_calling": true,
        "supports_parallel_function_calling": true,
        "supports_vision": true,
@ -931,7 +931,7 @@
        "input_cost_per_token": 0.000000,
        "output_cost_per_token": 0.000000,
        "litellm_provider": "openai",
-        "mode": "moderations"
+        "mode": "moderation"
    },
    "text-moderation-007": {
        "max_tokens": 32768,
@ -940,7 +940,7 @@
        "input_cost_per_token": 0.000000,
        "output_cost_per_token": 0.000000,
        "litellm_provider": "openai",
-        "mode": "moderations"
+        "mode": "moderation"
    },
    "text-moderation-latest": {
        "max_tokens": 32768,
@ -949,7 +949,7 @@
        "input_cost_per_token": 0.000000,
        "output_cost_per_token": 0.000000,
        "litellm_provider": "openai",
-        "mode": "moderations"
+        "mode": "moderation"
    },
    "256-x-256/dall-e-2": {
        "mode": "image_generation",
@ -1625,13 +1625,23 @@
        "max_tokens": 8192,
        "max_input_tokens": 128000,
        "max_output_tokens": 8192,
-        "input_cost_per_token": 0.0,
-        "input_cost_per_token_cache_hit": 0.0,
-        "output_cost_per_token": 0.0,
+        "input_cost_per_token": 0.00000135,
+        "output_cost_per_token": 0.0000054,
        "litellm_provider": "azure_ai",
        "mode": "chat",
-        "supports_prompt_caching": true,
-        "supports_tool_choice": true
+        "supports_tool_choice": true,
+        "source": "https://techcommunity.microsoft.com/blog/machinelearningblog/deepseek-r1-improved-performance-higher-limits-and-transparent-pricing/4386367"
+    },
+    "azure_ai/deepseek-v3": {
+        "max_tokens": 8192,
+        "max_input_tokens": 128000,
+        "max_output_tokens": 8192,
+        "input_cost_per_token": 0.00000114,
+        "output_cost_per_token": 0.00000456,
+        "litellm_provider": "azure_ai",
+        "mode": "chat",
+        "supports_tool_choice": true,
+        "source": "https://techcommunity.microsoft.com/blog/machinelearningblog/announcing-deepseek-v3-on-azure-ai-foundry-and-github/4390438"
    },
    "azure_ai/jamba-instruct": {
        "max_tokens": 4096,
@ -1643,6 +1653,17 @@
        "mode": "chat",
        "supports_tool_choice": true
    },
+    "azure_ai/mistral-nemo": {
+        "max_tokens": 4096,
+        "max_input_tokens": 131072,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 0.00000015,
+        "output_cost_per_token": 0.00000015,
+        "litellm_provider": "azure_ai",
+        "mode": "chat",
+        "supports_function_calling": true,
+        "source": "https://azuremarketplace.microsoft.com/en/marketplace/apps/000-000.mistral-nemo-12b-2407?tab=PlansAndPrice"
+    },
    "azure_ai/mistral-large": {
        "max_tokens": 8191,
        "max_input_tokens": 32000,
@ -1770,10 +1791,34 @@
        "source":"https://azuremarketplace.microsoft.com/en-us/marketplace/apps/metagenai.meta-llama-3-1-405b-instruct-offer?tab=PlansAndPrice",
        "supports_tool_choice": true
    },
-    "azure_ai/Phi-4": {
+    "azure_ai/Phi-4-mini-instruct": {
        "max_tokens": 4096,
-        "max_input_tokens": 128000,
+        "max_input_tokens": 131072,
        "max_output_tokens": 4096,
+        "input_cost_per_token": 0,
+        "output_cost_per_token": 0,
+        "litellm_provider": "azure_ai",
+        "mode": "chat",
+        "supports_function_calling": true,
+        "source": "https://learn.microsoft.com/en-us/azure/ai-foundry/concepts/models-featured#microsoft"
+    },
+    "azure_ai/Phi-4-multimodal-instruct": {
+        "max_tokens": 4096,
+        "max_input_tokens": 131072,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 0,
+        "output_cost_per_token": 0,
+        "litellm_provider": "azure_ai",
+        "mode": "chat",
+        "supports_audio_input": true,
+        "supports_function_calling": true,
+        "supports_vision": true,
+        "source": "https://learn.microsoft.com/en-us/azure/ai-foundry/concepts/models-featured#microsoft"
+    },
+    "azure_ai/Phi-4": {
+        "max_tokens": 16384,
+        "max_input_tokens": 16384,
+        "max_output_tokens": 16384,
        "input_cost_per_token": 0.000000125,
        "output_cost_per_token": 0.0000005,
        "litellm_provider": "azure_ai",
@ -1994,8 +2039,8 @@
        "max_tokens": 8191,
        "max_input_tokens": 32000,
        "max_output_tokens": 8191,
-        "input_cost_per_token": 0.000001,
-        "output_cost_per_token": 0.000003,
+        "input_cost_per_token": 0.0000001,
+        "output_cost_per_token": 0.0000003,
        "litellm_provider": "mistral",
        "supports_function_calling": true,
        "mode": "chat",
@ -2006,8 +2051,8 @@
        "max_tokens": 8191,
        "max_input_tokens": 32000,
        "max_output_tokens": 8191,
-        "input_cost_per_token": 0.000001,
-        "output_cost_per_token": 0.000003,
+        "input_cost_per_token": 0.0000001,
+        "output_cost_per_token": 0.0000003,
        "litellm_provider": "mistral",
        "supports_function_calling": true,
        "mode": "chat",
@ -3892,31 +3937,6 @@
        "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing",
        "supports_tool_choice": true
    },
-    "gemini/gemini-2.0-flash": {
-        "max_tokens": 8192,
-        "max_input_tokens": 1048576,
-        "max_output_tokens": 8192,
-        "max_images_per_prompt": 3000,
-        "max_videos_per_prompt": 10,
-        "max_video_length": 1,
-        "max_audio_length_hours": 8.4,
-        "max_audio_per_prompt": 1,
-        "max_pdf_size_mb": 30,
-        "input_cost_per_audio_token": 0.0000007,
-        "input_cost_per_token": 0.0000001,
-        "output_cost_per_token": 0.0000004,
-        "litellm_provider": "gemini",
-        "mode": "chat",
-        "rpm": 10000,
-        "tpm": 10000000,
-        "supports_system_messages": true,
-        "supports_function_calling": true,
-        "supports_vision": true,
-        "supports_response_schema": true,
-        "supports_audio_output": true,
-        "supports_tool_choice": true,
-        "source": "https://ai.google.dev/pricing#2_0flash"
-    },
    "gemini-2.0-flash-001": {
        "max_tokens": 8192,
        "max_input_tokens": 1048576,
@ -4008,6 +4028,69 @@
        "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash",
        "supports_tool_choice": true
    },
+    "gemini/gemini-2.0-pro-exp-02-05": {
+        "max_tokens": 8192,
+        "max_input_tokens": 2097152,
+        "max_output_tokens": 8192,
+        "max_images_per_prompt": 3000,
+        "max_videos_per_prompt": 10,
+        "max_video_length": 1,
+        "max_audio_length_hours": 8.4,
+        "max_audio_per_prompt": 1,
+        "max_pdf_size_mb": 30,
+        "input_cost_per_image": 0,
+        "input_cost_per_video_per_second": 0,
+        "input_cost_per_audio_per_second": 0,
+        "input_cost_per_token": 0,
+        "input_cost_per_character": 0, 
+        "input_cost_per_token_above_128k_tokens": 0, 
+        "input_cost_per_character_above_128k_tokens": 0, 
+        "input_cost_per_image_above_128k_tokens": 0,
+        "input_cost_per_video_per_second_above_128k_tokens": 0,
+        "input_cost_per_audio_per_second_above_128k_tokens": 0,
+        "output_cost_per_token": 0,
+        "output_cost_per_character": 0,
+        "output_cost_per_token_above_128k_tokens": 0,
+        "output_cost_per_character_above_128k_tokens": 0,
+        "litellm_provider": "gemini",
+        "mode": "chat",
+        "rpm": 2,
+        "tpm": 1000000,
+        "supports_system_messages": true,
+        "supports_function_calling": true,
+        "supports_vision": true,
+        "supports_audio_input": true,
+        "supports_video_input": true,
+        "supports_pdf_input": true,
+        "supports_response_schema": true,
+        "supports_tool_choice": true,
+        "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing"
+    },
+    "gemini/gemini-2.0-flash": {
+        "max_tokens": 8192,
+        "max_input_tokens": 1048576,
+        "max_output_tokens": 8192,
+        "max_images_per_prompt": 3000,
+        "max_videos_per_prompt": 10,
+        "max_video_length": 1,
+        "max_audio_length_hours": 8.4,
+        "max_audio_per_prompt": 1,
+        "max_pdf_size_mb": 30,
+        "input_cost_per_audio_token": 0.0000007,
+        "input_cost_per_token": 0.0000001,
+        "output_cost_per_token": 0.0000004,
+        "litellm_provider": "gemini",
+        "mode": "chat",
+        "rpm": 10000,
+        "tpm": 10000000,
+        "supports_system_messages": true,
+        "supports_function_calling": true,
+        "supports_vision": true,
+        "supports_response_schema": true,
+        "supports_audio_output": true,
+        "supports_tool_choice": true,
+        "source": "https://ai.google.dev/pricing#2_0flash"
+    },
    "gemini/gemini-2.0-flash-001": {
        "max_tokens": 8192,
        "max_input_tokens": 1048576,
@ -4511,6 +4594,12 @@
        "mode": "image_generation",
        "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing"
    },
+    "vertex_ai/imagen-3.0-generate-002": {
+        "output_cost_per_image": 0.04,
+        "litellm_provider": "vertex_ai-image-models",
+        "mode": "image_generation",
+        "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing"
+    },
    "vertex_ai/imagen-3.0-generate-001": {
        "output_cost_per_image": 0.04,
        "litellm_provider": "vertex_ai-image-models",
@ -6077,6 +6166,26 @@
        "mode": "chat",
        "supports_tool_choice": true
    },
+    "jamba-large-1.6": {
+        "max_tokens": 256000,
+        "max_input_tokens": 256000,
+        "max_output_tokens": 256000,
+        "input_cost_per_token": 0.000002,
+        "output_cost_per_token": 0.000008,
+        "litellm_provider": "ai21",
+        "mode": "chat",
+        "supports_tool_choice": true
+    },
+    "jamba-mini-1.6": {
+        "max_tokens": 256000,
+        "max_input_tokens": 256000,
+        "max_output_tokens": 256000,
+        "input_cost_per_token": 0.0000002,
+        "output_cost_per_token": 0.0000004,
+        "litellm_provider": "ai21",
+        "mode": "chat",
+        "supports_tool_choice": true
+    },
    "j2-mid": {
        "max_tokens": 8192,
        "max_input_tokens": 8192,
@ -7463,6 +7572,18 @@
        "litellm_provider": "bedrock",
        "mode": "embedding"
    },
+    "us.deepseek.r1-v1:0": {
+        "max_tokens": 4096, 
+        "max_input_tokens": 128000,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 0.00000135,
+        "output_cost_per_token": 0.0000054,
+        "litellm_provider": "bedrock_converse",
+        "mode": "chat",
+        "supports_function_calling": false, 
+        "supports_tool_choice": false
+
+    },
    "meta.llama3-3-70b-instruct-v1:0": {
        "max_tokens": 4096, 
        "max_input_tokens": 128000,
--- a/litellm/proxy/_experimental/out/_next/static/chunks/157-cf7bc8b3ae1b80ba.js
+++ b/litellm/proxy/_experimental/out/_next/static/chunks/157-cf7bc8b3ae1b80ba.js
--- a/litellm/proxy/_experimental/out/_next/static/chunks/261-cb27c20c4f8ec4c6.js
+++ b/litellm/proxy/_experimental/out/_next/static/chunks/261-cb27c20c4f8ec4c6.js
--- a/litellm/proxy/_experimental/out/_next/static/chunks/914-e17acab83d0eadb5.js
+++ b/litellm/proxy/_experimental/out/_next/static/chunks/914-e17acab83d0eadb5.js
--- a/litellm/proxy/_experimental/out/_next/static/chunks/app/page-a25b75c267486fe2.js
+++ b/litellm/proxy/_experimental/out/_next/static/chunks/app/page-a25b75c267486fe2.js
--- a/litellm/proxy/_experimental/out/_next/static/chunks/app/page-b36633214e76cfd1.js
+++ b/litellm/proxy/_experimental/out/_next/static/chunks/app/page-b36633214e76cfd1.js
--- a/litellm/proxy/_experimental/out/_next/static/css/b6d997482399c7e1.css
+++ b/litellm/proxy/_experimental/out/_next/static/css/b6d997482399c7e1.css
--- a/litellm/proxy/_experimental/out/_next/static/css/f41c66e22715ab00.css
+++ b/litellm/proxy/_experimental/out/_next/static/css/f41c66e22715ab00.css
--- a/litellm/proxy/_experimental/out/_next/static/i92Qc9kkJSCtCgV3DDmdu/_buildManifest.js
+++ b/litellm/proxy/_experimental/out/_next/static/i92Qc9kkJSCtCgV3DDmdu/_buildManifest.js
--- a/litellm/proxy/_experimental/out/_next/static/i92Qc9kkJSCtCgV3DDmdu/_ssgManifest.js
+++ b/litellm/proxy/_experimental/out/_next/static/i92Qc9kkJSCtCgV3DDmdu/_ssgManifest.js
--- a/litellm/proxy/_experimental/out/index.html
+++ b/litellm/proxy/_experimental/out/index.html
@ -1 +1 @@
-<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-75a5453f51d60261.js"/><script src="/ui/_next/static/chunks/fd9d1056-524b80e1a6b8bb06.js" async=""></script><script src="/ui/_next/static/chunks/117-883150efc583d711.js" async=""></script><script src="/ui/_next/static/chunks/main-app-475d6efe4080647d.js" async=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-42372ed130431b0a.js" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-75a5453f51d60261.js" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/a34f9d1faa5f3315-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/86f6cc749f6b8493.css\",\"style\"]\n3:HL[\"/ui/_next/static/css/f41c66e22715ab00.css\",\"style\"]\n"])</script><script>self.__next_f.push([1,"4:I[12846,[],\"\"]\n6:I[19107,[],\"ClientPageRoot\"]\n7:I[92222,[\"665\",\"static/chunks/3014691f-0b72c78cfebbd712.js\",\"990\",\"static/chunks/13b76428-ebdf3012af0e4489.js\",\"441\",\"static/chunks/441-79926bf2b9d89e04.js\",\"261\",\"static/chunks/261-e48c2ac6ff0b811c.js\",\"899\",\"static/chunks/899-354f59ecde307dfa.js\",\"914\",\"static/chunks/914-e17acab83d0eadb5.js\",\"250\",\"static/chunks/250-51513f2f6dabf571.js\",\"699\",\"static/chunks/699-6b82f8e7b98ca1a3.js\",\"931\",\"static/chunks/app/page-b36633214e76cfd1.js\"],\"default\",1]\n8:I[4707,[],\"\"]\n9:I[36423,[],\"\"]\nb:I[61060,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"0:[\"$\",\"$L4\",null,{\"buildId\":\"rCxUxULLkHhl5KoPY9DHv\",\"assetPrefix\":\"/ui\",\"urlParts\":[\"\",\"\"],\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[[\"$L5\",[\"$\",\"$L6\",null,{\"props\":{\"params\":{},\"searchParams\":{}},\"Component\":\"$7\"}],null],null],null]},[[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/86f6cc749f6b8493.css\",\"precedence\":\"next\",\"crossOrigin\":\"$undefined\"}],[\"$\",\"link\",\"1\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/f41c66e22715ab00.css\",\"precedence\":\"next\",\"crossOrigin\":\"$undefined\"}]],[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_cf7686\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[]}]}]}]],null],null],\"couldBeIntercepted\":false,\"initialHead\":[null,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script></body></html>
+<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-75a5453f51d60261.js"/><script src="/ui/_next/static/chunks/fd9d1056-524b80e1a6b8bb06.js" async=""></script><script src="/ui/_next/static/chunks/117-883150efc583d711.js" async=""></script><script src="/ui/_next/static/chunks/main-app-475d6efe4080647d.js" async=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-42372ed130431b0a.js" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-75a5453f51d60261.js" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/a34f9d1faa5f3315-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/86f6cc749f6b8493.css\",\"style\"]\n3:HL[\"/ui/_next/static/css/b6d997482399c7e1.css\",\"style\"]\n"])</script><script>self.__next_f.push([1,"4:I[12846,[],\"\"]\n6:I[19107,[],\"ClientPageRoot\"]\n7:I[62177,[\"665\",\"static/chunks/3014691f-0b72c78cfebbd712.js\",\"990\",\"static/chunks/13b76428-ebdf3012af0e4489.js\",\"441\",\"static/chunks/441-79926bf2b9d89e04.js\",\"261\",\"static/chunks/261-cb27c20c4f8ec4c6.js\",\"899\",\"static/chunks/899-354f59ecde307dfa.js\",\"157\",\"static/chunks/157-cf7bc8b3ae1b80ba.js\",\"250\",\"static/chunks/250-51513f2f6dabf571.js\",\"699\",\"static/chunks/699-6b82f8e7b98ca1a3.js\",\"931\",\"static/chunks/app/page-a25b75c267486fe2.js\"],\"default\",1]\n8:I[4707,[],\"\"]\n9:I[36423,[],\"\"]\nb:I[61060,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"0:[\"$\",\"$L4\",null,{\"buildId\":\"i92Qc9kkJSCtCgV3DDmdu\",\"assetPrefix\":\"/ui\",\"urlParts\":[\"\",\"\"],\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[[\"$L5\",[\"$\",\"$L6\",null,{\"props\":{\"params\":{},\"searchParams\":{}},\"Component\":\"$7\"}],null],null],null]},[[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/86f6cc749f6b8493.css\",\"precedence\":\"next\",\"crossOrigin\":\"$undefined\"}],[\"$\",\"link\",\"1\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/b6d997482399c7e1.css\",\"precedence\":\"next\",\"crossOrigin\":\"$undefined\"}]],[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_cf7686\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[]}]}]}]],null],null],\"couldBeIntercepted\":false,\"initialHead\":[null,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script></body></html>
--- a/litellm/proxy/_experimental/out/index.txt
+++ b/litellm/proxy/_experimental/out/index.txt
@ -1,7 +1,7 @@
 2:I[19107,[],"ClientPageRoot"]
-3:I[92222,["665","static/chunks/3014691f-0b72c78cfebbd712.js","990","static/chunks/13b76428-ebdf3012af0e4489.js","441","static/chunks/441-79926bf2b9d89e04.js","261","static/chunks/261-e48c2ac6ff0b811c.js","899","static/chunks/899-354f59ecde307dfa.js","914","static/chunks/914-e17acab83d0eadb5.js","250","static/chunks/250-51513f2f6dabf571.js","699","static/chunks/699-6b82f8e7b98ca1a3.js","931","static/chunks/app/page-b36633214e76cfd1.js"],"default",1]
+3:I[62177,["665","static/chunks/3014691f-0b72c78cfebbd712.js","990","static/chunks/13b76428-ebdf3012af0e4489.js","441","static/chunks/441-79926bf2b9d89e04.js","261","static/chunks/261-cb27c20c4f8ec4c6.js","899","static/chunks/899-354f59ecde307dfa.js","157","static/chunks/157-cf7bc8b3ae1b80ba.js","250","static/chunks/250-51513f2f6dabf571.js","699","static/chunks/699-6b82f8e7b98ca1a3.js","931","static/chunks/app/page-a25b75c267486fe2.js"],"default",1]
 4:I[4707,[],""]
 5:I[36423,[],""]
-0:["rCxUxULLkHhl5KoPY9DHv",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},[["$L1",["$","$L2",null,{"props":{"params":{},"searchParams":{}},"Component":"$3"}],null],null],null]},[[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/86f6cc749f6b8493.css","precedence":"next","crossOrigin":"$undefined"}],["$","link","1",{"rel":"stylesheet","href":"/ui/_next/static/css/f41c66e22715ab00.css","precedence":"next","crossOrigin":"$undefined"}]],["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_cf7686","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[]}]}]}]],null],null],["$L6",null]]]]
+0:["i92Qc9kkJSCtCgV3DDmdu",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},[["$L1",["$","$L2",null,{"props":{"params":{},"searchParams":{}},"Component":"$3"}],null],null],null]},[[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/86f6cc749f6b8493.css","precedence":"next","crossOrigin":"$undefined"}],["$","link","1",{"rel":"stylesheet","href":"/ui/_next/static/css/b6d997482399c7e1.css","precedence":"next","crossOrigin":"$undefined"}]],["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_cf7686","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[]}]}]}]],null],null],["$L6",null]]]]
 6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
 1:null
--- a/litellm/proxy/_experimental/out/model_hub.txt
+++ b/litellm/proxy/_experimental/out/model_hub.txt
@ -1,7 +1,7 @@
 2:I[19107,[],"ClientPageRoot"]
-3:I[52829,["441","static/chunks/441-79926bf2b9d89e04.js","261","static/chunks/261-e48c2ac6ff0b811c.js","250","static/chunks/250-51513f2f6dabf571.js","699","static/chunks/699-6b82f8e7b98ca1a3.js","418","static/chunks/app/model_hub/page-6f97b95f1023b0e9.js"],"default",1]
+3:I[52829,["441","static/chunks/441-79926bf2b9d89e04.js","261","static/chunks/261-cb27c20c4f8ec4c6.js","250","static/chunks/250-51513f2f6dabf571.js","699","static/chunks/699-6b82f8e7b98ca1a3.js","418","static/chunks/app/model_hub/page-6f97b95f1023b0e9.js"],"default",1]
 4:I[4707,[],""]
 5:I[36423,[],""]
-0:["rCxUxULLkHhl5KoPY9DHv",[[["",{"children":["model_hub",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["model_hub",{"children":["__PAGE__",{},[["$L1",["$","$L2",null,{"props":{"params":{},"searchParams":{}},"Component":"$3"}],null],null],null]},[null,["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","model_hub","children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined"}]],null]},[[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/86f6cc749f6b8493.css","precedence":"next","crossOrigin":"$undefined"}],["$","link","1",{"rel":"stylesheet","href":"/ui/_next/static/css/f41c66e22715ab00.css","precedence":"next","crossOrigin":"$undefined"}]],["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_cf7686","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[]}]}]}]],null],null],["$L6",null]]]]
+0:["i92Qc9kkJSCtCgV3DDmdu",[[["",{"children":["model_hub",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["model_hub",{"children":["__PAGE__",{},[["$L1",["$","$L2",null,{"props":{"params":{},"searchParams":{}},"Component":"$3"}],null],null],null]},[null,["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","model_hub","children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined"}]],null]},[[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/86f6cc749f6b8493.css","precedence":"next","crossOrigin":"$undefined"}],["$","link","1",{"rel":"stylesheet","href":"/ui/_next/static/css/b6d997482399c7e1.css","precedence":"next","crossOrigin":"$undefined"}]],["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_cf7686","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[]}]}]}]],null],null],["$L6",null]]]]
 6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
 1:null
--- a/litellm/proxy/_experimental/out/onboarding.html
+++ b/litellm/proxy/_experimental/out/onboarding.html
--- a/litellm/proxy/_experimental/out/onboarding.txt
+++ b/litellm/proxy/_experimental/out/onboarding.txt
@ -2,6 +2,6 @@
 3:I[12011,["665","static/chunks/3014691f-0b72c78cfebbd712.js","441","static/chunks/441-79926bf2b9d89e04.js","899","static/chunks/899-354f59ecde307dfa.js","250","static/chunks/250-51513f2f6dabf571.js","461","static/chunks/app/onboarding/page-a31bc08c35f01c0a.js"],"default",1]
 4:I[4707,[],""]
 5:I[36423,[],""]
-0:["rCxUxULLkHhl5KoPY9DHv",[[["",{"children":["onboarding",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["onboarding",{"children":["__PAGE__",{},[["$L1",["$","$L2",null,{"props":{"params":{},"searchParams":{}},"Component":"$3"}],null],null],null]},[null,["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","onboarding","children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined"}]],null]},[[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/86f6cc749f6b8493.css","precedence":"next","crossOrigin":"$undefined"}],["$","link","1",{"rel":"stylesheet","href":"/ui/_next/static/css/f41c66e22715ab00.css","precedence":"next","crossOrigin":"$undefined"}]],["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_cf7686","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[]}]}]}]],null],null],["$L6",null]]]]
+0:["i92Qc9kkJSCtCgV3DDmdu",[[["",{"children":["onboarding",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["onboarding",{"children":["__PAGE__",{},[["$L1",["$","$L2",null,{"props":{"params":{},"searchParams":{}},"Component":"$3"}],null],null],null]},[null,["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","onboarding","children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined"}]],null]},[[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/86f6cc749f6b8493.css","precedence":"next","crossOrigin":"$undefined"}],["$","link","1",{"rel":"stylesheet","href":"/ui/_next/static/css/b6d997482399c7e1.css","precedence":"next","crossOrigin":"$undefined"}]],["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_cf7686","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[]}]}]}]],null],null],["$L6",null]]]]
 6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
 1:null
--- a/litellm/proxy/_new_secret_config.yaml
+++ b/litellm/proxy/_new_secret_config.yaml
@ -1,5 +1,17 @@
 model_list:
-  - model_name: amazon.nova-canvas-v1:0
+  - model_name: gpt-3.5-turbo
    litellm_params:
-      model: bedrock/amazon.nova-canvas-v1:0
-      aws_region_name: "us-east-1"
+      model: gpt-3.5-turbo
+  - model_name: gpt-4o
+    litellm_params:
+      model: azure/gpt-4o
+      api_key: os.environ/AZURE_API_KEY
+      api_base: os.environ/AZURE_API_BASE
+  - model_name: fake-openai-endpoint-5
+    litellm_params:
+      model: openai/my-fake-model
+      api_key: my-fake-key
+      api_base: https://exampleopenaiendpoint-production.up.railway.app/
+      timeout: 1
+litellm_settings:
+  fallbacks: [{"gpt-3.5-turbo": ["gpt-4o"]}]
--- a/litellm/proxy/_types.py
+++ b/litellm/proxy/_types.py
@ -1994,13 +1994,14 @@ class ProxyException(Exception):
        message: str,
        type: str,
        param: Optional[str],
-        code: Optional[Union[int, str]] = None,
+        code: Optional[Union[int, str]] = None,  # maps to status code
        headers: Optional[Dict[str, str]] = None,
+        openai_code: Optional[str] = None,  # maps to 'code'  in openai
    ):
        self.message = str(message)
        self.type = type
        self.param = param
-
+        self.openai_code = openai_code or code
        # If we look on official python OpenAI lib, the code should be a string:
        # https://github.com/openai/openai-python/blob/195c05a64d39c87b2dfdf1eca2d339597f1fce03/src/openai/types/shared/error_object.py#L11
        # Related LiteLLM issue: https://github.com/BerriAI/litellm/discussions/4834
@ -2054,6 +2055,7 @@ class ProxyErrorTypes(str, enum.Enum):
    budget_exceeded = "budget_exceeded"
    key_model_access_denied = "key_model_access_denied"
    team_model_access_denied = "team_model_access_denied"
+    user_model_access_denied = "user_model_access_denied"
    expired_key = "expired_key"
    auth_error = "auth_error"
    internal_server_error = "internal_server_error"
@ -2062,6 +2064,20 @@ class ProxyErrorTypes(str, enum.Enum):
    validation_error = "bad_request_error"
    cache_ping_error = "cache_ping_error"

+    @classmethod
+    def get_model_access_error_type_for_object(
+        cls, object_type: Literal["key", "user", "team"]
+    ) -> "ProxyErrorTypes":
+        """
+        Get the model access error type for object_type
+        """
+        if object_type == "key":
+            return cls.key_model_access_denied
+        elif object_type == "team":
+            return cls.team_model_access_denied
+        elif object_type == "user":
+            return cls.user_model_access_denied
+

 DB_CONNECTION_ERROR_TYPES = (httpx.ConnectError, httpx.ReadError, httpx.ReadTimeout)

--- a/litellm/proxy/auth/auth_checks.py
+++ b/litellm/proxy/auth/auth_checks.py
@ -98,12 +98,19 @@ async def common_checks(
        )

    # 2. If team can call model
-    _team_model_access_check(
-        team_object=team_object,
-        model=_model,
-        llm_router=llm_router,
-        team_model_aliases=valid_token.team_model_aliases if valid_token else None,
-    )
+    if _model and team_object:
+        if not await can_team_access_model(
+            model=_model,
+            team_object=team_object,
+            llm_router=llm_router,
+            team_model_aliases=valid_token.team_model_aliases if valid_token else None,
+        ):
+            raise ProxyException(
+                message=f"Team not allowed to access model. Team={team_object.team_id}, Model={_model}. Allowed team models = {team_object.models}",
+                type=ProxyErrorTypes.team_model_access_denied,
+                param="model",
+                code=status.HTTP_401_UNAUTHORIZED,
+            )

    ## 2.1 If user can call model (if personal key)
    if team_object is None and user_object is not None:
@ -971,10 +978,18 @@ async def _can_object_call_model(
    llm_router: Optional[Router],
    models: List[str],
    team_model_aliases: Optional[Dict[str, str]] = None,
+    object_type: Literal["user", "team", "key"] = "user",
 ) -> Literal[True]:
    """
    Checks if token can call a given model

+    Args:
+        - model: str
+        - llm_router: Optional[Router]
+        - models: List[str]
+        - team_model_aliases: Optional[Dict[str, str]]
+        - object_type: Literal["user", "team", "key"]. We use the object type to raise the correct exception type
+
    Returns:
        - True: if token allowed to call model

@ -1018,10 +1033,15 @@ async def _can_object_call_model(
    if (len(filtered_models) == 0 and len(models) == 0) or "*" in filtered_models:
        all_model_access = True

+    if SpecialModelNames.all_proxy_models.value in filtered_models:
+        all_model_access = True
+
    if model is not None and model not in filtered_models and all_model_access is False:
        raise ProxyException(
-            message=f"API Key not allowed to access model. This token can only access models={models}. Tried to access {model}",
-            type=ProxyErrorTypes.key_model_access_denied,
+            message=f"{object_type} not allowed to access model. This {object_type} can only access models={models}. Tried to access {model}",
+            type=ProxyErrorTypes.get_model_access_error_type_for_object(
+                object_type=object_type
+            ),
            param="model",
            code=status.HTTP_401_UNAUTHORIZED,
        )
@ -1072,6 +1092,26 @@ async def can_key_call_model(
        llm_router=llm_router,
        models=valid_token.models,
        team_model_aliases=valid_token.team_model_aliases,
+        object_type="key",
+    )
+
+
+async def can_team_access_model(
+    model: str,
+    team_object: Optional[LiteLLM_TeamTable],
+    llm_router: Optional[Router],
+    team_model_aliases: Optional[Dict[str, str]] = None,
+) -> Literal[True]:
+    """
+    Returns True if the team can access a specific model.
+
+    """
+    return await _can_object_call_model(
+        model=model,
+        llm_router=llm_router,
+        models=team_object.models if team_object else [],
+        team_model_aliases=team_model_aliases,
+        object_type="team",
    )


@ -1096,6 +1136,7 @@ async def can_user_call_model(
        model=model,
        llm_router=llm_router,
        models=user_object.models,
+        object_type="user",
    )


@ -1248,53 +1289,6 @@ async def _team_max_budget_check(
        )


-def _team_model_access_check(
-    model: Optional[str],
-    team_object: Optional[LiteLLM_TeamTable],
-    llm_router: Optional[Router],
-    team_model_aliases: Optional[Dict[str, str]] = None,
-):
-    """
-    Access check for team models
-    Raises:
-        Exception if the team is not allowed to call the`model`
-    """
-    if (
-        model is not None
-        and team_object is not None
-        and team_object.models is not None
-        and len(team_object.models) > 0
-        and model not in team_object.models
-    ):
-        # this means the team has access to all models on the proxy
-        if "all-proxy-models" in team_object.models or "*" in team_object.models:
-            # this means the team has access to all models on the proxy
-            pass
-        # check if the team model is an access_group
-        elif (
-            model_in_access_group(
-                model=model, team_models=team_object.models, llm_router=llm_router
-            )
-            is True
-        ):
-            pass
-        elif model and "*" in model:
-            pass
-        elif _model_in_team_aliases(model=model, team_model_aliases=team_model_aliases):
-            pass
-        elif _model_matches_any_wildcard_pattern_in_list(
-            model=model, allowed_model_list=team_object.models
-        ):
-            pass
-        else:
-            raise ProxyException(
-                message=f"Team not allowed to access model. Team={team_object.team_id}, Model={model}. Allowed team models = {team_object.models}",
-                type=ProxyErrorTypes.team_model_access_denied,
-                param="model",
-                code=status.HTTP_401_UNAUTHORIZED,
-            )
-
-
 def is_model_allowed_by_pattern(model: str, allowed_model_pattern: str) -> bool:
    """
    Check if a model matches an allowed pattern.
--- a/litellm/proxy/auth/handle_jwt.py
+++ b/litellm/proxy/auth/handle_jwt.py
@ -33,6 +33,7 @@ from litellm.proxy._types import (
    ScopeMapping,
    Span,
 )
+from litellm.proxy.auth.auth_checks import can_team_access_model
 from litellm.proxy.utils import PrismaClient, ProxyLogging

 from .auth_checks import (
@ -344,32 +345,38 @@ class JWTHandler:
        if keys_url is None:
            raise Exception("Missing JWT Public Key URL from environment.")

-        cached_keys = await self.user_api_key_cache.async_get_cache(
-            "litellm_jwt_auth_keys"
-        )
-        if cached_keys is None:
-            response = await self.http_handler.get(keys_url)
+        keys_url_list = [url.strip() for url in keys_url.split(",")]

-            response_json = response.json()
-            if "keys" in response_json:
-                keys: JWKKeyValue = response.json()["keys"]
+        for key_url in keys_url_list:
+
+            cache_key = f"litellm_jwt_auth_keys_{key_url}"
+
+            cached_keys = await self.user_api_key_cache.async_get_cache(cache_key)
+
+            if cached_keys is None:
+                response = await self.http_handler.get(key_url)
+
+                response_json = response.json()
+                if "keys" in response_json:
+                    keys: JWKKeyValue = response.json()["keys"]
+                else:
+                    keys = response_json
+
+                await self.user_api_key_cache.async_set_cache(
+                    key=cache_key,
+                    value=keys,
+                    ttl=self.litellm_jwtauth.public_key_ttl,  # cache for 10 mins
+                )
            else:
-                keys = response_json
+                keys = cached_keys

-            await self.user_api_key_cache.async_set_cache(
-                key="litellm_jwt_auth_keys",
-                value=keys,
-                ttl=self.litellm_jwtauth.public_key_ttl,  # cache for 10 mins
-            )
-        else:
-            keys = cached_keys
+            public_key = self.parse_keys(keys=keys, kid=kid)
+            if public_key is not None:
+                return cast(dict, public_key)

-        public_key = self.parse_keys(keys=keys, kid=kid)
-        if public_key is None:
-            raise Exception(
-                f"No matching public key found. kid={kid}, keys_url={keys_url}, cached_keys={cached_keys}, len(keys)={len(keys)}"
-            )
-        return cast(dict, public_key)
+        raise Exception(
+            f"No matching public key found. keys={keys_url_list}, kid={kid}"
+        )

    def parse_keys(self, keys: JWKKeyValue, kid: Optional[str]) -> Optional[JWTKeyItem]:
        public_key: Optional[JWTKeyItem] = None
@ -723,8 +730,12 @@ class JWTAuthManager:
                    team_models = team_object.models
                    if isinstance(team_models, list) and (
                        not requested_model
-                        or requested_model in team_models
-                        or "*" in team_models
+                        or can_team_access_model(
+                            model=requested_model,
+                            team_object=team_object,
+                            llm_router=None,
+                            team_model_aliases=None,
+                        )
                    ):
                        is_allowed = allowed_routes_check(
                            user_role=LitellmUserRoles.TEAM,
--- a/litellm/proxy/management_endpoints/internal_user_endpoints.py
+++ b/litellm/proxy/management_endpoints/internal_user_endpoints.py
@ -365,6 +365,8 @@ async def user_info(
            and user_api_key_dict.user_role == LitellmUserRoles.PROXY_ADMIN
        ):
            return await _get_user_info_for_proxy_admin()
+        elif user_id is None:
+            user_id = user_api_key_dict.user_id
        ## GET USER ROW ##
        if user_id is not None:
            user_info = await prisma_client.get_data(user_id=user_id)
@ -373,10 +375,6 @@ async def user_info(
        ## GET ALL TEAMS ##
        team_list = []
        team_id_list = []
-        # get all teams user belongs to
-        # teams_1 = await prisma_client.get_data(
-        #     user_id=user_id, table_name="team", query_type="find_all"
-        # )
        from litellm.proxy.management_endpoints.team_endpoints import list_team

        teams_1 = await list_team(
--- a/litellm/proxy/proxy_server.py
+++ b/litellm/proxy/proxy_server.py
@ -3716,6 +3716,7 @@ async def chat_completion(  # noqa: PLR0915
            message=getattr(e, "message", error_msg),
            type=getattr(e, "type", "None"),
            param=getattr(e, "param", "None"),
+            openai_code=getattr(e, "code", None),
            code=getattr(e, "status_code", 500),
            headers=headers,
        )
@ -3929,6 +3930,7 @@ async def completion(  # noqa: PLR0915
            message=getattr(e, "message", error_msg),
            type=getattr(e, "type", "None"),
            param=getattr(e, "param", "None"),
+            openai_code=getattr(e, "code", None),
            code=getattr(e, "status_code", 500),
        )

@ -4138,6 +4140,7 @@ async def embeddings(  # noqa: PLR0915
                message=getattr(e, "message", error_msg),
                type=getattr(e, "type", "None"),
                param=getattr(e, "param", "None"),
+                openai_code=getattr(e, "code", None),
                code=getattr(e, "status_code", 500),
            )

@ -4257,6 +4260,7 @@ async def image_generation(
                message=getattr(e, "message", error_msg),
                type=getattr(e, "type", "None"),
                param=getattr(e, "param", "None"),
+                openai_code=getattr(e, "code", None),
                code=getattr(e, "status_code", 500),
            )

@ -4518,6 +4522,7 @@ async def audio_transcriptions(
                message=getattr(e, "message", error_msg),
                type=getattr(e, "type", "None"),
                param=getattr(e, "param", "None"),
+                openai_code=getattr(e, "code", None),
                code=getattr(e, "status_code", 500),
            )

@ -4667,6 +4672,7 @@ async def get_assistants(
                message=getattr(e, "message", error_msg),
                type=getattr(e, "type", "None"),
                param=getattr(e, "param", "None"),
+                openai_code=getattr(e, "code", None),
                code=getattr(e, "status_code", 500),
            )

@ -4765,7 +4771,7 @@ async def create_assistant(
                message=getattr(e, "message", error_msg),
                type=getattr(e, "type", "None"),
                param=getattr(e, "param", "None"),
-                code=getattr(e, "status_code", 500),
+                code=getattr(e, "code", getattr(e, "status_code", 500)),
            )


@ -4862,7 +4868,7 @@ async def delete_assistant(
                message=getattr(e, "message", error_msg),
                type=getattr(e, "type", "None"),
                param=getattr(e, "param", "None"),
-                code=getattr(e, "status_code", 500),
+                code=getattr(e, "code", getattr(e, "status_code", 500)),
            )


@ -4959,7 +4965,7 @@ async def create_threads(
                message=getattr(e, "message", error_msg),
                type=getattr(e, "type", "None"),
                param=getattr(e, "param", "None"),
-                code=getattr(e, "status_code", 500),
+                code=getattr(e, "code", getattr(e, "status_code", 500)),
            )


@ -5055,7 +5061,7 @@ async def get_thread(
                message=getattr(e, "message", error_msg),
                type=getattr(e, "type", "None"),
                param=getattr(e, "param", "None"),
-                code=getattr(e, "status_code", 500),
+                code=getattr(e, "code", getattr(e, "status_code", 500)),
            )


@ -5154,7 +5160,7 @@ async def add_messages(
                message=getattr(e, "message", error_msg),
                type=getattr(e, "type", "None"),
                param=getattr(e, "param", "None"),
-                code=getattr(e, "status_code", 500),
+                code=getattr(e, "code", getattr(e, "status_code", 500)),
            )


@ -5249,7 +5255,7 @@ async def get_messages(
                message=getattr(e, "message", error_msg),
                type=getattr(e, "type", "None"),
                param=getattr(e, "param", "None"),
-                code=getattr(e, "status_code", 500),
+                code=getattr(e, "code", getattr(e, "status_code", 500)),
            )


@ -5358,7 +5364,7 @@ async def run_thread(
                message=getattr(e, "message", error_msg),
                type=getattr(e, "type", "None"),
                param=getattr(e, "param", "None"),
-                code=getattr(e, "status_code", 500),
+                code=getattr(e, "code", getattr(e, "status_code", 500)),
            )


--- a/litellm/secret_managers/get_azure_ad_token_provider.py
+++ b/litellm/secret_managers/get_azure_ad_token_provider.py
@ -30,6 +30,8 @@ def get_azure_ad_token_provider() -> Callable[[], str]:
            client_secret=os.environ["AZURE_CLIENT_SECRET"],
            tenant_id=os.environ["AZURE_TENANT_ID"],
        )
+    elif cred == "ManagedIdentityCredential":
+        credential = cred_cls(client_id=os.environ["AZURE_CLIENT_ID"])
    else:
        credential = cred_cls()

--- a/model_prices_and_context_window.json
+++ b/model_prices_and_context_window.json
@ -6,7 +6,7 @@
        "input_cost_per_token": 0.0000,
        "output_cost_per_token": 0.000,
        "litellm_provider": "one of https://docs.litellm.ai/docs/providers",
-        "mode": "one of chat, embedding, completion, image_generation, audio_transcription, audio_speech",
+        "mode": "one of: chat, embedding, completion, image_generation, audio_transcription, audio_speech, image_generation, moderation, rerank",
        "supports_function_calling": true,
        "supports_parallel_function_calling": true,
        "supports_vision": true,
@ -931,7 +931,7 @@
        "input_cost_per_token": 0.000000,
        "output_cost_per_token": 0.000000,
        "litellm_provider": "openai",
-        "mode": "moderations"
+        "mode": "moderation"
    },
    "text-moderation-007": {
        "max_tokens": 32768,
@ -940,7 +940,7 @@
        "input_cost_per_token": 0.000000,
        "output_cost_per_token": 0.000000,
        "litellm_provider": "openai",
-        "mode": "moderations"
+        "mode": "moderation"
    },
    "text-moderation-latest": {
        "max_tokens": 32768,
@ -949,7 +949,7 @@
        "input_cost_per_token": 0.000000,
        "output_cost_per_token": 0.000000,
        "litellm_provider": "openai",
-        "mode": "moderations"
+        "mode": "moderation"
    },
    "256-x-256/dall-e-2": {
        "mode": "image_generation",
@ -1625,13 +1625,23 @@
        "max_tokens": 8192,
        "max_input_tokens": 128000,
        "max_output_tokens": 8192,
-        "input_cost_per_token": 0.0,
-        "input_cost_per_token_cache_hit": 0.0,
-        "output_cost_per_token": 0.0,
+        "input_cost_per_token": 0.00000135,
+        "output_cost_per_token": 0.0000054,
        "litellm_provider": "azure_ai",
        "mode": "chat",
-        "supports_prompt_caching": true,
-        "supports_tool_choice": true
+        "supports_tool_choice": true,
+        "source": "https://techcommunity.microsoft.com/blog/machinelearningblog/deepseek-r1-improved-performance-higher-limits-and-transparent-pricing/4386367"
+    },
+    "azure_ai/deepseek-v3": {
+        "max_tokens": 8192,
+        "max_input_tokens": 128000,
+        "max_output_tokens": 8192,
+        "input_cost_per_token": 0.00000114,
+        "output_cost_per_token": 0.00000456,
+        "litellm_provider": "azure_ai",
+        "mode": "chat",
+        "supports_tool_choice": true,
+        "source": "https://techcommunity.microsoft.com/blog/machinelearningblog/announcing-deepseek-v3-on-azure-ai-foundry-and-github/4390438"
    },
    "azure_ai/jamba-instruct": {
        "max_tokens": 4096,
@ -1643,6 +1653,17 @@
        "mode": "chat",
        "supports_tool_choice": true
    },
+    "azure_ai/mistral-nemo": {
+        "max_tokens": 4096,
+        "max_input_tokens": 131072,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 0.00000015,
+        "output_cost_per_token": 0.00000015,
+        "litellm_provider": "azure_ai",
+        "mode": "chat",
+        "supports_function_calling": true,
+        "source": "https://azuremarketplace.microsoft.com/en/marketplace/apps/000-000.mistral-nemo-12b-2407?tab=PlansAndPrice"
+    },
    "azure_ai/mistral-large": {
        "max_tokens": 8191,
        "max_input_tokens": 32000,
@ -1770,10 +1791,34 @@
        "source":"https://azuremarketplace.microsoft.com/en-us/marketplace/apps/metagenai.meta-llama-3-1-405b-instruct-offer?tab=PlansAndPrice",
        "supports_tool_choice": true
    },
-    "azure_ai/Phi-4": {
+    "azure_ai/Phi-4-mini-instruct": {
        "max_tokens": 4096,
-        "max_input_tokens": 128000,
+        "max_input_tokens": 131072,
        "max_output_tokens": 4096,
+        "input_cost_per_token": 0,
+        "output_cost_per_token": 0,
+        "litellm_provider": "azure_ai",
+        "mode": "chat",
+        "supports_function_calling": true,
+        "source": "https://learn.microsoft.com/en-us/azure/ai-foundry/concepts/models-featured#microsoft"
+    },
+    "azure_ai/Phi-4-multimodal-instruct": {
+        "max_tokens": 4096,
+        "max_input_tokens": 131072,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 0,
+        "output_cost_per_token": 0,
+        "litellm_provider": "azure_ai",
+        "mode": "chat",
+        "supports_audio_input": true,
+        "supports_function_calling": true,
+        "supports_vision": true,
+        "source": "https://learn.microsoft.com/en-us/azure/ai-foundry/concepts/models-featured#microsoft"
+    },
+    "azure_ai/Phi-4": {
+        "max_tokens": 16384,
+        "max_input_tokens": 16384,
+        "max_output_tokens": 16384,
        "input_cost_per_token": 0.000000125,
        "output_cost_per_token": 0.0000005,
        "litellm_provider": "azure_ai",
@ -1994,8 +2039,8 @@
        "max_tokens": 8191,
        "max_input_tokens": 32000,
        "max_output_tokens": 8191,
-        "input_cost_per_token": 0.000001,
-        "output_cost_per_token": 0.000003,
+        "input_cost_per_token": 0.0000001,
+        "output_cost_per_token": 0.0000003,
        "litellm_provider": "mistral",
        "supports_function_calling": true,
        "mode": "chat",
@ -2006,8 +2051,8 @@
        "max_tokens": 8191,
        "max_input_tokens": 32000,
        "max_output_tokens": 8191,
-        "input_cost_per_token": 0.000001,
-        "output_cost_per_token": 0.000003,
+        "input_cost_per_token": 0.0000001,
+        "output_cost_per_token": 0.0000003,
        "litellm_provider": "mistral",
        "supports_function_calling": true,
        "mode": "chat",
@ -3892,31 +3937,6 @@
        "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing",
        "supports_tool_choice": true
    },
-    "gemini/gemini-2.0-flash": {
-        "max_tokens": 8192,
-        "max_input_tokens": 1048576,
-        "max_output_tokens": 8192,
-        "max_images_per_prompt": 3000,
-        "max_videos_per_prompt": 10,
-        "max_video_length": 1,
-        "max_audio_length_hours": 8.4,
-        "max_audio_per_prompt": 1,
-        "max_pdf_size_mb": 30,
-        "input_cost_per_audio_token": 0.0000007,
-        "input_cost_per_token": 0.0000001,
-        "output_cost_per_token": 0.0000004,
-        "litellm_provider": "gemini",
-        "mode": "chat",
-        "rpm": 10000,
-        "tpm": 10000000,
-        "supports_system_messages": true,
-        "supports_function_calling": true,
-        "supports_vision": true,
-        "supports_response_schema": true,
-        "supports_audio_output": true,
-        "supports_tool_choice": true,
-        "source": "https://ai.google.dev/pricing#2_0flash"
-    },
    "gemini-2.0-flash-001": {
        "max_tokens": 8192,
        "max_input_tokens": 1048576,
@ -4008,6 +4028,69 @@
        "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash",
        "supports_tool_choice": true
    },
+    "gemini/gemini-2.0-pro-exp-02-05": {
+        "max_tokens": 8192,
+        "max_input_tokens": 2097152,
+        "max_output_tokens": 8192,
+        "max_images_per_prompt": 3000,
+        "max_videos_per_prompt": 10,
+        "max_video_length": 1,
+        "max_audio_length_hours": 8.4,
+        "max_audio_per_prompt": 1,
+        "max_pdf_size_mb": 30,
+        "input_cost_per_image": 0,
+        "input_cost_per_video_per_second": 0,
+        "input_cost_per_audio_per_second": 0,
+        "input_cost_per_token": 0,
+        "input_cost_per_character": 0, 
+        "input_cost_per_token_above_128k_tokens": 0, 
+        "input_cost_per_character_above_128k_tokens": 0, 
+        "input_cost_per_image_above_128k_tokens": 0,
+        "input_cost_per_video_per_second_above_128k_tokens": 0,
+        "input_cost_per_audio_per_second_above_128k_tokens": 0,
+        "output_cost_per_token": 0,
+        "output_cost_per_character": 0,
+        "output_cost_per_token_above_128k_tokens": 0,
+        "output_cost_per_character_above_128k_tokens": 0,
+        "litellm_provider": "gemini",
+        "mode": "chat",
+        "rpm": 2,
+        "tpm": 1000000,
+        "supports_system_messages": true,
+        "supports_function_calling": true,
+        "supports_vision": true,
+        "supports_audio_input": true,
+        "supports_video_input": true,
+        "supports_pdf_input": true,
+        "supports_response_schema": true,
+        "supports_tool_choice": true,
+        "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing"
+    },
+    "gemini/gemini-2.0-flash": {
+        "max_tokens": 8192,
+        "max_input_tokens": 1048576,
+        "max_output_tokens": 8192,
+        "max_images_per_prompt": 3000,
+        "max_videos_per_prompt": 10,
+        "max_video_length": 1,
+        "max_audio_length_hours": 8.4,
+        "max_audio_per_prompt": 1,
+        "max_pdf_size_mb": 30,
+        "input_cost_per_audio_token": 0.0000007,
+        "input_cost_per_token": 0.0000001,
+        "output_cost_per_token": 0.0000004,
+        "litellm_provider": "gemini",
+        "mode": "chat",
+        "rpm": 10000,
+        "tpm": 10000000,
+        "supports_system_messages": true,
+        "supports_function_calling": true,
+        "supports_vision": true,
+        "supports_response_schema": true,
+        "supports_audio_output": true,
+        "supports_tool_choice": true,
+        "source": "https://ai.google.dev/pricing#2_0flash"
+    },
    "gemini/gemini-2.0-flash-001": {
        "max_tokens": 8192,
        "max_input_tokens": 1048576,
@ -4511,6 +4594,12 @@
        "mode": "image_generation",
        "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing"
    },
+    "vertex_ai/imagen-3.0-generate-002": {
+        "output_cost_per_image": 0.04,
+        "litellm_provider": "vertex_ai-image-models",
+        "mode": "image_generation",
+        "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing"
+    },
    "vertex_ai/imagen-3.0-generate-001": {
        "output_cost_per_image": 0.04,
        "litellm_provider": "vertex_ai-image-models",
@ -6077,6 +6166,26 @@
        "mode": "chat",
        "supports_tool_choice": true
    },
+    "jamba-large-1.6": {
+        "max_tokens": 256000,
+        "max_input_tokens": 256000,
+        "max_output_tokens": 256000,
+        "input_cost_per_token": 0.000002,
+        "output_cost_per_token": 0.000008,
+        "litellm_provider": "ai21",
+        "mode": "chat",
+        "supports_tool_choice": true
+    },
+    "jamba-mini-1.6": {
+        "max_tokens": 256000,
+        "max_input_tokens": 256000,
+        "max_output_tokens": 256000,
+        "input_cost_per_token": 0.0000002,
+        "output_cost_per_token": 0.0000004,
+        "litellm_provider": "ai21",
+        "mode": "chat",
+        "supports_tool_choice": true
+    },
    "j2-mid": {
        "max_tokens": 8192,
        "max_input_tokens": 8192,
@ -7463,6 +7572,18 @@
        "litellm_provider": "bedrock",
        "mode": "embedding"
    },
+    "us.deepseek.r1-v1:0": {
+        "max_tokens": 4096, 
+        "max_input_tokens": 128000,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 0.00000135,
+        "output_cost_per_token": 0.0000054,
+        "litellm_provider": "bedrock_converse",
+        "mode": "chat",
+        "supports_function_calling": false, 
+        "supports_tool_choice": false
+
+    },
    "meta.llama3-3-70b-instruct-v1:0": {
        "max_tokens": 4096, 
        "max_input_tokens": 128000,
--- a/pyproject.toml
+++ b/pyproject.toml
@ -1,6 +1,6 @@
 [tool.poetry]
 name = "litellm"
-version = "1.63.5"
+version = "1.63.6"
 description = "Library to easily interface with LLM API providers"
 authors = ["BerriAI"]
 license = "MIT"
@ -96,7 +96,7 @@ requires = ["poetry-core", "wheel"]
 build-backend = "poetry.core.masonry.api"

 [tool.commitizen]
-version = "1.63.5"
+version = "1.63.6"
 version_files = [
    "pyproject.toml:^version"
 ]
--- a/requirements.txt
+++ b/requirements.txt
@ -44,7 +44,7 @@ tiktoken==0.8.0 # for calculating usage
 importlib-metadata==6.8.0 # for random utils
 tokenizers==0.20.2 # for calculating usage
 click==8.1.7 # for proxy cli 
-jinja2==3.1.4 # for prompt templates
+jinja2==3.1.6 # for prompt templates
 aiohttp==3.10.2 # for network calls
 aioboto3==12.3.0 # for async sagemaker calls
 tenacity==8.2.3  # for retrying requests, when litellm.num_retries set
--- a/tests/litellm/integrations/test_athina.py
+++ b/tests/litellm/integrations/test_athina.py
@ -0,0 +1,207 @@
+import unittest
+from unittest.mock import patch, MagicMock, ANY
+import json
+import datetime
+import sys 
+import os
+
+sys.path.insert(
+    0, os.path.abspath("../..")
+)  # Adds the parent directory to the system-path
+
+from litellm.integrations.athina import AthinaLogger  
+
+class TestAthinaLogger(unittest.TestCase):
+    
+    def setUp(self):
+        # Set up environment variables for testing
+        self.env_patcher = patch.dict('os.environ', {
+            'ATHINA_API_KEY': 'test-api-key',
+            'ATHINA_BASE_URL': 'https://test.athina.ai'
+        })
+        self.env_patcher.start()
+        self.logger = AthinaLogger()
+        
+        # Setup common test variables
+        self.start_time = datetime.datetime(2023, 1, 1, 12, 0, 0)
+        self.end_time = datetime.datetime(2023, 1, 1, 12, 0, 1)
+        self.print_verbose = MagicMock()
+        
+    def tearDown(self):
+        self.env_patcher.stop()
+    
+    def test_init(self):
+        """Test the initialization of AthinaLogger"""
+        self.assertEqual(self.logger.athina_api_key, 'test-api-key')
+        self.assertEqual(self.logger.athina_logging_url, 'https://test.athina.ai/api/v1/log/inference')
+        self.assertEqual(self.logger.headers, {
+            'athina-api-key': 'test-api-key',
+            'Content-Type': 'application/json'
+        })
+        
+    @patch('litellm.module_level_client.post')
+    def test_log_event_success(self, mock_post):
+        """Test successful logging of an event"""
+        # Setup mock response
+        mock_response = MagicMock()
+        mock_response.status_code = 200
+        mock_response.text = "Success"
+        mock_post.return_value = mock_response
+        
+        # Create test data
+        kwargs = {
+            'model': 'gpt-4',
+            'messages': [{'role': 'user', 'content': 'Hello'}],
+            'stream': False,
+            'litellm_params': {
+                'metadata': {
+                    'environment': 'test-environment',
+                    'prompt_slug': 'test-prompt',
+                    'customer_id': 'test-customer',
+                    'customer_user_id': 'test-user',
+                    'session_id': 'test-session',
+                    'external_reference_id': 'test-ext-ref',
+                    'context': 'test-context',
+                    'expected_response': 'test-expected',
+                    'user_query': 'test-query',
+                    'tags': ['test-tag'],
+                    'user_feedback': 'test-feedback',
+                    'model_options': {'test-opt': 'test-val'},
+                    'custom_attributes': {'test-attr': 'test-val'}
+                }
+            }
+        }
+        
+        response_obj = MagicMock()
+        response_obj.model_dump.return_value = {
+            'id': 'resp-123',
+            'choices': [{'message': {'content': 'Hi there'}}],
+            'usage': {
+                'prompt_tokens': 10,
+                'completion_tokens': 5,
+                'total_tokens': 15
+            }
+        }
+        
+        # Call the method
+        self.logger.log_event(kwargs, response_obj, self.start_time, self.end_time, self.print_verbose)
+        
+        # Verify the results
+        mock_post.assert_called_once()
+        call_args = mock_post.call_args
+        self.assertEqual(call_args[0][0], 'https://test.athina.ai/api/v1/log/inference')
+        self.assertEqual(call_args[1]['headers'], self.logger.headers)
+        
+        # Parse and verify the sent data
+        sent_data = json.loads(call_args[1]['data'])
+        self.assertEqual(sent_data['language_model_id'], 'gpt-4')
+        self.assertEqual(sent_data['prompt'], kwargs['messages'])
+        self.assertEqual(sent_data['prompt_tokens'], 10)
+        self.assertEqual(sent_data['completion_tokens'], 5)
+        self.assertEqual(sent_data['total_tokens'], 15)
+        self.assertEqual(sent_data['response_time'], 1000)  # 1 second = 1000ms
+        self.assertEqual(sent_data['customer_id'], 'test-customer')
+        self.assertEqual(sent_data['session_id'], 'test-session')
+        self.assertEqual(sent_data['environment'], 'test-environment')
+        self.assertEqual(sent_data['prompt_slug'], 'test-prompt')
+        self.assertEqual(sent_data['external_reference_id'], 'test-ext-ref')
+        self.assertEqual(sent_data['context'], 'test-context')
+        self.assertEqual(sent_data['expected_response'], 'test-expected')
+        self.assertEqual(sent_data['user_query'], 'test-query')
+        self.assertEqual(sent_data['tags'], ['test-tag'])
+        self.assertEqual(sent_data['user_feedback'], 'test-feedback')
+        self.assertEqual(sent_data['model_options'], {'test-opt': 'test-val'})
+        self.assertEqual(sent_data['custom_attributes'], {'test-attr': 'test-val'})
+        # Verify the print_verbose was called
+        self.print_verbose.assert_called_once_with("Athina Logger Succeeded - Success")
+    
+    @patch('litellm.module_level_client.post')
+    def test_log_event_error_response(self, mock_post):
+        """Test handling of error response from the API"""
+        # Setup mock error response
+        mock_response = MagicMock()
+        mock_response.status_code = 400
+        mock_response.text = "Bad Request"
+        mock_post.return_value = mock_response
+        
+        # Create test data
+        kwargs = {
+            'model': 'gpt-4',
+            'messages': [{'role': 'user', 'content': 'Hello'}],
+            'stream': False
+        }
+        
+        response_obj = MagicMock()
+        response_obj.model_dump.return_value = {
+            'id': 'resp-123',
+            'choices': [{'message': {'content': 'Hi there'}}],
+            'usage': {
+                'prompt_tokens': 10,
+                'completion_tokens': 5,
+                'total_tokens': 15
+            }
+        }
+        
+        # Call the method
+        self.logger.log_event(kwargs, response_obj, self.start_time, self.end_time, self.print_verbose)
+        
+        # Verify print_verbose was called with error message
+        self.print_verbose.assert_called_once_with("Athina Logger Error - Bad Request, 400")
+    
+    @patch('litellm.module_level_client.post')
+    def test_log_event_exception(self, mock_post):
+        """Test handling of exceptions during logging"""
+        # Setup mock to raise exception
+        mock_post.side_effect = Exception("Test exception")
+        
+        # Create test data
+        kwargs = {
+            'model': 'gpt-4',
+            'messages': [{'role': 'user', 'content': 'Hello'}],
+            'stream': False
+        }
+        
+        response_obj = MagicMock()
+        response_obj.model_dump.return_value = {}
+        
+        # Call the method
+        self.logger.log_event(kwargs, response_obj, self.start_time, self.end_time, self.print_verbose)
+        
+        # Verify print_verbose was called with exception info
+        self.print_verbose.assert_called_once()
+        self.assertIn("Athina Logger Error - Test exception", self.print_verbose.call_args[0][0])
+    
+    @patch('litellm.module_level_client.post')
+    def test_log_event_with_tools(self, mock_post):
+        """Test logging with tools/functions data"""
+        # Setup mock response
+        mock_response = MagicMock()
+        mock_response.status_code = 200
+        mock_post.return_value = mock_response
+        
+        # Create test data with tools
+        kwargs = {
+            'model': 'gpt-4',
+            'messages': [{'role': 'user', 'content': "What's the weather?"}],
+            'stream': False,
+            'optional_params': {
+                'tools': [{'type': 'function', 'function': {'name': 'get_weather'}}]
+            }
+        }
+        
+        response_obj = MagicMock()
+        response_obj.model_dump.return_value = {
+            'id': 'resp-123',
+            'usage': {'prompt_tokens': 10, 'completion_tokens': 5, 'total_tokens': 15}
+        }
+        
+        # Call the method
+        self.logger.log_event(kwargs, response_obj, self.start_time, self.end_time, self.print_verbose)
+        
+        # Verify the results
+        sent_data = json.loads(mock_post.call_args[1]['data'])
+        self.assertEqual(sent_data['tools'], [{'type': 'function', 'function': {'name': 'get_weather'}}])
+    
+
+if __name__ == '__main__':
+    unittest.main()
--- a/tests/litellm/test_model_prices_and_context_window_schema.py
+++ b/tests/litellm/test_model_prices_and_context_window_schema.py
@ -0,0 +1,108 @@
+import json
+from jsonschema import validate
+
+def test_model_prices_and_context_window_json_is_valid():
+    '''
+    Validates the `model_prices_and_context_window.json` file.
+
+    If this test fails after you update the json, you need to update the schema or correct the change you made.
+    '''
+
+    INTENDED_SCHEMA = {
+        "type": "object",
+        "additionalProperties": {
+            "type": "object",
+            "properties": {
+                "cache_creation_input_audio_token_cost": {"type": "number"},
+                "cache_creation_input_token_cost": {"type": "number"},
+                "cache_read_input_token_cost": {"type": "number"},
+                "deprecation_date": {"type": "string"},
+                "input_cost_per_audio_per_second": {"type": "number"},
+                "input_cost_per_audio_per_second_above_128k_tokens": {"type": "number"},
+                "input_cost_per_audio_token": {"type": "number"},
+                "input_cost_per_character": {"type": "number"},
+                "input_cost_per_character_above_128k_tokens": {"type": "number"},
+                "input_cost_per_image": {"type": "number"},
+                "input_cost_per_image_above_128k_tokens": {"type": "number"},
+                "input_cost_per_pixel": {"type": "number"},
+                "input_cost_per_query": {"type": "number"},
+                "input_cost_per_request": {"type": "number"},
+                "input_cost_per_second": {"type": "number"},
+                "input_cost_per_token": {"type": "number"},
+                "input_cost_per_token_above_128k_tokens": {"type": "number"},
+                "input_cost_per_token_batch_requests": {"type": "number"},
+                "input_cost_per_token_batches": {"type": "number"},
+                "input_cost_per_token_cache_hit": {"type": "number"},
+                "input_cost_per_video_per_second": {"type": "number"},
+                "input_cost_per_video_per_second_above_128k_tokens": {"type": "number"},
+                "input_dbu_cost_per_token": {"type": "number"},
+                "litellm_provider": {"type": "string"},
+                "max_audio_length_hours": {"type": "number"},
+                "max_audio_per_prompt": {"type": "number"},
+                "max_document_chunks_per_query": {"type": "number"},
+                "max_images_per_prompt": {"type": "number"},
+                "max_input_tokens": {"type": "number"},
+                "max_output_tokens": {"type": "number"},
+                "max_pdf_size_mb": {"type": "number"},
+                "max_query_tokens": {"type": "number"},
+                "max_tokens": {"type": "number"},
+                "max_tokens_per_document_chunk": {"type": "number"},
+                "max_video_length": {"type": "number"},
+                "max_videos_per_prompt": {"type": "number"},
+                "metadata": {"type": "object"},
+                "mode": {
+                    "type": "string",
+                    "enum": [
+                        "audio_speech",
+                        "audio_transcription",
+                        "chat",
+                        "completion",
+                        "embedding",
+                        "image_generation",
+                        "moderation",
+                        "rerank"
+                    ],
+                },
+                "output_cost_per_audio_token": {"type": "number"},
+                "output_cost_per_character": {"type": "number"},
+                "output_cost_per_character_above_128k_tokens": {"type": "number"},
+                "output_cost_per_image": {"type": "number"},
+                "output_cost_per_pixel": {"type": "number"},
+                "output_cost_per_second": {"type": "number"},
+                "output_cost_per_token": {"type": "number"},
+                "output_cost_per_token_above_128k_tokens": {"type": "number"},
+                "output_cost_per_token_batches": {"type": "number"},
+                "output_db_cost_per_token": {"type": "number"},
+                "output_dbu_cost_per_token": {"type": "number"},
+                "output_vector_size": {"type": "number"},
+                "rpd": {"type": "number"},
+                "rpm": {"type": "number"},
+                "source": {"type": "string"},
+                "supports_assistant_prefill": {"type": "boolean"},
+                "supports_audio_input": {"type": "boolean"},
+                "supports_audio_output": {"type": "boolean"},
+                "supports_embedding_image_input": {"type": "boolean"},
+                "supports_function_calling": {"type": "boolean"},
+                "supports_image_input": {"type": "boolean"},
+                "supports_parallel_function_calling": {"type": "boolean"},
+                "supports_pdf_input": {"type": "boolean"},
+                "supports_prompt_caching": {"type": "boolean"},
+                "supports_response_schema": {"type": "boolean"},
+                "supports_system_messages": {"type": "boolean"},
+                "supports_tool_choice": {"type": "boolean"},
+                "supports_video_input": {"type": "boolean"},
+                "supports_vision": {"type": "boolean"},
+                "tool_use_system_prompt_tokens": {"type": "number"},
+                "tpm": {"type": "number"},
+            },
+            "additionalProperties": False,
+        },
+    }
+
+    with open("./model_prices_and_context_window.json", "r") as model_prices_file:
+        actual_json = json.load(model_prices_file)
+    assert isinstance(actual_json, dict)
+    actual_json.pop('sample_spec', None) # remove the sample, whose schema is inconsistent with the real data
+
+    validate(actual_json, INTENDED_SCHEMA)
+
--- a/tests/llm_translation/test_bedrock_completion.py
+++ b/tests/llm_translation/test_bedrock_completion.py
@ -956,7 +956,7 @@ def test_bedrock_ptu():


@pytest.mark.asyncio
-async def test_bedrock_extra_headers():
+async def test_bedrock_custom_api_base():
    """
    Check if a url with 'modelId' passed in, is created correctly

@ -994,6 +994,44 @@ async def test_bedrock_extra_headers():
        mock_client_post.assert_called_once()


+@pytest.mark.parametrize(
+    "model",
+    [
+        "anthropic.claude-3-sonnet-20240229-v1:0",
+        "bedrock/invoke/anthropic.claude-3-sonnet-20240229-v1:0",
+    ],
+)
+@pytest.mark.asyncio
+async def test_bedrock_extra_headers(model):
+    """
+    Relevant Issue: https://github.com/BerriAI/litellm/issues/9106
+    """
+    client = AsyncHTTPHandler()
+
+    with patch.object(client, "post", new=AsyncMock()) as mock_client_post:
+        litellm.set_verbose = True
+        from openai.types.chat import ChatCompletion
+
+        try:
+            response = await litellm.acompletion(
+                model=model,
+                messages=[{"role": "user", "content": "What's AWS?"}],
+                client=client,
+                extra_headers={"test": "hello world", "Authorization": "my-test-key"},
+            )
+        except Exception as e:
+            print(f"error: {e}")
+
+        print(f"mock_client_post.call_args.kwargs: {mock_client_post.call_args.kwargs}")
+        assert "test" in mock_client_post.call_args.kwargs["headers"]
+        assert mock_client_post.call_args.kwargs["headers"]["test"] == "hello world"
+        assert (
+            mock_client_post.call_args.kwargs["headers"]["Authorization"]
+            == "my-test-key"
+        )
+        mock_client_post.assert_called_once()
+
+
@pytest.mark.asyncio
 async def test_bedrock_custom_prompt_template():
    """
--- a/tests/llm_translation/test_triton.py
+++ b/tests/llm_translation/test_triton.py
@ -49,16 +49,26 @@ def test_split_embedding_by_shape_fails_with_shape_value_error():
        )


-def test_completion_triton_generate_api():
+@pytest.mark.parametrize("stream", [True, False])
+def test_completion_triton_generate_api(stream):
    try:
        mock_response = MagicMock()
+        if stream:
+            def mock_iter_lines():
+                mock_output = ''.join([
+                    'data: {"model_name":"ensemble","model_version":"1","sequence_end":false,"sequence_id":0,"sequence_start":false,"text_output":"' + t + '"}\n\n'
+                    for t in ["I", " am", " an", " AI", " assistant"]
+                ])
+                for out in mock_output.split('\n'):
+                    yield out
+            mock_response.iter_lines = mock_iter_lines
+        else:
+            def return_val():
+                return {
+                    "text_output": "I am an AI assistant",
+                }

-        def return_val():
-            return {
-                "text_output": "I am an AI assistant",
-            }
-
-        mock_response.json = return_val
+            mock_response.json = return_val
        mock_response.status_code = 200

        with patch(
@ -71,6 +81,7 @@ def test_completion_triton_generate_api():
                max_tokens=10,
                timeout=5,
                api_base="http://localhost:8000/generate",
+                stream=stream,
            )

            # Verify the call was made
@ -81,7 +92,10 @@ def test_completion_triton_generate_api():
            call_kwargs = mock_post.call_args.kwargs  # Access kwargs directly

            # Verify URL
-            assert call_kwargs["url"] == "http://localhost:8000/generate"
+            if stream:
+                assert call_kwargs["url"] == "http://localhost:8000/generate_stream"
+            else:
+                assert call_kwargs["url"] == "http://localhost:8000/generate"

            # Parse the request data from the JSON string
            request_data = json.loads(call_kwargs["data"])
@ -91,7 +105,15 @@ def test_completion_triton_generate_api():
            assert request_data["parameters"]["max_tokens"] == 10

            # Verify response
-            assert response.choices[0].message.content == "I am an AI assistant"
+            if stream:
+                tokens = ["I", " am", " an", " AI", " assistant", None]
+                idx = 0
+                for chunk in response:
+                    assert chunk.choices[0].delta.content == tokens[idx]
+                    idx += 1
+                assert idx == len(tokens)
+            else:
+                assert response.choices[0].message.content == "I am an AI assistant"

    except Exception as e:
        print("exception", e)
--- a/tests/local_testing/test_exceptions.py
+++ b/tests/local_testing/test_exceptions.py
@ -1205,3 +1205,35 @@ def test_context_window_exceeded_error_from_litellm_proxy():
    }
    with pytest.raises(litellm.ContextWindowExceededError):
        extract_and_raise_litellm_exception(**args)
+
+
+@pytest.mark.parametrize("sync_mode", [True, False])
+@pytest.mark.parametrize("stream_mode", [True, False])
+@pytest.mark.parametrize("model", ["azure/gpt-4o"])  # "gpt-4o-mini",
+@pytest.mark.asyncio
+async def test_exception_bubbling_up(sync_mode, stream_mode, model):
+    """
+    make sure code, param, and type are bubbled up
+    """
+    import litellm
+
+    litellm.set_verbose = True
+    with pytest.raises(Exception) as exc_info:
+        if sync_mode:
+            litellm.completion(
+                model=model,
+                messages=[{"role": "usera", "content": "hi"}],
+                stream=stream_mode,
+                sync_stream=sync_mode,
+            )
+        else:
+            await litellm.acompletion(
+                model=model,
+                messages=[{"role": "usera", "content": "hi"}],
+                stream=stream_mode,
+                sync_stream=sync_mode,
+            )
+
+    assert exc_info.value.code == "invalid_value"
+    assert exc_info.value.param is not None
+    assert exc_info.value.type == "invalid_request_error"
--- a/tests/otel_tests/test_e2e_model_access.py
+++ b/tests/otel_tests/test_e2e_model_access.py
@ -9,7 +9,7 @@ from typing import Any, Optional, List, Literal
 async def generate_key(
    session, models: Optional[List[str]] = None, team_id: Optional[str] = None
 ):
-    """Helper function to generate a key with specific model access"""
+    """Helper function to generate a key with specific model access controls"""
    url = "http://0.0.0.0:4000/key/generate"
    headers = {"Authorization": "Bearer sk-1234", "Content-Type": "application/json"}
    data = {}
@ -94,7 +94,7 @@ async def test_model_access_patterns(key_models, test_model, expect_success):
            assert _error_body["type"] == "key_model_access_denied"
            assert _error_body["param"] == "model"
            assert _error_body["code"] == "401"
-            assert "API Key not allowed to access model" in _error_body["message"]
+            assert "key not allowed to access model" in _error_body["message"]


@pytest.mark.asyncio
@ -159,12 +159,6 @@ async def test_model_access_update():
    "team_models, test_model, expect_success",
    [
        (["openai/*"], "anthropic/claude-2", False),  # Non-matching model
-        (["gpt-4"], "gpt-4", True),  # Exact model match
-        (["bedrock/*"], "bedrock/anthropic.claude-3", True),  # Bedrock wildcard
-        (["bedrock/anthropic.*"], "bedrock/anthropic.claude-3", True),  # Pattern match
-        (["bedrock/anthropic.*"], "bedrock/amazon.titan", False),  # Pattern non-match
-        (None, "gpt-4", True),  # No model restrictions
-        ([], "gpt-4", True),  # Empty model list
    ],
 )
@pytest.mark.asyncio
@ -285,6 +279,6 @@ def _validate_model_access_exception(
    assert _error_body["param"] == "model"
    assert _error_body["code"] == "401"
    if expected_type == "key_model_access_denied":
-        assert "API Key not allowed to access model" in _error_body["message"]
+        assert "key not allowed to access model" in _error_body["message"]
    elif expected_type == "team_model_access_denied":
-        assert "Team not allowed to access model" in _error_body["message"]
+        assert "eam not allowed to access model" in _error_body["message"]
--- a/tests/proxy_unit_tests/test_auth_checks.py
+++ b/tests/proxy_unit_tests/test_auth_checks.py
@ -27,7 +27,7 @@ from litellm.proxy._types import (
 )
 from litellm.proxy.utils import PrismaClient
 from litellm.proxy.auth.auth_checks import (
-    _team_model_access_check,
+    can_team_access_model,
    _virtual_key_soft_budget_check,
 )
 from litellm.proxy.utils import ProxyLogging
@ -427,9 +427,9 @@ async def test_virtual_key_max_budget_check(
    ],
 )
@pytest.mark.asyncio
-async def test_team_model_access_check(model, team_models, expect_to_work):
+async def test_can_team_access_model(model, team_models, expect_to_work):
    """
-    Test cases for _team_model_access_check:
+    Test cases for can_team_access_model:
    1. Exact model match
    2. all-proxy-models access
    3. Wildcard (*) access
@ -438,16 +438,16 @@ async def test_team_model_access_check(model, team_models, expect_to_work):
    6. Empty model list
    7. None model list
    """
-    team_object = LiteLLM_TeamTable(
-        team_id="test-team",
-        models=team_models,
-    )
-
    try:
-        _team_model_access_check(
+        team_object = LiteLLM_TeamTable(
+            team_id="test-team",
+            models=team_models,
+        )
+        result = await can_team_access_model(
            model=model,
            team_object=team_object,
            llm_router=None,
+            team_model_aliases=None,
        )
        if not expect_to_work:
            pytest.fail(
--- a/tests/proxy_unit_tests/test_jwt.py
+++ b/tests/proxy_unit_tests/test_jwt.py
@ -64,7 +64,7 @@ def test_load_config_with_custom_role_names():


@pytest.mark.asyncio
-async def test_token_single_public_key():
+async def test_token_single_public_key(monkeypatch):
    import jwt

    jwt_handler = JWTHandler()
@ -80,10 +80,15 @@ async def test_token_single_public_key():
        ]
    }

+    monkeypatch.setenv("JWT_PUBLIC_KEY_URL", "https://example.com/public-key")
+
    # set cache
    cache = DualCache()

-    await cache.async_set_cache(key="litellm_jwt_auth_keys", value=backend_keys["keys"])
+    await cache.async_set_cache(
+        key="litellm_jwt_auth_keys_https://example.com/public-key",
+        value=backend_keys["keys"],
+    )

    jwt_handler.user_api_key_cache = cache

@ -99,7 +104,7 @@ async def test_token_single_public_key():

@pytest.mark.parametrize("audience", [None, "litellm-proxy"])
@pytest.mark.asyncio
-async def test_valid_invalid_token(audience):
+async def test_valid_invalid_token(audience, monkeypatch):
    """
    Tests
    - valid token
@ -116,6 +121,8 @@ async def test_valid_invalid_token(audience):
    if audience:
        os.environ["JWT_AUDIENCE"] = audience

+    monkeypatch.setenv("JWT_PUBLIC_KEY_URL", "https://example.com/public-key")
+
    # Generate a private / public key pair using RSA algorithm
    key = rsa.generate_private_key(
        public_exponent=65537, key_size=2048, backend=default_backend()
@ -145,7 +152,9 @@ async def test_valid_invalid_token(audience):
    # set cache
    cache = DualCache()

-    await cache.async_set_cache(key="litellm_jwt_auth_keys", value=[public_jwk])
+    await cache.async_set_cache(
+        key="litellm_jwt_auth_keys_https://example.com/public-key", value=[public_jwk]
+    )

    jwt_handler = JWTHandler()

@ -294,7 +303,7 @@ def team_token_tuple():

@pytest.mark.parametrize("audience", [None, "litellm-proxy"])
@pytest.mark.asyncio
-async def test_team_token_output(prisma_client, audience):
+async def test_team_token_output(prisma_client, audience, monkeypatch):
    import json
    import uuid

@ -316,6 +325,8 @@ async def test_team_token_output(prisma_client, audience):
    if audience:
        os.environ["JWT_AUDIENCE"] = audience

+    monkeypatch.setenv("JWT_PUBLIC_KEY_URL", "https://example.com/public-key")
+
    # Generate a private / public key pair using RSA algorithm
    key = rsa.generate_private_key(
        public_exponent=65537, key_size=2048, backend=default_backend()
@ -345,7 +356,9 @@ async def test_team_token_output(prisma_client, audience):
    # set cache
    cache = DualCache()

-    await cache.async_set_cache(key="litellm_jwt_auth_keys", value=[public_jwk])
+    await cache.async_set_cache(
+        key="litellm_jwt_auth_keys_https://example.com/public-key", value=[public_jwk]
+    )

    jwt_handler = JWTHandler()

@ -463,7 +476,7 @@ async def test_team_token_output(prisma_client, audience):
@pytest.mark.parametrize("user_id_upsert", [True, False])
@pytest.mark.asyncio
 async def aaaatest_user_token_output(
-    prisma_client, audience, team_id_set, default_team_id, user_id_upsert
+    prisma_client, audience, team_id_set, default_team_id, user_id_upsert, monkeypatch
 ):
    import uuid

@ -528,10 +541,14 @@ async def aaaatest_user_token_output(

    assert isinstance(public_jwk, dict)

+    monkeypatch.setenv("JWT_PUBLIC_KEY_URL", "https://example.com/public-key")
+
    # set cache
    cache = DualCache()

-    await cache.async_set_cache(key="litellm_jwt_auth_keys", value=[public_jwk])
+    await cache.async_set_cache(
+        key="litellm_jwt_auth_keys_https://example.com/public-key", value=[public_jwk]
+    )

    jwt_handler = JWTHandler()

@ -699,7 +716,9 @@ async def aaaatest_user_token_output(
@pytest.mark.parametrize("admin_allowed_routes", [None, ["ui_routes"]])
@pytest.mark.parametrize("audience", [None, "litellm-proxy"])
@pytest.mark.asyncio
-async def test_allowed_routes_admin(prisma_client, audience, admin_allowed_routes):
+async def test_allowed_routes_admin(
+    prisma_client, audience, admin_allowed_routes, monkeypatch
+):
    """
    Add a check to make sure jwt proxy admin scope can access all allowed admin routes

@ -723,6 +742,8 @@ async def test_allowed_routes_admin(prisma_client, audience, admin_allowed_route
    setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
    await litellm.proxy.proxy_server.prisma_client.connect()

+    monkeypatch.setenv("JWT_PUBLIC_KEY_URL", "https://example.com/public-key")
+
    os.environ.pop("JWT_AUDIENCE", None)
    if audience:
        os.environ["JWT_AUDIENCE"] = audience
@ -756,7 +777,9 @@ async def test_allowed_routes_admin(prisma_client, audience, admin_allowed_route
    # set cache
    cache = DualCache()

-    await cache.async_set_cache(key="litellm_jwt_auth_keys", value=[public_jwk])
+    await cache.async_set_cache(
+        key="litellm_jwt_auth_keys_https://example.com/public-key", value=[public_jwk]
+    )

    jwt_handler = JWTHandler()

@ -910,7 +933,9 @@ def mock_user_object(*args, **kwargs):
    "user_email, should_work", [("ishaan@berri.ai", True), ("krrish@tassle.xyz", False)]
 )
@pytest.mark.asyncio
-async def test_allow_access_by_email(public_jwt_key, user_email, should_work):
+async def test_allow_access_by_email(
+    public_jwt_key, user_email, should_work, monkeypatch
+):
    """
    Allow anyone with an `@xyz.com` email make a request to the proxy.

@ -925,10 +950,14 @@ async def test_allow_access_by_email(public_jwt_key, user_email, should_work):
    public_jwk = public_jwt_key["public_jwk"]
    private_key = public_jwt_key["private_key"]

+    monkeypatch.setenv("JWT_PUBLIC_KEY_URL", "https://example.com/public-key")
+
    # set cache
    cache = DualCache()

-    await cache.async_set_cache(key="litellm_jwt_auth_keys", value=[public_jwk])
+    await cache.async_set_cache(
+        key="litellm_jwt_auth_keys_https://example.com/public-key", value=[public_jwk]
+    )

    jwt_handler = JWTHandler()

@ -1074,7 +1103,7 @@ async def test_end_user_jwt_auth(monkeypatch):
    ]

    cache.set_cache(
-        key="litellm_jwt_auth_keys",
+        key="litellm_jwt_auth_keys_https://example.com/public-key",
        value=keys,
    )

--- a/tests/proxy_unit_tests/test_user_api_key_auth.py
+++ b/tests/proxy_unit_tests/test_user_api_key_auth.py
@ -826,7 +826,7 @@ async def test_jwt_user_api_key_auth_builder_enforce_rbac(enforce_rbac, monkeypa
    ]

    local_cache.set_cache(
-        key="litellm_jwt_auth_keys",
+        key="litellm_jwt_auth_keys_my-fake-url",
        value=keys,
    )

--- a/tests/test_openai_endpoints.py
+++ b/tests/test_openai_endpoints.py
@ -308,7 +308,7 @@ async def test_chat_completion():
                model="gpt-4",
                messages=[{"role": "user", "content": "Hello!"}],
            )
-        assert "API Key not allowed to access model." in str(e)
+        assert "key not allowed to access model." in str(e)


@pytest.mark.asyncio
--- a/ui/litellm-dashboard/out/404.html
+++ b/ui/litellm-dashboard/out/404.html
--- a/ui/litellm-dashboard/out/_next/static/chunks/157-cf7bc8b3ae1b80ba.js
+++ b/ui/litellm-dashboard/out/_next/static/chunks/157-cf7bc8b3ae1b80ba.js
--- a/ui/litellm-dashboard/out/_next/static/chunks/261-cb27c20c4f8ec4c6.js
+++ b/ui/litellm-dashboard/out/_next/static/chunks/261-cb27c20c4f8ec4c6.js
--- a/ui/litellm-dashboard/out/_next/static/chunks/914-e17acab83d0eadb5.js
+++ b/ui/litellm-dashboard/out/_next/static/chunks/914-e17acab83d0eadb5.js
--- a/ui/litellm-dashboard/out/_next/static/chunks/app/page-a25b75c267486fe2.js
+++ b/ui/litellm-dashboard/out/_next/static/chunks/app/page-a25b75c267486fe2.js
--- a/ui/litellm-dashboard/out/_next/static/chunks/app/page-b36633214e76cfd1.js
+++ b/ui/litellm-dashboard/out/_next/static/chunks/app/page-b36633214e76cfd1.js
--- a/ui/litellm-dashboard/out/_next/static/css/b6d997482399c7e1.css
+++ b/ui/litellm-dashboard/out/_next/static/css/b6d997482399c7e1.css
--- a/ui/litellm-dashboard/out/_next/static/css/f41c66e22715ab00.css
+++ b/ui/litellm-dashboard/out/_next/static/css/f41c66e22715ab00.css
--- a/ui/litellm-dashboard/out/_next/static/i92Qc9kkJSCtCgV3DDmdu/_buildManifest.js
+++ b/ui/litellm-dashboard/out/_next/static/i92Qc9kkJSCtCgV3DDmdu/_buildManifest.js
--- a/ui/litellm-dashboard/out/_next/static/i92Qc9kkJSCtCgV3DDmdu/_ssgManifest.js
+++ b/ui/litellm-dashboard/out/_next/static/i92Qc9kkJSCtCgV3DDmdu/_ssgManifest.js
--- a/ui/litellm-dashboard/out/index.html
+++ b/ui/litellm-dashboard/out/index.html
@ -1 +1 @@
-<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-75a5453f51d60261.js"/><script src="/ui/_next/static/chunks/fd9d1056-524b80e1a6b8bb06.js" async=""></script><script src="/ui/_next/static/chunks/117-883150efc583d711.js" async=""></script><script src="/ui/_next/static/chunks/main-app-475d6efe4080647d.js" async=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-42372ed130431b0a.js" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-75a5453f51d60261.js" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/a34f9d1faa5f3315-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/86f6cc749f6b8493.css\",\"style\"]\n3:HL[\"/ui/_next/static/css/f41c66e22715ab00.css\",\"style\"]\n"])</script><script>self.__next_f.push([1,"4:I[12846,[],\"\"]\n6:I[19107,[],\"ClientPageRoot\"]\n7:I[92222,[\"665\",\"static/chunks/3014691f-0b72c78cfebbd712.js\",\"990\",\"static/chunks/13b76428-ebdf3012af0e4489.js\",\"441\",\"static/chunks/441-79926bf2b9d89e04.js\",\"261\",\"static/chunks/261-e48c2ac6ff0b811c.js\",\"899\",\"static/chunks/899-354f59ecde307dfa.js\",\"914\",\"static/chunks/914-e17acab83d0eadb5.js\",\"250\",\"static/chunks/250-51513f2f6dabf571.js\",\"699\",\"static/chunks/699-6b82f8e7b98ca1a3.js\",\"931\",\"static/chunks/app/page-b36633214e76cfd1.js\"],\"default\",1]\n8:I[4707,[],\"\"]\n9:I[36423,[],\"\"]\nb:I[61060,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"0:[\"$\",\"$L4\",null,{\"buildId\":\"rCxUxULLkHhl5KoPY9DHv\",\"assetPrefix\":\"/ui\",\"urlParts\":[\"\",\"\"],\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[[\"$L5\",[\"$\",\"$L6\",null,{\"props\":{\"params\":{},\"searchParams\":{}},\"Component\":\"$7\"}],null],null],null]},[[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/86f6cc749f6b8493.css\",\"precedence\":\"next\",\"crossOrigin\":\"$undefined\"}],[\"$\",\"link\",\"1\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/f41c66e22715ab00.css\",\"precedence\":\"next\",\"crossOrigin\":\"$undefined\"}]],[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_cf7686\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[]}]}]}]],null],null],\"couldBeIntercepted\":false,\"initialHead\":[null,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script></body></html>
+<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-75a5453f51d60261.js"/><script src="/ui/_next/static/chunks/fd9d1056-524b80e1a6b8bb06.js" async=""></script><script src="/ui/_next/static/chunks/117-883150efc583d711.js" async=""></script><script src="/ui/_next/static/chunks/main-app-475d6efe4080647d.js" async=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-42372ed130431b0a.js" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-75a5453f51d60261.js" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/a34f9d1faa5f3315-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/86f6cc749f6b8493.css\",\"style\"]\n3:HL[\"/ui/_next/static/css/b6d997482399c7e1.css\",\"style\"]\n"])</script><script>self.__next_f.push([1,"4:I[12846,[],\"\"]\n6:I[19107,[],\"ClientPageRoot\"]\n7:I[62177,[\"665\",\"static/chunks/3014691f-0b72c78cfebbd712.js\",\"990\",\"static/chunks/13b76428-ebdf3012af0e4489.js\",\"441\",\"static/chunks/441-79926bf2b9d89e04.js\",\"261\",\"static/chunks/261-cb27c20c4f8ec4c6.js\",\"899\",\"static/chunks/899-354f59ecde307dfa.js\",\"157\",\"static/chunks/157-cf7bc8b3ae1b80ba.js\",\"250\",\"static/chunks/250-51513f2f6dabf571.js\",\"699\",\"static/chunks/699-6b82f8e7b98ca1a3.js\",\"931\",\"static/chunks/app/page-a25b75c267486fe2.js\"],\"default\",1]\n8:I[4707,[],\"\"]\n9:I[36423,[],\"\"]\nb:I[61060,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"0:[\"$\",\"$L4\",null,{\"buildId\":\"i92Qc9kkJSCtCgV3DDmdu\",\"assetPrefix\":\"/ui\",\"urlParts\":[\"\",\"\"],\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[[\"$L5\",[\"$\",\"$L6\",null,{\"props\":{\"params\":{},\"searchParams\":{}},\"Component\":\"$7\"}],null],null],null]},[[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/86f6cc749f6b8493.css\",\"precedence\":\"next\",\"crossOrigin\":\"$undefined\"}],[\"$\",\"link\",\"1\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/b6d997482399c7e1.css\",\"precedence\":\"next\",\"crossOrigin\":\"$undefined\"}]],[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_cf7686\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[]}]}]}]],null],null],\"couldBeIntercepted\":false,\"initialHead\":[null,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script></body></html>
--- a/ui/litellm-dashboard/out/index.txt
+++ b/ui/litellm-dashboard/out/index.txt
@ -1,7 +1,7 @@
 2:I[19107,[],"ClientPageRoot"]
-3:I[92222,["665","static/chunks/3014691f-0b72c78cfebbd712.js","990","static/chunks/13b76428-ebdf3012af0e4489.js","441","static/chunks/441-79926bf2b9d89e04.js","261","static/chunks/261-e48c2ac6ff0b811c.js","899","static/chunks/899-354f59ecde307dfa.js","914","static/chunks/914-e17acab83d0eadb5.js","250","static/chunks/250-51513f2f6dabf571.js","699","static/chunks/699-6b82f8e7b98ca1a3.js","931","static/chunks/app/page-b36633214e76cfd1.js"],"default",1]
+3:I[62177,["665","static/chunks/3014691f-0b72c78cfebbd712.js","990","static/chunks/13b76428-ebdf3012af0e4489.js","441","static/chunks/441-79926bf2b9d89e04.js","261","static/chunks/261-cb27c20c4f8ec4c6.js","899","static/chunks/899-354f59ecde307dfa.js","157","static/chunks/157-cf7bc8b3ae1b80ba.js","250","static/chunks/250-51513f2f6dabf571.js","699","static/chunks/699-6b82f8e7b98ca1a3.js","931","static/chunks/app/page-a25b75c267486fe2.js"],"default",1]
 4:I[4707,[],""]
 5:I[36423,[],""]
-0:["rCxUxULLkHhl5KoPY9DHv",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},[["$L1",["$","$L2",null,{"props":{"params":{},"searchParams":{}},"Component":"$3"}],null],null],null]},[[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/86f6cc749f6b8493.css","precedence":"next","crossOrigin":"$undefined"}],["$","link","1",{"rel":"stylesheet","href":"/ui/_next/static/css/f41c66e22715ab00.css","precedence":"next","crossOrigin":"$undefined"}]],["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_cf7686","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[]}]}]}]],null],null],["$L6",null]]]]
+0:["i92Qc9kkJSCtCgV3DDmdu",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},[["$L1",["$","$L2",null,{"props":{"params":{},"searchParams":{}},"Component":"$3"}],null],null],null]},[[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/86f6cc749f6b8493.css","precedence":"next","crossOrigin":"$undefined"}],["$","link","1",{"rel":"stylesheet","href":"/ui/_next/static/css/b6d997482399c7e1.css","precedence":"next","crossOrigin":"$undefined"}]],["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_cf7686","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[]}]}]}]],null],null],["$L6",null]]]]
 6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
 1:null
--- a/ui/litellm-dashboard/out/model_hub.html
+++ b/ui/litellm-dashboard/out/model_hub.html
--- a/ui/litellm-dashboard/out/model_hub.txt
+++ b/ui/litellm-dashboard/out/model_hub.txt
@ -1,7 +1,7 @@
 2:I[19107,[],"ClientPageRoot"]
-3:I[52829,["441","static/chunks/441-79926bf2b9d89e04.js","261","static/chunks/261-e48c2ac6ff0b811c.js","250","static/chunks/250-51513f2f6dabf571.js","699","static/chunks/699-6b82f8e7b98ca1a3.js","418","static/chunks/app/model_hub/page-6f97b95f1023b0e9.js"],"default",1]
+3:I[52829,["441","static/chunks/441-79926bf2b9d89e04.js","261","static/chunks/261-cb27c20c4f8ec4c6.js","250","static/chunks/250-51513f2f6dabf571.js","699","static/chunks/699-6b82f8e7b98ca1a3.js","418","static/chunks/app/model_hub/page-6f97b95f1023b0e9.js"],"default",1]
 4:I[4707,[],""]
 5:I[36423,[],""]
-0:["rCxUxULLkHhl5KoPY9DHv",[[["",{"children":["model_hub",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["model_hub",{"children":["__PAGE__",{},[["$L1",["$","$L2",null,{"props":{"params":{},"searchParams":{}},"Component":"$3"}],null],null],null]},[null,["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","model_hub","children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined"}]],null]},[[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/86f6cc749f6b8493.css","precedence":"next","crossOrigin":"$undefined"}],["$","link","1",{"rel":"stylesheet","href":"/ui/_next/static/css/f41c66e22715ab00.css","precedence":"next","crossOrigin":"$undefined"}]],["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_cf7686","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[]}]}]}]],null],null],["$L6",null]]]]
+0:["i92Qc9kkJSCtCgV3DDmdu",[[["",{"children":["model_hub",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["model_hub",{"children":["__PAGE__",{},[["$L1",["$","$L2",null,{"props":{"params":{},"searchParams":{}},"Component":"$3"}],null],null],null]},[null,["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","model_hub","children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined"}]],null]},[[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/86f6cc749f6b8493.css","precedence":"next","crossOrigin":"$undefined"}],["$","link","1",{"rel":"stylesheet","href":"/ui/_next/static/css/b6d997482399c7e1.css","precedence":"next","crossOrigin":"$undefined"}]],["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_cf7686","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[]}]}]}]],null],null],["$L6",null]]]]
 6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
 1:null
--- a/ui/litellm-dashboard/out/onboarding.html
+++ b/ui/litellm-dashboard/out/onboarding.html
--- a/ui/litellm-dashboard/out/onboarding.txt
+++ b/ui/litellm-dashboard/out/onboarding.txt
@ -2,6 +2,6 @@
 3:I[12011,["665","static/chunks/3014691f-0b72c78cfebbd712.js","441","static/chunks/441-79926bf2b9d89e04.js","899","static/chunks/899-354f59ecde307dfa.js","250","static/chunks/250-51513f2f6dabf571.js","461","static/chunks/app/onboarding/page-a31bc08c35f01c0a.js"],"default",1]
 4:I[4707,[],""]
 5:I[36423,[],""]
-0:["rCxUxULLkHhl5KoPY9DHv",[[["",{"children":["onboarding",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["onboarding",{"children":["__PAGE__",{},[["$L1",["$","$L2",null,{"props":{"params":{},"searchParams":{}},"Component":"$3"}],null],null],null]},[null,["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","onboarding","children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined"}]],null]},[[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/86f6cc749f6b8493.css","precedence":"next","crossOrigin":"$undefined"}],["$","link","1",{"rel":"stylesheet","href":"/ui/_next/static/css/f41c66e22715ab00.css","precedence":"next","crossOrigin":"$undefined"}]],["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_cf7686","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[]}]}]}]],null],null],["$L6",null]]]]
+0:["i92Qc9kkJSCtCgV3DDmdu",[[["",{"children":["onboarding",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["onboarding",{"children":["__PAGE__",{},[["$L1",["$","$L2",null,{"props":{"params":{},"searchParams":{}},"Component":"$3"}],null],null],null]},[null,["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","onboarding","children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined"}]],null]},[[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/86f6cc749f6b8493.css","precedence":"next","crossOrigin":"$undefined"}],["$","link","1",{"rel":"stylesheet","href":"/ui/_next/static/css/b6d997482399c7e1.css","precedence":"next","crossOrigin":"$undefined"}]],["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_cf7686","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[]}]}]}]],null],null],["$L6",null]]]]
 6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
 1:null
--- a/ui/litellm-dashboard/src/app/page.tsx
+++ b/ui/litellm-dashboard/src/app/page.tsx
@ -215,6 +215,7 @@ export default function CreateKeyPage() {
              userEmail={userEmail}
              setProxySettings={setProxySettings}
              proxySettings={proxySettings}
+              accessToken={accessToken}
            />
            <div className="flex flex-1 overflow-auto">
              <div className="mt-8">
--- a/ui/litellm-dashboard/src/components/add_model/provider_specific_fields.tsx
+++ b/ui/litellm-dashboard/src/components/add_model/provider_specific_fields.tsx
@ -23,7 +23,7 @@ const ProviderSpecificFields: React.FC<ProviderSpecificFieldsProps> = ({
  console.log(`type of selectedProviderEnum: ${typeof selectedProviderEnum}`);
  return (
    <>
-      {selectedProviderEnum === Providers.OpenAI && (
+      {selectedProviderEnum === Providers.OpenAI || selectedProviderEnum === Providers.OpenAI_Text && (
        <>
          <Form.Item
            label="API Base"
@ -99,7 +99,8 @@ const ProviderSpecificFields: React.FC<ProviderSpecificFieldsProps> = ({

      {(selectedProviderEnum === Providers.Azure ||
        selectedProviderEnum === Providers.Azure_AI_Studio ||
-        selectedProviderEnum === Providers.OpenAI_Compatible
+        selectedProviderEnum === Providers.OpenAI_Compatible ||
+        selectedProviderEnum === Providers.OpenAI_Text_Compatible
      ) && (
        <Form.Item
          rules={[{ required: true, message: "Required" }]}
--- a/ui/litellm-dashboard/src/components/all_keys_table.tsx
+++ b/ui/litellm-dashboard/src/components/all_keys_table.tsx
@ -325,14 +325,15 @@ export function AllKeysTable({
        if (!allOrganizations || allOrganizations.length === 0) return [];
        
        const filteredOrgs = allOrganizations.filter(org => 
-          org.organization_id.toLowerCase().includes(searchText.toLowerCase()) || 
-          (org.organization_name && org.organization_name.toLowerCase().includes(searchText.toLowerCase()))
+          org.organization_id?.toLowerCase().includes(searchText.toLowerCase()) ?? false
        );
        
-        return filteredOrgs.map(org => ({
-          label: `${org.organization_name || 'Unknown'} (${org.organization_id})`,
-          value: org.organization_id
-        }));
+        return filteredOrgs
+          .filter(org => org.organization_id !== null && org.organization_id !== undefined)
+          .map(org => ({
+            label: `${org.organization_id || 'Unknown'} (${org.organization_id})`,
+            value: org.organization_id as string
+          }));
      }
    },
  ];
@ -385,8 +386,8 @@ export function AllKeysTable({
          <div className="h-[75vh] overflow-auto">
            
            <DataTable
-              columns={columns.filter(col => col.id !== 'expander')}
-              data={filteredKeys}
+              columns={columns.filter(col => col.id !== 'expander') as any}
+              data={filteredKeys as any}
              isLoading={isLoading}
              getRowCanExpand={() => false}
              renderSubComponent={() => <></>}
--- a/ui/litellm-dashboard/src/components/create_key_button.tsx
+++ b/ui/litellm-dashboard/src/components/create_key_button.tsx
@ -39,6 +39,9 @@ import { InfoCircleOutlined } from '@ant-design/icons';
 import { Tooltip } from 'antd';
 import Createuser from "./create_user_button";
 import debounce from 'lodash/debounce';
+import { rolesWithWriteAccess } from '../utils/roles';
+
+

 const { Option } = Select;

@ -335,9 +338,11 @@ const CreateKey: React.FC<CreateKeyProps> = ({

  return (
    <div>
-      <Button className="mx-auto" onClick={() => setIsModalVisible(true)}>
-        + Create New Key
-      </Button>
+      {userRole && rolesWithWriteAccess.includes(userRole) && (
+        <Button className="mx-auto" onClick={() => setIsModalVisible(true)}>
+          + Create New Key
+        </Button>
+      )}
      <Modal
        // title="Create Key"
        visible={isModalVisible}
--- a/ui/litellm-dashboard/src/components/key_info_view.tsx
+++ b/ui/litellm-dashboard/src/components/key_info_view.tsx
@ -21,6 +21,7 @@ import { KeyResponse } from "./key_team_helpers/key_list";
 import { Form, Input, InputNumber, message, Select } from "antd";
 import { KeyEditView } from "./key_edit_view";
 import { RegenerateKeyModal } from "./regenerate_key_modal";
+import { rolesWithWriteAccess } from '../utils/roles';

 interface KeyInfoViewProps {
  keyId: string;
@ -128,24 +129,26 @@ export default function KeyInfoView({ keyId, onClose, keyData, accessToken, user
          <Title>{keyData.key_alias || "API Key"}</Title>
          <Text className="text-gray-500 font-mono">{keyData.token}</Text>
        </div>
-        <div className="flex gap-2">
-          <Button
-            icon={RefreshIcon}
-            variant="secondary"
-            onClick={() => setIsRegenerateModalOpen(true)}
-            className="flex items-center"
-          >
-            Regenerate Key
-          </Button>
-          <Button
-            icon={TrashIcon}
-            variant="secondary"
-            onClick={() => setIsDeleteModalOpen(true)}
-            className="flex items-center"
-          >
-            Delete Key
-          </Button>
-        </div>
+        {userRole && rolesWithWriteAccess.includes(userRole) && (
+          <div className="flex gap-2">
+            <Button
+              icon={RefreshIcon}
+              variant="secondary"
+              onClick={() => setIsRegenerateModalOpen(true)}
+              className="flex items-center"
+            >
+              Regenerate Key
+            </Button>
+            <Button
+              icon={TrashIcon}
+              variant="secondary"
+              onClick={() => setIsDeleteModalOpen(true)}
+              className="flex items-center"
+            >
+              Delete Key
+            </Button>
+          </div>
+        )}
      </div>

      {/* Add RegenerateKeyModal */}
@ -246,7 +249,7 @@ export default function KeyInfoView({ keyId, onClose, keyData, accessToken, user
            <Card>
              <div className="flex justify-between items-center mb-4">
                <Title>Key Settings</Title>
-                {!isEditing && (
+                {!isEditing && userRole && rolesWithWriteAccess.includes(userRole) && (
                  <Button variant="light" onClick={() => setIsEditing(true)}>
                    Edit Settings
                  </Button>
--- a/ui/litellm-dashboard/src/components/key_team_helpers/filter_helpers.ts
+++ b/ui/litellm-dashboard/src/components/key_team_helpers/filter_helpers.ts
@ -27,7 +27,7 @@ export const fetchAllKeyAliases = async (accessToken: string | null): Promise<st
      
      // Extract aliases from this page
      const pageAliases = response.keys
-        .map(key => key.key_alias)
+        .map((key: any) => key.key_alias)
        .filter(Boolean) as string[];
      
      allAliases = [...allAliases, ...pageAliases];
@ -41,7 +41,7 @@ export const fetchAllKeyAliases = async (accessToken: string | null): Promise<st
    }
    
    // Remove duplicates
-    return [...new Set(allAliases)];
+    return Array.from(new Set(allAliases));
  } catch (error) {
    console.error("Error fetching all key aliases:", error);
    return [];
@ -66,8 +66,7 @@ export const fetchAllTeams = async (accessToken: string | null, organizationId?:
      const response = await teamListCall(
        accessToken,
        organizationId || null,
-        currentPage,
-        100 // larger page size to reduce number of requests
+        null,
      );
      
      // Add teams from this page
@ -103,9 +102,7 @@ export const fetchAllOrganizations = async (accessToken: string | null): Promise
    
    while (hasMorePages) {
      const response = await organizationListCall(
-        accessToken,
-        currentPage,
-        100 // larger page size to reduce number of requests
+        accessToken
      );
      
      // Add organizations from this page
--- a/ui/litellm-dashboard/src/components/key_team_helpers/filter_logic.tsx
+++ b/ui/litellm-dashboard/src/components/key_team_helpers/filter_logic.tsx
@ -1,6 +1,7 @@
 import { useEffect, useState } from "react";
 import { KeyResponse } from "../key_team_helpers/key_list";
-import { Team, Organization } from "../networking";
+import { Organization } from "../networking";
+import { Team } from "../key_team_helpers/key_list";

 export interface FilterState {
  'Team ID': string;
--- a/ui/litellm-dashboard/src/components/leftnav.tsx
+++ b/ui/litellm-dashboard/src/components/leftnav.tsx
@ -21,7 +21,7 @@ import {
  ExperimentOutlined,
  ThunderboltOutlined,
 } from '@ant-design/icons';
-import { old_admin_roles, v2_admin_role_names, all_admin_roles, rolesAllowedToSeeUsage } from '../utils/roles';
+import { old_admin_roles, v2_admin_role_names, all_admin_roles, rolesAllowedToSeeUsage, rolesWithWriteAccess } from '../utils/roles';

 const { Sider } = Layout;

@ -45,7 +45,7 @@ interface MenuItem {
 // Note: If a menu item does not have a role, it is visible to all roles.
 const menuItems: MenuItem[] = [
  { key: "1", page: "api-keys", label: "Virtual Keys", icon: <KeyOutlined /> },
-  { key: "3", page: "llm-playground", label: "Test Key", icon: <PlayCircleOutlined /> },
+  { key: "3", page: "llm-playground", label: "Test Key", icon: <PlayCircleOutlined />, roles: rolesWithWriteAccess },
  { key: "2", page: "models", label: "Models", icon: <BlockOutlined />, roles: all_admin_roles },
  { key: "4", page: "usage", label: "Usage", icon: <BarChartOutlined /> },
  { key: "6", page: "teams", label: "Teams", icon: <TeamOutlined /> },
--- a/ui/litellm-dashboard/src/components/navbar.tsx
+++ b/ui/litellm-dashboard/src/components/navbar.tsx
@ -1,5 +1,5 @@
 import Link from "next/link";
-import React from "react";
+import React, { useState, useEffect } from "react";
 import type { MenuProps } from "antd";
 import { Dropdown } from "antd";
 import { Organization } from "@/components/networking";
@ -9,6 +9,8 @@ import {
  LogoutOutlined
 } from '@ant-design/icons';
 import { clearTokenCookies } from "@/utils/cookieUtils";
+import { fetchProxySettings } from "@/utils/proxyUtils";
+
 interface NavbarProps {
  userID: string | null;
  userEmail: string | null;
@ -16,6 +18,7 @@ interface NavbarProps {
  premiumUser: boolean;
  setProxySettings: React.Dispatch<React.SetStateAction<any>>;
  proxySettings: any;
+  accessToken: string | null;
 }

 const Navbar: React.FC<NavbarProps> = ({
@ -24,10 +27,30 @@ const Navbar: React.FC<NavbarProps> = ({
  userRole,
  premiumUser,
  proxySettings,
+  setProxySettings,
+  accessToken,
 }) => {
  const isLocal = process.env.NODE_ENV === "development";
  const imageUrl = isLocal ? "http://localhost:4000/get_image" : "/get_image";
-  let logoutUrl = proxySettings?.PROXY_LOGOUT_URL || "";
+  const [logoutUrl, setLogoutUrl] = useState("");
+
+  useEffect(() => {
+    const initializeProxySettings = async () => {
+      if (accessToken) {
+        const settings = await fetchProxySettings(accessToken);
+        console.log("response from fetchProxySettings", settings);
+        if (settings) {
+          setProxySettings(settings);
+        }
+      }
+    };
+
+    initializeProxySettings();
+  }, [accessToken]);
+
+  useEffect(() => {
+    setLogoutUrl(proxySettings?.PROXY_LOGOUT_URL || "");
+  }, [proxySettings]);

  const handleLogout = () => {
    clearTokenCookies();
--- a/ui/litellm-dashboard/src/components/provider_info_helpers.tsx
+++ b/ui/litellm-dashboard/src/components/provider_info_helpers.tsx
@ -1,7 +1,11 @@
+import OpenAI from "openai";
 import React from "react";

 export enum Providers {
    OpenAI = "OpenAI",
+    OpenAI_Compatible = "OpenAI-Compatible Endpoints (Together AI, etc.)",
+    OpenAI_Text = "OpenAI Text Completion",
+    OpenAI_Text_Compatible = "OpenAI-Compatible Text Completion Models (Together AI, etc.)",
    Azure = "Azure",
    Azure_AI_Studio = "Azure AI Foundry (Studio)",
    Anthropic = "Anthropic",
@ -11,7 +15,6 @@ export enum Providers {
    Groq = "Groq",
    MistralAI = "Mistral AI",
    Deepseek = "Deepseek",
-    OpenAI_Compatible = "OpenAI-Compatible Endpoints (Together AI, etc.)",
    Cohere = "Cohere",
    Databricks = "Databricks",
    Ollama = "Ollama",
@ -28,6 +31,7 @@ export enum Providers {
  
 export const provider_map: Record<string, string> = {
    OpenAI: "openai",
+    OpenAI_Text: "text-completion-openai",
    Azure: "azure",
    Azure_AI_Studio: "azure_ai",
    Anthropic: "anthropic",
@ -37,6 +41,7 @@ export const provider_map: Record<string, string> = {
    MistralAI: "mistral",
    Cohere: "cohere_chat",
    OpenAI_Compatible: "openai",
+    OpenAI_Text_Compatible: "text-completion-openai",
    Vertex_AI: "vertex_ai",
    Databricks: "databricks",
    xAI: "xai",
@ -53,6 +58,9 @@ export const provider_map: Record<string, string> = {

 export const providerLogoMap: Record<string, string> = {
    [Providers.OpenAI]: "https://artificialanalysis.ai/img/logos/openai_small.svg",
+    [Providers.OpenAI_Text]: "https://artificialanalysis.ai/img/logos/openai_small.svg",
+    [Providers.OpenAI_Text_Compatible]: "https://artificialanalysis.ai/img/logos/openai_small.svg",
+    [Providers.OpenAI_Compatible]: "https://artificialanalysis.ai/img/logos/openai_small.svg",
    [Providers.Azure]: "https://upload.wikimedia.org/wikipedia/commons/a/a8/Microsoft_Azure_Logo.svg",
    [Providers.Azure_AI_Studio]: "https://upload.wikimedia.org/wikipedia/commons/a/a8/Microsoft_Azure_Logo.svg",
    [Providers.Anthropic]: "https://artificialanalysis.ai/img/logos/anthropic_small.svg",
@ -61,7 +69,6 @@ export const providerLogoMap: Record<string, string> = {
    [Providers.Groq]: "https://artificialanalysis.ai/img/logos/groq_small.png",
    [Providers.MistralAI]: "https://artificialanalysis.ai/img/logos/mistral_small.png",
    [Providers.Cohere]: "https://artificialanalysis.ai/img/logos/cohere_small.png",
-    [Providers.OpenAI_Compatible]: "https://upload.wikimedia.org/wikipedia/commons/4/4e/OpenAI_Logo.svg",
    [Providers.Vertex_AI]: "https://artificialanalysis.ai/img/logos/google_small.svg",
    [Providers.Databricks]: "https://artificialanalysis.ai/img/logos/databricks_small.png",
    [Providers.Ollama]: "https://artificialanalysis.ai/img/logos/ollama_small.svg",
--- a/ui/litellm-dashboard/src/components/team/team_info.tsx
+++ b/ui/litellm-dashboard/src/components/team/team_info.tsx
@ -222,9 +222,13 @@ const TeamInfoView: React.FC<TeamInfoProps> = ({

      <TabGroup defaultIndex={editTeam ? 2 : 0}>
        <TabList className="mb-4">
-          <Tab>Overview</Tab>
-          <Tab>Members</Tab>
-          <Tab>Settings</Tab>
+          {[
+            <Tab key="overview">Overview</Tab>,
+            ...(canEditTeam ? [
+              <Tab key="members">Members</Tab>,
+              <Tab key="settings">Settings</Tab>
+            ] : [])
+          ]}
        </TabList>

        <TabPanels>
--- a/ui/litellm-dashboard/src/components/transform_request.tsx
+++ b/ui/litellm-dashboard/src/components/transform_request.tsx
@ -156,7 +156,7 @@ ${formattedBody}
      }}>
        <div style={{ marginBottom: '24px' }}>
          <h2 style={{ fontSize: '24px', fontWeight: 'bold', margin: '0 0 4px 0' }}>Original Request</h2>
-          <p style={{ color: '#666', margin: 0 }}>The request you would send to LiteLLM's `/chat/completions` endpoint.</p>
+          <p style={{ color: '#666', margin: 0 }}>The request you would send to LiteLLM /chat/completions endpoint.</p>
        </div>
        
        <textarea 
--- a/ui/litellm-dashboard/src/components/view_logs/columns.tsx
+++ b/ui/litellm-dashboard/src/components/view_logs/columns.tsx
@ -37,41 +37,47 @@ export const columns: ColumnDef<LogEntry>[] = [
    id: "expander",
    header: () => null,
    cell: ({ row }) => {
-      const [localExpanded, setLocalExpanded] = React.useState(row.getIsExpanded());
+      // Convert the cell function to a React component to properly use hooks
+      const ExpanderCell = () => {
+        const [localExpanded, setLocalExpanded] = React.useState(row.getIsExpanded());

-      // Memoize the toggle handler to prevent unnecessary re-renders
-      const toggleHandler = React.useCallback(() => {
-        setLocalExpanded((prev) => !prev);
-        row.getToggleExpandedHandler()();
-      }, [row]);
+        // Memoize the toggle handler to prevent unnecessary re-renders
+        const toggleHandler = React.useCallback(() => {
+          setLocalExpanded((prev) => !prev);
+          row.getToggleExpandedHandler()();
+        }, [row]);

-      return row.getCanExpand() ? (
-        <button
-          onClick={toggleHandler}
-          style={{ cursor: "pointer" }}
-          aria-label={localExpanded ? "Collapse row" : "Expand row"}
-          className="w-6 h-6 flex items-center justify-center focus:outline-none"
-        >
-          <svg
-            className={`w-4 h-4 transform transition-transform duration-75 ${
-              localExpanded ? 'rotate-90' : ''
-            }`}
-            fill="none"
-            stroke="currentColor"
-            viewBox="0 0 24 24"
-            xmlns="http://www.w3.org/2000/svg"
+        return row.getCanExpand() ? (
+          <button
+            onClick={toggleHandler}
+            style={{ cursor: "pointer" }}
+            aria-label={localExpanded ? "Collapse row" : "Expand row"}
+            className="w-6 h-6 flex items-center justify-center focus:outline-none"
          >
-            <path
-              strokeLinecap="round"
-              strokeLinejoin="round"
-              strokeWidth={2}
-              d="M9 5l7 7-7 7"
-            />
-          </svg>
-        </button>
-      ) : (
-        <span className="w-6 h-6 flex items-center justify-center">●</span>
-      );
+            <svg
+              className={`w-4 h-4 transform transition-transform duration-75 ${
+                localExpanded ? 'rotate-90' : ''
+              }`}
+              fill="none"
+              stroke="currentColor"
+              viewBox="0 0 24 24"
+              xmlns="http://www.w3.org/2000/svg"
+            >
+              <path
+                strokeLinecap="round"
+                strokeLinejoin="round"
+                strokeWidth={2}
+                d="M9 5l7 7-7 7"
+              />
+            </svg>
+          </button>
+        ) : (
+          <span className="w-6 h-6 flex items-center justify-center">●</span>
+        );
+      };
+
+      // Return the component
+      return <ExpanderCell />;
    },
  },
  {
--- a/ui/litellm-dashboard/src/utils/proxyUtils.ts
+++ b/ui/litellm-dashboard/src/utils/proxyUtils.ts
@ -0,0 +1,13 @@
+import { getProxyUISettings } from "@/components/networking";
+
+export const fetchProxySettings = async (accessToken: string | null) => {
+  if (!accessToken) return null;
+  
+  try {
+    const proxySettings = await getProxyUISettings(accessToken);
+    return proxySettings;
+  } catch (error) {
+    console.error("Error fetching proxy settings:", error);
+    return null;
+  }
+}; 
--- a/ui/litellm-dashboard/src/utils/roles.ts
+++ b/ui/litellm-dashboard/src/utils/roles.ts
@ -4,4 +4,5 @@ export const v2_admin_role_names = ["proxy_admin", "proxy_admin_viewer", "org_ad
 export const all_admin_roles = [...old_admin_roles, ...v2_admin_role_names];

 export const internalUserRoles = ["Internal User", "Internal Viewer"];
-export const rolesAllowedToSeeUsage = ["Admin", "Admin Viewer", "Internal User", "Internal Viewer"]; 
+export const rolesAllowedToSeeUsage = ["Admin", "Admin Viewer", "Internal User", "Internal Viewer"]; 
+export const rolesWithWriteAccess = ["Internal User", "Admin"];