Merge branch 'main' into litellm_dev_03_10_2025_p3

2025-04-25 02:34:29 +00:00 · 2025-03-12 14:56:01 -07:00 · 2025-03-12 14:56:01 -07:00 · 2d957a0ed9
commit 2d957a0ed9
parent b8d1166e0c 50926b386a
105 changed files with 3874 additions and 437 deletions
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@ -49,7 +49,7 @@ jobs:
            pip install opentelemetry-api==1.25.0
            pip install opentelemetry-sdk==1.25.0
            pip install opentelemetry-exporter-otlp==1.25.0
-            pip install openai==1.54.0
+            pip install openai==1.66.1
            pip install prisma==0.11.0
            pip install "detect_secrets==1.5.0"
            pip install "httpx==0.24.1"
@ -168,7 +168,7 @@ jobs:
            pip install opentelemetry-api==1.25.0
            pip install opentelemetry-sdk==1.25.0
            pip install opentelemetry-exporter-otlp==1.25.0
-            pip install openai==1.54.0
+            pip install openai==1.66.1
            pip install prisma==0.11.0
            pip install "detect_secrets==1.5.0"
            pip install "httpx==0.24.1"
@ -267,7 +267,7 @@ jobs:
            pip install opentelemetry-api==1.25.0
            pip install opentelemetry-sdk==1.25.0
            pip install opentelemetry-exporter-otlp==1.25.0
-            pip install openai==1.54.0
+            pip install openai==1.66.1
            pip install prisma==0.11.0
            pip install "detect_secrets==1.5.0"
            pip install "httpx==0.24.1"
@ -511,7 +511,7 @@ jobs:
            pip install opentelemetry-api==1.25.0
            pip install opentelemetry-sdk==1.25.0
            pip install opentelemetry-exporter-otlp==1.25.0
-            pip install openai==1.54.0
+            pip install openai==1.66.1
            pip install prisma==0.11.0
            pip install "detect_secrets==1.5.0"
            pip install "httpx==0.24.1"
@ -678,6 +678,48 @@ jobs:
          paths:
            - llm_translation_coverage.xml
            - llm_translation_coverage
+  llm_responses_api_testing:
+    docker:
+      - image: cimg/python:3.11
+        auth:
+          username: ${DOCKERHUB_USERNAME}
+          password: ${DOCKERHUB_PASSWORD}
+    working_directory: ~/project
+
+    steps:
+      - checkout
+      - run:
+          name: Install Dependencies
+          command: |
+            python -m pip install --upgrade pip
+            python -m pip install -r requirements.txt
+            pip install "pytest==7.3.1"
+            pip install "pytest-retry==1.6.3"
+            pip install "pytest-cov==5.0.0"
+            pip install "pytest-asyncio==0.21.1"
+            pip install "respx==0.21.1"
+      # Run pytest and generate JUnit XML report
+      - run:
+          name: Run tests
+          command: |
+            pwd
+            ls
+            python -m pytest -vv tests/llm_responses_api_testing --cov=litellm --cov-report=xml -x -s -v --junitxml=test-results/junit.xml --durations=5
+          no_output_timeout: 120m
+      - run:
+          name: Rename the coverage files
+          command: |
+            mv coverage.xml llm_responses_api_coverage.xml
+            mv .coverage llm_responses_api_coverage
+
+      # Store test results
+      - store_test_results:
+          path: test-results
+      - persist_to_workspace:
+          root: .
+          paths:
+            - llm_responses_api_coverage.xml
+            - llm_responses_api_coverage
  litellm_mapped_tests:
    docker:
      - image: cimg/python:3.11
@ -1234,7 +1276,7 @@ jobs:
            pip install "aiodynamo==23.10.1"
            pip install "asyncio==3.4.3"
            pip install "PyGithub==1.59.1"
-            pip install "openai==1.54.0 "
+            pip install "openai==1.66.1"
      - run:
          name: Install Grype
          command: |
@ -1309,7 +1351,7 @@ jobs:
          command: |
            pwd
            ls
-            python -m pytest -s -vv tests/*.py -x --junitxml=test-results/junit.xml --durations=5 --ignore=tests/otel_tests --ignore=tests/pass_through_tests --ignore=tests/proxy_admin_ui_tests --ignore=tests/load_tests --ignore=tests/llm_translation --ignore=tests/image_gen_tests --ignore=tests/pass_through_unit_tests
+            python -m pytest -s -vv tests/*.py -x --junitxml=test-results/junit.xml --durations=5 --ignore=tests/otel_tests --ignore=tests/pass_through_tests --ignore=tests/proxy_admin_ui_tests --ignore=tests/load_tests --ignore=tests/llm_translation --ignore=tests/llm_responses_api_testing --ignore=tests/image_gen_tests --ignore=tests/pass_through_unit_tests
          no_output_timeout: 120m

      # Store test results
@ -1370,7 +1412,7 @@ jobs:
            pip install "aiodynamo==23.10.1"
            pip install "asyncio==3.4.3"
            pip install "PyGithub==1.59.1"
-            pip install "openai==1.54.0 "
+            pip install "openai==1.66.1"
            # Run pytest and generate JUnit XML report
      - run:
          name: Build Docker image
@ -1492,7 +1534,7 @@ jobs:
            pip install "aiodynamo==23.10.1"
            pip install "asyncio==3.4.3"
            pip install "PyGithub==1.59.1"
-            pip install "openai==1.54.0 "
+            pip install "openai==1.66.1"
      - run:
          name: Build Docker image
          command: docker build -t my-app:latest -f ./docker/Dockerfile.database .
@ -1921,7 +1963,7 @@ jobs:
            pip install "pytest-asyncio==0.21.1"
            pip install "google-cloud-aiplatform==1.43.0"
            pip install aiohttp
-            pip install "openai==1.54.0 "
+            pip install "openai==1.66.1"
            pip install "assemblyai==0.37.0"
            python -m pip install --upgrade pip
            pip install "pydantic==2.7.1"
@ -2068,7 +2110,7 @@ jobs:
            python -m venv venv
            . venv/bin/activate
            pip install coverage
-            coverage combine llm_translation_coverage logging_coverage litellm_router_coverage local_testing_coverage litellm_assistants_api_coverage auth_ui_unit_tests_coverage langfuse_coverage caching_coverage litellm_proxy_unit_tests_coverage image_gen_coverage pass_through_unit_tests_coverage batches_coverage litellm_proxy_security_tests_coverage
+            coverage combine llm_translation_coverage llm_responses_api_coverage logging_coverage litellm_router_coverage local_testing_coverage litellm_assistants_api_coverage auth_ui_unit_tests_coverage langfuse_coverage caching_coverage litellm_proxy_unit_tests_coverage image_gen_coverage pass_through_unit_tests_coverage batches_coverage litellm_proxy_security_tests_coverage
            coverage xml
      - codecov/upload:
          file: ./coverage.xml
@ -2197,7 +2239,7 @@ jobs:
            pip install "pytest-retry==1.6.3"
            pip install "pytest-asyncio==0.21.1"
            pip install aiohttp
-            pip install "openai==1.54.0 "
+            pip install "openai==1.66.1"
            python -m pip install --upgrade pip
            pip install "pydantic==2.7.1"
            pip install "pytest==7.3.1"
@ -2429,6 +2471,12 @@ workflows:
              only:
                - main
                - /litellm_.*/
+      - llm_responses_api_testing:
+          filters:
+            branches:
+              only:
+                - main
+                - /litellm_.*/
      - litellm_mapped_tests:
          filters:
            branches:
@ -2468,6 +2516,7 @@ workflows:
      - upload-coverage:
          requires:
            - llm_translation_testing
+            - llm_responses_api_testing
            - litellm_mapped_tests
            - batches_testing
            - litellm_utils_testing
@ -2526,6 +2575,7 @@ workflows:
            - load_testing
            - test_bad_database_url
            - llm_translation_testing
+            - llm_responses_api_testing
            - litellm_mapped_tests
            - batches_testing
            - litellm_utils_testing
--- a/.circleci/requirements.txt
+++ b/.circleci/requirements.txt
@ -1,5 +1,5 @@
 # used by CI/CD testing
-openai==1.54.0 
+openai==1.66.1
 python-dotenv
 tiktoken
 importlib_metadata
--- a/.github/pull_request_template.md
+++ b/.github/pull_request_template.md
@ -10,9 +10,9 @@

 **Please complete all items before asking a LiteLLM maintainer to review your PR**

- [ ] I have Added testing in the `tests/litellm/` directory, **Adding at least 1 test is a hard requirement** - [see details](https://docs.litellm.ai/docs/contributing#2-adding-testing-to-your-pr)
+- [ ] I have Added testing in the `tests/litellm/` directory, **Adding at least 1 test is a hard requirement** - [see details](https://docs.litellm.ai/docs/extras/contributing_code)
 - [ ] I have added a screenshot of my new test passing locally 
- [ ] My PR passes all unit tests on `make unit-test` [https://docs.litellm.ai/docs/contributing]
+- [ ] My PR passes all unit tests on (`make test-unit`)[https://docs.litellm.ai/docs/extras/contributing_code]
 - [ ] My PR's scope is as isolated as possible, it only solves 1 specific problem


--- a/.github/workflows/helm_unit_test.yml
+++ b/.github/workflows/helm_unit_test.yml
@ -0,0 +1,27 @@
+name: Helm unit test
+
+on:
+  pull_request:
+  push:
+    branches:
+      - main
+
+jobs:
+  unit-test:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v2
+
+      - name: Set up Helm 3.11.1
+        uses: azure/setup-helm@v1
+        with:
+          version: '3.11.1'
+
+      - name: Install Helm Unit Test Plugin
+        run: |
+          helm plugin install https://github.com/helm-unittest/helm-unittest --version v0.4.4
+
+      - name: Run unit tests
+        run:
+          helm unittest -f 'tests/*.yaml' deploy/charts/litellm-helm
--- a/README.md
+++ b/README.md
@ -340,7 +340,7 @@ curl 'http://0.0.0.0:4000/key/generate' \

 ## Contributing

-Interested in contributing? Contributions to LiteLLM Python SDK, Proxy Server, and contributing LLM integrations are both accepted and highly encouraged! [See our Contribution Guide for more details](https://docs.litellm.ai/docs/contributing)
+Interested in contributing? Contributions to LiteLLM Python SDK, Proxy Server, and contributing LLM integrations are both accepted and highly encouraged! [See our Contribution Guide for more details](https://docs.litellm.ai/docs/extras/contributing_code)

 # Enterprise
 For companies that need better security, user management and professional support
--- a/deploy/charts/litellm-helm/tests/deployment_tests.yaml
+++ b/deploy/charts/litellm-helm/tests/deployment_tests.yaml
@ -0,0 +1,54 @@
+suite: test deployment
+templates:
+  - deployment.yaml
+  - configmap-litellm.yaml
+tests:
+  - it: should work
+    template: deployment.yaml
+    set:
+      image.tag: test
+    asserts:
+      - isKind:
+          of: Deployment
+      - matchRegex:
+          path: metadata.name
+          pattern: -litellm$
+      - equal:
+          path: spec.template.spec.containers[0].image
+          value: ghcr.io/berriai/litellm-database:test
+  - it: should work with tolerations
+    template: deployment.yaml
+    set:
+      tolerations:
+        - key: node-role.kubernetes.io/master
+          operator: Exists
+          effect: NoSchedule
+    asserts:
+      - equal:
+          path: spec.template.spec.tolerations[0].key
+          value: node-role.kubernetes.io/master
+      - equal:
+          path: spec.template.spec.tolerations[0].operator
+          value: Exists
+  - it: should work with affinity
+    template: deployment.yaml
+    set:
+      affinity:
+        nodeAffinity:
+          requiredDuringSchedulingIgnoredDuringExecution:
+            nodeSelectorTerms:
+            - matchExpressions:
+              - key: topology.kubernetes.io/zone
+                operator: In
+                values:
+                - antarctica-east1
+    asserts:
+      - equal:
+          path: spec.template.spec.affinity.nodeAffinity.requiredDuringSchedulingIgnoredDuringExecution.nodeSelectorTerms[0].matchExpressions[0].key
+          value: topology.kubernetes.io/zone
+      - equal:
+          path: spec.template.spec.affinity.nodeAffinity.requiredDuringSchedulingIgnoredDuringExecution.nodeSelectorTerms[0].matchExpressions[0].operator
+          value: In
+      - equal:
+          path: spec.template.spec.affinity.nodeAffinity.requiredDuringSchedulingIgnoredDuringExecution.nodeSelectorTerms[0].matchExpressions[0].values[0]
+          value: antarctica-east1
--- a/docs/my-website/docs/extras/contributing_code.md
+++ b/docs/my-website/docs/extras/contributing_code.md
@ -48,7 +48,7 @@ The `tests/litellm/` directory follows the same directory structure as `litellm/
 - `litellm/proxy/test_caching_routes.py` maps to `litellm/proxy/caching_routes.py`
 - `test_{filename}.py` maps to `litellm/{filename}.py`

-### 3. Running Unit Tests
+## 3. Running Unit Tests

 run the following command on the root of the litellm directory

@ -56,7 +56,7 @@ run the following command on the root of the litellm directory
 make test-unit
 ```

-### 4. Submit a PR with your changes!
+## 4. Submit a PR with your changes!

 - push your fork to your GitHub repo
 - submit a PR from there
--- a/docs/my-website/docs/providers/bedrock.md
+++ b/docs/my-website/docs/providers/bedrock.md
@ -63,9 +63,9 @@ model_list:
  - model_name: bedrock-claude-v1
    litellm_params:
      model: bedrock/anthropic.claude-instant-v1
-      aws_access_key_id: os.environ/CUSTOM_AWS_ACCESS_KEY_ID
-      aws_secret_access_key: os.environ/CUSTOM_AWS_SECRET_ACCESS_KEY
-      aws_region_name: os.environ/CUSTOM_AWS_REGION_NAME
+      aws_access_key_id: os.environ/AWS_ACCESS_KEY_ID
+      aws_secret_access_key: os.environ/AWS_SECRET_ACCESS_KEY
+      aws_region_name: os.environ/AWS_REGION_NAME
 ```

 All possible auth params: 
@ -1792,10 +1792,14 @@ print(response)
 ### Advanced - [Pass model/provider-specific Params](https://docs.litellm.ai/docs/completion/provider_specific_params#proxy-usage)

 ## Image Generation
-Use this for stable diffusion on bedrock
+Use this for stable diffusion, and amazon nova canvas on bedrock


 ### Usage
+
+<Tabs>
+<TabItem value="sdk" label="SDK">
+
 ```python
 import os
 from litellm import image_generation
@ -1830,6 +1834,41 @@ response = image_generation(
        )
 print(f"response: {response}")
 ```
+</TabItem>
+<TabItem value="proxy" label="PROXY">
+
+1. Setup config.yaml
+
+```yaml
+model_list:
+  - model_name: amazon.nova-canvas-v1:0
+    litellm_params:
+      model: bedrock/amazon.nova-canvas-v1:0
+      aws_region_name: "us-east-1"
+      aws_secret_access_key: my-key # OPTIONAL - all boto3 auth params supported
+      aws_secret_access_id: my-id # OPTIONAL - all boto3 auth params supported
+```
+
+2. Start proxy 
+
+```bash
+litellm --config /path/to/config.yaml
+```
+
+3. Test it! 
+
+```bash
+curl -L -X POST 'http://0.0.0.0:4000/v1/images/generations' \
+-H 'Content-Type: application/json' \
+-H 'Authorization: Bearer $LITELLM_VIRTUAL_KEY' \
+-d '{
+    "model": "amazon.nova-canvas-v1:0",
+    "prompt": "A cute baby sea otter"
+}'
+```
+
+</TabItem>
+</Tabs>

 ## Supported AWS Bedrock Image Generation Models

@ -1910,6 +1949,8 @@ curl http://0.0.0.0:4000/rerank \
        "Capital punishment has existed in the United States since before it was a country."
    ],
    "top_n": 3
+
+
  }'
 ```

--- a/litellm/init.py
+++ b/litellm/init.py
@ -903,6 +903,7 @@ from .llms.bedrock.chat.invoke_transformations.base_invoke_transformation import

 from .llms.bedrock.image.amazon_stability1_transformation import AmazonStabilityConfig
 from .llms.bedrock.image.amazon_stability3_transformation import AmazonStability3Config
+from .llms.bedrock.image.amazon_nova_canvas_transformation import AmazonNovaCanvasConfig
 from .llms.bedrock.embed.amazon_titan_g1_transformation import AmazonTitanG1Config
 from .llms.bedrock.embed.amazon_titan_multimodal_transformation import (
    AmazonTitanMultimodalEmbeddingG1Config,
@ -925,6 +926,7 @@ from .llms.groq.chat.transformation import GroqChatConfig
 from .llms.voyage.embedding.transformation import VoyageEmbeddingConfig
 from .llms.azure_ai.chat.transformation import AzureAIStudioConfig
 from .llms.mistral.mistral_chat_transformation import MistralConfig
+from .llms.openai.responses.transformation import OpenAIResponsesAPIConfig
 from .llms.openai.chat.o_series_transformation import (
    OpenAIOSeriesConfig as OpenAIO1Config,  # maintain backwards compatibility
    OpenAIOSeriesConfig,
@ -1014,6 +1016,7 @@ from .batches.main import *
 from .batch_completion.main import *  # type: ignore
 from .rerank_api.main import *
 from .llms.anthropic.experimental_pass_through.messages.handler import *
+from .responses.main import *
 from .realtime_api.main import _arealtime
 from .fine_tuning.main import *
 from .files.main import *
--- a/litellm/constants.py
+++ b/litellm/constants.py
@ -18,6 +18,7 @@ SINGLE_DEPLOYMENT_TRAFFIC_FAILURE_THRESHOLD = 1000  # Minimum number of requests
 REPEATED_STREAMING_CHUNK_LIMIT = 100  # catch if model starts looping the same chunk while streaming. Uses high default to prevent false positives.
 #### Networking settings ####
 request_timeout: float = 6000  # time in seconds
+STREAM_SSE_DONE_STRING: str = "[DONE]"

 LITELLM_CHAT_PROVIDERS = [
    "openai",
--- a/litellm/cost_calculator.py
+++ b/litellm/cost_calculator.py
@ -44,7 +44,12 @@ from litellm.llms.vertex_ai.cost_calculator import cost_router as google_cost_ro
 from litellm.llms.vertex_ai.image_generation.cost_calculator import (
    cost_calculator as vertex_ai_image_cost_calculator,
 )
-from litellm.types.llms.openai import HttpxBinaryResponseContent
+from litellm.responses.utils import ResponseAPILoggingUtils
+from litellm.types.llms.openai import (
+    HttpxBinaryResponseContent,
+    ResponseAPIUsage,
+    ResponsesAPIResponse,
+)
 from litellm.types.rerank import RerankBilledUnits, RerankResponse
 from litellm.types.utils import (
    CallTypesLiteral,
@ -464,6 +469,13 @@ def _get_usage_object(
    return usage_obj


+def _is_known_usage_objects(usage_obj):
+    """Returns True if the usage obj is a known Usage type"""
+    return isinstance(usage_obj, litellm.Usage) or isinstance(
+        usage_obj, ResponseAPIUsage
+    )
+
+
 def _infer_call_type(
    call_type: Optional[CallTypesLiteral], completion_response: Any
 ) -> Optional[CallTypesLiteral]:
@ -573,9 +585,7 @@ def completion_cost(  # noqa: PLR0915
            base_model=base_model,
        )

-        verbose_logger.debug(
-            f"completion_response _select_model_name_for_cost_calc: {model}"
-        )
+        verbose_logger.info(f"selected model name for cost calculation: {model}")

        if completion_response is not None and (
            isinstance(completion_response, BaseModel)
@ -587,8 +597,8 @@ def completion_cost(  # noqa: PLR0915
                )
            else:
                usage_obj = getattr(completion_response, "usage", {})
-            if isinstance(usage_obj, BaseModel) and not isinstance(
-                usage_obj, litellm.Usage
+            if isinstance(usage_obj, BaseModel) and not _is_known_usage_objects(
+                usage_obj=usage_obj
            ):
                setattr(
                    completion_response,
@ -601,6 +611,14 @@ def completion_cost(  # noqa: PLR0915
                _usage = usage_obj.model_dump()
            else:
                _usage = usage_obj
+
+            if ResponseAPILoggingUtils._is_response_api_usage(_usage):
+                _usage = (
+                    ResponseAPILoggingUtils._transform_response_api_usage_to_chat_usage(
+                        _usage
+                    ).model_dump()
+                )
+
            # get input/output tokens from completion_response
            prompt_tokens = _usage.get("prompt_tokens", 0)
            completion_tokens = _usage.get("completion_tokens", 0)
@ -799,6 +817,7 @@ def response_cost_calculator(
        TextCompletionResponse,
        HttpxBinaryResponseContent,
        RerankResponse,
+        ResponsesAPIResponse,
    ],
    model: str,
    custom_llm_provider: Optional[str],
--- a/litellm/exceptions.py
+++ b/litellm/exceptions.py
@ -118,6 +118,7 @@ class BadRequestError(openai.BadRequestError):  # type: ignore
        litellm_debug_info: Optional[str] = None,
        max_retries: Optional[int] = None,
        num_retries: Optional[int] = None,
+        body: Optional[dict] = None,
    ):
        self.status_code = 400
        self.message = "litellm.BadRequestError: {}".format(message)
@ -133,7 +134,7 @@ class BadRequestError(openai.BadRequestError):  # type: ignore
        self.max_retries = max_retries
        self.num_retries = num_retries
        super().__init__(
-            self.message, response=response, body=None
+            self.message, response=response, body=body
        )  # Call the base class constructor with the parameters it needs

    def __str__(self):
--- a/litellm/litellm_core_utils/exception_mapping_utils.py
+++ b/litellm/litellm_core_utils/exception_mapping_utils.py
@ -331,6 +331,7 @@ def exception_type(  # type: ignore  # noqa: PLR0915
                        model=model,
                        response=getattr(original_exception, "response", None),
                        litellm_debug_info=extra_information,
+                        body=getattr(original_exception, "body", None),
                    )
                elif (
                    "Web server is returning an unknown error" in error_str
@ -421,6 +422,7 @@ def exception_type(  # type: ignore  # noqa: PLR0915
                            llm_provider=custom_llm_provider,
                            response=getattr(original_exception, "response", None),
                            litellm_debug_info=extra_information,
+                            body=getattr(original_exception, "body", None),
                        )
                    elif original_exception.status_code == 429:
                        exception_mapping_worked = True
@ -1960,6 +1962,7 @@ def exception_type(  # type: ignore  # noqa: PLR0915
                        model=model,
                        litellm_debug_info=extra_information,
                        response=getattr(original_exception, "response", None),
+                        body=getattr(original_exception, "body", None),
                    )
                elif (
                    "The api_key client option must be set either by passing api_key to the client or by setting"
@ -1991,6 +1994,7 @@ def exception_type(  # type: ignore  # noqa: PLR0915
                            model=model,
                            litellm_debug_info=extra_information,
                            response=getattr(original_exception, "response", None),
+                            body=getattr(original_exception, "body", None),
                        )
                    elif original_exception.status_code == 401:
                        exception_mapping_worked = True
--- a/litellm/litellm_core_utils/litellm_logging.py
+++ b/litellm/litellm_core_utils/litellm_logging.py
@ -39,11 +39,14 @@ from litellm.litellm_core_utils.redact_messages import (
    redact_message_input_output_from_custom_logger,
    redact_message_input_output_from_logging,
 )
+from litellm.responses.utils import ResponseAPILoggingUtils
 from litellm.types.llms.openai import (
    AllMessageValues,
    Batch,
    FineTuningJob,
    HttpxBinaryResponseContent,
+    ResponseCompletedEvent,
+    ResponsesAPIResponse,
 )
 from litellm.types.rerank import RerankResponse
 from litellm.types.router import SPECIAL_MODEL_INFO_PARAMS
@ -851,6 +854,8 @@ class Logging(LiteLLMLoggingBaseClass):
            RerankResponse,
            Batch,
            FineTuningJob,
+            ResponsesAPIResponse,
+            ResponseCompletedEvent,
        ],
        cache_hit: Optional[bool] = None,
    ) -> Optional[float]:
@ -1000,7 +1005,7 @@ class Logging(LiteLLMLoggingBaseClass):
                standard_logging_object is None
                and result is not None
                and self.stream is not True
-            ):  # handle streaming separately
+            ):
                if (
                    isinstance(result, ModelResponse)
                    or isinstance(result, ModelResponseStream)
@ -1012,6 +1017,7 @@ class Logging(LiteLLMLoggingBaseClass):
                    or isinstance(result, RerankResponse)
                    or isinstance(result, FineTuningJob)
                    or isinstance(result, LiteLLMBatch)
+                    or isinstance(result, ResponsesAPIResponse)
                ):
                    ## HIDDEN PARAMS ##
                    hidden_params = getattr(result, "_hidden_params", {})
@ -1111,7 +1117,7 @@ class Logging(LiteLLMLoggingBaseClass):

            ## BUILD COMPLETE STREAMED RESPONSE
            complete_streaming_response: Optional[
-                Union[ModelResponse, TextCompletionResponse]
+                Union[ModelResponse, TextCompletionResponse, ResponsesAPIResponse]
            ] = None
            if "complete_streaming_response" in self.model_call_details:
                return  # break out of this.
@ -1633,7 +1639,7 @@ class Logging(LiteLLMLoggingBaseClass):
        if "async_complete_streaming_response" in self.model_call_details:
            return  # break out of this.
        complete_streaming_response: Optional[
-            Union[ModelResponse, TextCompletionResponse]
+            Union[ModelResponse, TextCompletionResponse, ResponsesAPIResponse]
        ] = self._get_assembled_streaming_response(
            result=result,
            start_time=start_time,
@ -2343,16 +2349,24 @@ class Logging(LiteLLMLoggingBaseClass):

    def _get_assembled_streaming_response(
        self,
-        result: Union[ModelResponse, TextCompletionResponse, ModelResponseStream, Any],
+        result: Union[
+            ModelResponse,
+            TextCompletionResponse,
+            ModelResponseStream,
+            ResponseCompletedEvent,
+            Any,
+        ],
        start_time: datetime.datetime,
        end_time: datetime.datetime,
        is_async: bool,
        streaming_chunks: List[Any],
-    ) -> Optional[Union[ModelResponse, TextCompletionResponse]]:
+    ) -> Optional[Union[ModelResponse, TextCompletionResponse, ResponsesAPIResponse]]:
        if isinstance(result, ModelResponse):
            return result
        elif isinstance(result, TextCompletionResponse):
            return result
+        elif isinstance(result, ResponseCompletedEvent):
+            return result.response
        elif isinstance(result, ModelResponseStream):
            complete_streaming_response: Optional[
                Union[ModelResponse, TextCompletionResponse]
@ -3111,6 +3125,12 @@ class StandardLoggingPayloadSetup:
        elif isinstance(usage, Usage):
            return usage
        elif isinstance(usage, dict):
+            if ResponseAPILoggingUtils._is_response_api_usage(usage):
+                return (
+                    ResponseAPILoggingUtils._transform_response_api_usage_to_chat_usage(
+                        usage
+                    )
+                )
            return Usage(**usage)

        raise ValueError(f"usage is required, got={usage} of type {type(usage)}")
--- a/litellm/llms/azure/assistants.py
+++ b/litellm/llms/azure/assistants.py
@ -1,4 +1,4 @@
-from typing import Coroutine, Iterable, Literal, Optional, Union
+from typing import Any, Coroutine, Dict, Iterable, Literal, Optional, Union

 import httpx
 from openai import AsyncAzureOpenAI, AzureOpenAI
@ -649,7 +649,7 @@ class AzureAssistantsAPI(BaseAzureLLM):
        assistant_id: str,
        additional_instructions: Optional[str],
        instructions: Optional[str],
-        metadata: Optional[object],
+        metadata: Optional[Dict],
        model: Optional[str],
        stream: Optional[bool],
        tools: Optional[Iterable[AssistantToolParam]],
@ -692,13 +692,13 @@ class AzureAssistantsAPI(BaseAzureLLM):
        assistant_id: str,
        additional_instructions: Optional[str],
        instructions: Optional[str],
-        metadata: Optional[object],
+        metadata: Optional[Dict],
        model: Optional[str],
        tools: Optional[Iterable[AssistantToolParam]],
        event_handler: Optional[AssistantEventHandler],
        litellm_params: Optional[dict] = None,
    ) -> AsyncAssistantStreamManager[AsyncAssistantEventHandler]:
-        data = {
+        data: Dict[str, Any] = {
            "thread_id": thread_id,
            "assistant_id": assistant_id,
            "additional_instructions": additional_instructions,
@ -718,13 +718,13 @@ class AzureAssistantsAPI(BaseAzureLLM):
        assistant_id: str,
        additional_instructions: Optional[str],
        instructions: Optional[str],
-        metadata: Optional[object],
+        metadata: Optional[Dict],
        model: Optional[str],
        tools: Optional[Iterable[AssistantToolParam]],
        event_handler: Optional[AssistantEventHandler],
        litellm_params: Optional[dict] = None,
    ) -> AssistantStreamManager[AssistantEventHandler]:
-        data = {
+        data: Dict[str, Any] = {
            "thread_id": thread_id,
            "assistant_id": assistant_id,
            "additional_instructions": additional_instructions,
@ -746,7 +746,7 @@ class AzureAssistantsAPI(BaseAzureLLM):
        assistant_id: str,
        additional_instructions: Optional[str],
        instructions: Optional[str],
-        metadata: Optional[object],
+        metadata: Optional[Dict],
        model: Optional[str],
        stream: Optional[bool],
        tools: Optional[Iterable[AssistantToolParam]],
@ -768,7 +768,7 @@ class AzureAssistantsAPI(BaseAzureLLM):
        assistant_id: str,
        additional_instructions: Optional[str],
        instructions: Optional[str],
-        metadata: Optional[object],
+        metadata: Optional[Dict],
        model: Optional[str],
        stream: Optional[bool],
        tools: Optional[Iterable[AssistantToolParam]],
@ -791,7 +791,7 @@ class AzureAssistantsAPI(BaseAzureLLM):
        assistant_id: str,
        additional_instructions: Optional[str],
        instructions: Optional[str],
-        metadata: Optional[object],
+        metadata: Optional[Dict],
        model: Optional[str],
        stream: Optional[bool],
        tools: Optional[Iterable[AssistantToolParam]],
--- a/litellm/llms/azure/audio_transcriptions.py
+++ b/litellm/llms/azure/audio_transcriptions.py
@ -7,7 +7,11 @@ from pydantic import BaseModel
 import litellm
 from litellm.litellm_core_utils.audio_utils.utils import get_audio_file_name
 from litellm.types.utils import FileTypes
-from litellm.utils import TranscriptionResponse, convert_to_model_response_object
+from litellm.utils import (
+    TranscriptionResponse,
+    convert_to_model_response_object,
+    extract_duration_from_srt_or_vtt,
+)

 from .azure import AzureChatCompletion

@ -140,6 +144,8 @@ class AzureAudioTranscription(AzureChatCompletion):
                stringified_response = response.model_dump()
            else:
                stringified_response = TranscriptionResponse(text=response).model_dump()
+                duration = extract_duration_from_srt_or_vtt(response)
+                stringified_response["duration"] = duration

            ## LOGGING
            logging_obj.post_call(
--- a/litellm/llms/azure/azure.py
+++ b/litellm/llms/azure/azure.py
@ -430,10 +430,14 @@ class AzureChatCompletion(BaseAzureLLM, BaseLLM):
            status_code = getattr(e, "status_code", 500)
            error_headers = getattr(e, "headers", None)
            error_response = getattr(e, "response", None)
+            error_body = getattr(e, "body", None)
            if error_headers is None and error_response:
                error_headers = getattr(error_response, "headers", None)
            raise AzureOpenAIError(
-                status_code=status_code, message=str(e), headers=error_headers
+                status_code=status_code,
+                message=str(e),
+                headers=error_headers,
+                body=error_body,
            )

    async def acompletion(
@ -519,6 +523,7 @@ class AzureChatCompletion(BaseAzureLLM, BaseLLM):
            raise AzureOpenAIError(status_code=500, message=str(e))
        except Exception as e:
            message = getattr(e, "message", str(e))
+            body = getattr(e, "body", None)
            ## LOGGING
            logging_obj.post_call(
                input=data["messages"],
@ -529,7 +534,7 @@ class AzureChatCompletion(BaseAzureLLM, BaseLLM):
            if hasattr(e, "status_code"):
                raise e
            else:
-                raise AzureOpenAIError(status_code=500, message=message)
+                raise AzureOpenAIError(status_code=500, message=message, body=body)

    def streaming(
        self,
@ -656,10 +661,14 @@ class AzureChatCompletion(BaseAzureLLM, BaseLLM):
            error_headers = getattr(e, "headers", None)
            error_response = getattr(e, "response", None)
            message = getattr(e, "message", str(e))
+            error_body = getattr(e, "body", None)
            if error_headers is None and error_response:
                error_headers = getattr(error_response, "headers", None)
            raise AzureOpenAIError(
-                status_code=status_code, message=message, headers=error_headers
+                status_code=status_code,
+                message=message,
+                headers=error_headers,
+                body=error_body,
            )

    async def aembedding(
--- a/litellm/llms/azure/common_utils.py
+++ b/litellm/llms/azure/common_utils.py
@ -25,6 +25,7 @@ class AzureOpenAIError(BaseLLMException):
        request: Optional[httpx.Request] = None,
        response: Optional[httpx.Response] = None,
        headers: Optional[Union[httpx.Headers, dict]] = None,
+        body: Optional[dict] = None,
    ):
        super().__init__(
            status_code=status_code,
@ -32,6 +33,7 @@ class AzureOpenAIError(BaseLLMException):
            request=request,
            response=response,
            headers=headers,
+            body=body,
        )


--- a/litellm/llms/azure_ai/chat/transformation.py
+++ b/litellm/llms/azure_ai/chat/transformation.py
@ -16,10 +16,23 @@ from litellm.llms.openai.openai import OpenAIConfig
 from litellm.secret_managers.main import get_secret_str
 from litellm.types.llms.openai import AllMessageValues
 from litellm.types.utils import ModelResponse, ProviderField
-from litellm.utils import _add_path_to_api_base
+from litellm.utils import _add_path_to_api_base, supports_tool_choice


 class AzureAIStudioConfig(OpenAIConfig):
+    def get_supported_openai_params(self, model: str) -> List:
+        model_supports_tool_choice = True  # azure ai supports this by default
+        if not supports_tool_choice(model=f"azure_ai/{model}"):
+            model_supports_tool_choice = False
+        supported_params = super().get_supported_openai_params(model)
+        if not model_supports_tool_choice:
+            filtered_supported_params = []
+            for param in supported_params:
+                if param != "tool_choice":
+                    filtered_supported_params.append(param)
+            return filtered_supported_params
+        return supported_params
+
    def validate_environment(
        self,
        headers: dict,
--- a/litellm/llms/base_llm/chat/transformation.py
+++ b/litellm/llms/base_llm/chat/transformation.py
@ -51,6 +51,7 @@ class BaseLLMException(Exception):
        headers: Optional[Union[dict, httpx.Headers]] = None,
        request: Optional[httpx.Request] = None,
        response: Optional[httpx.Response] = None,
+        body: Optional[dict] = None,
    ):
        self.status_code = status_code
        self.message: str = message
@ -67,6 +68,7 @@ class BaseLLMException(Exception):
            self.response = httpx.Response(
                status_code=status_code, request=self.request
            )
+        self.body = body
        super().__init__(
            self.message
        )  # Call the base class constructor with the parameters it needs
--- a/litellm/llms/base_llm/responses/transformation.py
+++ b/litellm/llms/base_llm/responses/transformation.py
@ -0,0 +1,133 @@
+import types
+from abc import ABC, abstractmethod
+from typing import TYPE_CHECKING, Any, Dict, Optional, Union
+
+import httpx
+
+from litellm.types.llms.openai import (
+    ResponseInputParam,
+    ResponsesAPIOptionalRequestParams,
+    ResponsesAPIRequestParams,
+    ResponsesAPIResponse,
+    ResponsesAPIStreamingResponse,
+)
+from litellm.types.router import GenericLiteLLMParams
+
+if TYPE_CHECKING:
+    from litellm.litellm_core_utils.litellm_logging import Logging as _LiteLLMLoggingObj
+
+    from ..chat.transformation import BaseLLMException as _BaseLLMException
+
+    LiteLLMLoggingObj = _LiteLLMLoggingObj
+    BaseLLMException = _BaseLLMException
+else:
+    LiteLLMLoggingObj = Any
+    BaseLLMException = Any
+
+
+class BaseResponsesAPIConfig(ABC):
+    def __init__(self):
+        pass
+
+    @classmethod
+    def get_config(cls):
+        return {
+            k: v
+            for k, v in cls.__dict__.items()
+            if not k.startswith("__")
+            and not k.startswith("_abc")
+            and not isinstance(
+                v,
+                (
+                    types.FunctionType,
+                    types.BuiltinFunctionType,
+                    classmethod,
+                    staticmethod,
+                ),
+            )
+            and v is not None
+        }
+
+    @abstractmethod
+    def get_supported_openai_params(self, model: str) -> list:
+        pass
+
+    @abstractmethod
+    def map_openai_params(
+        self,
+        response_api_optional_params: ResponsesAPIOptionalRequestParams,
+        model: str,
+        drop_params: bool,
+    ) -> Dict:
+
+        pass
+
+    @abstractmethod
+    def validate_environment(
+        self,
+        headers: dict,
+        model: str,
+        api_key: Optional[str] = None,
+    ) -> dict:
+        return {}
+
+    @abstractmethod
+    def get_complete_url(
+        self,
+        api_base: Optional[str],
+        model: str,
+        stream: Optional[bool] = None,
+    ) -> str:
+        """
+        OPTIONAL
+
+        Get the complete url for the request
+
+        Some providers need `model` in `api_base`
+        """
+        if api_base is None:
+            raise ValueError("api_base is required")
+        return api_base
+
+    @abstractmethod
+    def transform_responses_api_request(
+        self,
+        model: str,
+        input: Union[str, ResponseInputParam],
+        response_api_optional_request_params: Dict,
+        litellm_params: GenericLiteLLMParams,
+        headers: dict,
+    ) -> ResponsesAPIRequestParams:
+        pass
+
+    @abstractmethod
+    def transform_response_api_response(
+        self,
+        model: str,
+        raw_response: httpx.Response,
+        logging_obj: LiteLLMLoggingObj,
+    ) -> ResponsesAPIResponse:
+        pass
+
+    @abstractmethod
+    def transform_streaming_response(
+        self,
+        model: str,
+        parsed_chunk: dict,
+        logging_obj: LiteLLMLoggingObj,
+    ) -> ResponsesAPIStreamingResponse:
+        """
+        Transform a parsed streaming response chunk into a ResponsesAPIStreamingResponse
+        """
+        pass
+
+    def get_error_class(
+        self, error_message: str, status_code: int, headers: Union[dict, httpx.Headers]
+    ) -> BaseLLMException:
+        from ..chat.transformation import BaseLLMException
+
+        raise BaseLLMException(
+            status_code=status_code,
+            message=error_message,
+            headers=headers,
+        )
--- a/litellm/llms/bedrock/chat/invoke_handler.py
+++ b/litellm/llms/bedrock/chat/invoke_handler.py
@ -1231,7 +1231,9 @@ class AWSEventStreamDecoder:
        if len(self.content_blocks) == 0:
            return False

-        if "text" in self.content_blocks[0]:
+        if (
+            "toolUse" not in self.content_blocks[0]
+        ):  # be explicit - only do this if tool use block, as this is to prevent json decoding errors
            return False

        for block in self.content_blocks:
--- a/litellm/llms/bedrock/chat/invoke_transformations/base_invoke_transformation.py
+++ b/litellm/llms/bedrock/chat/invoke_transformations/base_invoke_transformation.py
@ -129,7 +129,6 @@ class AmazonInvokeConfig(BaseConfig, BaseAWSLLM):

        ## CREDENTIALS ##
        # pop aws_secret_access_key, aws_access_key_id, aws_session_token, aws_region_name from kwargs, since completion calls fail with them
-        extra_headers = optional_params.get("extra_headers", None)
        aws_secret_access_key = optional_params.get("aws_secret_access_key", None)
        aws_access_key_id = optional_params.get("aws_access_key_id", None)
        aws_session_token = optional_params.get("aws_session_token", None)
@ -155,9 +154,10 @@ class AmazonInvokeConfig(BaseConfig, BaseAWSLLM):
        )

        sigv4 = SigV4Auth(credentials, "bedrock", aws_region_name)
+        if headers is not None:
+            headers = {"Content-Type": "application/json", **headers}
+        else:
            headers = {"Content-Type": "application/json"}
-        if extra_headers is not None:
-            headers = {"Content-Type": "application/json", **extra_headers}

        request = AWSRequest(
            method="POST",
@ -166,12 +166,13 @@ class AmazonInvokeConfig(BaseConfig, BaseAWSLLM):
            headers=headers,
        )
        sigv4.add_auth(request)
-        if (
-            extra_headers is not None and "Authorization" in extra_headers
-        ):  # prevent sigv4 from overwriting the auth header
-            request.headers["Authorization"] = extra_headers["Authorization"]

-        return dict(request.headers)
+        request_headers_dict = dict(request.headers)
+        if (
+            headers is not None and "Authorization" in headers
+        ):  # prevent sigv4 from overwriting the auth header
+            request_headers_dict["Authorization"] = headers["Authorization"]
+        return request_headers_dict

    def transform_request(
        self,
@ -443,7 +444,7 @@ class AmazonInvokeConfig(BaseConfig, BaseAWSLLM):
        api_key: Optional[str] = None,
        api_base: Optional[str] = None,
    ) -> dict:
-        return {}
+        return headers

    def get_error_class(
        self, error_message: str, status_code: int, headers: Union[dict, httpx.Headers]
--- a/litellm/llms/bedrock/image/amazon_nova_canvas_transformation.py
+++ b/litellm/llms/bedrock/image/amazon_nova_canvas_transformation.py
@ -0,0 +1,106 @@
+import types
+from typing import List, Optional
+
+from openai.types.image import Image
+
+from litellm.types.llms.bedrock import (
+    AmazonNovaCanvasTextToImageRequest, AmazonNovaCanvasTextToImageResponse,
+    AmazonNovaCanvasTextToImageParams, AmazonNovaCanvasRequestBase,
+)
+from litellm.types.utils import ImageResponse
+
+
+class AmazonNovaCanvasConfig:
+    """
+    Reference: https://us-east-1.console.aws.amazon.com/bedrock/home?region=us-east-1#/model-catalog/serverless/amazon.nova-canvas-v1:0
+
+    """
+
+    @classmethod
+    def get_config(cls):
+        return {
+            k: v
+            for k, v in cls.__dict__.items()
+            if not k.startswith("__")
+               and not isinstance(
+                v,
+                (
+                    types.FunctionType,
+                    types.BuiltinFunctionType,
+                    classmethod,
+                    staticmethod,
+                ),
+            )
+               and v is not None
+        }
+
+    @classmethod
+    def get_supported_openai_params(cls, model: Optional[str] = None) -> List:
+        """
+        """
+        return ["n", "size", "quality"]
+
+    @classmethod
+    def _is_nova_model(cls, model: Optional[str] = None) -> bool:
+        """
+        Returns True if the model is a Nova Canvas model
+
+        Nova models follow this pattern:
+
+        """
+        if model:
+            if "amazon.nova-canvas" in model:
+                return True
+        return False
+
+    @classmethod
+    def transform_request_body(
+            cls, text: str, optional_params: dict
+    ) -> AmazonNovaCanvasRequestBase:
+        """
+        Transform the request body for Amazon Nova Canvas model
+        """
+        task_type = optional_params.pop("taskType", "TEXT_IMAGE")
+        image_generation_config = optional_params.pop("imageGenerationConfig", {})
+        image_generation_config = {**image_generation_config, **optional_params}
+        if task_type == "TEXT_IMAGE":
+            text_to_image_params = image_generation_config.pop("textToImageParams", {})
+            text_to_image_params = {"text" :text, **text_to_image_params}
+            text_to_image_params = AmazonNovaCanvasTextToImageParams(**text_to_image_params)
+            return AmazonNovaCanvasTextToImageRequest(textToImageParams=text_to_image_params, taskType=task_type,
+                                                      imageGenerationConfig=image_generation_config)
+        raise NotImplementedError(f"Task type {task_type} is not supported")
+
+    @classmethod
+    def map_openai_params(cls, non_default_params: dict, optional_params: dict) -> dict:
+        """
+        Map the OpenAI params to the Bedrock params
+        """
+        _size = non_default_params.get("size")
+        if _size is not None:
+            width, height = _size.split("x")
+            optional_params["width"], optional_params["height"] = int(width), int(height)
+        if non_default_params.get("n") is not None:
+            optional_params["numberOfImages"] = non_default_params.get("n")
+        if non_default_params.get("quality") is not None:
+            if non_default_params.get("quality") in ("hd", "premium"):
+                optional_params["quality"] = "premium"
+            if non_default_params.get("quality") == "standard":
+                optional_params["quality"] = "standard"
+        return optional_params
+
+    @classmethod
+    def transform_response_dict_to_openai_response(
+            cls, model_response: ImageResponse, response_dict: dict
+    ) -> ImageResponse:
+        """
+        Transform the response dict to the OpenAI response
+        """
+
+        nova_response = AmazonNovaCanvasTextToImageResponse(**response_dict)
+        openai_images: List[Image] = []
+        for _img in nova_response.get("images", []):
+            openai_images.append(Image(b64_json=_img))
+
+        model_response.data = openai_images
+        return model_response
--- a/litellm/llms/bedrock/image/image_handler.py
+++ b/litellm/llms/bedrock/image/image_handler.py
@ -266,6 +266,8 @@ class BedrockImageGeneration(BaseAWSLLM):
                    "text_prompts": [{"text": prompt, "weight": 1}],
                    **inference_params,
                }
+        elif provider == "amazon":
+            return dict(litellm.AmazonNovaCanvasConfig.transform_request_body(text=prompt, optional_params=optional_params))
        else:
            raise BedrockError(
                status_code=422, message=f"Unsupported model={model}, passed in"
@ -301,6 +303,7 @@ class BedrockImageGeneration(BaseAWSLLM):
        config_class = (
            litellm.AmazonStability3Config
            if litellm.AmazonStability3Config._is_stability_3_model(model=model)
+            else litellm.AmazonNovaCanvasConfig if litellm.AmazonNovaCanvasConfig._is_nova_model(model=model)
            else litellm.AmazonStabilityConfig
        )
        config_class.transform_response_dict_to_openai_response(
--- a/litellm/llms/custom_httpx/llm_http_handler.py
+++ b/litellm/llms/custom_httpx/llm_http_handler.py
@ -1,6 +1,6 @@
 import io
 import json
-from typing import TYPE_CHECKING, Any, Optional, Tuple, Union
+from typing import TYPE_CHECKING, Any, Coroutine, Dict, Optional, Tuple, Union

 import httpx  # type: ignore

@ -11,13 +11,21 @@ import litellm.types.utils
 from litellm.llms.base_llm.chat.transformation import BaseConfig
 from litellm.llms.base_llm.embedding.transformation import BaseEmbeddingConfig
 from litellm.llms.base_llm.rerank.transformation import BaseRerankConfig
+from litellm.llms.base_llm.responses.transformation import BaseResponsesAPIConfig
 from litellm.llms.custom_httpx.http_handler import (
    AsyncHTTPHandler,
    HTTPHandler,
    _get_httpx_client,
    get_async_httpx_client,
 )
+from litellm.responses.streaming_iterator import (
+    BaseResponsesAPIStreamingIterator,
+    ResponsesAPIStreamingIterator,
+    SyncResponsesAPIStreamingIterator,
+)
+from litellm.types.llms.openai import ResponseInputParam, ResponsesAPIResponse
 from litellm.types.rerank import OptionalRerankParams, RerankResponse
+from litellm.types.router import GenericLiteLLMParams
 from litellm.types.utils import EmbeddingResponse, FileTypes, TranscriptionResponse
 from litellm.utils import CustomStreamWrapper, ModelResponse, ProviderConfigManager

@ -873,7 +881,9 @@ class BaseLLMHTTPHandler:
        elif isinstance(audio_file, bytes):
            # Assume it's already binary data
            binary_data = audio_file
-        elif isinstance(audio_file, io.BufferedReader) or isinstance(audio_file, io.BytesIO):
+        elif isinstance(audio_file, io.BufferedReader) or isinstance(
+            audio_file, io.BytesIO
+        ):
            # Handle file-like objects
            binary_data = audio_file.read()

@ -950,8 +960,235 @@ class BaseLLMHTTPHandler:
            return returned_response
        return model_response

+    def response_api_handler(
+        self,
+        model: str,
+        input: Union[str, ResponseInputParam],
+        responses_api_provider_config: BaseResponsesAPIConfig,
+        response_api_optional_request_params: Dict,
+        custom_llm_provider: str,
+        litellm_params: GenericLiteLLMParams,
+        logging_obj: LiteLLMLoggingObj,
+        extra_headers: Optional[Dict[str, Any]] = None,
+        extra_body: Optional[Dict[str, Any]] = None,
+        timeout: Optional[Union[float, httpx.Timeout]] = None,
+        client: Optional[Union[HTTPHandler, AsyncHTTPHandler]] = None,
+        _is_async: bool = False,
+    ) -> Union[
+        ResponsesAPIResponse,
+        BaseResponsesAPIStreamingIterator,
+        Coroutine[
+            Any, Any, Union[ResponsesAPIResponse, BaseResponsesAPIStreamingIterator]
+        ],
+    ]:
+        """
+        Handles responses API requests.
+        When _is_async=True, returns a coroutine instead of making the call directly.
+        """
+        if _is_async:
+            # Return the async coroutine if called with _is_async=True
+            return self.async_response_api_handler(
+                model=model,
+                input=input,
+                responses_api_provider_config=responses_api_provider_config,
+                response_api_optional_request_params=response_api_optional_request_params,
+                custom_llm_provider=custom_llm_provider,
+                litellm_params=litellm_params,
+                logging_obj=logging_obj,
+                extra_headers=extra_headers,
+                extra_body=extra_body,
+                timeout=timeout,
+                client=client if isinstance(client, AsyncHTTPHandler) else None,
+            )
+
+        if client is None or not isinstance(client, HTTPHandler):
+            sync_httpx_client = _get_httpx_client(
+                params={"ssl_verify": litellm_params.get("ssl_verify", None)}
+            )
+        else:
+            sync_httpx_client = client
+
+        headers = responses_api_provider_config.validate_environment(
+            api_key=litellm_params.api_key,
+            headers=response_api_optional_request_params.get("extra_headers", {}) or {},
+            model=model,
+        )
+
+        if extra_headers:
+            headers.update(extra_headers)
+
+        api_base = responses_api_provider_config.get_complete_url(
+            api_base=litellm_params.api_base,
+            model=model,
+        )
+
+        data = responses_api_provider_config.transform_responses_api_request(
+            model=model,
+            input=input,
+            response_api_optional_request_params=response_api_optional_request_params,
+            litellm_params=litellm_params,
+            headers=headers,
+        )
+
+        ## LOGGING
+        logging_obj.pre_call(
+            input=input,
+            api_key="",
+            additional_args={
+                "complete_input_dict": data,
+                "api_base": api_base,
+                "headers": headers,
+            },
+        )
+
+        # Check if streaming is requested
+        stream = response_api_optional_request_params.get("stream", False)
+
+        try:
+            if stream:
+                # For streaming, use stream=True in the request
+                response = sync_httpx_client.post(
+                    url=api_base,
+                    headers=headers,
+                    data=json.dumps(data),
+                    timeout=timeout
+                    or response_api_optional_request_params.get("timeout"),
+                    stream=True,
+                )
+
+                return SyncResponsesAPIStreamingIterator(
+                    response=response,
+                    model=model,
+                    logging_obj=logging_obj,
+                    responses_api_provider_config=responses_api_provider_config,
+                )
+            else:
+                # For non-streaming requests
+                response = sync_httpx_client.post(
+                    url=api_base,
+                    headers=headers,
+                    data=json.dumps(data),
+                    timeout=timeout
+                    or response_api_optional_request_params.get("timeout"),
+                )
+        except Exception as e:
+            raise self._handle_error(
+                e=e,
+                provider_config=responses_api_provider_config,
+            )
+
+        return responses_api_provider_config.transform_response_api_response(
+            model=model,
+            raw_response=response,
+            logging_obj=logging_obj,
+        )
+
+    async def async_response_api_handler(
+        self,
+        model: str,
+        input: Union[str, ResponseInputParam],
+        responses_api_provider_config: BaseResponsesAPIConfig,
+        response_api_optional_request_params: Dict,
+        custom_llm_provider: str,
+        litellm_params: GenericLiteLLMParams,
+        logging_obj: LiteLLMLoggingObj,
+        extra_headers: Optional[Dict[str, Any]] = None,
+        extra_body: Optional[Dict[str, Any]] = None,
+        timeout: Optional[Union[float, httpx.Timeout]] = None,
+        client: Optional[Union[HTTPHandler, AsyncHTTPHandler]] = None,
+    ) -> Union[ResponsesAPIResponse, BaseResponsesAPIStreamingIterator]:
+        """
+        Async version of the responses API handler.
+        Uses async HTTP client to make requests.
+        """
+        if client is None or not isinstance(client, AsyncHTTPHandler):
+            async_httpx_client = get_async_httpx_client(
+                llm_provider=litellm.LlmProviders(custom_llm_provider),
+                params={"ssl_verify": litellm_params.get("ssl_verify", None)},
+            )
+        else:
+            async_httpx_client = client
+
+        headers = responses_api_provider_config.validate_environment(
+            api_key=litellm_params.api_key,
+            headers=response_api_optional_request_params.get("extra_headers", {}) or {},
+            model=model,
+        )
+
+        if extra_headers:
+            headers.update(extra_headers)
+
+        api_base = responses_api_provider_config.get_complete_url(
+            api_base=litellm_params.api_base,
+            model=model,
+        )
+
+        data = responses_api_provider_config.transform_responses_api_request(
+            model=model,
+            input=input,
+            response_api_optional_request_params=response_api_optional_request_params,
+            litellm_params=litellm_params,
+            headers=headers,
+        )
+
+        ## LOGGING
+        logging_obj.pre_call(
+            input=input,
+            api_key="",
+            additional_args={
+                "complete_input_dict": data,
+                "api_base": api_base,
+                "headers": headers,
+            },
+        )
+
+        # Check if streaming is requested
+        stream = response_api_optional_request_params.get("stream", False)
+
+        try:
+            if stream:
+                # For streaming, we need to use stream=True in the request
+                response = await async_httpx_client.post(
+                    url=api_base,
+                    headers=headers,
+                    data=json.dumps(data),
+                    timeout=timeout
+                    or response_api_optional_request_params.get("timeout"),
+                    stream=True,
+                )
+
+                # Return the streaming iterator
+                return ResponsesAPIStreamingIterator(
+                    response=response,
+                    model=model,
+                    logging_obj=logging_obj,
+                    responses_api_provider_config=responses_api_provider_config,
+                )
+            else:
+                # For non-streaming, proceed as before
+                response = await async_httpx_client.post(
+                    url=api_base,
+                    headers=headers,
+                    data=json.dumps(data),
+                    timeout=timeout
+                    or response_api_optional_request_params.get("timeout"),
+                )
+        except Exception as e:
+            raise self._handle_error(
+                e=e,
+                provider_config=responses_api_provider_config,
+            )
+
+        return responses_api_provider_config.transform_response_api_response(
+            model=model,
+            raw_response=response,
+            logging_obj=logging_obj,
+        )
+
    def _handle_error(
-        self, e: Exception, provider_config: Union[BaseConfig, BaseRerankConfig]
+        self,
+        e: Exception,
+        provider_config: Union[BaseConfig, BaseRerankConfig, BaseResponsesAPIConfig],
    ):
        status_code = getattr(e, "status_code", 500)
        error_headers = getattr(e, "headers", None)
--- a/litellm/llms/openai/common_utils.py
+++ b/litellm/llms/openai/common_utils.py
@ -19,6 +19,7 @@ class OpenAIError(BaseLLMException):
        request: Optional[httpx.Request] = None,
        response: Optional[httpx.Response] = None,
        headers: Optional[Union[dict, httpx.Headers]] = None,
+        body: Optional[dict] = None,
    ):
        self.status_code = status_code
        self.message = message
@ -39,6 +40,7 @@ class OpenAIError(BaseLLMException):
            headers=self.headers,
            request=self.request,
            response=self.response,
+            body=body,
        )


--- a/litellm/llms/openai/openai.py
+++ b/litellm/llms/openai/openai.py
@ -732,10 +732,14 @@ class OpenAIChatCompletion(BaseLLM):
            error_headers = getattr(e, "headers", None)
            error_text = getattr(e, "text", str(e))
            error_response = getattr(e, "response", None)
+            error_body = getattr(e, "body", None)
            if error_headers is None and error_response:
                error_headers = getattr(error_response, "headers", None)
            raise OpenAIError(
-                status_code=status_code, message=error_text, headers=error_headers
+                status_code=status_code,
+                message=error_text,
+                headers=error_headers,
+                body=error_body,
            )

    async def acompletion(
@ -828,13 +832,17 @@ class OpenAIChatCompletion(BaseLLM):
            except Exception as e:
                exception_response = getattr(e, "response", None)
                status_code = getattr(e, "status_code", 500)
+                exception_body = getattr(e, "body", None)
                error_headers = getattr(e, "headers", None)
                if error_headers is None and exception_response:
                    error_headers = getattr(exception_response, "headers", None)
                message = getattr(e, "message", str(e))

                raise OpenAIError(
-                    status_code=status_code, message=message, headers=error_headers
+                    status_code=status_code,
+                    message=message,
+                    headers=error_headers,
+                    body=exception_body,
                )

    def streaming(
@ -973,6 +981,7 @@ class OpenAIChatCompletion(BaseLLM):
                error_headers = getattr(e, "headers", None)
                status_code = getattr(e, "status_code", 500)
                error_response = getattr(e, "response", None)
+                exception_body = getattr(e, "body", None)
                if error_headers is None and error_response:
                    error_headers = getattr(error_response, "headers", None)
                if response is not None and hasattr(response, "text"):
@ -980,6 +989,7 @@ class OpenAIChatCompletion(BaseLLM):
                        status_code=status_code,
                        message=f"{str(e)}\n\nOriginal Response: {response.text}",  # type: ignore
                        headers=error_headers,
+                        body=exception_body,
                    )
                else:
                    if type(e).__name__ == "ReadTimeout":
@ -987,16 +997,21 @@ class OpenAIChatCompletion(BaseLLM):
                            status_code=408,
                            message=f"{type(e).__name__}",
                            headers=error_headers,
+                            body=exception_body,
                        )
                    elif hasattr(e, "status_code"):
                        raise OpenAIError(
                            status_code=getattr(e, "status_code", 500),
                            message=str(e),
                            headers=error_headers,
+                            body=exception_body,
                        )
                    else:
                        raise OpenAIError(
-                            status_code=500, message=f"{str(e)}", headers=error_headers
+                            status_code=500,
+                            message=f"{str(e)}",
+                            headers=error_headers,
+                            body=exception_body,
                        )

    def get_stream_options(
@ -2635,7 +2650,7 @@ class OpenAIAssistantsAPI(BaseLLM):
        assistant_id: str,
        additional_instructions: Optional[str],
        instructions: Optional[str],
-        metadata: Optional[object],
+        metadata: Optional[Dict],
        model: Optional[str],
        stream: Optional[bool],
        tools: Optional[Iterable[AssistantToolParam]],
@ -2674,12 +2689,12 @@ class OpenAIAssistantsAPI(BaseLLM):
        assistant_id: str,
        additional_instructions: Optional[str],
        instructions: Optional[str],
-        metadata: Optional[object],
+        metadata: Optional[Dict],
        model: Optional[str],
        tools: Optional[Iterable[AssistantToolParam]],
        event_handler: Optional[AssistantEventHandler],
    ) -> AsyncAssistantStreamManager[AsyncAssistantEventHandler]:
-        data = {
+        data: Dict[str, Any] = {
            "thread_id": thread_id,
            "assistant_id": assistant_id,
            "additional_instructions": additional_instructions,
@ -2699,12 +2714,12 @@ class OpenAIAssistantsAPI(BaseLLM):
        assistant_id: str,
        additional_instructions: Optional[str],
        instructions: Optional[str],
-        metadata: Optional[object],
+        metadata: Optional[Dict],
        model: Optional[str],
        tools: Optional[Iterable[AssistantToolParam]],
        event_handler: Optional[AssistantEventHandler],
    ) -> AssistantStreamManager[AssistantEventHandler]:
-        data = {
+        data: Dict[str, Any] = {
            "thread_id": thread_id,
            "assistant_id": assistant_id,
            "additional_instructions": additional_instructions,
@ -2726,7 +2741,7 @@ class OpenAIAssistantsAPI(BaseLLM):
        assistant_id: str,
        additional_instructions: Optional[str],
        instructions: Optional[str],
-        metadata: Optional[object],
+        metadata: Optional[Dict],
        model: Optional[str],
        stream: Optional[bool],
        tools: Optional[Iterable[AssistantToolParam]],
@ -2748,7 +2763,7 @@ class OpenAIAssistantsAPI(BaseLLM):
        assistant_id: str,
        additional_instructions: Optional[str],
        instructions: Optional[str],
-        metadata: Optional[object],
+        metadata: Optional[Dict],
        model: Optional[str],
        stream: Optional[bool],
        tools: Optional[Iterable[AssistantToolParam]],
@ -2771,7 +2786,7 @@ class OpenAIAssistantsAPI(BaseLLM):
        assistant_id: str,
        additional_instructions: Optional[str],
        instructions: Optional[str],
-        metadata: Optional[object],
+        metadata: Optional[Dict],
        model: Optional[str],
        stream: Optional[bool],
        tools: Optional[Iterable[AssistantToolParam]],
--- a/litellm/llms/openai/responses/transformation.py
+++ b/litellm/llms/openai/responses/transformation.py
@ -0,0 +1,190 @@
+from typing import TYPE_CHECKING, Any, Dict, Optional, Union, cast
+
+import httpx
+
+import litellm
+from litellm._logging import verbose_logger
+from litellm.llms.base_llm.responses.transformation import BaseResponsesAPIConfig
+from litellm.secret_managers.main import get_secret_str
+from litellm.types.llms.openai import *
+from litellm.types.router import GenericLiteLLMParams
+
+from ..common_utils import OpenAIError
+
+if TYPE_CHECKING:
+    from litellm.litellm_core_utils.litellm_logging import Logging as _LiteLLMLoggingObj
+
+    LiteLLMLoggingObj = _LiteLLMLoggingObj
+else:
+    LiteLLMLoggingObj = Any
+
+
+class OpenAIResponsesAPIConfig(BaseResponsesAPIConfig):
+    def get_supported_openai_params(self, model: str) -> list:
+        """
+        All OpenAI Responses API params are supported
+        """
+        return [
+            "input",
+            "model",
+            "include",
+            "instructions",
+            "max_output_tokens",
+            "metadata",
+            "parallel_tool_calls",
+            "previous_response_id",
+            "reasoning",
+            "store",
+            "stream",
+            "temperature",
+            "text",
+            "tool_choice",
+            "tools",
+            "top_p",
+            "truncation",
+            "user",
+            "extra_headers",
+            "extra_query",
+            "extra_body",
+            "timeout",
+        ]
+
+    def map_openai_params(
+        self,
+        response_api_optional_params: ResponsesAPIOptionalRequestParams,
+        model: str,
+        drop_params: bool,
+    ) -> Dict:
+        """No mapping applied since inputs are in OpenAI spec already"""
+        return dict(response_api_optional_params)
+
+    def transform_responses_api_request(
+        self,
+        model: str,
+        input: Union[str, ResponseInputParam],
+        response_api_optional_request_params: Dict,
+        litellm_params: GenericLiteLLMParams,
+        headers: dict,
+    ) -> ResponsesAPIRequestParams:
+        """No transform applied since inputs are in OpenAI spec already"""
+        return ResponsesAPIRequestParams(
+            model=model, input=input, **response_api_optional_request_params
+        )
+
+    def transform_response_api_response(
+        self,
+        model: str,
+        raw_response: httpx.Response,
+        logging_obj: LiteLLMLoggingObj,
+    ) -> ResponsesAPIResponse:
+        """No transform applied since outputs are in OpenAI spec already"""
+        try:
+            raw_response_json = raw_response.json()
+        except Exception:
+            raise OpenAIError(
+                message=raw_response.text, status_code=raw_response.status_code
+            )
+        return ResponsesAPIResponse(**raw_response_json)
+
+    def validate_environment(
+        self,
+        headers: dict,
+        model: str,
+        api_key: Optional[str] = None,
+    ) -> dict:
+        api_key = (
+            api_key
+            or litellm.api_key
+            or litellm.openai_key
+            or get_secret_str("OPENAI_API_KEY")
+        )
+        headers.update(
+            {
+                "Authorization": f"Bearer {api_key}",
+            }
+        )
+        return headers
+
+    def get_complete_url(
+        self,
+        api_base: Optional[str],
+        model: str,
+        stream: Optional[bool] = None,
+    ) -> str:
+        """
+        Get the endpoint for OpenAI responses API
+        """
+        api_base = (
+            api_base
+            or litellm.api_base
+            or get_secret_str("OPENAI_API_BASE")
+            or "https://api.openai.com/v1"
+        )
+
+        # Remove trailing slashes
+        api_base = api_base.rstrip("/")
+
+        return f"{api_base}/responses"
+
+    def transform_streaming_response(
+        self,
+        model: str,
+        parsed_chunk: dict,
+        logging_obj: LiteLLMLoggingObj,
+    ) -> ResponsesAPIStreamingResponse:
+        """
+        Transform a parsed streaming response chunk into a ResponsesAPIStreamingResponse
+        """
+        # Convert the dictionary to a properly typed ResponsesAPIStreamingResponse
+        verbose_logger.debug("Raw OpenAI Chunk=%s", parsed_chunk)
+        event_type = str(parsed_chunk.get("type"))
+        event_pydantic_model = OpenAIResponsesAPIConfig.get_event_model_class(
+            event_type=event_type
+        )
+        return event_pydantic_model(**parsed_chunk)
+
+    @staticmethod
+    def get_event_model_class(event_type: str) -> Any:
+        """
+        Returns the appropriate event model class based on the event type.
+
+        Args:
+            event_type (str): The type of event from the response chunk
+
+        Returns:
+            Any: The corresponding event model class
+
+        Raises:
+            ValueError: If the event type is unknown
+        """
+        event_models = {
+            ResponsesAPIStreamEvents.RESPONSE_CREATED: ResponseCreatedEvent,
+            ResponsesAPIStreamEvents.RESPONSE_IN_PROGRESS: ResponseInProgressEvent,
+            ResponsesAPIStreamEvents.RESPONSE_COMPLETED: ResponseCompletedEvent,
+            ResponsesAPIStreamEvents.RESPONSE_FAILED: ResponseFailedEvent,
+            ResponsesAPIStreamEvents.RESPONSE_INCOMPLETE: ResponseIncompleteEvent,
+            ResponsesAPIStreamEvents.OUTPUT_ITEM_ADDED: OutputItemAddedEvent,
+            ResponsesAPIStreamEvents.OUTPUT_ITEM_DONE: OutputItemDoneEvent,
+            ResponsesAPIStreamEvents.CONTENT_PART_ADDED: ContentPartAddedEvent,
+            ResponsesAPIStreamEvents.CONTENT_PART_DONE: ContentPartDoneEvent,
+            ResponsesAPIStreamEvents.OUTPUT_TEXT_DELTA: OutputTextDeltaEvent,
+            ResponsesAPIStreamEvents.OUTPUT_TEXT_ANNOTATION_ADDED: OutputTextAnnotationAddedEvent,
+            ResponsesAPIStreamEvents.OUTPUT_TEXT_DONE: OutputTextDoneEvent,
+            ResponsesAPIStreamEvents.REFUSAL_DELTA: RefusalDeltaEvent,
+            ResponsesAPIStreamEvents.REFUSAL_DONE: RefusalDoneEvent,
+            ResponsesAPIStreamEvents.FUNCTION_CALL_ARGUMENTS_DELTA: FunctionCallArgumentsDeltaEvent,
+            ResponsesAPIStreamEvents.FUNCTION_CALL_ARGUMENTS_DONE: FunctionCallArgumentsDoneEvent,
+            ResponsesAPIStreamEvents.FILE_SEARCH_CALL_IN_PROGRESS: FileSearchCallInProgressEvent,
+            ResponsesAPIStreamEvents.FILE_SEARCH_CALL_SEARCHING: FileSearchCallSearchingEvent,
+            ResponsesAPIStreamEvents.FILE_SEARCH_CALL_COMPLETED: FileSearchCallCompletedEvent,
+            ResponsesAPIStreamEvents.WEB_SEARCH_CALL_IN_PROGRESS: WebSearchCallInProgressEvent,
+            ResponsesAPIStreamEvents.WEB_SEARCH_CALL_SEARCHING: WebSearchCallSearchingEvent,
+            ResponsesAPIStreamEvents.WEB_SEARCH_CALL_COMPLETED: WebSearchCallCompletedEvent,
+            ResponsesAPIStreamEvents.ERROR: ErrorEvent,
+        }
+
+        model_class = event_models.get(cast(ResponsesAPIStreamEvents, event_type))
+        if not model_class:
+            raise ValueError(f"Unknown event type: {event_type}")
+
+        return model_class
--- a/litellm/main.py
+++ b/litellm/main.py
@ -3910,42 +3910,19 @@ async def atext_completion(
        ctx = contextvars.copy_context()
        func_with_context = partial(ctx.run, func)

-        _, custom_llm_provider, _, _ = get_llm_provider(
-            model=model, api_base=kwargs.get("api_base", None)
-        )
-
-        if (
-            custom_llm_provider == "openai"
-            or custom_llm_provider == "azure"
-            or custom_llm_provider == "azure_text"
-            or custom_llm_provider == "custom_openai"
-            or custom_llm_provider == "anyscale"
-            or custom_llm_provider == "mistral"
-            or custom_llm_provider == "openrouter"
-            or custom_llm_provider == "deepinfra"
-            or custom_llm_provider == "perplexity"
-            or custom_llm_provider == "groq"
-            or custom_llm_provider == "nvidia_nim"
-            or custom_llm_provider == "cerebras"
-            or custom_llm_provider == "sambanova"
-            or custom_llm_provider == "ai21_chat"
-            or custom_llm_provider == "ai21"
-            or custom_llm_provider == "volcengine"
-            or custom_llm_provider == "text-completion-codestral"
-            or custom_llm_provider == "deepseek"
-            or custom_llm_provider == "text-completion-openai"
-            or custom_llm_provider == "huggingface"
-            or custom_llm_provider == "ollama"
-            or custom_llm_provider == "vertex_ai"
-            or custom_llm_provider in litellm.openai_compatible_providers
-        ):  # currently implemented aiohttp calls for just azure and openai, soon all.
-            # Await normally
-            response = await loop.run_in_executor(None, func_with_context)
-            if asyncio.iscoroutine(response):
-                response = await response
+        init_response = await loop.run_in_executor(None, func_with_context)
+        if isinstance(init_response, dict) or isinstance(
+            init_response, TextCompletionResponse
+        ):  ## CACHING SCENARIO
+            if isinstance(init_response, dict):
+                response = TextCompletionResponse(**init_response)
            else:
-            # Call the synchronous function using run_in_executor
-            response = await loop.run_in_executor(None, func_with_context)
+                response = init_response
+        elif asyncio.iscoroutine(init_response):
+            response = await init_response
+        else:
+            response = init_response  # type: ignore
+
        if (
            kwargs.get("stream", False) is True
            or isinstance(response, TextCompletionStreamWrapper)
--- a/litellm/model_prices_and_context_window_backup.json
+++ b/litellm/model_prices_and_context_window_backup.json
@ -6,7 +6,7 @@
        "input_cost_per_token": 0.0000,
        "output_cost_per_token": 0.000,
        "litellm_provider": "one of https://docs.litellm.ai/docs/providers",
-        "mode": "one of chat, embedding, completion, image_generation, audio_transcription, audio_speech",
+        "mode": "one of: chat, embedding, completion, image_generation, audio_transcription, audio_speech, image_generation, moderation, rerank",
        "supports_function_calling": true,
        "supports_parallel_function_calling": true,
        "supports_vision": true,
@ -931,7 +931,7 @@
        "input_cost_per_token": 0.000000,
        "output_cost_per_token": 0.000000,
        "litellm_provider": "openai",
-        "mode": "moderations"
+        "mode": "moderation"
    },
    "text-moderation-007": {
        "max_tokens": 32768,
@ -940,7 +940,7 @@
        "input_cost_per_token": 0.000000,
        "output_cost_per_token": 0.000000,
        "litellm_provider": "openai",
-        "mode": "moderations"
+        "mode": "moderation"
    },
    "text-moderation-latest": {
        "max_tokens": 32768,
@ -949,7 +949,7 @@
        "input_cost_per_token": 0.000000,
        "output_cost_per_token": 0.000000,
        "litellm_provider": "openai",
-        "mode": "moderations"
+        "mode": "moderation"
    },
    "256-x-256/dall-e-2": {
        "mode": "image_generation",
@ -1625,13 +1625,23 @@
        "max_tokens": 8192,
        "max_input_tokens": 128000,
        "max_output_tokens": 8192,
-        "input_cost_per_token": 0.0,
-        "input_cost_per_token_cache_hit": 0.0,
-        "output_cost_per_token": 0.0,
+        "input_cost_per_token": 0.00000135,
+        "output_cost_per_token": 0.0000054,
        "litellm_provider": "azure_ai",
        "mode": "chat",
-        "supports_prompt_caching": true,
-        "supports_tool_choice": true
+        "supports_tool_choice": true,
+        "source": "https://techcommunity.microsoft.com/blog/machinelearningblog/deepseek-r1-improved-performance-higher-limits-and-transparent-pricing/4386367"
+    },
+    "azure_ai/deepseek-v3": {
+        "max_tokens": 8192,
+        "max_input_tokens": 128000,
+        "max_output_tokens": 8192,
+        "input_cost_per_token": 0.00000114,
+        "output_cost_per_token": 0.00000456,
+        "litellm_provider": "azure_ai",
+        "mode": "chat",
+        "supports_tool_choice": true,
+        "source": "https://techcommunity.microsoft.com/blog/machinelearningblog/announcing-deepseek-v3-on-azure-ai-foundry-and-github/4390438"
    },
    "azure_ai/jamba-instruct": {
        "max_tokens": 4096,
@ -1643,6 +1653,17 @@
        "mode": "chat",
        "supports_tool_choice": true
    },
+    "azure_ai/mistral-nemo": {
+        "max_tokens": 4096,
+        "max_input_tokens": 131072,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 0.00000015,
+        "output_cost_per_token": 0.00000015,
+        "litellm_provider": "azure_ai",
+        "mode": "chat",
+        "supports_function_calling": true,
+        "source": "https://azuremarketplace.microsoft.com/en/marketplace/apps/000-000.mistral-nemo-12b-2407?tab=PlansAndPrice"
+    },
    "azure_ai/mistral-large": {
        "max_tokens": 8191,
        "max_input_tokens": 32000,
@ -1770,10 +1791,34 @@
        "source":"https://azuremarketplace.microsoft.com/en-us/marketplace/apps/metagenai.meta-llama-3-1-405b-instruct-offer?tab=PlansAndPrice",
        "supports_tool_choice": true
    },
-    "azure_ai/Phi-4": {
+    "azure_ai/Phi-4-mini-instruct": {
        "max_tokens": 4096,
-        "max_input_tokens": 128000,
+        "max_input_tokens": 131072,
        "max_output_tokens": 4096,
+        "input_cost_per_token": 0,
+        "output_cost_per_token": 0,
+        "litellm_provider": "azure_ai",
+        "mode": "chat",
+        "supports_function_calling": true,
+        "source": "https://learn.microsoft.com/en-us/azure/ai-foundry/concepts/models-featured#microsoft"
+    },
+    "azure_ai/Phi-4-multimodal-instruct": {
+        "max_tokens": 4096,
+        "max_input_tokens": 131072,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 0,
+        "output_cost_per_token": 0,
+        "litellm_provider": "azure_ai",
+        "mode": "chat",
+        "supports_audio_input": true,
+        "supports_function_calling": true,
+        "supports_vision": true,
+        "source": "https://learn.microsoft.com/en-us/azure/ai-foundry/concepts/models-featured#microsoft"
+    },
+    "azure_ai/Phi-4": {
+        "max_tokens": 16384,
+        "max_input_tokens": 16384,
+        "max_output_tokens": 16384,
        "input_cost_per_token": 0.000000125,
        "output_cost_per_token": 0.0000005,
        "litellm_provider": "azure_ai",
@ -3892,31 +3937,6 @@
        "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing",
        "supports_tool_choice": true
    },
-    "gemini/gemini-2.0-flash": {
-        "max_tokens": 8192,
-        "max_input_tokens": 1048576,
-        "max_output_tokens": 8192,
-        "max_images_per_prompt": 3000,
-        "max_videos_per_prompt": 10,
-        "max_video_length": 1,
-        "max_audio_length_hours": 8.4,
-        "max_audio_per_prompt": 1,
-        "max_pdf_size_mb": 30,
-        "input_cost_per_audio_token": 0.0000007,
-        "input_cost_per_token": 0.0000001,
-        "output_cost_per_token": 0.0000004,
-        "litellm_provider": "gemini",
-        "mode": "chat",
-        "rpm": 10000,
-        "tpm": 10000000,
-        "supports_system_messages": true,
-        "supports_function_calling": true,
-        "supports_vision": true,
-        "supports_response_schema": true,
-        "supports_audio_output": true,
-        "supports_tool_choice": true,
-        "source": "https://ai.google.dev/pricing#2_0flash"
-    },
    "gemini-2.0-flash-001": {
        "max_tokens": 8192,
        "max_input_tokens": 1048576,
@ -4008,6 +4028,69 @@
        "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash",
        "supports_tool_choice": true
    },
+    "gemini/gemini-2.0-pro-exp-02-05": {
+        "max_tokens": 8192,
+        "max_input_tokens": 2097152,
+        "max_output_tokens": 8192,
+        "max_images_per_prompt": 3000,
+        "max_videos_per_prompt": 10,
+        "max_video_length": 1,
+        "max_audio_length_hours": 8.4,
+        "max_audio_per_prompt": 1,
+        "max_pdf_size_mb": 30,
+        "input_cost_per_image": 0,
+        "input_cost_per_video_per_second": 0,
+        "input_cost_per_audio_per_second": 0,
+        "input_cost_per_token": 0,
+        "input_cost_per_character": 0, 
+        "input_cost_per_token_above_128k_tokens": 0, 
+        "input_cost_per_character_above_128k_tokens": 0, 
+        "input_cost_per_image_above_128k_tokens": 0,
+        "input_cost_per_video_per_second_above_128k_tokens": 0,
+        "input_cost_per_audio_per_second_above_128k_tokens": 0,
+        "output_cost_per_token": 0,
+        "output_cost_per_character": 0,
+        "output_cost_per_token_above_128k_tokens": 0,
+        "output_cost_per_character_above_128k_tokens": 0,
+        "litellm_provider": "gemini",
+        "mode": "chat",
+        "rpm": 2,
+        "tpm": 1000000,
+        "supports_system_messages": true,
+        "supports_function_calling": true,
+        "supports_vision": true,
+        "supports_audio_input": true,
+        "supports_video_input": true,
+        "supports_pdf_input": true,
+        "supports_response_schema": true,
+        "supports_tool_choice": true,
+        "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing"
+    },
+    "gemini/gemini-2.0-flash": {
+        "max_tokens": 8192,
+        "max_input_tokens": 1048576,
+        "max_output_tokens": 8192,
+        "max_images_per_prompt": 3000,
+        "max_videos_per_prompt": 10,
+        "max_video_length": 1,
+        "max_audio_length_hours": 8.4,
+        "max_audio_per_prompt": 1,
+        "max_pdf_size_mb": 30,
+        "input_cost_per_audio_token": 0.0000007,
+        "input_cost_per_token": 0.0000001,
+        "output_cost_per_token": 0.0000004,
+        "litellm_provider": "gemini",
+        "mode": "chat",
+        "rpm": 10000,
+        "tpm": 10000000,
+        "supports_system_messages": true,
+        "supports_function_calling": true,
+        "supports_vision": true,
+        "supports_response_schema": true,
+        "supports_audio_output": true,
+        "supports_tool_choice": true,
+        "source": "https://ai.google.dev/pricing#2_0flash"
+    },
    "gemini/gemini-2.0-flash-001": {
        "max_tokens": 8192,
        "max_input_tokens": 1048576,
@ -4511,6 +4594,12 @@
        "mode": "image_generation",
        "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing"
    },
+    "vertex_ai/imagen-3.0-generate-002": {
+        "output_cost_per_image": 0.04,
+        "litellm_provider": "vertex_ai-image-models",
+        "mode": "image_generation",
+        "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing"
+    },
    "vertex_ai/imagen-3.0-generate-001": {
        "output_cost_per_image": 0.04,
        "litellm_provider": "vertex_ai-image-models",
@ -6547,6 +6636,12 @@
        "supports_prompt_caching": true,
        "supports_response_schema": true
    },
+    "1024-x-1024/50-steps/bedrock/amazon.nova-canvas-v1:0": {
+      "max_input_tokens": 2600,
+      "output_cost_per_image": 0.06,
+      "litellm_provider": "bedrock",
+      "mode": "image_generation"
+    },
    "eu.amazon.nova-pro-v1:0": {
        "max_tokens": 4096, 
        "max_input_tokens": 300000,
@ -7477,6 +7572,18 @@
        "litellm_provider": "bedrock",
        "mode": "embedding"
    },
+    "us.deepseek.r1-v1:0": {
+        "max_tokens": 4096, 
+        "max_input_tokens": 128000,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 0.00000135,
+        "output_cost_per_token": 0.0000054,
+        "litellm_provider": "bedrock_converse",
+        "mode": "chat",
+        "supports_function_calling": false, 
+        "supports_tool_choice": false
+
+    },
    "meta.llama3-3-70b-instruct-v1:0": {
        "max_tokens": 4096, 
        "max_input_tokens": 128000,
--- a/litellm/proxy/_experimental/out/_next/static/chunks/157-cf7bc8b3ae1b80ba.js
+++ b/litellm/proxy/_experimental/out/_next/static/chunks/157-cf7bc8b3ae1b80ba.js
--- a/litellm/proxy/_experimental/out/_next/static/chunks/261-cb27c20c4f8ec4c6.js
+++ b/litellm/proxy/_experimental/out/_next/static/chunks/261-cb27c20c4f8ec4c6.js
--- a/litellm/proxy/_experimental/out/_next/static/chunks/914-e17acab83d0eadb5.js
+++ b/litellm/proxy/_experimental/out/_next/static/chunks/914-e17acab83d0eadb5.js
--- a/litellm/proxy/_experimental/out/_next/static/chunks/app/page-a25b75c267486fe2.js
+++ b/litellm/proxy/_experimental/out/_next/static/chunks/app/page-a25b75c267486fe2.js
--- a/litellm/proxy/_experimental/out/_next/static/chunks/app/page-b36633214e76cfd1.js
+++ b/litellm/proxy/_experimental/out/_next/static/chunks/app/page-b36633214e76cfd1.js
--- a/litellm/proxy/_experimental/out/_next/static/css/b6d997482399c7e1.css
+++ b/litellm/proxy/_experimental/out/_next/static/css/b6d997482399c7e1.css
--- a/litellm/proxy/_experimental/out/_next/static/css/f41c66e22715ab00.css
+++ b/litellm/proxy/_experimental/out/_next/static/css/f41c66e22715ab00.css
--- a/litellm/proxy/_experimental/out/_next/static/i92Qc9kkJSCtCgV3DDmdu/_buildManifest.js
+++ b/litellm/proxy/_experimental/out/_next/static/i92Qc9kkJSCtCgV3DDmdu/_buildManifest.js
--- a/litellm/proxy/_experimental/out/_next/static/i92Qc9kkJSCtCgV3DDmdu/_ssgManifest.js
+++ b/litellm/proxy/_experimental/out/_next/static/i92Qc9kkJSCtCgV3DDmdu/_ssgManifest.js
--- a/litellm/proxy/_experimental/out/index.html
+++ b/litellm/proxy/_experimental/out/index.html
@ -1 +1 @@
-<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-75a5453f51d60261.js"/><script src="/ui/_next/static/chunks/fd9d1056-524b80e1a6b8bb06.js" async=""></script><script src="/ui/_next/static/chunks/117-883150efc583d711.js" async=""></script><script src="/ui/_next/static/chunks/main-app-475d6efe4080647d.js" async=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-42372ed130431b0a.js" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-75a5453f51d60261.js" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/a34f9d1faa5f3315-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/86f6cc749f6b8493.css\",\"style\"]\n3:HL[\"/ui/_next/static/css/f41c66e22715ab00.css\",\"style\"]\n"])</script><script>self.__next_f.push([1,"4:I[12846,[],\"\"]\n6:I[19107,[],\"ClientPageRoot\"]\n7:I[92222,[\"665\",\"static/chunks/3014691f-0b72c78cfebbd712.js\",\"990\",\"static/chunks/13b76428-ebdf3012af0e4489.js\",\"441\",\"static/chunks/441-79926bf2b9d89e04.js\",\"261\",\"static/chunks/261-e48c2ac6ff0b811c.js\",\"899\",\"static/chunks/899-354f59ecde307dfa.js\",\"914\",\"static/chunks/914-e17acab83d0eadb5.js\",\"250\",\"static/chunks/250-51513f2f6dabf571.js\",\"699\",\"static/chunks/699-6b82f8e7b98ca1a3.js\",\"931\",\"static/chunks/app/page-b36633214e76cfd1.js\"],\"default\",1]\n8:I[4707,[],\"\"]\n9:I[36423,[],\"\"]\nb:I[61060,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"0:[\"$\",\"$L4\",null,{\"buildId\":\"rCxUxULLkHhl5KoPY9DHv\",\"assetPrefix\":\"/ui\",\"urlParts\":[\"\",\"\"],\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[[\"$L5\",[\"$\",\"$L6\",null,{\"props\":{\"params\":{},\"searchParams\":{}},\"Component\":\"$7\"}],null],null],null]},[[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/86f6cc749f6b8493.css\",\"precedence\":\"next\",\"crossOrigin\":\"$undefined\"}],[\"$\",\"link\",\"1\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/f41c66e22715ab00.css\",\"precedence\":\"next\",\"crossOrigin\":\"$undefined\"}]],[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_cf7686\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[]}]}]}]],null],null],\"couldBeIntercepted\":false,\"initialHead\":[null,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script></body></html>
+<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-75a5453f51d60261.js"/><script src="/ui/_next/static/chunks/fd9d1056-524b80e1a6b8bb06.js" async=""></script><script src="/ui/_next/static/chunks/117-883150efc583d711.js" async=""></script><script src="/ui/_next/static/chunks/main-app-475d6efe4080647d.js" async=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-42372ed130431b0a.js" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-75a5453f51d60261.js" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/a34f9d1faa5f3315-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/86f6cc749f6b8493.css\",\"style\"]\n3:HL[\"/ui/_next/static/css/b6d997482399c7e1.css\",\"style\"]\n"])</script><script>self.__next_f.push([1,"4:I[12846,[],\"\"]\n6:I[19107,[],\"ClientPageRoot\"]\n7:I[62177,[\"665\",\"static/chunks/3014691f-0b72c78cfebbd712.js\",\"990\",\"static/chunks/13b76428-ebdf3012af0e4489.js\",\"441\",\"static/chunks/441-79926bf2b9d89e04.js\",\"261\",\"static/chunks/261-cb27c20c4f8ec4c6.js\",\"899\",\"static/chunks/899-354f59ecde307dfa.js\",\"157\",\"static/chunks/157-cf7bc8b3ae1b80ba.js\",\"250\",\"static/chunks/250-51513f2f6dabf571.js\",\"699\",\"static/chunks/699-6b82f8e7b98ca1a3.js\",\"931\",\"static/chunks/app/page-a25b75c267486fe2.js\"],\"default\",1]\n8:I[4707,[],\"\"]\n9:I[36423,[],\"\"]\nb:I[61060,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"0:[\"$\",\"$L4\",null,{\"buildId\":\"i92Qc9kkJSCtCgV3DDmdu\",\"assetPrefix\":\"/ui\",\"urlParts\":[\"\",\"\"],\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[[\"$L5\",[\"$\",\"$L6\",null,{\"props\":{\"params\":{},\"searchParams\":{}},\"Component\":\"$7\"}],null],null],null]},[[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/86f6cc749f6b8493.css\",\"precedence\":\"next\",\"crossOrigin\":\"$undefined\"}],[\"$\",\"link\",\"1\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/b6d997482399c7e1.css\",\"precedence\":\"next\",\"crossOrigin\":\"$undefined\"}]],[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_cf7686\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[]}]}]}]],null],null],\"couldBeIntercepted\":false,\"initialHead\":[null,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script></body></html>
--- a/litellm/proxy/_experimental/out/index.txt
+++ b/litellm/proxy/_experimental/out/index.txt
@ -1,7 +1,7 @@
 2:I[19107,[],"ClientPageRoot"]
-3:I[92222,["665","static/chunks/3014691f-0b72c78cfebbd712.js","990","static/chunks/13b76428-ebdf3012af0e4489.js","441","static/chunks/441-79926bf2b9d89e04.js","261","static/chunks/261-e48c2ac6ff0b811c.js","899","static/chunks/899-354f59ecde307dfa.js","914","static/chunks/914-e17acab83d0eadb5.js","250","static/chunks/250-51513f2f6dabf571.js","699","static/chunks/699-6b82f8e7b98ca1a3.js","931","static/chunks/app/page-b36633214e76cfd1.js"],"default",1]
+3:I[62177,["665","static/chunks/3014691f-0b72c78cfebbd712.js","990","static/chunks/13b76428-ebdf3012af0e4489.js","441","static/chunks/441-79926bf2b9d89e04.js","261","static/chunks/261-cb27c20c4f8ec4c6.js","899","static/chunks/899-354f59ecde307dfa.js","157","static/chunks/157-cf7bc8b3ae1b80ba.js","250","static/chunks/250-51513f2f6dabf571.js","699","static/chunks/699-6b82f8e7b98ca1a3.js","931","static/chunks/app/page-a25b75c267486fe2.js"],"default",1]
 4:I[4707,[],""]
 5:I[36423,[],""]
-0:["rCxUxULLkHhl5KoPY9DHv",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},[["$L1",["$","$L2",null,{"props":{"params":{},"searchParams":{}},"Component":"$3"}],null],null],null]},[[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/86f6cc749f6b8493.css","precedence":"next","crossOrigin":"$undefined"}],["$","link","1",{"rel":"stylesheet","href":"/ui/_next/static/css/f41c66e22715ab00.css","precedence":"next","crossOrigin":"$undefined"}]],["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_cf7686","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[]}]}]}]],null],null],["$L6",null]]]]
+0:["i92Qc9kkJSCtCgV3DDmdu",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},[["$L1",["$","$L2",null,{"props":{"params":{},"searchParams":{}},"Component":"$3"}],null],null],null]},[[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/86f6cc749f6b8493.css","precedence":"next","crossOrigin":"$undefined"}],["$","link","1",{"rel":"stylesheet","href":"/ui/_next/static/css/b6d997482399c7e1.css","precedence":"next","crossOrigin":"$undefined"}]],["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_cf7686","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[]}]}]}]],null],null],["$L6",null]]]]
 6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
 1:null
--- a/litellm/proxy/_experimental/out/model_hub.txt
+++ b/litellm/proxy/_experimental/out/model_hub.txt
@ -1,7 +1,7 @@
 2:I[19107,[],"ClientPageRoot"]
-3:I[52829,["441","static/chunks/441-79926bf2b9d89e04.js","261","static/chunks/261-e48c2ac6ff0b811c.js","250","static/chunks/250-51513f2f6dabf571.js","699","static/chunks/699-6b82f8e7b98ca1a3.js","418","static/chunks/app/model_hub/page-6f97b95f1023b0e9.js"],"default",1]
+3:I[52829,["441","static/chunks/441-79926bf2b9d89e04.js","261","static/chunks/261-cb27c20c4f8ec4c6.js","250","static/chunks/250-51513f2f6dabf571.js","699","static/chunks/699-6b82f8e7b98ca1a3.js","418","static/chunks/app/model_hub/page-6f97b95f1023b0e9.js"],"default",1]
 4:I[4707,[],""]
 5:I[36423,[],""]
-0:["rCxUxULLkHhl5KoPY9DHv",[[["",{"children":["model_hub",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["model_hub",{"children":["__PAGE__",{},[["$L1",["$","$L2",null,{"props":{"params":{},"searchParams":{}},"Component":"$3"}],null],null],null]},[null,["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","model_hub","children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined"}]],null]},[[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/86f6cc749f6b8493.css","precedence":"next","crossOrigin":"$undefined"}],["$","link","1",{"rel":"stylesheet","href":"/ui/_next/static/css/f41c66e22715ab00.css","precedence":"next","crossOrigin":"$undefined"}]],["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_cf7686","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[]}]}]}]],null],null],["$L6",null]]]]
+0:["i92Qc9kkJSCtCgV3DDmdu",[[["",{"children":["model_hub",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["model_hub",{"children":["__PAGE__",{},[["$L1",["$","$L2",null,{"props":{"params":{},"searchParams":{}},"Component":"$3"}],null],null],null]},[null,["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","model_hub","children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined"}]],null]},[[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/86f6cc749f6b8493.css","precedence":"next","crossOrigin":"$undefined"}],["$","link","1",{"rel":"stylesheet","href":"/ui/_next/static/css/b6d997482399c7e1.css","precedence":"next","crossOrigin":"$undefined"}]],["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_cf7686","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[]}]}]}]],null],null],["$L6",null]]]]
 6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
 1:null
--- a/litellm/proxy/_experimental/out/onboarding.txt
+++ b/litellm/proxy/_experimental/out/onboarding.txt
@ -2,6 +2,6 @@
 3:I[12011,["665","static/chunks/3014691f-0b72c78cfebbd712.js","441","static/chunks/441-79926bf2b9d89e04.js","899","static/chunks/899-354f59ecde307dfa.js","250","static/chunks/250-51513f2f6dabf571.js","461","static/chunks/app/onboarding/page-a31bc08c35f01c0a.js"],"default",1]
 4:I[4707,[],""]
 5:I[36423,[],""]
-0:["rCxUxULLkHhl5KoPY9DHv",[[["",{"children":["onboarding",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["onboarding",{"children":["__PAGE__",{},[["$L1",["$","$L2",null,{"props":{"params":{},"searchParams":{}},"Component":"$3"}],null],null],null]},[null,["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","onboarding","children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined"}]],null]},[[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/86f6cc749f6b8493.css","precedence":"next","crossOrigin":"$undefined"}],["$","link","1",{"rel":"stylesheet","href":"/ui/_next/static/css/f41c66e22715ab00.css","precedence":"next","crossOrigin":"$undefined"}]],["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_cf7686","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[]}]}]}]],null],null],["$L6",null]]]]
+0:["i92Qc9kkJSCtCgV3DDmdu",[[["",{"children":["onboarding",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["onboarding",{"children":["__PAGE__",{},[["$L1",["$","$L2",null,{"props":{"params":{},"searchParams":{}},"Component":"$3"}],null],null],null]},[null,["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","onboarding","children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined"}]],null]},[[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/86f6cc749f6b8493.css","precedence":"next","crossOrigin":"$undefined"}],["$","link","1",{"rel":"stylesheet","href":"/ui/_next/static/css/b6d997482399c7e1.css","precedence":"next","crossOrigin":"$undefined"}]],["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_cf7686","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[]}]}]}]],null],null],["$L6",null]]]]
 6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
 1:null
--- a/litellm/proxy/_new_secret_config.yaml
+++ b/litellm/proxy/_new_secret_config.yaml
@ -1,13 +1,5 @@
 model_list:
-  - model_name: gpt-4o
+  - model_name: amazon.nova-canvas-v1:0
    litellm_params:
-      model: azure/gpt-4o
-      litellm_credential_name: default_azure_credential
-
-credential_list:
-  - credential_name: default_azure_credential
-    credential_values:
-      api_key: os.environ/AZURE_API_KEY
-      api_base: os.environ/AZURE_API_BASE
-    credential_info:
-      description: "Default Azure credential"
+      model: bedrock/amazon.nova-canvas-v1:0
+      aws_region_name: "us-east-1"
--- a/litellm/proxy/_types.py
+++ b/litellm/proxy/_types.py
@ -1994,13 +1994,14 @@ class ProxyException(Exception):
        message: str,
        type: str,
        param: Optional[str],
-        code: Optional[Union[int, str]] = None,
+        code: Optional[Union[int, str]] = None,  # maps to status code
        headers: Optional[Dict[str, str]] = None,
+        openai_code: Optional[str] = None,  # maps to 'code'  in openai
    ):
        self.message = str(message)
        self.type = type
        self.param = param
-
+        self.openai_code = openai_code or code
        # If we look on official python OpenAI lib, the code should be a string:
        # https://github.com/openai/openai-python/blob/195c05a64d39c87b2dfdf1eca2d339597f1fce03/src/openai/types/shared/error_object.py#L11
        # Related LiteLLM issue: https://github.com/BerriAI/litellm/discussions/4834
@ -2054,6 +2055,7 @@ class ProxyErrorTypes(str, enum.Enum):
    budget_exceeded = "budget_exceeded"
    key_model_access_denied = "key_model_access_denied"
    team_model_access_denied = "team_model_access_denied"
+    user_model_access_denied = "user_model_access_denied"
    expired_key = "expired_key"
    auth_error = "auth_error"
    internal_server_error = "internal_server_error"
@ -2062,6 +2064,20 @@ class ProxyErrorTypes(str, enum.Enum):
    validation_error = "bad_request_error"
    cache_ping_error = "cache_ping_error"

+    @classmethod
+    def get_model_access_error_type_for_object(
+        cls, object_type: Literal["key", "user", "team"]
+    ) -> "ProxyErrorTypes":
+        """
+        Get the model access error type for object_type
+        """
+        if object_type == "key":
+            return cls.key_model_access_denied
+        elif object_type == "team":
+            return cls.team_model_access_denied
+        elif object_type == "user":
+            return cls.user_model_access_denied
+

 DB_CONNECTION_ERROR_TYPES = (httpx.ConnectError, httpx.ReadError, httpx.ReadTimeout)

@ -2283,6 +2299,7 @@ class SpecialHeaders(enum.Enum):
    azure_authorization = "API-Key"
    anthropic_authorization = "x-api-key"
    google_ai_studio_authorization = "x-goog-api-key"
+    azure_apim_authorization = "Ocp-Apim-Subscription-Key"


 class LitellmDataForBackendLLMCall(TypedDict, total=False):
--- a/litellm/proxy/auth/auth_checks.py
+++ b/litellm/proxy/auth/auth_checks.py
@ -98,11 +98,18 @@ async def common_checks(
        )

    # 2. If team can call model
-    _team_model_access_check(
-        team_object=team_object,
+    if _model and team_object:
+        if not await can_team_access_model(
            model=_model,
+            team_object=team_object,
            llm_router=llm_router,
            team_model_aliases=valid_token.team_model_aliases if valid_token else None,
+        ):
+            raise ProxyException(
+                message=f"Team not allowed to access model. Team={team_object.team_id}, Model={_model}. Allowed team models = {team_object.models}",
+                type=ProxyErrorTypes.team_model_access_denied,
+                param="model",
+                code=status.HTTP_401_UNAUTHORIZED,
            )

    ## 2.1 If user can call model (if personal key)
@ -971,10 +978,18 @@ async def _can_object_call_model(
    llm_router: Optional[Router],
    models: List[str],
    team_model_aliases: Optional[Dict[str, str]] = None,
+    object_type: Literal["user", "team", "key"] = "user",
 ) -> Literal[True]:
    """
    Checks if token can call a given model

+    Args:
+        - model: str
+        - llm_router: Optional[Router]
+        - models: List[str]
+        - team_model_aliases: Optional[Dict[str, str]]
+        - object_type: Literal["user", "team", "key"]. We use the object type to raise the correct exception type
+
    Returns:
        - True: if token allowed to call model

@ -1018,10 +1033,15 @@ async def _can_object_call_model(
    if (len(filtered_models) == 0 and len(models) == 0) or "*" in filtered_models:
        all_model_access = True

+    if SpecialModelNames.all_proxy_models.value in filtered_models:
+        all_model_access = True
+
    if model is not None and model not in filtered_models and all_model_access is False:
        raise ProxyException(
-            message=f"API Key not allowed to access model. This token can only access models={models}. Tried to access {model}",
-            type=ProxyErrorTypes.key_model_access_denied,
+            message=f"{object_type} not allowed to access model. This {object_type} can only access models={models}. Tried to access {model}",
+            type=ProxyErrorTypes.get_model_access_error_type_for_object(
+                object_type=object_type
+            ),
            param="model",
            code=status.HTTP_401_UNAUTHORIZED,
        )
@ -1072,6 +1092,26 @@ async def can_key_call_model(
        llm_router=llm_router,
        models=valid_token.models,
        team_model_aliases=valid_token.team_model_aliases,
+        object_type="key",
+    )
+
+
+async def can_team_access_model(
+    model: str,
+    team_object: Optional[LiteLLM_TeamTable],
+    llm_router: Optional[Router],
+    team_model_aliases: Optional[Dict[str, str]] = None,
+) -> Literal[True]:
+    """
+    Returns True if the team can access a specific model.
+
+    """
+    return await _can_object_call_model(
+        model=model,
+        llm_router=llm_router,
+        models=team_object.models if team_object else [],
+        team_model_aliases=team_model_aliases,
+        object_type="team",
    )


@ -1096,6 +1136,7 @@ async def can_user_call_model(
        model=model,
        llm_router=llm_router,
        models=user_object.models,
+        object_type="user",
    )


@ -1248,53 +1289,6 @@ async def _team_max_budget_check(
        )


-def _team_model_access_check(
-    model: Optional[str],
-    team_object: Optional[LiteLLM_TeamTable],
-    llm_router: Optional[Router],
-    team_model_aliases: Optional[Dict[str, str]] = None,
-):
-    """
-    Access check for team models
-    Raises:
-        Exception if the team is not allowed to call the`model`
-    """
-    if (
-        model is not None
-        and team_object is not None
-        and team_object.models is not None
-        and len(team_object.models) > 0
-        and model not in team_object.models
-    ):
-        # this means the team has access to all models on the proxy
-        if "all-proxy-models" in team_object.models or "*" in team_object.models:
-            # this means the team has access to all models on the proxy
-            pass
-        # check if the team model is an access_group
-        elif (
-            model_in_access_group(
-                model=model, team_models=team_object.models, llm_router=llm_router
-            )
-            is True
-        ):
-            pass
-        elif model and "*" in model:
-            pass
-        elif _model_in_team_aliases(model=model, team_model_aliases=team_model_aliases):
-            pass
-        elif _model_matches_any_wildcard_pattern_in_list(
-            model=model, allowed_model_list=team_object.models
-        ):
-            pass
-        else:
-            raise ProxyException(
-                message=f"Team not allowed to access model. Team={team_object.team_id}, Model={model}. Allowed team models = {team_object.models}",
-                type=ProxyErrorTypes.team_model_access_denied,
-                param="model",
-                code=status.HTTP_401_UNAUTHORIZED,
-            )
-
-
 def is_model_allowed_by_pattern(model: str, allowed_model_pattern: str) -> bool:
    """
    Check if a model matches an allowed pattern.
--- a/litellm/proxy/auth/handle_jwt.py
+++ b/litellm/proxy/auth/handle_jwt.py
@ -33,6 +33,7 @@ from litellm.proxy._types import (
    ScopeMapping,
    Span,
 )
+from litellm.proxy.auth.auth_checks import can_team_access_model
 from litellm.proxy.utils import PrismaClient, ProxyLogging

 from .auth_checks import (
@ -344,11 +345,16 @@ class JWTHandler:
        if keys_url is None:
            raise Exception("Missing JWT Public Key URL from environment.")

-        cached_keys = await self.user_api_key_cache.async_get_cache(
-            "litellm_jwt_auth_keys"
-        )
+        keys_url_list = [url.strip() for url in keys_url.split(",")]
+
+        for key_url in keys_url_list:
+
+            cache_key = f"litellm_jwt_auth_keys_{key_url}"
+
+            cached_keys = await self.user_api_key_cache.async_get_cache(cache_key)
+
            if cached_keys is None:
-            response = await self.http_handler.get(keys_url)
+                response = await self.http_handler.get(key_url)

                response_json = response.json()
                if "keys" in response_json:
@ -357,7 +363,7 @@ class JWTHandler:
                    keys = response_json

                await self.user_api_key_cache.async_set_cache(
-                key="litellm_jwt_auth_keys",
+                    key=cache_key,
                    value=keys,
                    ttl=self.litellm_jwtauth.public_key_ttl,  # cache for 10 mins
                )
@ -365,12 +371,13 @@ class JWTHandler:
                keys = cached_keys

            public_key = self.parse_keys(keys=keys, kid=kid)
-        if public_key is None:
-            raise Exception(
-                f"No matching public key found. kid={kid}, keys_url={keys_url}, cached_keys={cached_keys}, len(keys)={len(keys)}"
-            )
+            if public_key is not None:
                return cast(dict, public_key)

+        raise Exception(
+            f"No matching public key found. keys={keys_url_list}, kid={kid}"
+        )
+
    def parse_keys(self, keys: JWKKeyValue, kid: Optional[str]) -> Optional[JWTKeyItem]:
        public_key: Optional[JWTKeyItem] = None
        if len(keys) == 1:
@ -723,8 +730,12 @@ class JWTAuthManager:
                    team_models = team_object.models
                    if isinstance(team_models, list) and (
                        not requested_model
-                        or requested_model in team_models
-                        or "*" in team_models
+                        or can_team_access_model(
+                            model=requested_model,
+                            team_object=team_object,
+                            llm_router=None,
+                            team_model_aliases=None,
+                        )
                    ):
                        is_allowed = allowed_routes_check(
                            user_role=LitellmUserRoles.TEAM,
--- a/litellm/proxy/auth/user_api_key_auth.py
+++ b/litellm/proxy/auth/user_api_key_auth.py
@ -77,6 +77,11 @@ google_ai_studio_api_key_header = APIKeyHeader(
    auto_error=False,
    description="If google ai studio client used.",
 )
+azure_apim_header = APIKeyHeader(
+    name=SpecialHeaders.azure_apim_authorization.value,
+    auto_error=False,
+    description="The default name of the subscription key header of Azure",
+)


 def _get_bearer_token(
@ -301,6 +306,7 @@ async def _user_api_key_auth_builder(  # noqa: PLR0915
    azure_api_key_header: str,
    anthropic_api_key_header: Optional[str],
    google_ai_studio_api_key_header: Optional[str],
+    azure_apim_header: Optional[str],
    request_data: dict,
 ) -> UserAPIKeyAuth:

@ -344,6 +350,8 @@ async def _user_api_key_auth_builder(  # noqa: PLR0915
            api_key = anthropic_api_key_header
        elif isinstance(google_ai_studio_api_key_header, str):
            api_key = google_ai_studio_api_key_header
+        elif isinstance(azure_apim_header, str):
+            api_key = azure_apim_header
        elif pass_through_endpoints is not None:
            for endpoint in pass_through_endpoints:
                if endpoint.get("path", "") == route:
@ -1165,6 +1173,7 @@ async def user_api_key_auth(
    google_ai_studio_api_key_header: Optional[str] = fastapi.Security(
        google_ai_studio_api_key_header
    ),
+    azure_apim_header: Optional[str] = fastapi.Security(azure_apim_header),
 ) -> UserAPIKeyAuth:
    """
    Parent function to authenticate user api key / jwt token.
@ -1178,6 +1187,7 @@ async def user_api_key_auth(
        azure_api_key_header=azure_api_key_header,
        anthropic_api_key_header=anthropic_api_key_header,
        google_ai_studio_api_key_header=google_ai_studio_api_key_header,
+        azure_apim_header=azure_apim_header,
        request_data=request_data,
    )

--- a/litellm/proxy/management_endpoints/internal_user_endpoints.py
+++ b/litellm/proxy/management_endpoints/internal_user_endpoints.py
@ -365,6 +365,8 @@ async def user_info(
            and user_api_key_dict.user_role == LitellmUserRoles.PROXY_ADMIN
        ):
            return await _get_user_info_for_proxy_admin()
+        elif user_id is None:
+            user_id = user_api_key_dict.user_id
        ## GET USER ROW ##
        if user_id is not None:
            user_info = await prisma_client.get_data(user_id=user_id)
@ -373,10 +375,6 @@ async def user_info(
        ## GET ALL TEAMS ##
        team_list = []
        team_id_list = []
-        # get all teams user belongs to
-        # teams_1 = await prisma_client.get_data(
-        #     user_id=user_id, table_name="team", query_type="find_all"
-        # )
        from litellm.proxy.management_endpoints.team_endpoints import list_team

        teams_1 = await list_team(
--- a/litellm/proxy/pass_through_endpoints/pass_through_endpoints.py
+++ b/litellm/proxy/pass_through_endpoints/pass_through_endpoints.py
@ -3,8 +3,8 @@ import asyncio
 import json
 from base64 import b64encode
 from datetime import datetime
-from typing import List, Optional
-from urllib.parse import urlparse
+from typing import Dict, List, Optional, Union
+from urllib.parse import parse_qs, urlencode, urlparse

 import httpx
 from fastapi import APIRouter, Depends, HTTPException, Request, Response, status
@ -307,6 +307,21 @@ class HttpPassThroughEndpointHelpers:
            return EndpointType.ANTHROPIC
        return EndpointType.GENERIC

+    @staticmethod
+    def get_merged_query_parameters(
+        existing_url: httpx.URL, request_query_params: Dict[str, Union[str, list]]
+    ) -> Dict[str, Union[str, List[str]]]:
+        # Get the existing query params from the target URL
+        existing_query_string = existing_url.query.decode("utf-8")
+        existing_query_params = parse_qs(existing_query_string)
+
+        # parse_qs returns a dict where each value is a list, so let's flatten it
+        updated_existing_query_params = {
+            k: v[0] if len(v) == 1 else v for k, v in existing_query_params.items()
+        }
+        # Merge the query params, giving priority to the existing ones
+        return {**request_query_params, **updated_existing_query_params}
+
    @staticmethod
    async def _make_non_streaming_http_request(
        request: Request,
@ -346,6 +361,7 @@ async def pass_through_request(  # noqa: PLR0915
    user_api_key_dict: UserAPIKeyAuth,
    custom_body: Optional[dict] = None,
    forward_headers: Optional[bool] = False,
+    merge_query_params: Optional[bool] = False,
    query_params: Optional[dict] = None,
    stream: Optional[bool] = None,
 ):
@ -361,6 +377,18 @@ async def pass_through_request(  # noqa: PLR0915
            request=request, headers=headers, forward_headers=forward_headers
        )

+        if merge_query_params:
+
+            # Create a new URL with the merged query params
+            url = url.copy_with(
+                query=urlencode(
+                    HttpPassThroughEndpointHelpers.get_merged_query_parameters(
+                        existing_url=url,
+                        request_query_params=dict(request.query_params),
+                    )
+                ).encode("ascii")
+            )
+
        endpoint_type: EndpointType = HttpPassThroughEndpointHelpers.get_endpoint_type(
            str(url)
        )
@ -657,6 +685,7 @@ def create_pass_through_route(
    target: str,
    custom_headers: Optional[dict] = None,
    _forward_headers: Optional[bool] = False,
+    _merge_query_params: Optional[bool] = False,
    dependencies: Optional[List] = None,
 ):
    # check if target is an adapter.py or a url
@ -703,6 +732,7 @@ def create_pass_through_route(
                custom_headers=custom_headers or {},
                user_api_key_dict=user_api_key_dict,
                forward_headers=_forward_headers,
+                merge_query_params=_merge_query_params,
                query_params=query_params,
                stream=stream,
                custom_body=custom_body,
@ -732,6 +762,7 @@ async def initialize_pass_through_endpoints(pass_through_endpoints: list):
            custom_headers=_custom_headers
        )
        _forward_headers = endpoint.get("forward_headers", None)
+        _merge_query_params = endpoint.get("merge_query_params", None)
        _auth = endpoint.get("auth", None)
        _dependencies = None
        if _auth is not None and str(_auth).lower() == "true":
@ -753,7 +784,12 @@ async def initialize_pass_through_endpoints(pass_through_endpoints: list):
        app.add_api_route(  # type: ignore
            path=_path,
            endpoint=create_pass_through_route(  # type: ignore
-                _path, _target, _custom_headers, _forward_headers, _dependencies
+                _path,
+                _target,
+                _custom_headers,
+                _forward_headers,
+                _merge_query_params,
+                _dependencies,
            ),
            methods=["GET", "POST", "PUT", "DELETE", "PATCH"],
            dependencies=_dependencies,
--- a/litellm/proxy/proxy_server.py
+++ b/litellm/proxy/proxy_server.py
@ -949,7 +949,9 @@ def _set_spend_logs_payload(
    spend_logs_url: Optional[str] = None,
 ):
    verbose_proxy_logger.info(
-        "Writing spend log to db - request_id: {}".format(payload.get("request_id"))
+        "Writing spend log to db - request_id: {}, spend: {}".format(
+            payload.get("request_id"), payload.get("spend")
+        )
    )
    if prisma_client is not None and spend_logs_url is not None:
        if isinstance(payload["startTime"], datetime):
@ -3759,6 +3761,7 @@ async def chat_completion(  # noqa: PLR0915
            message=getattr(e, "message", error_msg),
            type=getattr(e, "type", "None"),
            param=getattr(e, "param", "None"),
+            openai_code=getattr(e, "code", None),
            code=getattr(e, "status_code", 500),
            headers=headers,
        )
@ -3972,6 +3975,7 @@ async def completion(  # noqa: PLR0915
            message=getattr(e, "message", error_msg),
            type=getattr(e, "type", "None"),
            param=getattr(e, "param", "None"),
+            openai_code=getattr(e, "code", None),
            code=getattr(e, "status_code", 500),
        )

@ -4181,6 +4185,7 @@ async def embeddings(  # noqa: PLR0915
                message=getattr(e, "message", error_msg),
                type=getattr(e, "type", "None"),
                param=getattr(e, "param", "None"),
+                openai_code=getattr(e, "code", None),
                code=getattr(e, "status_code", 500),
            )

@ -4300,6 +4305,7 @@ async def image_generation(
                message=getattr(e, "message", error_msg),
                type=getattr(e, "type", "None"),
                param=getattr(e, "param", "None"),
+                openai_code=getattr(e, "code", None),
                code=getattr(e, "status_code", 500),
            )

@ -4561,6 +4567,7 @@ async def audio_transcriptions(
                message=getattr(e, "message", error_msg),
                type=getattr(e, "type", "None"),
                param=getattr(e, "param", "None"),
+                openai_code=getattr(e, "code", None),
                code=getattr(e, "status_code", 500),
            )

@ -4710,6 +4717,7 @@ async def get_assistants(
                message=getattr(e, "message", error_msg),
                type=getattr(e, "type", "None"),
                param=getattr(e, "param", "None"),
+                openai_code=getattr(e, "code", None),
                code=getattr(e, "status_code", 500),
            )

@ -4808,7 +4816,7 @@ async def create_assistant(
                message=getattr(e, "message", error_msg),
                type=getattr(e, "type", "None"),
                param=getattr(e, "param", "None"),
-                code=getattr(e, "status_code", 500),
+                code=getattr(e, "code", getattr(e, "status_code", 500)),
            )


@ -4905,7 +4913,7 @@ async def delete_assistant(
                message=getattr(e, "message", error_msg),
                type=getattr(e, "type", "None"),
                param=getattr(e, "param", "None"),
-                code=getattr(e, "status_code", 500),
+                code=getattr(e, "code", getattr(e, "status_code", 500)),
            )


@ -5002,7 +5010,7 @@ async def create_threads(
                message=getattr(e, "message", error_msg),
                type=getattr(e, "type", "None"),
                param=getattr(e, "param", "None"),
-                code=getattr(e, "status_code", 500),
+                code=getattr(e, "code", getattr(e, "status_code", 500)),
            )


@ -5098,7 +5106,7 @@ async def get_thread(
                message=getattr(e, "message", error_msg),
                type=getattr(e, "type", "None"),
                param=getattr(e, "param", "None"),
-                code=getattr(e, "status_code", 500),
+                code=getattr(e, "code", getattr(e, "status_code", 500)),
            )


@ -5197,7 +5205,7 @@ async def add_messages(
                message=getattr(e, "message", error_msg),
                type=getattr(e, "type", "None"),
                param=getattr(e, "param", "None"),
-                code=getattr(e, "status_code", 500),
+                code=getattr(e, "code", getattr(e, "status_code", 500)),
            )


@ -5292,7 +5300,7 @@ async def get_messages(
                message=getattr(e, "message", error_msg),
                type=getattr(e, "type", "None"),
                param=getattr(e, "param", "None"),
-                code=getattr(e, "status_code", 500),
+                code=getattr(e, "code", getattr(e, "status_code", 500)),
            )


@ -5401,7 +5409,7 @@ async def run_thread(
                message=getattr(e, "message", error_msg),
                type=getattr(e, "type", "None"),
                param=getattr(e, "param", "None"),
-                code=getattr(e, "status_code", 500),
+                code=getattr(e, "code", getattr(e, "status_code", 500)),
            )


--- a/litellm/responses/main.py
+++ b/litellm/responses/main.py
@ -0,0 +1,217 @@
+import asyncio
+import contextvars
+from functools import partial
+from typing import Any, Dict, Iterable, List, Literal, Optional, Union
+
+import httpx
+
+import litellm
+from litellm.constants import request_timeout
+from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj
+from litellm.llms.base_llm.responses.transformation import BaseResponsesAPIConfig
+from litellm.llms.custom_httpx.llm_http_handler import BaseLLMHTTPHandler
+from litellm.responses.utils import ResponsesAPIRequestUtils
+from litellm.types.llms.openai import (
+    Reasoning,
+    ResponseIncludable,
+    ResponseInputParam,
+    ResponsesAPIOptionalRequestParams,
+    ResponsesAPIResponse,
+    ResponseTextConfigParam,
+    ToolChoice,
+    ToolParam,
+)
+from litellm.types.router import GenericLiteLLMParams
+from litellm.utils import ProviderConfigManager, client
+
+from .streaming_iterator import BaseResponsesAPIStreamingIterator
+
+####### ENVIRONMENT VARIABLES ###################
+# Initialize any necessary instances or variables here
+base_llm_http_handler = BaseLLMHTTPHandler()
+#################################################
+
+
+@client
+async def aresponses(
+    input: Union[str, ResponseInputParam],
+    model: str,
+    include: Optional[List[ResponseIncludable]] = None,
+    instructions: Optional[str] = None,
+    max_output_tokens: Optional[int] = None,
+    metadata: Optional[Dict[str, Any]] = None,
+    parallel_tool_calls: Optional[bool] = None,
+    previous_response_id: Optional[str] = None,
+    reasoning: Optional[Reasoning] = None,
+    store: Optional[bool] = None,
+    stream: Optional[bool] = None,
+    temperature: Optional[float] = None,
+    text: Optional[ResponseTextConfigParam] = None,
+    tool_choice: Optional[ToolChoice] = None,
+    tools: Optional[Iterable[ToolParam]] = None,
+    top_p: Optional[float] = None,
+    truncation: Optional[Literal["auto", "disabled"]] = None,
+    user: Optional[str] = None,
+    # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+    # The extra values given here take precedence over values defined on the client or passed to this method.
+    extra_headers: Optional[Dict[str, Any]] = None,
+    extra_query: Optional[Dict[str, Any]] = None,
+    extra_body: Optional[Dict[str, Any]] = None,
+    timeout: Optional[Union[float, httpx.Timeout]] = None,
+    **kwargs,
+) -> Union[ResponsesAPIResponse, BaseResponsesAPIStreamingIterator]:
+    """
+    Async: Handles responses API requests by reusing the synchronous function
+    """
+    try:
+        loop = asyncio.get_event_loop()
+        kwargs["aresponses"] = True
+
+        func = partial(
+            responses,
+            input=input,
+            model=model,
+            include=include,
+            instructions=instructions,
+            max_output_tokens=max_output_tokens,
+            metadata=metadata,
+            parallel_tool_calls=parallel_tool_calls,
+            previous_response_id=previous_response_id,
+            reasoning=reasoning,
+            store=store,
+            stream=stream,
+            temperature=temperature,
+            text=text,
+            tool_choice=tool_choice,
+            tools=tools,
+            top_p=top_p,
+            truncation=truncation,
+            user=user,
+            extra_headers=extra_headers,
+            extra_query=extra_query,
+            extra_body=extra_body,
+            timeout=timeout,
+            **kwargs,
+        )
+
+        ctx = contextvars.copy_context()
+        func_with_context = partial(ctx.run, func)
+        init_response = await loop.run_in_executor(None, func_with_context)
+
+        if asyncio.iscoroutine(init_response):
+            response = await init_response
+        else:
+            response = init_response
+        return response
+    except Exception as e:
+        raise e
+
+
+@client
+def responses(
+    input: Union[str, ResponseInputParam],
+    model: str,
+    include: Optional[List[ResponseIncludable]] = None,
+    instructions: Optional[str] = None,
+    max_output_tokens: Optional[int] = None,
+    metadata: Optional[Dict[str, Any]] = None,
+    parallel_tool_calls: Optional[bool] = None,
+    previous_response_id: Optional[str] = None,
+    reasoning: Optional[Reasoning] = None,
+    store: Optional[bool] = None,
+    stream: Optional[bool] = None,
+    temperature: Optional[float] = None,
+    text: Optional[ResponseTextConfigParam] = None,
+    tool_choice: Optional[ToolChoice] = None,
+    tools: Optional[Iterable[ToolParam]] = None,
+    top_p: Optional[float] = None,
+    truncation: Optional[Literal["auto", "disabled"]] = None,
+    user: Optional[str] = None,
+    # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+    # The extra values given here take precedence over values defined on the client or passed to this method.
+    extra_headers: Optional[Dict[str, Any]] = None,
+    extra_query: Optional[Dict[str, Any]] = None,
+    extra_body: Optional[Dict[str, Any]] = None,
+    timeout: Optional[Union[float, httpx.Timeout]] = None,
+    **kwargs,
+):
+    """
+    Synchronous version of the Responses API.
+    Uses the synchronous HTTP handler to make requests.
+    """
+    litellm_logging_obj: LiteLLMLoggingObj = kwargs.get("litellm_logging_obj")  # type: ignore
+    litellm_call_id: Optional[str] = kwargs.get("litellm_call_id", None)
+    _is_async = kwargs.pop("aresponses", False) is True
+
+    # get llm provider logic
+    litellm_params = GenericLiteLLMParams(**kwargs)
+    model, custom_llm_provider, dynamic_api_key, dynamic_api_base = (
+        litellm.get_llm_provider(
+            model=model,
+            custom_llm_provider=kwargs.get("custom_llm_provider", None),
+            api_base=litellm_params.api_base,
+            api_key=litellm_params.api_key,
+        )
+    )
+
+    # get provider config
+    responses_api_provider_config: Optional[BaseResponsesAPIConfig] = (
+        ProviderConfigManager.get_provider_responses_api_config(
+            model=model,
+            provider=litellm.LlmProviders(custom_llm_provider),
+        )
+    )
+
+    if responses_api_provider_config is None:
+        raise litellm.BadRequestError(
+            model=model,
+            llm_provider=custom_llm_provider,
+            message=f"Responses API not available for custom_llm_provider={custom_llm_provider}, model: {model}",
+        )
+
+    # Get all parameters using locals() and combine with kwargs
+    local_vars = locals()
+    local_vars.update(kwargs)
+    # Get ResponsesAPIOptionalRequestParams with only valid parameters
+    response_api_optional_params: ResponsesAPIOptionalRequestParams = (
+        ResponsesAPIRequestUtils.get_requested_response_api_optional_param(local_vars)
+    )
+
+    # Get optional parameters for the responses API
+    responses_api_request_params: Dict = (
+        ResponsesAPIRequestUtils.get_optional_params_responses_api(
+            model=model,
+            responses_api_provider_config=responses_api_provider_config,
+            response_api_optional_params=response_api_optional_params,
+        )
+    )
+
+    # Pre Call logging
+    litellm_logging_obj.update_environment_variables(
+        model=model,
+        user=user,
+        optional_params=dict(responses_api_request_params),
+        litellm_params={
+            "litellm_call_id": litellm_call_id,
+            **responses_api_request_params,
+        },
+        custom_llm_provider=custom_llm_provider,
+    )
+
+    # Call the handler with _is_async flag instead of directly calling the async handler
+    response = base_llm_http_handler.response_api_handler(
+        model=model,
+        input=input,
+        responses_api_provider_config=responses_api_provider_config,
+        response_api_optional_request_params=responses_api_request_params,
+        custom_llm_provider=custom_llm_provider,
+        litellm_params=litellm_params,
+        logging_obj=litellm_logging_obj,
+        extra_headers=extra_headers,
+        extra_body=extra_body,
+        timeout=timeout or request_timeout,
+        _is_async=_is_async,
+        client=kwargs.get("client"),
+    )
+
+    return response
--- a/litellm/responses/streaming_iterator.py
+++ b/litellm/responses/streaming_iterator.py
@ -0,0 +1,209 @@
+import asyncio
+import json
+from datetime import datetime
+from typing import Optional
+
+import httpx
+
+from litellm.constants import STREAM_SSE_DONE_STRING
+from litellm.litellm_core_utils.asyncify import run_async_function
+from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj
+from litellm.litellm_core_utils.thread_pool_executor import executor
+from litellm.llms.base_llm.responses.transformation import BaseResponsesAPIConfig
+from litellm.types.llms.openai import (
+    ResponsesAPIStreamEvents,
+    ResponsesAPIStreamingResponse,
+)
+from litellm.utils import CustomStreamWrapper
+
+
+class BaseResponsesAPIStreamingIterator:
+    """
+    Base class for streaming iterators that process responses from the Responses API.
+
+    This class contains shared logic for both synchronous and asynchronous iterators.
+    """
+
+    def __init__(
+        self,
+        response: httpx.Response,
+        model: str,
+        responses_api_provider_config: BaseResponsesAPIConfig,
+        logging_obj: LiteLLMLoggingObj,
+    ):
+        self.response = response
+        self.model = model
+        self.logging_obj = logging_obj
+        self.finished = False
+        self.responses_api_provider_config = responses_api_provider_config
+        self.completed_response: Optional[ResponsesAPIStreamingResponse] = None
+        self.start_time = datetime.now()
+
+    def _process_chunk(self, chunk):
+        """Process a single chunk of data from the stream"""
+        if not chunk:
+            return None
+
+        # Handle SSE format (data: {...})
+        chunk = CustomStreamWrapper._strip_sse_data_from_chunk(chunk)
+        if chunk is None:
+            return None
+
+        # Handle "[DONE]" marker
+        if chunk == STREAM_SSE_DONE_STRING:
+            self.finished = True
+            return None
+
+        try:
+            # Parse the JSON chunk
+            parsed_chunk = json.loads(chunk)
+
+            # Format as ResponsesAPIStreamingResponse
+            if isinstance(parsed_chunk, dict):
+                openai_responses_api_chunk = (
+                    self.responses_api_provider_config.transform_streaming_response(
+                        model=self.model,
+                        parsed_chunk=parsed_chunk,
+                        logging_obj=self.logging_obj,
+                    )
+                )
+                # Store the completed response
+                if (
+                    openai_responses_api_chunk
+                    and openai_responses_api_chunk.type
+                    == ResponsesAPIStreamEvents.RESPONSE_COMPLETED
+                ):
+                    self.completed_response = openai_responses_api_chunk
+                    self._handle_logging_completed_response()
+
+                return openai_responses_api_chunk
+
+            return None
+        except json.JSONDecodeError:
+            # If we can't parse the chunk, continue
+            return None
+
+    def _handle_logging_completed_response(self):
+        """Base implementation - should be overridden by subclasses"""
+        pass
+
+
+class ResponsesAPIStreamingIterator(BaseResponsesAPIStreamingIterator):
+    """
+    Async iterator for processing streaming responses from the Responses API.
+    """
+
+    def __init__(
+        self,
+        response: httpx.Response,
+        model: str,
+        responses_api_provider_config: BaseResponsesAPIConfig,
+        logging_obj: LiteLLMLoggingObj,
+    ):
+        super().__init__(response, model, responses_api_provider_config, logging_obj)
+        self.stream_iterator = response.aiter_lines()
+
+    def __aiter__(self):
+        return self
+
+    async def __anext__(self) -> ResponsesAPIStreamingResponse:
+        try:
+            while True:
+                # Get the next chunk from the stream
+                try:
+                    chunk = await self.stream_iterator.__anext__()
+                except StopAsyncIteration:
+                    self.finished = True
+                    raise StopAsyncIteration
+
+                result = self._process_chunk(chunk)
+
+                if self.finished:
+                    raise StopAsyncIteration
+                elif result is not None:
+                    return result
+                # If result is None, continue the loop to get the next chunk
+
+        except httpx.HTTPError as e:
+            # Handle HTTP errors
+            self.finished = True
+            raise e
+
+    def _handle_logging_completed_response(self):
+        """Handle logging for completed responses in async context"""
+        asyncio.create_task(
+            self.logging_obj.async_success_handler(
+                result=self.completed_response,
+                start_time=self.start_time,
+                end_time=datetime.now(),
+                cache_hit=None,
+            )
+        )
+
+        executor.submit(
+            self.logging_obj.success_handler,
+            result=self.completed_response,
+            cache_hit=None,
+            start_time=self.start_time,
+            end_time=datetime.now(),
+        )
+
+
+class SyncResponsesAPIStreamingIterator(BaseResponsesAPIStreamingIterator):
+    """
+    Synchronous iterator for processing streaming responses from the Responses API.
+    """
+
+    def __init__(
+        self,
+        response: httpx.Response,
+        model: str,
+        responses_api_provider_config: BaseResponsesAPIConfig,
+        logging_obj: LiteLLMLoggingObj,
+    ):
+        super().__init__(response, model, responses_api_provider_config, logging_obj)
+        self.stream_iterator = response.iter_lines()
+
+    def __iter__(self):
+        return self
+
+    def __next__(self):
+        try:
+            while True:
+                # Get the next chunk from the stream
+                try:
+                    chunk = next(self.stream_iterator)
+                except StopIteration:
+                    self.finished = True
+                    raise StopIteration
+
+                result = self._process_chunk(chunk)
+
+                if self.finished:
+                    raise StopIteration
+                elif result is not None:
+                    return result
+                # If result is None, continue the loop to get the next chunk
+
+        except httpx.HTTPError as e:
+            # Handle HTTP errors
+            self.finished = True
+            raise e
+
+    def _handle_logging_completed_response(self):
+        """Handle logging for completed responses in sync context"""
+        run_async_function(
+            async_function=self.logging_obj.async_success_handler,
+            result=self.completed_response,
+            start_time=self.start_time,
+            end_time=datetime.now(),
+            cache_hit=None,
+        )
+
+        executor.submit(
+            self.logging_obj.success_handler,
+            result=self.completed_response,
+            cache_hit=None,
+            start_time=self.start_time,
+            end_time=datetime.now(),
+        )
--- a/litellm/responses/utils.py
+++ b/litellm/responses/utils.py
@ -0,0 +1,97 @@
+from typing import Any, Dict, cast, get_type_hints
+
+import litellm
+from litellm.llms.base_llm.responses.transformation import BaseResponsesAPIConfig
+from litellm.types.llms.openai import (
+    ResponseAPIUsage,
+    ResponsesAPIOptionalRequestParams,
+)
+from litellm.types.utils import Usage
+
+
+class ResponsesAPIRequestUtils:
+    """Helper utils for constructing ResponseAPI requests"""
+
+    @staticmethod
+    def get_optional_params_responses_api(
+        model: str,
+        responses_api_provider_config: BaseResponsesAPIConfig,
+        response_api_optional_params: ResponsesAPIOptionalRequestParams,
+    ) -> Dict:
+        """
+        Get optional parameters for the responses API.
+
+        Args:
+            params: Dictionary of all parameters
+            model: The model name
+            responses_api_provider_config: The provider configuration for responses API
+
+        Returns:
+            A dictionary of supported parameters for the responses API
+        """
+        # Remove None values and internal parameters
+
+        # Get supported parameters for the model
+        supported_params = responses_api_provider_config.get_supported_openai_params(
+            model
+        )
+
+        # Check for unsupported parameters
+        unsupported_params = [
+            param
+            for param in response_api_optional_params
+            if param not in supported_params
+        ]
+
+        if unsupported_params:
+            raise litellm.UnsupportedParamsError(
+                model=model,
+                message=f"The following parameters are not supported for model {model}: {', '.join(unsupported_params)}",
+            )
+
+        # Map parameters to provider-specific format
+        mapped_params = responses_api_provider_config.map_openai_params(
+            response_api_optional_params=response_api_optional_params,
+            model=model,
+            drop_params=litellm.drop_params,
+        )
+
+        return mapped_params
+
+    @staticmethod
+    def get_requested_response_api_optional_param(
+        params: Dict[str, Any]
+    ) -> ResponsesAPIOptionalRequestParams:
+        """
+        Filter parameters to only include those defined in ResponsesAPIOptionalRequestParams.
+
+        Args:
+            params: Dictionary of parameters to filter
+
+        Returns:
+            ResponsesAPIOptionalRequestParams instance with only the valid parameters
+        """
+        valid_keys = get_type_hints(ResponsesAPIOptionalRequestParams).keys()
+        filtered_params = {k: v for k, v in params.items() if k in valid_keys}
+        return cast(ResponsesAPIOptionalRequestParams, filtered_params)
+
+
+class ResponseAPILoggingUtils:
+    @staticmethod
+    def _is_response_api_usage(usage: dict) -> bool:
+        """returns True if usage is from OpenAI Response API"""
+        if "input_tokens" in usage and "output_tokens" in usage:
+            return True
+        return False
+
+    @staticmethod
+    def _transform_response_api_usage_to_chat_usage(usage: dict) -> Usage:
+        """Tranforms the ResponseAPIUsage object to a Usage object"""
+        response_api_usage: ResponseAPIUsage = ResponseAPIUsage(**usage)
+        prompt_tokens: int = response_api_usage.input_tokens or 0
+        completion_tokens: int = response_api_usage.output_tokens or 0
+        return Usage(
+            prompt_tokens=prompt_tokens,
+            completion_tokens=completion_tokens,
+            total_tokens=prompt_tokens + completion_tokens,
+        )
--- a/litellm/secret_managers/get_azure_ad_token_provider.py
+++ b/litellm/secret_managers/get_azure_ad_token_provider.py
@ -30,6 +30,8 @@ def get_azure_ad_token_provider() -> Callable[[], str]:
            client_secret=os.environ["AZURE_CLIENT_SECRET"],
            tenant_id=os.environ["AZURE_TENANT_ID"],
        )
+    elif cred == "ManagedIdentityCredential":
+        credential = cred_cls(client_id=os.environ["AZURE_CLIENT_ID"])
    else:
        credential = cred_cls()

--- a/litellm/types/llms/bedrock.py
+++ b/litellm/types/llms/bedrock.py
@ -365,6 +365,63 @@ class AmazonStability3TextToImageResponse(TypedDict, total=False):
    finish_reasons: List[str]


+class AmazonNovaCanvasRequestBase(TypedDict, total=False):
+    """
+    Base class for Amazon Nova Canvas API requests
+    """
+
+    pass
+
+
+class AmazonNovaCanvasImageGenerationConfig(TypedDict, total=False):
+    """
+    Config for Amazon Nova Canvas Text to Image API
+
+    Ref: https://docs.aws.amazon.com/nova/latest/userguide/image-gen-req-resp-structure.html
+    """
+
+    cfgScale: int
+    seed: int
+    quality: Literal["standard", "premium"]
+    width: int
+    height: int
+    numberOfImages: int
+
+
+class AmazonNovaCanvasTextToImageParams(TypedDict, total=False):
+    """
+    Params for Amazon Nova Canvas Text to Image API
+    """
+
+    text: str
+    negativeText: str
+    controlStrength: float
+    controlMode: Literal["CANNY_EDIT", "SEGMENTATION"]
+    conditionImage: str
+
+
+class AmazonNovaCanvasTextToImageRequest(AmazonNovaCanvasRequestBase, TypedDict, total=False):
+    """
+    Request for Amazon Nova Canvas Text to Image API
+
+    Ref: https://docs.aws.amazon.com/nova/latest/userguide/image-gen-req-resp-structure.html
+    """
+
+    textToImageParams: AmazonNovaCanvasTextToImageParams
+    taskType: Literal["TEXT_IMAGE"]
+    imageGenerationConfig: AmazonNovaCanvasImageGenerationConfig
+
+
+class AmazonNovaCanvasTextToImageResponse(TypedDict, total=False):
+    """
+    Response for Amazon Nova Canvas Text to Image API
+
+    Ref: https://docs.aws.amazon.com/nova/latest/userguide/image-gen-req-resp-structure.html
+    """
+
+    images: List[str]
+
+
 if TYPE_CHECKING:
    from botocore.awsrequest import AWSPreparedRequest
 else:
--- a/litellm/types/llms/openai.py
+++ b/litellm/types/llms/openai.py
@ -1,6 +1,8 @@
+from enum import Enum
 from os import PathLike
 from typing import IO, Any, Iterable, List, Literal, Mapping, Optional, Tuple, Union

+import httpx
 from openai._legacy_response import (
    HttpxBinaryResponseContent as _HttpxBinaryResponseContent,
 )
@ -31,8 +33,24 @@ from openai.types.chat.chat_completion_prediction_content_param import (
 )
 from openai.types.embedding import Embedding as OpenAIEmbedding
 from openai.types.fine_tuning.fine_tuning_job import FineTuningJob
-from pydantic import BaseModel, Field
-from typing_extensions import Dict, Required, TypedDict, override
+from openai.types.responses.response import (
+    IncompleteDetails,
+    Response,
+    ResponseOutputItem,
+    ResponseTextConfig,
+    Tool,
+    ToolChoice,
+)
+from openai.types.responses.response_create_params import (
+    Reasoning,
+    ResponseIncludable,
+    ResponseInputParam,
+    ResponseTextConfigParam,
+    ToolChoice,
+    ToolParam,
+)
+from pydantic import BaseModel, Discriminator, Field, PrivateAttr
+from typing_extensions import Annotated, Dict, Required, TypedDict, override

 FileContent = Union[IO[bytes], bytes, PathLike]

@ -684,3 +702,323 @@ OpenAIAudioTranscriptionOptionalParams = Literal[


 OpenAIImageVariationOptionalParams = Literal["n", "size", "response_format", "user"]
+
+
+class ResponsesAPIOptionalRequestParams(TypedDict, total=False):
+    """TypedDict for Optional parameters supported by the responses API."""
+
+    include: Optional[List[ResponseIncludable]]
+    instructions: Optional[str]
+    max_output_tokens: Optional[int]
+    metadata: Optional[Dict[str, Any]]
+    parallel_tool_calls: Optional[bool]
+    previous_response_id: Optional[str]
+    reasoning: Optional[Reasoning]
+    store: Optional[bool]
+    stream: Optional[bool]
+    temperature: Optional[float]
+    text: Optional[ResponseTextConfigParam]
+    tool_choice: Optional[ToolChoice]
+    tools: Optional[Iterable[ToolParam]]
+    top_p: Optional[float]
+    truncation: Optional[Literal["auto", "disabled"]]
+    user: Optional[str]
+
+
+class ResponsesAPIRequestParams(ResponsesAPIOptionalRequestParams, total=False):
+    """TypedDict for request parameters supported by the responses API."""
+
+    input: Union[str, ResponseInputParam]
+    model: str
+
+
+class BaseLiteLLMOpenAIResponseObject(BaseModel):
+    def __getitem__(self, key):
+        return self.__dict__[key]
+
+    def get(self, key, default=None):
+        return self.__dict__.get(key, default)
+
+    def __contains__(self, key):
+        return key in self.__dict__
+
+
+class OutputTokensDetails(BaseLiteLLMOpenAIResponseObject):
+    reasoning_tokens: int
+
+    model_config = {"extra": "allow"}
+
+
+class ResponseAPIUsage(BaseLiteLLMOpenAIResponseObject):
+    input_tokens: int
+    """The number of input tokens."""
+
+    output_tokens: int
+    """The number of output tokens."""
+
+    output_tokens_details: Optional[OutputTokensDetails]
+    """A detailed breakdown of the output tokens."""
+
+    total_tokens: int
+    """The total number of tokens used."""
+
+    model_config = {"extra": "allow"}
+
+
+class ResponsesAPIResponse(BaseLiteLLMOpenAIResponseObject):
+    id: str
+    created_at: float
+    error: Optional[dict]
+    incomplete_details: Optional[IncompleteDetails]
+    instructions: Optional[str]
+    metadata: Optional[Dict]
+    model: Optional[str]
+    object: Optional[str]
+    output: List[ResponseOutputItem]
+    parallel_tool_calls: bool
+    temperature: Optional[float]
+    tool_choice: ToolChoice
+    tools: List[Tool]
+    top_p: Optional[float]
+    max_output_tokens: Optional[int]
+    previous_response_id: Optional[str]
+    reasoning: Optional[Reasoning]
+    status: Optional[str]
+    text: Optional[ResponseTextConfig]
+    truncation: Optional[Literal["auto", "disabled"]]
+    usage: Optional[ResponseAPIUsage]
+    user: Optional[str]
+    # Define private attributes using PrivateAttr
+    _hidden_params: dict = PrivateAttr(default_factory=dict)
+
+
+class ResponsesAPIStreamEvents(str, Enum):
+    """
+    Enum representing all supported OpenAI stream event types for the Responses API.
+
+    Inherits from str to allow direct string comparison and usage as dictionary keys.
+    """
+
+    # Response lifecycle events
+    RESPONSE_CREATED = "response.created"
+    RESPONSE_IN_PROGRESS = "response.in_progress"
+    RESPONSE_COMPLETED = "response.completed"
+    RESPONSE_FAILED = "response.failed"
+    RESPONSE_INCOMPLETE = "response.incomplete"
+
+    # Output item events
+    OUTPUT_ITEM_ADDED = "response.output_item.added"
+    OUTPUT_ITEM_DONE = "response.output_item.done"
+
+    # Content part events
+    CONTENT_PART_ADDED = "response.content_part.added"
+    CONTENT_PART_DONE = "response.content_part.done"
+
+    # Output text events
+    OUTPUT_TEXT_DELTA = "response.output_text.delta"
+    OUTPUT_TEXT_ANNOTATION_ADDED = "response.output_text.annotation.added"
+    OUTPUT_TEXT_DONE = "response.output_text.done"
+
+    # Refusal events
+    REFUSAL_DELTA = "response.refusal.delta"
+    REFUSAL_DONE = "response.refusal.done"
+
+    # Function call events
+    FUNCTION_CALL_ARGUMENTS_DELTA = "response.function_call_arguments.delta"
+    FUNCTION_CALL_ARGUMENTS_DONE = "response.function_call_arguments.done"
+
+    # File search events
+    FILE_SEARCH_CALL_IN_PROGRESS = "response.file_search_call.in_progress"
+    FILE_SEARCH_CALL_SEARCHING = "response.file_search_call.searching"
+    FILE_SEARCH_CALL_COMPLETED = "response.file_search_call.completed"
+
+    # Web search events
+    WEB_SEARCH_CALL_IN_PROGRESS = "response.web_search_call.in_progress"
+    WEB_SEARCH_CALL_SEARCHING = "response.web_search_call.searching"
+    WEB_SEARCH_CALL_COMPLETED = "response.web_search_call.completed"
+
+    # Error event
+    ERROR = "error"
+
+
+class ResponseCreatedEvent(BaseLiteLLMOpenAIResponseObject):
+    type: Literal[ResponsesAPIStreamEvents.RESPONSE_CREATED]
+    response: ResponsesAPIResponse
+
+
+class ResponseInProgressEvent(BaseLiteLLMOpenAIResponseObject):
+    type: Literal[ResponsesAPIStreamEvents.RESPONSE_IN_PROGRESS]
+    response: ResponsesAPIResponse
+
+
+class ResponseCompletedEvent(BaseLiteLLMOpenAIResponseObject):
+    type: Literal[ResponsesAPIStreamEvents.RESPONSE_COMPLETED]
+    response: ResponsesAPIResponse
+    _hidden_params: dict = PrivateAttr(default_factory=dict)
+
+
+class ResponseFailedEvent(BaseLiteLLMOpenAIResponseObject):
+    type: Literal[ResponsesAPIStreamEvents.RESPONSE_FAILED]
+    response: ResponsesAPIResponse
+
+
+class ResponseIncompleteEvent(BaseLiteLLMOpenAIResponseObject):
+    type: Literal[ResponsesAPIStreamEvents.RESPONSE_INCOMPLETE]
+    response: ResponsesAPIResponse
+
+
+class OutputItemAddedEvent(BaseLiteLLMOpenAIResponseObject):
+    type: Literal[ResponsesAPIStreamEvents.OUTPUT_ITEM_ADDED]
+    output_index: int
+    item: dict
+
+
+class OutputItemDoneEvent(BaseLiteLLMOpenAIResponseObject):
+    type: Literal[ResponsesAPIStreamEvents.OUTPUT_ITEM_DONE]
+    output_index: int
+    item: dict
+
+
+class ContentPartAddedEvent(BaseLiteLLMOpenAIResponseObject):
+    type: Literal[ResponsesAPIStreamEvents.CONTENT_PART_ADDED]
+    item_id: str
+    output_index: int
+    content_index: int
+    part: dict
+
+
+class ContentPartDoneEvent(BaseLiteLLMOpenAIResponseObject):
+    type: Literal[ResponsesAPIStreamEvents.CONTENT_PART_DONE]
+    item_id: str
+    output_index: int
+    content_index: int
+    part: dict
+
+
+class OutputTextDeltaEvent(BaseLiteLLMOpenAIResponseObject):
+    type: Literal[ResponsesAPIStreamEvents.OUTPUT_TEXT_DELTA]
+    item_id: str
+    output_index: int
+    content_index: int
+    delta: str
+
+
+class OutputTextAnnotationAddedEvent(BaseLiteLLMOpenAIResponseObject):
+    type: Literal[ResponsesAPIStreamEvents.OUTPUT_TEXT_ANNOTATION_ADDED]
+    item_id: str
+    output_index: int
+    content_index: int
+    annotation_index: int
+    annotation: dict
+
+
+class OutputTextDoneEvent(BaseLiteLLMOpenAIResponseObject):
+    type: Literal[ResponsesAPIStreamEvents.OUTPUT_TEXT_DONE]
+    item_id: str
+    output_index: int
+    content_index: int
+    text: str
+
+
+class RefusalDeltaEvent(BaseLiteLLMOpenAIResponseObject):
+    type: Literal[ResponsesAPIStreamEvents.REFUSAL_DELTA]
+    item_id: str
+    output_index: int
+    content_index: int
+    delta: str
+
+
+class RefusalDoneEvent(BaseLiteLLMOpenAIResponseObject):
+    type: Literal[ResponsesAPIStreamEvents.REFUSAL_DONE]
+    item_id: str
+    output_index: int
+    content_index: int
+    refusal: str
+
+
+class FunctionCallArgumentsDeltaEvent(BaseLiteLLMOpenAIResponseObject):
+    type: Literal[ResponsesAPIStreamEvents.FUNCTION_CALL_ARGUMENTS_DELTA]
+    item_id: str
+    output_index: int
+    delta: str
+
+
+class FunctionCallArgumentsDoneEvent(BaseLiteLLMOpenAIResponseObject):
+    type: Literal[ResponsesAPIStreamEvents.FUNCTION_CALL_ARGUMENTS_DONE]
+    item_id: str
+    output_index: int
+    arguments: str
+
+
+class FileSearchCallInProgressEvent(BaseLiteLLMOpenAIResponseObject):
+    type: Literal[ResponsesAPIStreamEvents.FILE_SEARCH_CALL_IN_PROGRESS]
+    output_index: int
+    item_id: str
+
+
+class FileSearchCallSearchingEvent(BaseLiteLLMOpenAIResponseObject):
+    type: Literal[ResponsesAPIStreamEvents.FILE_SEARCH_CALL_SEARCHING]
+    output_index: int
+    item_id: str
+
+
+class FileSearchCallCompletedEvent(BaseLiteLLMOpenAIResponseObject):
+    type: Literal[ResponsesAPIStreamEvents.FILE_SEARCH_CALL_COMPLETED]
+    output_index: int
+    item_id: str
+
+
+class WebSearchCallInProgressEvent(BaseLiteLLMOpenAIResponseObject):
+    type: Literal[ResponsesAPIStreamEvents.WEB_SEARCH_CALL_IN_PROGRESS]
+    output_index: int
+    item_id: str
+
+
+class WebSearchCallSearchingEvent(BaseLiteLLMOpenAIResponseObject):
+    type: Literal[ResponsesAPIStreamEvents.WEB_SEARCH_CALL_SEARCHING]
+    output_index: int
+    item_id: str
+
+
+class WebSearchCallCompletedEvent(BaseLiteLLMOpenAIResponseObject):
+    type: Literal[ResponsesAPIStreamEvents.WEB_SEARCH_CALL_COMPLETED]
+    output_index: int
+    item_id: str
+
+
+class ErrorEvent(BaseLiteLLMOpenAIResponseObject):
+    type: Literal[ResponsesAPIStreamEvents.ERROR]
+    code: Optional[str]
+    message: str
+    param: Optional[str]
+
+
+# Union type for all possible streaming responses
+ResponsesAPIStreamingResponse = Annotated[
+    Union[
+        ResponseCreatedEvent,
+        ResponseInProgressEvent,
+        ResponseCompletedEvent,
+        ResponseFailedEvent,
+        ResponseIncompleteEvent,
+        OutputItemAddedEvent,
+        OutputItemDoneEvent,
+        ContentPartAddedEvent,
+        ContentPartDoneEvent,
+        OutputTextDeltaEvent,
+        OutputTextAnnotationAddedEvent,
+        OutputTextDoneEvent,
+        RefusalDeltaEvent,
+        RefusalDoneEvent,
+        FunctionCallArgumentsDeltaEvent,
+        FunctionCallArgumentsDoneEvent,
+        FileSearchCallInProgressEvent,
+        FileSearchCallSearchingEvent,
+        FileSearchCallCompletedEvent,
+        WebSearchCallInProgressEvent,
+        WebSearchCallSearchingEvent,
+        WebSearchCallCompletedEvent,
+        ErrorEvent,
+    ],
+    Discriminator("type"),
+]
--- a/litellm/types/utils.py
+++ b/litellm/types/utils.py
@ -227,6 +227,8 @@ class CallTypes(Enum):
    list_fine_tuning_jobs = "list_fine_tuning_jobs"
    aretrieve_fine_tuning_job = "aretrieve_fine_tuning_job"
    retrieve_fine_tuning_job = "retrieve_fine_tuning_job"
+    responses = "responses"
+    aresponses = "aresponses"


 CallTypesLiteral = Literal[
--- a/litellm/utils.py
+++ b/litellm/utils.py
@ -211,6 +211,7 @@ from litellm.llms.base_llm.image_variations.transformation import (
    BaseImageVariationConfig,
 )
 from litellm.llms.base_llm.rerank.transformation import BaseRerankConfig
+from litellm.llms.base_llm.responses.transformation import BaseResponsesAPIConfig

 from ._logging import _is_debugging_on, verbose_logger
 from .caching.caching import (
@ -729,6 +730,11 @@ def function_setup(  # noqa: PLR0915
            call_type == CallTypes.aspeech.value or call_type == CallTypes.speech.value
        ):
            messages = kwargs.get("input", "speech")
+        elif (
+            call_type == CallTypes.aresponses.value
+            or call_type == CallTypes.responses.value
+        ):
+            messages = args[0] if len(args) > 0 else kwargs["input"]
        else:
            messages = "default-message-value"
        stream = True if "stream" in kwargs and kwargs["stream"] is True else False
@ -2445,6 +2451,7 @@ def get_optional_params_image_gen(
        config_class = (
            litellm.AmazonStability3Config
            if litellm.AmazonStability3Config._is_stability_3_model(model=model)
+            else litellm.AmazonNovaCanvasConfig if litellm.AmazonNovaCanvasConfig._is_nova_model(model=model)
            else litellm.AmazonStabilityConfig
        )
        supported_params = config_class.get_supported_openai_params(model=model)
@ -5121,7 +5128,7 @@ def prompt_token_calculator(model, messages):
        from anthropic import AI_PROMPT, HUMAN_PROMPT, Anthropic

        anthropic_obj = Anthropic()
-        num_tokens = anthropic_obj.count_tokens(text)
+        num_tokens = anthropic_obj.count_tokens(text)  # type: ignore
    else:
        num_tokens = len(encoding.encode(text))
    return num_tokens
@ -6293,6 +6300,15 @@ class ProviderConfigManager:
            return litellm.DeepgramAudioTranscriptionConfig()
        return None

+    @staticmethod
+    def get_provider_responses_api_config(
+        model: str,
+        provider: LlmProviders,
+    ) -> Optional[BaseResponsesAPIConfig]:
+        if litellm.LlmProviders.OPENAI == provider:
+            return litellm.OpenAIResponsesAPIConfig()
+        return None
+
    @staticmethod
    def get_provider_text_completion_config(
        model: str,
--- a/model_prices_and_context_window.json
+++ b/model_prices_and_context_window.json
@ -6,7 +6,7 @@
        "input_cost_per_token": 0.0000,
        "output_cost_per_token": 0.000,
        "litellm_provider": "one of https://docs.litellm.ai/docs/providers",
-        "mode": "one of chat, embedding, completion, image_generation, audio_transcription, audio_speech",
+        "mode": "one of: chat, embedding, completion, image_generation, audio_transcription, audio_speech, image_generation, moderation, rerank",
        "supports_function_calling": true,
        "supports_parallel_function_calling": true,
        "supports_vision": true,
@ -931,7 +931,7 @@
        "input_cost_per_token": 0.000000,
        "output_cost_per_token": 0.000000,
        "litellm_provider": "openai",
-        "mode": "moderations"
+        "mode": "moderation"
    },
    "text-moderation-007": {
        "max_tokens": 32768,
@ -940,7 +940,7 @@
        "input_cost_per_token": 0.000000,
        "output_cost_per_token": 0.000000,
        "litellm_provider": "openai",
-        "mode": "moderations"
+        "mode": "moderation"
    },
    "text-moderation-latest": {
        "max_tokens": 32768,
@ -949,7 +949,7 @@
        "input_cost_per_token": 0.000000,
        "output_cost_per_token": 0.000000,
        "litellm_provider": "openai",
-        "mode": "moderations"
+        "mode": "moderation"
    },
    "256-x-256/dall-e-2": {
        "mode": "image_generation",
@ -1625,13 +1625,23 @@
        "max_tokens": 8192,
        "max_input_tokens": 128000,
        "max_output_tokens": 8192,
-        "input_cost_per_token": 0.0,
-        "input_cost_per_token_cache_hit": 0.0,
-        "output_cost_per_token": 0.0,
+        "input_cost_per_token": 0.00000135,
+        "output_cost_per_token": 0.0000054,
        "litellm_provider": "azure_ai",
        "mode": "chat",
-        "supports_prompt_caching": true,
-        "supports_tool_choice": true
+        "supports_tool_choice": true,
+        "source": "https://techcommunity.microsoft.com/blog/machinelearningblog/deepseek-r1-improved-performance-higher-limits-and-transparent-pricing/4386367"
+    },
+    "azure_ai/deepseek-v3": {
+        "max_tokens": 8192,
+        "max_input_tokens": 128000,
+        "max_output_tokens": 8192,
+        "input_cost_per_token": 0.00000114,
+        "output_cost_per_token": 0.00000456,
+        "litellm_provider": "azure_ai",
+        "mode": "chat",
+        "supports_tool_choice": true,
+        "source": "https://techcommunity.microsoft.com/blog/machinelearningblog/announcing-deepseek-v3-on-azure-ai-foundry-and-github/4390438"
    },
    "azure_ai/jamba-instruct": {
        "max_tokens": 4096,
@ -1643,6 +1653,17 @@
        "mode": "chat",
        "supports_tool_choice": true
    },
+    "azure_ai/mistral-nemo": {
+        "max_tokens": 4096,
+        "max_input_tokens": 131072,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 0.00000015,
+        "output_cost_per_token": 0.00000015,
+        "litellm_provider": "azure_ai",
+        "mode": "chat",
+        "supports_function_calling": true,
+        "source": "https://azuremarketplace.microsoft.com/en/marketplace/apps/000-000.mistral-nemo-12b-2407?tab=PlansAndPrice"
+    },
    "azure_ai/mistral-large": {
        "max_tokens": 8191,
        "max_input_tokens": 32000,
@ -1770,10 +1791,34 @@
        "source":"https://azuremarketplace.microsoft.com/en-us/marketplace/apps/metagenai.meta-llama-3-1-405b-instruct-offer?tab=PlansAndPrice",
        "supports_tool_choice": true
    },
-    "azure_ai/Phi-4": {
+    "azure_ai/Phi-4-mini-instruct": {
        "max_tokens": 4096,
-        "max_input_tokens": 128000,
+        "max_input_tokens": 131072,
        "max_output_tokens": 4096,
+        "input_cost_per_token": 0,
+        "output_cost_per_token": 0,
+        "litellm_provider": "azure_ai",
+        "mode": "chat",
+        "supports_function_calling": true,
+        "source": "https://learn.microsoft.com/en-us/azure/ai-foundry/concepts/models-featured#microsoft"
+    },
+    "azure_ai/Phi-4-multimodal-instruct": {
+        "max_tokens": 4096,
+        "max_input_tokens": 131072,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 0,
+        "output_cost_per_token": 0,
+        "litellm_provider": "azure_ai",
+        "mode": "chat",
+        "supports_audio_input": true,
+        "supports_function_calling": true,
+        "supports_vision": true,
+        "source": "https://learn.microsoft.com/en-us/azure/ai-foundry/concepts/models-featured#microsoft"
+    },
+    "azure_ai/Phi-4": {
+        "max_tokens": 16384,
+        "max_input_tokens": 16384,
+        "max_output_tokens": 16384,
        "input_cost_per_token": 0.000000125,
        "output_cost_per_token": 0.0000005,
        "litellm_provider": "azure_ai",
@ -3892,31 +3937,6 @@
        "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing",
        "supports_tool_choice": true
    },
-    "gemini/gemini-2.0-flash": {
-        "max_tokens": 8192,
-        "max_input_tokens": 1048576,
-        "max_output_tokens": 8192,
-        "max_images_per_prompt": 3000,
-        "max_videos_per_prompt": 10,
-        "max_video_length": 1,
-        "max_audio_length_hours": 8.4,
-        "max_audio_per_prompt": 1,
-        "max_pdf_size_mb": 30,
-        "input_cost_per_audio_token": 0.0000007,
-        "input_cost_per_token": 0.0000001,
-        "output_cost_per_token": 0.0000004,
-        "litellm_provider": "gemini",
-        "mode": "chat",
-        "rpm": 10000,
-        "tpm": 10000000,
-        "supports_system_messages": true,
-        "supports_function_calling": true,
-        "supports_vision": true,
-        "supports_response_schema": true,
-        "supports_audio_output": true,
-        "supports_tool_choice": true,
-        "source": "https://ai.google.dev/pricing#2_0flash"
-    },
    "gemini-2.0-flash-001": {
        "max_tokens": 8192,
        "max_input_tokens": 1048576,
@ -4008,6 +4028,69 @@
        "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash",
        "supports_tool_choice": true
    },
+    "gemini/gemini-2.0-pro-exp-02-05": {
+        "max_tokens": 8192,
+        "max_input_tokens": 2097152,
+        "max_output_tokens": 8192,
+        "max_images_per_prompt": 3000,
+        "max_videos_per_prompt": 10,
+        "max_video_length": 1,
+        "max_audio_length_hours": 8.4,
+        "max_audio_per_prompt": 1,
+        "max_pdf_size_mb": 30,
+        "input_cost_per_image": 0,
+        "input_cost_per_video_per_second": 0,
+        "input_cost_per_audio_per_second": 0,
+        "input_cost_per_token": 0,
+        "input_cost_per_character": 0, 
+        "input_cost_per_token_above_128k_tokens": 0, 
+        "input_cost_per_character_above_128k_tokens": 0, 
+        "input_cost_per_image_above_128k_tokens": 0,
+        "input_cost_per_video_per_second_above_128k_tokens": 0,
+        "input_cost_per_audio_per_second_above_128k_tokens": 0,
+        "output_cost_per_token": 0,
+        "output_cost_per_character": 0,
+        "output_cost_per_token_above_128k_tokens": 0,
+        "output_cost_per_character_above_128k_tokens": 0,
+        "litellm_provider": "gemini",
+        "mode": "chat",
+        "rpm": 2,
+        "tpm": 1000000,
+        "supports_system_messages": true,
+        "supports_function_calling": true,
+        "supports_vision": true,
+        "supports_audio_input": true,
+        "supports_video_input": true,
+        "supports_pdf_input": true,
+        "supports_response_schema": true,
+        "supports_tool_choice": true,
+        "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing"
+    },
+    "gemini/gemini-2.0-flash": {
+        "max_tokens": 8192,
+        "max_input_tokens": 1048576,
+        "max_output_tokens": 8192,
+        "max_images_per_prompt": 3000,
+        "max_videos_per_prompt": 10,
+        "max_video_length": 1,
+        "max_audio_length_hours": 8.4,
+        "max_audio_per_prompt": 1,
+        "max_pdf_size_mb": 30,
+        "input_cost_per_audio_token": 0.0000007,
+        "input_cost_per_token": 0.0000001,
+        "output_cost_per_token": 0.0000004,
+        "litellm_provider": "gemini",
+        "mode": "chat",
+        "rpm": 10000,
+        "tpm": 10000000,
+        "supports_system_messages": true,
+        "supports_function_calling": true,
+        "supports_vision": true,
+        "supports_response_schema": true,
+        "supports_audio_output": true,
+        "supports_tool_choice": true,
+        "source": "https://ai.google.dev/pricing#2_0flash"
+    },
    "gemini/gemini-2.0-flash-001": {
        "max_tokens": 8192,
        "max_input_tokens": 1048576,
@ -4511,6 +4594,12 @@
        "mode": "image_generation",
        "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing"
    },
+    "vertex_ai/imagen-3.0-generate-002": {
+        "output_cost_per_image": 0.04,
+        "litellm_provider": "vertex_ai-image-models",
+        "mode": "image_generation",
+        "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing"
+    },
    "vertex_ai/imagen-3.0-generate-001": {
        "output_cost_per_image": 0.04,
        "litellm_provider": "vertex_ai-image-models",
@ -6547,6 +6636,12 @@
        "supports_prompt_caching": true,
        "supports_response_schema": true
    },
+    "1024-x-1024/50-steps/bedrock/amazon.nova-canvas-v1:0": {
+      "max_input_tokens": 2600,
+      "output_cost_per_image": 0.06,
+      "litellm_provider": "bedrock",
+      "mode": "image_generation"
+    },
    "eu.amazon.nova-pro-v1:0": {
        "max_tokens": 4096, 
        "max_input_tokens": 300000,
@ -7477,6 +7572,18 @@
        "litellm_provider": "bedrock",
        "mode": "embedding"
    },
+    "us.deepseek.r1-v1:0": {
+        "max_tokens": 4096, 
+        "max_input_tokens": 128000,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 0.00000135,
+        "output_cost_per_token": 0.0000054,
+        "litellm_provider": "bedrock_converse",
+        "mode": "chat",
+        "supports_function_calling": false, 
+        "supports_tool_choice": false
+
+    },
    "meta.llama3-3-70b-instruct-v1:0": {
        "max_tokens": 4096, 
        "max_input_tokens": 128000,
--- a/pyproject.toml
+++ b/pyproject.toml
@ -1,6 +1,6 @@
 [tool.poetry]
 name = "litellm"
-version = "1.63.5"
+version = "1.63.7"
 description = "Library to easily interface with LLM API providers"
 authors = ["BerriAI"]
 license = "MIT"
@ -96,7 +96,7 @@ requires = ["poetry-core", "wheel"]
 build-backend = "poetry.core.masonry.api"

 [tool.commitizen]
-version = "1.63.5"
+version = "1.63.7"
 version_files = [
    "pyproject.toml:^version"
 ]
--- a/requirements.txt
+++ b/requirements.txt
@ -1,7 +1,7 @@
 # LITELLM PROXY DEPENDENCIES #
 anyio==4.4.0 # openai + http req.
 httpx==0.27.0 # Pin Httpx dependency
-openai==1.61.0  # openai req. 
+openai==1.66.1  # openai req. 
 fastapi==0.115.5 # server dep
 backoff==2.2.1 # server dep
 pyyaml==6.0.2 # server dep
--- a/tests/image_gen_tests/base_image_generation_test.py
+++ b/tests/image_gen_tests/base_image_generation_test.py
@ -59,15 +59,15 @@ class BaseImageGenTest(ABC):

            await asyncio.sleep(1)

-            assert response._hidden_params["response_cost"] is not None
-            assert response._hidden_params["response_cost"] > 0
-            print("response_cost", response._hidden_params["response_cost"])
+            # assert response._hidden_params["response_cost"] is not None
+            # assert response._hidden_params["response_cost"] > 0
+            # print("response_cost", response._hidden_params["response_cost"])

            logged_standard_logging_payload = custom_logger.standard_logging_payload
            print("logged_standard_logging_payload", logged_standard_logging_payload)
            assert logged_standard_logging_payload is not None
-            assert logged_standard_logging_payload["response_cost"] is not None
-            assert logged_standard_logging_payload["response_cost"] > 0
+            # assert logged_standard_logging_payload["response_cost"] is not None
+            # assert logged_standard_logging_payload["response_cost"] > 0

            from openai.types.images_response import ImagesResponse

--- a/tests/image_gen_tests/test_image_generation.py
+++ b/tests/image_gen_tests/test_image_generation.py
@ -130,6 +130,19 @@ class TestBedrockSd1(BaseImageGenTest):
        return {"model": "bedrock/stability.sd3-large-v1:0"}


+class TestBedrockNovaCanvasTextToImage(BaseImageGenTest):
+    def get_base_image_generation_call_args(self) -> dict:
+        litellm.in_memory_llm_clients_cache = InMemoryCache()
+        return {
+            "model": "bedrock/amazon.nova-canvas-v1:0",
+            "n": 1,
+            "size": "320x320",
+            "imageGenerationConfig": {"cfgScale": 6.5, "seed": 12},
+            "taskType": "TEXT_IMAGE",
+            "aws_region_name": "us-east-1",
+        }
+
+
 class TestOpenAIDalle3(BaseImageGenTest):
    def get_base_image_generation_call_args(self) -> dict:
        return {"model": "dall-e-3"}
--- a/tests/litellm/llms/openai/responses/test_openai_responses_transformation.py
+++ b/tests/litellm/llms/openai/responses/test_openai_responses_transformation.py
@ -0,0 +1,239 @@
+import json
+import os
+import sys
+from unittest.mock import AsyncMock, MagicMock, patch
+
+import httpx
+import pytest
+
+sys.path.insert(
+    0, os.path.abspath("../../../../..")
+)  # Adds the parent directory to the system path
+
+from litellm.llms.openai.responses.transformation import OpenAIResponsesAPIConfig
+from litellm.types.llms.openai import (
+    OutputTextDeltaEvent,
+    ResponseCompletedEvent,
+    ResponsesAPIRequestParams,
+    ResponsesAPIResponse,
+    ResponsesAPIStreamEvents,
+)
+
+
+class TestOpenAIResponsesAPIConfig:
+    def setup_method(self):
+        self.config = OpenAIResponsesAPIConfig()
+        self.model = "gpt-4o"
+        self.logging_obj = MagicMock()
+
+    def test_map_openai_params(self):
+        """Test that parameters are correctly mapped"""
+        test_params = {"input": "Hello world", "temperature": 0.7, "stream": True}
+
+        result = self.config.map_openai_params(
+            response_api_optional_params=test_params,
+            model=self.model,
+            drop_params=False,
+        )
+
+        # The function should return the params unchanged
+        assert result == test_params
+
+    def validate_responses_api_request_params(self, params, expected_fields):
+        """
+        Validate that the params dict has the expected structure of ResponsesAPIRequestParams
+
+        Args:
+            params: The dict to validate
+            expected_fields: Dict of field names and their expected values
+        """
+        # Check that it's a dict
+        assert isinstance(params, dict), "Result should be a dict"
+
+        # Check expected fields have correct values
+        for field, value in expected_fields.items():
+            assert field in params, f"Missing expected field: {field}"
+            assert (
+                params[field] == value
+            ), f"Field {field} has value {params[field]}, expected {value}"
+
+    def test_transform_responses_api_request(self):
+        """Test request transformation"""
+        input_text = "What is the capital of France?"
+        optional_params = {"temperature": 0.7, "stream": True}
+
+        result = self.config.transform_responses_api_request(
+            model=self.model,
+            input=input_text,
+            response_api_optional_request_params=optional_params,
+            litellm_params={},
+            headers={},
+        )
+
+        # Validate the result has the expected structure and values
+        expected_fields = {
+            "model": self.model,
+            "input": input_text,
+            "temperature": 0.7,
+            "stream": True,
+        }
+
+        self.validate_responses_api_request_params(result, expected_fields)
+
+    def test_transform_streaming_response(self):
+        """Test streaming response transformation"""
+        # Test with a text delta event
+        chunk = {
+            "type": "response.output_text.delta",
+            "item_id": "item_123",
+            "output_index": 0,
+            "content_index": 0,
+            "delta": "Hello",
+        }
+
+        result = self.config.transform_streaming_response(
+            model=self.model, parsed_chunk=chunk, logging_obj=self.logging_obj
+        )
+
+        assert isinstance(result, OutputTextDeltaEvent)
+        assert result.type == ResponsesAPIStreamEvents.OUTPUT_TEXT_DELTA
+        assert result.delta == "Hello"
+        assert result.item_id == "item_123"
+
+        # Test with a completed event - providing all required fields
+        completed_chunk = {
+            "type": "response.completed",
+            "response": {
+                "id": "resp_123",
+                "created_at": 1234567890,
+                "model": "gpt-4o",
+                "object": "response",
+                "output": [],
+                "parallel_tool_calls": False,
+                "error": None,
+                "incomplete_details": None,
+                "instructions": None,
+                "metadata": None,
+                "temperature": 0.7,
+                "tool_choice": "auto",
+                "tools": [],
+                "top_p": 1.0,
+                "max_output_tokens": None,
+                "previous_response_id": None,
+                "reasoning": None,
+                "status": "completed",
+                "text": None,
+                "truncation": "auto",
+                "usage": None,
+                "user": None,
+            },
+        }
+
+        # Mock the get_event_model_class to avoid validation issues in tests
+        with patch.object(
+            OpenAIResponsesAPIConfig, "get_event_model_class"
+        ) as mock_get_class:
+            mock_get_class.return_value = ResponseCompletedEvent
+
+            result = self.config.transform_streaming_response(
+                model=self.model,
+                parsed_chunk=completed_chunk,
+                logging_obj=self.logging_obj,
+            )
+
+            assert result.type == ResponsesAPIStreamEvents.RESPONSE_COMPLETED
+            assert result.response.id == "resp_123"
+
+    def test_validate_environment(self):
+        """Test that validate_environment correctly sets the Authorization header"""
+        # Test with provided API key
+        headers = {}
+        api_key = "test_api_key"
+
+        result = self.config.validate_environment(
+            headers=headers, model=self.model, api_key=api_key
+        )
+
+        assert "Authorization" in result
+        assert result["Authorization"] == f"Bearer {api_key}"
+
+        # Test with empty headers
+        headers = {}
+
+        with patch("litellm.api_key", "litellm_api_key"):
+            result = self.config.validate_environment(headers=headers, model=self.model)
+
+            assert "Authorization" in result
+            assert result["Authorization"] == "Bearer litellm_api_key"
+
+        # Test with existing headers
+        headers = {"Content-Type": "application/json"}
+
+        with patch("litellm.openai_key", "openai_key"):
+            with patch("litellm.api_key", None):
+                result = self.config.validate_environment(
+                    headers=headers, model=self.model
+                )
+
+                assert "Authorization" in result
+                assert result["Authorization"] == "Bearer openai_key"
+                assert "Content-Type" in result
+                assert result["Content-Type"] == "application/json"
+
+        # Test with environment variable
+        headers = {}
+
+        with patch("litellm.api_key", None):
+            with patch("litellm.openai_key", None):
+                with patch(
+                    "litellm.llms.openai.responses.transformation.get_secret_str",
+                    return_value="env_api_key",
+                ):
+                    result = self.config.validate_environment(
+                        headers=headers, model=self.model
+                    )
+
+                    assert "Authorization" in result
+                    assert result["Authorization"] == "Bearer env_api_key"
+
+    def test_get_complete_url(self):
+        """Test that get_complete_url returns the correct URL"""
+        # Test with provided API base
+        api_base = "https://custom-openai.example.com/v1"
+
+        result = self.config.get_complete_url(api_base=api_base, model=self.model)
+
+        assert result == "https://custom-openai.example.com/v1/responses"
+
+        # Test with litellm.api_base
+        with patch("litellm.api_base", "https://litellm-api-base.example.com/v1"):
+            result = self.config.get_complete_url(api_base=None, model=self.model)
+
+            assert result == "https://litellm-api-base.example.com/v1/responses"
+
+        # Test with environment variable
+        with patch("litellm.api_base", None):
+            with patch(
+                "litellm.llms.openai.responses.transformation.get_secret_str",
+                return_value="https://env-api-base.example.com/v1",
+            ):
+                result = self.config.get_complete_url(api_base=None, model=self.model)
+
+                assert result == "https://env-api-base.example.com/v1/responses"
+
+        # Test with default API base
+        with patch("litellm.api_base", None):
+            with patch(
+                "litellm.llms.openai.responses.transformation.get_secret_str",
+                return_value=None,
+            ):
+                result = self.config.get_complete_url(api_base=None, model=self.model)
+
+                assert result == "https://api.openai.com/v1/responses"
+
+        # Test with trailing slash in API base
+        api_base = "https://custom-openai.example.com/v1/"
+
+        result = self.config.get_complete_url(api_base=api_base, model=self.model)
+
+        assert result == "https://custom-openai.example.com/v1/responses"
--- a/tests/litellm/responses/test_responses_utils.py
+++ b/tests/litellm/responses/test_responses_utils.py
@ -0,0 +1,150 @@
+import json
+import os
+import sys
+
+import pytest
+from fastapi.testclient import TestClient
+
+sys.path.insert(
+    0, os.path.abspath("../../..")
+)  # Adds the parent directory to the system path
+
+import litellm
+from litellm.llms.base_llm.responses.transformation import BaseResponsesAPIConfig
+from litellm.llms.openai.responses.transformation import OpenAIResponsesAPIConfig
+from litellm.responses.utils import ResponseAPILoggingUtils, ResponsesAPIRequestUtils
+from litellm.types.llms.openai import ResponsesAPIOptionalRequestParams
+from litellm.types.utils import Usage
+
+
+class TestResponsesAPIRequestUtils:
+    def test_get_optional_params_responses_api(self):
+        """Test that optional parameters are correctly processed for responses API"""
+        # Setup
+        model = "gpt-4o"
+        config = OpenAIResponsesAPIConfig()
+        optional_params = ResponsesAPIOptionalRequestParams(
+            {"temperature": 0.7, "max_output_tokens": 100}
+        )
+
+        # Execute
+        result = ResponsesAPIRequestUtils.get_optional_params_responses_api(
+            model=model,
+            responses_api_provider_config=config,
+            response_api_optional_params=optional_params,
+        )
+
+        # Assert
+        assert result == optional_params
+        assert "temperature" in result
+        assert result["temperature"] == 0.7
+        assert "max_output_tokens" in result
+        assert result["max_output_tokens"] == 100
+
+    def test_get_optional_params_responses_api_unsupported_param(self):
+        """Test that unsupported parameters raise an error"""
+        # Setup
+        model = "gpt-4o"
+        config = OpenAIResponsesAPIConfig()
+        optional_params = ResponsesAPIOptionalRequestParams(
+            {"temperature": 0.7, "unsupported_param": "value"}
+        )
+
+        # Execute and Assert
+        with pytest.raises(litellm.UnsupportedParamsError) as excinfo:
+            ResponsesAPIRequestUtils.get_optional_params_responses_api(
+                model=model,
+                responses_api_provider_config=config,
+                response_api_optional_params=optional_params,
+            )
+
+        assert "unsupported_param" in str(excinfo.value)
+        assert model in str(excinfo.value)
+
+    def test_get_requested_response_api_optional_param(self):
+        """Test filtering parameters to only include those in ResponsesAPIOptionalRequestParams"""
+        # Setup
+        params = {
+            "temperature": 0.7,
+            "max_output_tokens": 100,
+            "invalid_param": "value",
+            "model": "gpt-4o",  # This is not in ResponsesAPIOptionalRequestParams
+        }
+
+        # Execute
+        result = ResponsesAPIRequestUtils.get_requested_response_api_optional_param(
+            params
+        )
+
+        # Assert
+        assert "temperature" in result
+        assert "max_output_tokens" in result
+        assert "invalid_param" not in result
+        assert "model" not in result
+        assert result["temperature"] == 0.7
+        assert result["max_output_tokens"] == 100
+
+
+class TestResponseAPILoggingUtils:
+    def test_is_response_api_usage_true(self):
+        """Test identification of Response API usage format"""
+        # Setup
+        usage = {"input_tokens": 10, "output_tokens": 20}
+
+        # Execute
+        result = ResponseAPILoggingUtils._is_response_api_usage(usage)
+
+        # Assert
+        assert result is True
+
+    def test_is_response_api_usage_false(self):
+        """Test identification of non-Response API usage format"""
+        # Setup
+        usage = {"prompt_tokens": 10, "completion_tokens": 20, "total_tokens": 30}
+
+        # Execute
+        result = ResponseAPILoggingUtils._is_response_api_usage(usage)
+
+        # Assert
+        assert result is False
+
+    def test_transform_response_api_usage_to_chat_usage(self):
+        """Test transformation from Response API usage to Chat usage format"""
+        # Setup
+        usage = {
+            "input_tokens": 10,
+            "output_tokens": 20,
+            "total_tokens": 30,
+            "output_tokens_details": {"reasoning_tokens": 5},
+        }
+
+        # Execute
+        result = ResponseAPILoggingUtils._transform_response_api_usage_to_chat_usage(
+            usage
+        )
+
+        # Assert
+        assert isinstance(result, Usage)
+        assert result.prompt_tokens == 10
+        assert result.completion_tokens == 20
+        assert result.total_tokens == 30
+
+    def test_transform_response_api_usage_with_none_values(self):
+        """Test transformation handles None values properly"""
+        # Setup
+        usage = {
+            "input_tokens": 0,  # Changed from None to 0
+            "output_tokens": 20,
+            "total_tokens": 20,
+            "output_tokens_details": {"reasoning_tokens": 5},
+        }
+
+        # Execute
+        result = ResponseAPILoggingUtils._transform_response_api_usage_to_chat_usage(
+            usage
+        )
+
+        # Assert
+        assert result.prompt_tokens == 0
+        assert result.completion_tokens == 20
+        assert result.total_tokens == 20
--- a/tests/litellm/test_model_prices_and_context_window_schema.py
+++ b/tests/litellm/test_model_prices_and_context_window_schema.py
@ -0,0 +1,108 @@
+import json
+from jsonschema import validate
+
+def test_model_prices_and_context_window_json_is_valid():
+    '''
+    Validates the `model_prices_and_context_window.json` file.
+
+    If this test fails after you update the json, you need to update the schema or correct the change you made.
+    '''
+
+    INTENDED_SCHEMA = {
+        "type": "object",
+        "additionalProperties": {
+            "type": "object",
+            "properties": {
+                "cache_creation_input_audio_token_cost": {"type": "number"},
+                "cache_creation_input_token_cost": {"type": "number"},
+                "cache_read_input_token_cost": {"type": "number"},
+                "deprecation_date": {"type": "string"},
+                "input_cost_per_audio_per_second": {"type": "number"},
+                "input_cost_per_audio_per_second_above_128k_tokens": {"type": "number"},
+                "input_cost_per_audio_token": {"type": "number"},
+                "input_cost_per_character": {"type": "number"},
+                "input_cost_per_character_above_128k_tokens": {"type": "number"},
+                "input_cost_per_image": {"type": "number"},
+                "input_cost_per_image_above_128k_tokens": {"type": "number"},
+                "input_cost_per_pixel": {"type": "number"},
+                "input_cost_per_query": {"type": "number"},
+                "input_cost_per_request": {"type": "number"},
+                "input_cost_per_second": {"type": "number"},
+                "input_cost_per_token": {"type": "number"},
+                "input_cost_per_token_above_128k_tokens": {"type": "number"},
+                "input_cost_per_token_batch_requests": {"type": "number"},
+                "input_cost_per_token_batches": {"type": "number"},
+                "input_cost_per_token_cache_hit": {"type": "number"},
+                "input_cost_per_video_per_second": {"type": "number"},
+                "input_cost_per_video_per_second_above_128k_tokens": {"type": "number"},
+                "input_dbu_cost_per_token": {"type": "number"},
+                "litellm_provider": {"type": "string"},
+                "max_audio_length_hours": {"type": "number"},
+                "max_audio_per_prompt": {"type": "number"},
+                "max_document_chunks_per_query": {"type": "number"},
+                "max_images_per_prompt": {"type": "number"},
+                "max_input_tokens": {"type": "number"},
+                "max_output_tokens": {"type": "number"},
+                "max_pdf_size_mb": {"type": "number"},
+                "max_query_tokens": {"type": "number"},
+                "max_tokens": {"type": "number"},
+                "max_tokens_per_document_chunk": {"type": "number"},
+                "max_video_length": {"type": "number"},
+                "max_videos_per_prompt": {"type": "number"},
+                "metadata": {"type": "object"},
+                "mode": {
+                    "type": "string",
+                    "enum": [
+                        "audio_speech",
+                        "audio_transcription",
+                        "chat",
+                        "completion",
+                        "embedding",
+                        "image_generation",
+                        "moderation",
+                        "rerank"
+                    ],
+                },
+                "output_cost_per_audio_token": {"type": "number"},
+                "output_cost_per_character": {"type": "number"},
+                "output_cost_per_character_above_128k_tokens": {"type": "number"},
+                "output_cost_per_image": {"type": "number"},
+                "output_cost_per_pixel": {"type": "number"},
+                "output_cost_per_second": {"type": "number"},
+                "output_cost_per_token": {"type": "number"},
+                "output_cost_per_token_above_128k_tokens": {"type": "number"},
+                "output_cost_per_token_batches": {"type": "number"},
+                "output_db_cost_per_token": {"type": "number"},
+                "output_dbu_cost_per_token": {"type": "number"},
+                "output_vector_size": {"type": "number"},
+                "rpd": {"type": "number"},
+                "rpm": {"type": "number"},
+                "source": {"type": "string"},
+                "supports_assistant_prefill": {"type": "boolean"},
+                "supports_audio_input": {"type": "boolean"},
+                "supports_audio_output": {"type": "boolean"},
+                "supports_embedding_image_input": {"type": "boolean"},
+                "supports_function_calling": {"type": "boolean"},
+                "supports_image_input": {"type": "boolean"},
+                "supports_parallel_function_calling": {"type": "boolean"},
+                "supports_pdf_input": {"type": "boolean"},
+                "supports_prompt_caching": {"type": "boolean"},
+                "supports_response_schema": {"type": "boolean"},
+                "supports_system_messages": {"type": "boolean"},
+                "supports_tool_choice": {"type": "boolean"},
+                "supports_video_input": {"type": "boolean"},
+                "supports_vision": {"type": "boolean"},
+                "tool_use_system_prompt_tokens": {"type": "number"},
+                "tpm": {"type": "number"},
+            },
+            "additionalProperties": False,
+        },
+    }
+
+    with open("./model_prices_and_context_window.json", "r") as model_prices_file:
+        actual_json = json.load(model_prices_file)
+    assert isinstance(actual_json, dict)
+    actual_json.pop('sample_spec', None) # remove the sample, whose schema is inconsistent with the real data
+
+    validate(actual_json, INTENDED_SCHEMA)
+
--- a/tests/llm_responses_api_testing/conftest.py
+++ b/tests/llm_responses_api_testing/conftest.py
@ -0,0 +1,63 @@
+# conftest.py
+
+import importlib
+import os
+import sys
+
+import pytest
+
+sys.path.insert(
+    0, os.path.abspath("../..")
+)  # Adds the parent directory to the system path
+import litellm
+
+
+@pytest.fixture(scope="function", autouse=True)
+def setup_and_teardown():
+    """
+    This fixture reloads litellm before every function. To speed up testing by removing callbacks being chained.
+    """
+    curr_dir = os.getcwd()  # Get the current working directory
+    sys.path.insert(
+        0, os.path.abspath("../..")
+    )  # Adds the project directory to the system path
+
+    import litellm
+    from litellm import Router
+
+    importlib.reload(litellm)
+
+    try:
+        if hasattr(litellm, "proxy") and hasattr(litellm.proxy, "proxy_server"):
+            import litellm.proxy.proxy_server
+
+            importlib.reload(litellm.proxy.proxy_server)
+    except Exception as e:
+        print(f"Error reloading litellm.proxy.proxy_server: {e}")
+
+    import asyncio
+
+    loop = asyncio.get_event_loop_policy().new_event_loop()
+    asyncio.set_event_loop(loop)
+    print(litellm)
+    # from litellm import Router, completion, aembedding, acompletion, embedding
+    yield
+
+    # Teardown code (executes after the yield point)
+    loop.close()  # Close the loop created earlier
+    asyncio.set_event_loop(None)  # Remove the reference to the loop
+
+
+def pytest_collection_modifyitems(config, items):
+    # Separate tests in 'test_amazing_proxy_custom_logger.py' and other tests
+    custom_logger_tests = [
+        item for item in items if "custom_logger" in item.parent.name
+    ]
+    other_tests = [item for item in items if "custom_logger" not in item.parent.name]
+
+    # Sort tests based on their names
+    custom_logger_tests.sort(key=lambda x: x.name)
+    other_tests.sort(key=lambda x: x.name)
+
+    # Reorder the items list
+    items[:] = custom_logger_tests + other_tests
--- a/tests/llm_responses_api_testing/test_openai_responses_api.py
+++ b/tests/llm_responses_api_testing/test_openai_responses_api.py
@ -0,0 +1,505 @@
+import os
+import sys
+import pytest
+import asyncio
+from typing import Optional
+
+sys.path.insert(0, os.path.abspath("../.."))
+import litellm
+from litellm.integrations.custom_logger import CustomLogger
+import json
+from litellm.types.utils import StandardLoggingPayload
+from litellm.types.llms.openai import (
+    ResponseCompletedEvent,
+    ResponsesAPIResponse,
+    ResponseTextConfig,
+    ResponseAPIUsage,
+    IncompleteDetails,
+)
+
+
+def validate_responses_api_response(response, final_chunk: bool = False):
+    """
+    Validate that a response from litellm.responses() or litellm.aresponses()
+    conforms to the expected ResponsesAPIResponse structure.
+
+    Args:
+        response: The response object to validate
+
+    Raises:
+        AssertionError: If the response doesn't match the expected structure
+    """
+    # Validate response structure
+    print("response=", json.dumps(response, indent=4, default=str))
+    assert isinstance(
+        response, ResponsesAPIResponse
+    ), "Response should be an instance of ResponsesAPIResponse"
+
+    # Required fields
+    assert "id" in response and isinstance(
+        response["id"], str
+    ), "Response should have a string 'id' field"
+    assert "created_at" in response and isinstance(
+        response["created_at"], (int, float)
+    ), "Response should have a numeric 'created_at' field"
+    assert "output" in response and isinstance(
+        response["output"], list
+    ), "Response should have a list 'output' field"
+    assert "parallel_tool_calls" in response and isinstance(
+        response["parallel_tool_calls"], bool
+    ), "Response should have a boolean 'parallel_tool_calls' field"
+
+    # Optional fields with their expected types
+    optional_fields = {
+        "error": (dict, type(None)),  # error can be dict or None
+        "incomplete_details": (IncompleteDetails, type(None)),
+        "instructions": (str, type(None)),
+        "metadata": dict,
+        "model": str,
+        "object": str,
+        "temperature": (int, float),
+        "tool_choice": (dict, str),
+        "tools": list,
+        "top_p": (int, float),
+        "max_output_tokens": (int, type(None)),
+        "previous_response_id": (str, type(None)),
+        "reasoning": dict,
+        "status": str,
+        "text": ResponseTextConfig,
+        "truncation": str,
+        "usage": ResponseAPIUsage,
+        "user": (str, type(None)),
+    }
+    if final_chunk is False:
+        optional_fields["usage"] = type(None)
+
+    for field, expected_type in optional_fields.items():
+        if field in response:
+            assert isinstance(
+                response[field], expected_type
+            ), f"Field '{field}' should be of type {expected_type}, but got {type(response[field])}"
+
+    # Check if output has at least one item
+    if final_chunk is True:
+        assert (
+            len(response["output"]) > 0
+        ), "Response 'output' field should have at least one item"
+
+    return True  # Return True if validation passes
+
+
+@pytest.mark.parametrize("sync_mode", [True, False])
+@pytest.mark.asyncio
+async def test_basic_openai_responses_api(sync_mode):
+    litellm._turn_on_debug()
+
+    if sync_mode:
+        response = litellm.responses(
+            model="gpt-4o", input="Basic ping", max_output_tokens=20
+        )
+    else:
+        response = await litellm.aresponses(
+            model="gpt-4o", input="Basic ping", max_output_tokens=20
+        )
+
+    print("litellm response=", json.dumps(response, indent=4, default=str))
+
+    # Use the helper function to validate the response
+    validate_responses_api_response(response, final_chunk=True)
+
+
+@pytest.mark.parametrize("sync_mode", [True])
+@pytest.mark.asyncio
+async def test_basic_openai_responses_api_streaming(sync_mode):
+    litellm._turn_on_debug()
+
+    if sync_mode:
+        response = litellm.responses(
+            model="gpt-4o",
+            input="Basic ping",
+            stream=True,
+        )
+        for event in response:
+            print("litellm response=", json.dumps(event, indent=4, default=str))
+    else:
+        response = await litellm.aresponses(
+            model="gpt-4o",
+            input="Basic ping",
+            stream=True,
+        )
+        async for event in response:
+            print("litellm response=", json.dumps(event, indent=4, default=str))
+
+
+class TestCustomLogger(CustomLogger):
+    def __init__(
+        self,
+    ):
+        self.standard_logging_object: Optional[StandardLoggingPayload] = None
+
+    async def async_log_success_event(self, kwargs, response_obj, start_time, end_time):
+        print("in async_log_success_event")
+        print("kwargs=", json.dumps(kwargs, indent=4, default=str))
+        self.standard_logging_object = kwargs["standard_logging_object"]
+        pass
+
+
+def validate_standard_logging_payload(
+    slp: StandardLoggingPayload, response: ResponsesAPIResponse, request_model: str
+):
+    """
+    Validate that a StandardLoggingPayload object matches the expected response
+
+    Args:
+        slp (StandardLoggingPayload): The standard logging payload object to validate
+        response (dict): The litellm response to compare against
+        request_model (str): The model name that was requested
+    """
+    # Validate payload exists
+    assert slp is not None, "Standard logging payload should not be None"
+
+    # Validate token counts
+    print("response=", json.dumps(response, indent=4, default=str))
+    assert (
+        slp["prompt_tokens"] == response["usage"]["input_tokens"]
+    ), "Prompt tokens mismatch"
+    assert (
+        slp["completion_tokens"] == response["usage"]["output_tokens"]
+    ), "Completion tokens mismatch"
+    assert (
+        slp["total_tokens"]
+        == response["usage"]["input_tokens"] + response["usage"]["output_tokens"]
+    ), "Total tokens mismatch"
+
+    # Validate spend and response metadata
+    assert slp["response_cost"] > 0, "Response cost should be greater than 0"
+    assert slp["id"] == response["id"], "Response ID mismatch"
+    assert slp["model"] == request_model, "Model name mismatch"
+
+    # Validate messages
+    assert slp["messages"] == [{"content": "hi", "role": "user"}], "Messages mismatch"
+
+    # Validate complete response structure
+    validate_responses_match(slp["response"], response)
+
+
+@pytest.mark.asyncio
+async def test_basic_openai_responses_api_streaming_with_logging():
+    litellm._turn_on_debug()
+    litellm.set_verbose = True
+    test_custom_logger = TestCustomLogger()
+    litellm.callbacks = [test_custom_logger]
+    request_model = "gpt-4o"
+    response = await litellm.aresponses(
+        model=request_model,
+        input="hi",
+        stream=True,
+    )
+    final_response: Optional[ResponseCompletedEvent] = None
+    async for event in response:
+        if event.type == "response.completed":
+            final_response = event
+        print("litellm response=", json.dumps(event, indent=4, default=str))
+
+    print("sleeping for 2 seconds...")
+    await asyncio.sleep(2)
+    print(
+        "standard logging payload=",
+        json.dumps(test_custom_logger.standard_logging_object, indent=4, default=str),
+    )
+
+    assert final_response is not None
+    assert test_custom_logger.standard_logging_object is not None
+
+    validate_standard_logging_payload(
+        slp=test_custom_logger.standard_logging_object,
+        response=final_response.response,
+        request_model=request_model,
+    )
+
+
+def validate_responses_match(slp_response, litellm_response):
+    """Validate that the standard logging payload OpenAI response matches the litellm response"""
+    # Validate core fields
+    assert slp_response["id"] == litellm_response["id"], "ID mismatch"
+    assert slp_response["model"] == litellm_response["model"], "Model mismatch"
+    assert (
+        slp_response["created_at"] == litellm_response["created_at"]
+    ), "Created at mismatch"
+
+    # Validate usage
+    assert (
+        slp_response["usage"]["input_tokens"]
+        == litellm_response["usage"]["input_tokens"]
+    ), "Input tokens mismatch"
+    assert (
+        slp_response["usage"]["output_tokens"]
+        == litellm_response["usage"]["output_tokens"]
+    ), "Output tokens mismatch"
+    assert (
+        slp_response["usage"]["total_tokens"]
+        == litellm_response["usage"]["total_tokens"]
+    ), "Total tokens mismatch"
+
+    # Validate output/messages
+    assert len(slp_response["output"]) == len(
+        litellm_response["output"]
+    ), "Output length mismatch"
+    for slp_msg, litellm_msg in zip(slp_response["output"], litellm_response["output"]):
+        assert slp_msg["role"] == litellm_msg.role, "Message role mismatch"
+        # Access the content's text field for the litellm response
+        litellm_content = litellm_msg.content[0].text if litellm_msg.content else ""
+        assert (
+            slp_msg["content"][0]["text"] == litellm_content
+        ), f"Message content mismatch. Expected {litellm_content}, Got {slp_msg['content']}"
+        assert slp_msg["status"] == litellm_msg.status, "Message status mismatch"
+
+
+@pytest.mark.asyncio
+async def test_basic_openai_responses_api_non_streaming_with_logging():
+    litellm._turn_on_debug()
+    litellm.set_verbose = True
+    test_custom_logger = TestCustomLogger()
+    litellm.callbacks = [test_custom_logger]
+    request_model = "gpt-4o"
+    response = await litellm.aresponses(
+        model=request_model,
+        input="hi",
+    )
+
+    print("litellm response=", json.dumps(response, indent=4, default=str))
+    print("response hidden params=", response._hidden_params)
+
+    print("sleeping for 2 seconds...")
+    await asyncio.sleep(2)
+    print(
+        "standard logging payload=",
+        json.dumps(test_custom_logger.standard_logging_object, indent=4, default=str),
+    )
+
+    assert response is not None
+    assert test_custom_logger.standard_logging_object is not None
+
+    validate_standard_logging_payload(
+        test_custom_logger.standard_logging_object, response, request_model
+    )
+
+
+def validate_stream_event(event):
+    """
+    Validate that a streaming event from litellm.responses() or litellm.aresponses()
+    with stream=True conforms to the expected structure based on its event type.
+
+    Args:
+        event: The streaming event object to validate
+
+    Raises:
+        AssertionError: If the event doesn't match the expected structure for its type
+    """
+    # Common validation for all event types
+    assert hasattr(event, "type"), "Event should have a 'type' attribute"
+
+    # Type-specific validation
+    if event.type == "response.created" or event.type == "response.in_progress":
+        assert hasattr(
+            event, "response"
+        ), f"{event.type} event should have a 'response' attribute"
+        validate_responses_api_response(event.response, final_chunk=False)
+
+    elif event.type == "response.completed":
+        assert hasattr(
+            event, "response"
+        ), "response.completed event should have a 'response' attribute"
+        validate_responses_api_response(event.response, final_chunk=True)
+        # Usage is guaranteed only on the completed event
+        assert (
+            "usage" in event.response
+        ), "response.completed event should have usage information"
+        print("Usage in event.response=", event.response["usage"])
+        assert isinstance(event.response["usage"], ResponseAPIUsage)
+    elif event.type == "response.failed" or event.type == "response.incomplete":
+        assert hasattr(
+            event, "response"
+        ), f"{event.type} event should have a 'response' attribute"
+
+    elif (
+        event.type == "response.output_item.added"
+        or event.type == "response.output_item.done"
+    ):
+        assert hasattr(
+            event, "output_index"
+        ), f"{event.type} event should have an 'output_index' attribute"
+        assert hasattr(
+            event, "item"
+        ), f"{event.type} event should have an 'item' attribute"
+
+    elif (
+        event.type == "response.content_part.added"
+        or event.type == "response.content_part.done"
+    ):
+        assert hasattr(
+            event, "item_id"
+        ), f"{event.type} event should have an 'item_id' attribute"
+        assert hasattr(
+            event, "output_index"
+        ), f"{event.type} event should have an 'output_index' attribute"
+        assert hasattr(
+            event, "content_index"
+        ), f"{event.type} event should have a 'content_index' attribute"
+        assert hasattr(
+            event, "part"
+        ), f"{event.type} event should have a 'part' attribute"
+
+    elif event.type == "response.output_text.delta":
+        assert hasattr(
+            event, "item_id"
+        ), f"{event.type} event should have an 'item_id' attribute"
+        assert hasattr(
+            event, "output_index"
+        ), f"{event.type} event should have an 'output_index' attribute"
+        assert hasattr(
+            event, "content_index"
+        ), f"{event.type} event should have a 'content_index' attribute"
+        assert hasattr(
+            event, "delta"
+        ), f"{event.type} event should have a 'delta' attribute"
+
+    elif event.type == "response.output_text.annotation.added":
+        assert hasattr(
+            event, "item_id"
+        ), f"{event.type} event should have an 'item_id' attribute"
+        assert hasattr(
+            event, "output_index"
+        ), f"{event.type} event should have an 'output_index' attribute"
+        assert hasattr(
+            event, "content_index"
+        ), f"{event.type} event should have a 'content_index' attribute"
+        assert hasattr(
+            event, "annotation_index"
+        ), f"{event.type} event should have an 'annotation_index' attribute"
+        assert hasattr(
+            event, "annotation"
+        ), f"{event.type} event should have an 'annotation' attribute"
+
+    elif event.type == "response.output_text.done":
+        assert hasattr(
+            event, "item_id"
+        ), f"{event.type} event should have an 'item_id' attribute"
+        assert hasattr(
+            event, "output_index"
+        ), f"{event.type} event should have an 'output_index' attribute"
+        assert hasattr(
+            event, "content_index"
+        ), f"{event.type} event should have a 'content_index' attribute"
+        assert hasattr(
+            event, "text"
+        ), f"{event.type} event should have a 'text' attribute"
+
+    elif event.type == "response.refusal.delta":
+        assert hasattr(
+            event, "item_id"
+        ), f"{event.type} event should have an 'item_id' attribute"
+        assert hasattr(
+            event, "output_index"
+        ), f"{event.type} event should have an 'output_index' attribute"
+        assert hasattr(
+            event, "content_index"
+        ), f"{event.type} event should have a 'content_index' attribute"
+        assert hasattr(
+            event, "delta"
+        ), f"{event.type} event should have a 'delta' attribute"
+
+    elif event.type == "response.refusal.done":
+        assert hasattr(
+            event, "item_id"
+        ), f"{event.type} event should have an 'item_id' attribute"
+        assert hasattr(
+            event, "output_index"
+        ), f"{event.type} event should have an 'output_index' attribute"
+        assert hasattr(
+            event, "content_index"
+        ), f"{event.type} event should have a 'content_index' attribute"
+        assert hasattr(
+            event, "refusal"
+        ), f"{event.type} event should have a 'refusal' attribute"
+
+    elif event.type == "response.function_call_arguments.delta":
+        assert hasattr(
+            event, "item_id"
+        ), f"{event.type} event should have an 'item_id' attribute"
+        assert hasattr(
+            event, "output_index"
+        ), f"{event.type} event should have an 'output_index' attribute"
+        assert hasattr(
+            event, "delta"
+        ), f"{event.type} event should have a 'delta' attribute"
+
+    elif event.type == "response.function_call_arguments.done":
+        assert hasattr(
+            event, "item_id"
+        ), f"{event.type} event should have an 'item_id' attribute"
+        assert hasattr(
+            event, "output_index"
+        ), f"{event.type} event should have an 'output_index' attribute"
+        assert hasattr(
+            event, "arguments"
+        ), f"{event.type} event should have an 'arguments' attribute"
+
+    elif event.type in [
+        "response.file_search_call.in_progress",
+        "response.file_search_call.searching",
+        "response.file_search_call.completed",
+        "response.web_search_call.in_progress",
+        "response.web_search_call.searching",
+        "response.web_search_call.completed",
+    ]:
+        assert hasattr(
+            event, "output_index"
+        ), f"{event.type} event should have an 'output_index' attribute"
+        assert hasattr(
+            event, "item_id"
+        ), f"{event.type} event should have an 'item_id' attribute"
+
+    elif event.type == "error":
+        assert hasattr(
+            event, "message"
+        ), "Error event should have a 'message' attribute"
+    return True  # Return True if validation passes
+
+
+@pytest.mark.parametrize("sync_mode", [True, False])
+@pytest.mark.asyncio
+async def test_openai_responses_api_streaming_validation(sync_mode):
+    """Test that validates each streaming event from the responses API"""
+    litellm._turn_on_debug()
+
+    event_types_seen = set()
+
+    if sync_mode:
+        response = litellm.responses(
+            model="gpt-4o",
+            input="Tell me about artificial intelligence in 3 sentences.",
+            stream=True,
+        )
+        for event in response:
+            print(f"Validating event type: {event.type}")
+            validate_stream_event(event)
+            event_types_seen.add(event.type)
+    else:
+        response = await litellm.aresponses(
+            model="gpt-4o",
+            input="Tell me about artificial intelligence in 3 sentences.",
+            stream=True,
+        )
+        async for event in response:
+            print(f"Validating event type: {event.type}")
+            validate_stream_event(event)
+            event_types_seen.add(event.type)
+
+    # At minimum, we should see these core event types
+    required_events = {"response.created", "response.completed"}
+
+    missing_events = required_events - event_types_seen
+    assert not missing_events, f"Missing required event types: {missing_events}"
+
+    print(f"Successfully validated all event types: {event_types_seen}")
--- a/tests/llm_translation/test_anthropic_completion.py
+++ b/tests/llm_translation/test_anthropic_completion.py
@ -992,8 +992,8 @@ def test_anthropic_thinking_output(model):
@pytest.mark.parametrize(
    "model",
    [
-        "anthropic/claude-3-7-sonnet-20250219",
-        # "bedrock/us.anthropic.claude-3-7-sonnet-20250219-v1:0",
+        # "anthropic/claude-3-7-sonnet-20250219",
+        "bedrock/us.anthropic.claude-3-7-sonnet-20250219-v1:0",
        # "bedrock/invoke/us.anthropic.claude-3-7-sonnet-20250219-v1:0",
    ],
 )
@ -1011,8 +1011,11 @@ def test_anthropic_thinking_output_stream(model):

        reasoning_content_exists = False
        signature_block_exists = False
+        tool_call_exists = False
        for chunk in resp:
            print(f"chunk 2: {chunk}")
+            if chunk.choices[0].delta.tool_calls:
+                tool_call_exists = True
            if (
                hasattr(chunk.choices[0].delta, "thinking_blocks")
                and chunk.choices[0].delta.thinking_blocks is not None
@ -1025,6 +1028,7 @@ def test_anthropic_thinking_output_stream(model):
                print(chunk.choices[0].delta.thinking_blocks[0])
                if chunk.choices[0].delta.thinking_blocks[0].get("signature"):
                    signature_block_exists = True
+        assert not tool_call_exists
        assert reasoning_content_exists
        assert signature_block_exists
    except litellm.Timeout:
--- a/tests/llm_translation/test_bedrock_completion.py
+++ b/tests/llm_translation/test_bedrock_completion.py
@ -956,7 +956,7 @@ def test_bedrock_ptu():


@pytest.mark.asyncio
-async def test_bedrock_extra_headers():
+async def test_bedrock_custom_api_base():
    """
    Check if a url with 'modelId' passed in, is created correctly

@ -994,6 +994,44 @@ async def test_bedrock_extra_headers():
        mock_client_post.assert_called_once()


+@pytest.mark.parametrize(
+    "model",
+    [
+        "anthropic.claude-3-sonnet-20240229-v1:0",
+        "bedrock/invoke/anthropic.claude-3-sonnet-20240229-v1:0",
+    ],
+)
+@pytest.mark.asyncio
+async def test_bedrock_extra_headers(model):
+    """
+    Relevant Issue: https://github.com/BerriAI/litellm/issues/9106
+    """
+    client = AsyncHTTPHandler()
+
+    with patch.object(client, "post", new=AsyncMock()) as mock_client_post:
+        litellm.set_verbose = True
+        from openai.types.chat import ChatCompletion
+
+        try:
+            response = await litellm.acompletion(
+                model=model,
+                messages=[{"role": "user", "content": "What's AWS?"}],
+                client=client,
+                extra_headers={"test": "hello world", "Authorization": "my-test-key"},
+            )
+        except Exception as e:
+            print(f"error: {e}")
+
+        print(f"mock_client_post.call_args.kwargs: {mock_client_post.call_args.kwargs}")
+        assert "test" in mock_client_post.call_args.kwargs["headers"]
+        assert mock_client_post.call_args.kwargs["headers"]["test"] == "hello world"
+        assert (
+            mock_client_post.call_args.kwargs["headers"]["Authorization"]
+            == "my-test-key"
+        )
+        mock_client_post.assert_called_once()
+
+
@pytest.mark.asyncio
 async def test_bedrock_custom_prompt_template():
    """
--- a/tests/local_testing/test_exceptions.py
+++ b/tests/local_testing/test_exceptions.py
@ -1205,3 +1205,35 @@ def test_context_window_exceeded_error_from_litellm_proxy():
    }
    with pytest.raises(litellm.ContextWindowExceededError):
        extract_and_raise_litellm_exception(**args)
+
+
+@pytest.mark.parametrize("sync_mode", [True, False])
+@pytest.mark.parametrize("stream_mode", [True, False])
+@pytest.mark.parametrize("model", ["azure/gpt-4o"])  # "gpt-4o-mini",
+@pytest.mark.asyncio
+async def test_exception_bubbling_up(sync_mode, stream_mode, model):
+    """
+    make sure code, param, and type are bubbled up
+    """
+    import litellm
+
+    litellm.set_verbose = True
+    with pytest.raises(Exception) as exc_info:
+        if sync_mode:
+            litellm.completion(
+                model=model,
+                messages=[{"role": "usera", "content": "hi"}],
+                stream=stream_mode,
+                sync_stream=sync_mode,
+            )
+        else:
+            await litellm.acompletion(
+                model=model,
+                messages=[{"role": "usera", "content": "hi"}],
+                stream=stream_mode,
+                sync_stream=sync_mode,
+            )
+
+    assert exc_info.value.code == "invalid_value"
+    assert exc_info.value.param is not None
+    assert exc_info.value.type == "invalid_request_error"
--- a/tests/local_testing/test_pass_through_endpoints.py
+++ b/tests/local_testing/test_pass_through_endpoints.py
@ -329,3 +329,71 @@ async def test_aaapass_through_endpoint_pass_through_keys_langfuse(
        setattr(
            litellm.proxy.proxy_server, "proxy_logging_obj", original_proxy_logging_obj
        )
+
+@pytest.mark.asyncio
+async def test_pass_through_endpoint_bing(client, monkeypatch):
+    import litellm
+
+    captured_requests = []
+
+    async def mock_bing_request(*args, **kwargs):
+
+        captured_requests.append((args, kwargs))
+        mock_response = httpx.Response(
+            200,
+            json={
+                "_type": "SearchResponse",
+                "queryContext": {"originalQuery": "bob barker"},
+                "webPages": {
+                    "webSearchUrl": "https://www.bing.com/search?q=bob+barker",
+                    "totalEstimatedMatches": 12000000,
+                    "value": [],
+                },
+            },
+        )
+        mock_response.request = Mock(spec=httpx.Request)
+        return mock_response
+
+    monkeypatch.setattr("httpx.AsyncClient.request", mock_bing_request)
+
+    # Define a pass-through endpoint
+    pass_through_endpoints = [
+        {
+            "path": "/bing/search",
+            "target": "https://api.bing.microsoft.com/v7.0/search?setLang=en-US&mkt=en-US",
+            "headers": {"Ocp-Apim-Subscription-Key": "XX"},
+            "forward_headers": True,
+            # Additional settings
+            "merge_query_params": True,
+            "auth": True,
+        },
+        {
+            "path": "/bing/search-no-merge-params",
+            "target": "https://api.bing.microsoft.com/v7.0/search?setLang=en-US&mkt=en-US",
+            "headers": {"Ocp-Apim-Subscription-Key": "XX"},
+            "forward_headers": True,
+        },
+    ]
+
+    # Initialize the pass-through endpoint
+    await initialize_pass_through_endpoints(pass_through_endpoints)
+    general_settings: Optional[dict] = (
+        getattr(litellm.proxy.proxy_server, "general_settings", {}) or {}
+    )
+    general_settings.update({"pass_through_endpoints": pass_through_endpoints})
+    setattr(litellm.proxy.proxy_server, "general_settings", general_settings)
+
+    # Make 2 requests thru the pass-through endpoint
+    client.get("/bing/search?q=bob+barker")
+    client.get("/bing/search-no-merge-params?q=bob+barker")
+
+    first_transformed_url = captured_requests[0][1]["url"]
+    second_transformed_url = captured_requests[1][1]["url"]
+
+    # Assert the response
+    assert (
+        first_transformed_url
+        == "https://api.bing.microsoft.com/v7.0/search?q=bob+barker&setLang=en-US&mkt=en-US"
+        and second_transformed_url
+        == "https://api.bing.microsoft.com/v7.0/search?setLang=en-US&mkt=en-US"
+    )
--- a/tests/otel_tests/test_e2e_model_access.py
+++ b/tests/otel_tests/test_e2e_model_access.py
@ -9,7 +9,7 @@ from typing import Any, Optional, List, Literal
 async def generate_key(
    session, models: Optional[List[str]] = None, team_id: Optional[str] = None
 ):
-    """Helper function to generate a key with specific model access"""
+    """Helper function to generate a key with specific model access controls"""
    url = "http://0.0.0.0:4000/key/generate"
    headers = {"Authorization": "Bearer sk-1234", "Content-Type": "application/json"}
    data = {}
@ -94,7 +94,7 @@ async def test_model_access_patterns(key_models, test_model, expect_success):
            assert _error_body["type"] == "key_model_access_denied"
            assert _error_body["param"] == "model"
            assert _error_body["code"] == "401"
-            assert "API Key not allowed to access model" in _error_body["message"]
+            assert "key not allowed to access model" in _error_body["message"]


@pytest.mark.asyncio
@ -159,12 +159,6 @@ async def test_model_access_update():
    "team_models, test_model, expect_success",
    [
        (["openai/*"], "anthropic/claude-2", False),  # Non-matching model
-        (["gpt-4"], "gpt-4", True),  # Exact model match
-        (["bedrock/*"], "bedrock/anthropic.claude-3", True),  # Bedrock wildcard
-        (["bedrock/anthropic.*"], "bedrock/anthropic.claude-3", True),  # Pattern match
-        (["bedrock/anthropic.*"], "bedrock/amazon.titan", False),  # Pattern non-match
-        (None, "gpt-4", True),  # No model restrictions
-        ([], "gpt-4", True),  # Empty model list
    ],
 )
@pytest.mark.asyncio
@ -285,6 +279,6 @@ def _validate_model_access_exception(
    assert _error_body["param"] == "model"
    assert _error_body["code"] == "401"
    if expected_type == "key_model_access_denied":
-        assert "API Key not allowed to access model" in _error_body["message"]
+        assert "key not allowed to access model" in _error_body["message"]
    elif expected_type == "team_model_access_denied":
-        assert "Team not allowed to access model" in _error_body["message"]
+        assert "eam not allowed to access model" in _error_body["message"]
--- a/tests/proxy_unit_tests/test_auth_checks.py
+++ b/tests/proxy_unit_tests/test_auth_checks.py
@ -27,7 +27,7 @@ from litellm.proxy._types import (
 )
 from litellm.proxy.utils import PrismaClient
 from litellm.proxy.auth.auth_checks import (
-    _team_model_access_check,
+    can_team_access_model,
    _virtual_key_soft_budget_check,
 )
 from litellm.proxy.utils import ProxyLogging
@ -427,9 +427,9 @@ async def test_virtual_key_max_budget_check(
    ],
 )
@pytest.mark.asyncio
-async def test_team_model_access_check(model, team_models, expect_to_work):
+async def test_can_team_access_model(model, team_models, expect_to_work):
    """
-    Test cases for _team_model_access_check:
+    Test cases for can_team_access_model:
    1. Exact model match
    2. all-proxy-models access
    3. Wildcard (*) access
@ -438,16 +438,16 @@ async def test_team_model_access_check(model, team_models, expect_to_work):
    6. Empty model list
    7. None model list
    """
+    try:
        team_object = LiteLLM_TeamTable(
            team_id="test-team",
            models=team_models,
        )
-
-    try:
-        _team_model_access_check(
+        result = await can_team_access_model(
            model=model,
            team_object=team_object,
            llm_router=None,
+            team_model_aliases=None,
        )
        if not expect_to_work:
            pytest.fail(
--- a/tests/proxy_unit_tests/test_jwt.py
+++ b/tests/proxy_unit_tests/test_jwt.py
@ -64,7 +64,7 @@ def test_load_config_with_custom_role_names():


@pytest.mark.asyncio
-async def test_token_single_public_key():
+async def test_token_single_public_key(monkeypatch):
    import jwt

    jwt_handler = JWTHandler()
@ -80,10 +80,15 @@ async def test_token_single_public_key():
        ]
    }

+    monkeypatch.setenv("JWT_PUBLIC_KEY_URL", "https://example.com/public-key")
+
    # set cache
    cache = DualCache()

-    await cache.async_set_cache(key="litellm_jwt_auth_keys", value=backend_keys["keys"])
+    await cache.async_set_cache(
+        key="litellm_jwt_auth_keys_https://example.com/public-key",
+        value=backend_keys["keys"],
+    )

    jwt_handler.user_api_key_cache = cache

@ -99,7 +104,7 @@ async def test_token_single_public_key():

@pytest.mark.parametrize("audience", [None, "litellm-proxy"])
@pytest.mark.asyncio
-async def test_valid_invalid_token(audience):
+async def test_valid_invalid_token(audience, monkeypatch):
    """
    Tests
    - valid token
@ -116,6 +121,8 @@ async def test_valid_invalid_token(audience):
    if audience:
        os.environ["JWT_AUDIENCE"] = audience

+    monkeypatch.setenv("JWT_PUBLIC_KEY_URL", "https://example.com/public-key")
+
    # Generate a private / public key pair using RSA algorithm
    key = rsa.generate_private_key(
        public_exponent=65537, key_size=2048, backend=default_backend()
@ -145,7 +152,9 @@ async def test_valid_invalid_token(audience):
    # set cache
    cache = DualCache()

-    await cache.async_set_cache(key="litellm_jwt_auth_keys", value=[public_jwk])
+    await cache.async_set_cache(
+        key="litellm_jwt_auth_keys_https://example.com/public-key", value=[public_jwk]
+    )

    jwt_handler = JWTHandler()

@ -294,7 +303,7 @@ def team_token_tuple():

@pytest.mark.parametrize("audience", [None, "litellm-proxy"])
@pytest.mark.asyncio
-async def test_team_token_output(prisma_client, audience):
+async def test_team_token_output(prisma_client, audience, monkeypatch):
    import json
    import uuid

@ -316,6 +325,8 @@ async def test_team_token_output(prisma_client, audience):
    if audience:
        os.environ["JWT_AUDIENCE"] = audience

+    monkeypatch.setenv("JWT_PUBLIC_KEY_URL", "https://example.com/public-key")
+
    # Generate a private / public key pair using RSA algorithm
    key = rsa.generate_private_key(
        public_exponent=65537, key_size=2048, backend=default_backend()
@ -345,7 +356,9 @@ async def test_team_token_output(prisma_client, audience):
    # set cache
    cache = DualCache()

-    await cache.async_set_cache(key="litellm_jwt_auth_keys", value=[public_jwk])
+    await cache.async_set_cache(
+        key="litellm_jwt_auth_keys_https://example.com/public-key", value=[public_jwk]
+    )

    jwt_handler = JWTHandler()

@ -463,7 +476,7 @@ async def test_team_token_output(prisma_client, audience):
@pytest.mark.parametrize("user_id_upsert", [True, False])
@pytest.mark.asyncio
 async def aaaatest_user_token_output(
-    prisma_client, audience, team_id_set, default_team_id, user_id_upsert
+    prisma_client, audience, team_id_set, default_team_id, user_id_upsert, monkeypatch
 ):
    import uuid

@ -528,10 +541,14 @@ async def aaaatest_user_token_output(

    assert isinstance(public_jwk, dict)

+    monkeypatch.setenv("JWT_PUBLIC_KEY_URL", "https://example.com/public-key")
+
    # set cache
    cache = DualCache()

-    await cache.async_set_cache(key="litellm_jwt_auth_keys", value=[public_jwk])
+    await cache.async_set_cache(
+        key="litellm_jwt_auth_keys_https://example.com/public-key", value=[public_jwk]
+    )

    jwt_handler = JWTHandler()

@ -699,7 +716,9 @@ async def aaaatest_user_token_output(
@pytest.mark.parametrize("admin_allowed_routes", [None, ["ui_routes"]])
@pytest.mark.parametrize("audience", [None, "litellm-proxy"])
@pytest.mark.asyncio
-async def test_allowed_routes_admin(prisma_client, audience, admin_allowed_routes):
+async def test_allowed_routes_admin(
+    prisma_client, audience, admin_allowed_routes, monkeypatch
+):
    """
    Add a check to make sure jwt proxy admin scope can access all allowed admin routes

@ -723,6 +742,8 @@ async def test_allowed_routes_admin(prisma_client, audience, admin_allowed_route
    setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
    await litellm.proxy.proxy_server.prisma_client.connect()

+    monkeypatch.setenv("JWT_PUBLIC_KEY_URL", "https://example.com/public-key")
+
    os.environ.pop("JWT_AUDIENCE", None)
    if audience:
        os.environ["JWT_AUDIENCE"] = audience
@ -756,7 +777,9 @@ async def test_allowed_routes_admin(prisma_client, audience, admin_allowed_route
    # set cache
    cache = DualCache()

-    await cache.async_set_cache(key="litellm_jwt_auth_keys", value=[public_jwk])
+    await cache.async_set_cache(
+        key="litellm_jwt_auth_keys_https://example.com/public-key", value=[public_jwk]
+    )

    jwt_handler = JWTHandler()

@ -910,7 +933,9 @@ def mock_user_object(*args, **kwargs):
    "user_email, should_work", [("ishaan@berri.ai", True), ("krrish@tassle.xyz", False)]
 )
@pytest.mark.asyncio
-async def test_allow_access_by_email(public_jwt_key, user_email, should_work):
+async def test_allow_access_by_email(
+    public_jwt_key, user_email, should_work, monkeypatch
+):
    """
    Allow anyone with an `@xyz.com` email make a request to the proxy.

@ -925,10 +950,14 @@ async def test_allow_access_by_email(public_jwt_key, user_email, should_work):
    public_jwk = public_jwt_key["public_jwk"]
    private_key = public_jwt_key["private_key"]

+    monkeypatch.setenv("JWT_PUBLIC_KEY_URL", "https://example.com/public-key")
+
    # set cache
    cache = DualCache()

-    await cache.async_set_cache(key="litellm_jwt_auth_keys", value=[public_jwk])
+    await cache.async_set_cache(
+        key="litellm_jwt_auth_keys_https://example.com/public-key", value=[public_jwk]
+    )

    jwt_handler = JWTHandler()

@ -1074,7 +1103,7 @@ async def test_end_user_jwt_auth(monkeypatch):
    ]

    cache.set_cache(
-        key="litellm_jwt_auth_keys",
+        key="litellm_jwt_auth_keys_https://example.com/public-key",
        value=keys,
    )

--- a/tests/proxy_unit_tests/test_user_api_key_auth.py
+++ b/tests/proxy_unit_tests/test_user_api_key_auth.py
@ -826,7 +826,7 @@ async def test_jwt_user_api_key_auth_builder_enforce_rbac(enforce_rbac, monkeypa
    ]

    local_cache.set_cache(
-        key="litellm_jwt_auth_keys",
+        key="litellm_jwt_auth_keys_my-fake-url",
        value=keys,
    )

--- a/tests/test_openai_endpoints.py
+++ b/tests/test_openai_endpoints.py
@ -308,7 +308,7 @@ async def test_chat_completion():
                model="gpt-4",
                messages=[{"role": "user", "content": "Hello!"}],
            )
-        assert "API Key not allowed to access model." in str(e)
+        assert "key not allowed to access model." in str(e)


@pytest.mark.asyncio
--- a/ui/litellm-dashboard/out/404.html
+++ b/ui/litellm-dashboard/out/404.html
--- a/ui/litellm-dashboard/out/_next/static/chunks/157-cf7bc8b3ae1b80ba.js
+++ b/ui/litellm-dashboard/out/_next/static/chunks/157-cf7bc8b3ae1b80ba.js
--- a/ui/litellm-dashboard/out/_next/static/chunks/261-cb27c20c4f8ec4c6.js
+++ b/ui/litellm-dashboard/out/_next/static/chunks/261-cb27c20c4f8ec4c6.js
--- a/ui/litellm-dashboard/out/_next/static/chunks/914-e17acab83d0eadb5.js
+++ b/ui/litellm-dashboard/out/_next/static/chunks/914-e17acab83d0eadb5.js
--- a/ui/litellm-dashboard/out/_next/static/chunks/app/page-a25b75c267486fe2.js
+++ b/ui/litellm-dashboard/out/_next/static/chunks/app/page-a25b75c267486fe2.js
--- a/ui/litellm-dashboard/out/_next/static/chunks/app/page-b36633214e76cfd1.js
+++ b/ui/litellm-dashboard/out/_next/static/chunks/app/page-b36633214e76cfd1.js
--- a/ui/litellm-dashboard/out/_next/static/css/b6d997482399c7e1.css
+++ b/ui/litellm-dashboard/out/_next/static/css/b6d997482399c7e1.css
--- a/ui/litellm-dashboard/out/_next/static/css/f41c66e22715ab00.css
+++ b/ui/litellm-dashboard/out/_next/static/css/f41c66e22715ab00.css
--- a/ui/litellm-dashboard/out/_next/static/i92Qc9kkJSCtCgV3DDmdu/_buildManifest.js
+++ b/ui/litellm-dashboard/out/_next/static/i92Qc9kkJSCtCgV3DDmdu/_buildManifest.js
--- a/ui/litellm-dashboard/out/_next/static/i92Qc9kkJSCtCgV3DDmdu/_ssgManifest.js
+++ b/ui/litellm-dashboard/out/_next/static/i92Qc9kkJSCtCgV3DDmdu/_ssgManifest.js
--- a/ui/litellm-dashboard/out/index.html
+++ b/ui/litellm-dashboard/out/index.html
@ -1 +1 @@
-<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-75a5453f51d60261.js"/><script src="/ui/_next/static/chunks/fd9d1056-524b80e1a6b8bb06.js" async=""></script><script src="/ui/_next/static/chunks/117-883150efc583d711.js" async=""></script><script src="/ui/_next/static/chunks/main-app-475d6efe4080647d.js" async=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-42372ed130431b0a.js" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-75a5453f51d60261.js" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/a34f9d1faa5f3315-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/86f6cc749f6b8493.css\",\"style\"]\n3:HL[\"/ui/_next/static/css/f41c66e22715ab00.css\",\"style\"]\n"])</script><script>self.__next_f.push([1,"4:I[12846,[],\"\"]\n6:I[19107,[],\"ClientPageRoot\"]\n7:I[92222,[\"665\",\"static/chunks/3014691f-0b72c78cfebbd712.js\",\"990\",\"static/chunks/13b76428-ebdf3012af0e4489.js\",\"441\",\"static/chunks/441-79926bf2b9d89e04.js\",\"261\",\"static/chunks/261-e48c2ac6ff0b811c.js\",\"899\",\"static/chunks/899-354f59ecde307dfa.js\",\"914\",\"static/chunks/914-e17acab83d0eadb5.js\",\"250\",\"static/chunks/250-51513f2f6dabf571.js\",\"699\",\"static/chunks/699-6b82f8e7b98ca1a3.js\",\"931\",\"static/chunks/app/page-b36633214e76cfd1.js\"],\"default\",1]\n8:I[4707,[],\"\"]\n9:I[36423,[],\"\"]\nb:I[61060,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"0:[\"$\",\"$L4\",null,{\"buildId\":\"rCxUxULLkHhl5KoPY9DHv\",\"assetPrefix\":\"/ui\",\"urlParts\":[\"\",\"\"],\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[[\"$L5\",[\"$\",\"$L6\",null,{\"props\":{\"params\":{},\"searchParams\":{}},\"Component\":\"$7\"}],null],null],null]},[[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/86f6cc749f6b8493.css\",\"precedence\":\"next\",\"crossOrigin\":\"$undefined\"}],[\"$\",\"link\",\"1\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/f41c66e22715ab00.css\",\"precedence\":\"next\",\"crossOrigin\":\"$undefined\"}]],[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_cf7686\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[]}]}]}]],null],null],\"couldBeIntercepted\":false,\"initialHead\":[null,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script></body></html>
+<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-75a5453f51d60261.js"/><script src="/ui/_next/static/chunks/fd9d1056-524b80e1a6b8bb06.js" async=""></script><script src="/ui/_next/static/chunks/117-883150efc583d711.js" async=""></script><script src="/ui/_next/static/chunks/main-app-475d6efe4080647d.js" async=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-42372ed130431b0a.js" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-75a5453f51d60261.js" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/a34f9d1faa5f3315-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/86f6cc749f6b8493.css\",\"style\"]\n3:HL[\"/ui/_next/static/css/b6d997482399c7e1.css\",\"style\"]\n"])</script><script>self.__next_f.push([1,"4:I[12846,[],\"\"]\n6:I[19107,[],\"ClientPageRoot\"]\n7:I[62177,[\"665\",\"static/chunks/3014691f-0b72c78cfebbd712.js\",\"990\",\"static/chunks/13b76428-ebdf3012af0e4489.js\",\"441\",\"static/chunks/441-79926bf2b9d89e04.js\",\"261\",\"static/chunks/261-cb27c20c4f8ec4c6.js\",\"899\",\"static/chunks/899-354f59ecde307dfa.js\",\"157\",\"static/chunks/157-cf7bc8b3ae1b80ba.js\",\"250\",\"static/chunks/250-51513f2f6dabf571.js\",\"699\",\"static/chunks/699-6b82f8e7b98ca1a3.js\",\"931\",\"static/chunks/app/page-a25b75c267486fe2.js\"],\"default\",1]\n8:I[4707,[],\"\"]\n9:I[36423,[],\"\"]\nb:I[61060,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"0:[\"$\",\"$L4\",null,{\"buildId\":\"i92Qc9kkJSCtCgV3DDmdu\",\"assetPrefix\":\"/ui\",\"urlParts\":[\"\",\"\"],\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[[\"$L5\",[\"$\",\"$L6\",null,{\"props\":{\"params\":{},\"searchParams\":{}},\"Component\":\"$7\"}],null],null],null]},[[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/86f6cc749f6b8493.css\",\"precedence\":\"next\",\"crossOrigin\":\"$undefined\"}],[\"$\",\"link\",\"1\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/b6d997482399c7e1.css\",\"precedence\":\"next\",\"crossOrigin\":\"$undefined\"}]],[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_cf7686\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[]}]}]}]],null],null],\"couldBeIntercepted\":false,\"initialHead\":[null,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script></body></html>
--- a/ui/litellm-dashboard/out/index.txt
+++ b/ui/litellm-dashboard/out/index.txt
@ -1,7 +1,7 @@
 2:I[19107,[],"ClientPageRoot"]
-3:I[92222,["665","static/chunks/3014691f-0b72c78cfebbd712.js","990","static/chunks/13b76428-ebdf3012af0e4489.js","441","static/chunks/441-79926bf2b9d89e04.js","261","static/chunks/261-e48c2ac6ff0b811c.js","899","static/chunks/899-354f59ecde307dfa.js","914","static/chunks/914-e17acab83d0eadb5.js","250","static/chunks/250-51513f2f6dabf571.js","699","static/chunks/699-6b82f8e7b98ca1a3.js","931","static/chunks/app/page-b36633214e76cfd1.js"],"default",1]
+3:I[62177,["665","static/chunks/3014691f-0b72c78cfebbd712.js","990","static/chunks/13b76428-ebdf3012af0e4489.js","441","static/chunks/441-79926bf2b9d89e04.js","261","static/chunks/261-cb27c20c4f8ec4c6.js","899","static/chunks/899-354f59ecde307dfa.js","157","static/chunks/157-cf7bc8b3ae1b80ba.js","250","static/chunks/250-51513f2f6dabf571.js","699","static/chunks/699-6b82f8e7b98ca1a3.js","931","static/chunks/app/page-a25b75c267486fe2.js"],"default",1]
 4:I[4707,[],""]
 5:I[36423,[],""]
-0:["rCxUxULLkHhl5KoPY9DHv",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},[["$L1",["$","$L2",null,{"props":{"params":{},"searchParams":{}},"Component":"$3"}],null],null],null]},[[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/86f6cc749f6b8493.css","precedence":"next","crossOrigin":"$undefined"}],["$","link","1",{"rel":"stylesheet","href":"/ui/_next/static/css/f41c66e22715ab00.css","precedence":"next","crossOrigin":"$undefined"}]],["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_cf7686","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[]}]}]}]],null],null],["$L6",null]]]]
+0:["i92Qc9kkJSCtCgV3DDmdu",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},[["$L1",["$","$L2",null,{"props":{"params":{},"searchParams":{}},"Component":"$3"}],null],null],null]},[[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/86f6cc749f6b8493.css","precedence":"next","crossOrigin":"$undefined"}],["$","link","1",{"rel":"stylesheet","href":"/ui/_next/static/css/b6d997482399c7e1.css","precedence":"next","crossOrigin":"$undefined"}]],["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_cf7686","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[]}]}]}]],null],null],["$L6",null]]]]
 6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
 1:null
--- a/ui/litellm-dashboard/out/model_hub.html
+++ b/ui/litellm-dashboard/out/model_hub.html
--- a/ui/litellm-dashboard/out/model_hub.txt
+++ b/ui/litellm-dashboard/out/model_hub.txt
@ -1,7 +1,7 @@
 2:I[19107,[],"ClientPageRoot"]
-3:I[52829,["441","static/chunks/441-79926bf2b9d89e04.js","261","static/chunks/261-e48c2ac6ff0b811c.js","250","static/chunks/250-51513f2f6dabf571.js","699","static/chunks/699-6b82f8e7b98ca1a3.js","418","static/chunks/app/model_hub/page-6f97b95f1023b0e9.js"],"default",1]
+3:I[52829,["441","static/chunks/441-79926bf2b9d89e04.js","261","static/chunks/261-cb27c20c4f8ec4c6.js","250","static/chunks/250-51513f2f6dabf571.js","699","static/chunks/699-6b82f8e7b98ca1a3.js","418","static/chunks/app/model_hub/page-6f97b95f1023b0e9.js"],"default",1]
 4:I[4707,[],""]
 5:I[36423,[],""]
-0:["rCxUxULLkHhl5KoPY9DHv",[[["",{"children":["model_hub",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["model_hub",{"children":["__PAGE__",{},[["$L1",["$","$L2",null,{"props":{"params":{},"searchParams":{}},"Component":"$3"}],null],null],null]},[null,["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","model_hub","children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined"}]],null]},[[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/86f6cc749f6b8493.css","precedence":"next","crossOrigin":"$undefined"}],["$","link","1",{"rel":"stylesheet","href":"/ui/_next/static/css/f41c66e22715ab00.css","precedence":"next","crossOrigin":"$undefined"}]],["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_cf7686","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[]}]}]}]],null],null],["$L6",null]]]]
+0:["i92Qc9kkJSCtCgV3DDmdu",[[["",{"children":["model_hub",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["model_hub",{"children":["__PAGE__",{},[["$L1",["$","$L2",null,{"props":{"params":{},"searchParams":{}},"Component":"$3"}],null],null],null]},[null,["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","model_hub","children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined"}]],null]},[[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/86f6cc749f6b8493.css","precedence":"next","crossOrigin":"$undefined"}],["$","link","1",{"rel":"stylesheet","href":"/ui/_next/static/css/b6d997482399c7e1.css","precedence":"next","crossOrigin":"$undefined"}]],["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_cf7686","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[]}]}]}]],null],null],["$L6",null]]]]
 6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
 1:null
--- a/ui/litellm-dashboard/out/onboarding.html
+++ b/ui/litellm-dashboard/out/onboarding.html
--- a/ui/litellm-dashboard/out/onboarding.txt
+++ b/ui/litellm-dashboard/out/onboarding.txt
@ -2,6 +2,6 @@
 3:I[12011,["665","static/chunks/3014691f-0b72c78cfebbd712.js","441","static/chunks/441-79926bf2b9d89e04.js","899","static/chunks/899-354f59ecde307dfa.js","250","static/chunks/250-51513f2f6dabf571.js","461","static/chunks/app/onboarding/page-a31bc08c35f01c0a.js"],"default",1]
 4:I[4707,[],""]
 5:I[36423,[],""]
-0:["rCxUxULLkHhl5KoPY9DHv",[[["",{"children":["onboarding",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["onboarding",{"children":["__PAGE__",{},[["$L1",["$","$L2",null,{"props":{"params":{},"searchParams":{}},"Component":"$3"}],null],null],null]},[null,["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","onboarding","children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined"}]],null]},[[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/86f6cc749f6b8493.css","precedence":"next","crossOrigin":"$undefined"}],["$","link","1",{"rel":"stylesheet","href":"/ui/_next/static/css/f41c66e22715ab00.css","precedence":"next","crossOrigin":"$undefined"}]],["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_cf7686","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[]}]}]}]],null],null],["$L6",null]]]]
+0:["i92Qc9kkJSCtCgV3DDmdu",[[["",{"children":["onboarding",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["onboarding",{"children":["__PAGE__",{},[["$L1",["$","$L2",null,{"props":{"params":{},"searchParams":{}},"Component":"$3"}],null],null],null]},[null,["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","onboarding","children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined"}]],null]},[[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/86f6cc749f6b8493.css","precedence":"next","crossOrigin":"$undefined"}],["$","link","1",{"rel":"stylesheet","href":"/ui/_next/static/css/b6d997482399c7e1.css","precedence":"next","crossOrigin":"$undefined"}]],["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_cf7686","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[]}]}]}]],null],null],["$L6",null]]]]
 6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
 1:null
--- a/ui/litellm-dashboard/src/app/page.tsx
+++ b/ui/litellm-dashboard/src/app/page.tsx
@ -215,6 +215,7 @@ export default function CreateKeyPage() {
              userEmail={userEmail}
              setProxySettings={setProxySettings}
              proxySettings={proxySettings}
+              accessToken={accessToken}
            />
            <div className="flex flex-1 overflow-auto">
              <div className="mt-8">
--- a/ui/litellm-dashboard/src/components/add_model/provider_specific_fields.tsx
+++ b/ui/litellm-dashboard/src/components/add_model/provider_specific_fields.tsx
@ -23,7 +23,7 @@ const ProviderSpecificFields: React.FC<ProviderSpecificFieldsProps> = ({
  console.log(`type of selectedProviderEnum: ${typeof selectedProviderEnum}`);
  return (
    <>
-      {selectedProviderEnum === Providers.OpenAI && (
+      {selectedProviderEnum === Providers.OpenAI || selectedProviderEnum === Providers.OpenAI_Text && (
        <>
          <Form.Item
            label="API Base"
@ -99,7 +99,8 @@ const ProviderSpecificFields: React.FC<ProviderSpecificFieldsProps> = ({

      {(selectedProviderEnum === Providers.Azure ||
        selectedProviderEnum === Providers.Azure_AI_Studio ||
-        selectedProviderEnum === Providers.OpenAI_Compatible
+        selectedProviderEnum === Providers.OpenAI_Compatible ||
+        selectedProviderEnum === Providers.OpenAI_Text_Compatible
      ) && (
        <Form.Item
          rules={[{ required: true, message: "Required" }]}
--- a/ui/litellm-dashboard/src/components/create_key_button.tsx
+++ b/ui/litellm-dashboard/src/components/create_key_button.tsx
@ -39,6 +39,9 @@ import { InfoCircleOutlined } from '@ant-design/icons';
 import { Tooltip } from 'antd';
 import Createuser from "./create_user_button";
 import debounce from 'lodash/debounce';
+import { rolesWithWriteAccess } from '../utils/roles';
+
+

 const { Option } = Select;

@ -335,9 +338,11 @@ const CreateKey: React.FC<CreateKeyProps> = ({

  return (
    <div>
+      {userRole && rolesWithWriteAccess.includes(userRole) && (
        <Button className="mx-auto" onClick={() => setIsModalVisible(true)}>
          + Create New Key
        </Button>
+      )}
      <Modal
        // title="Create Key"
        visible={isModalVisible}
--- a/ui/litellm-dashboard/src/components/key_info_view.tsx
+++ b/ui/litellm-dashboard/src/components/key_info_view.tsx
@ -21,6 +21,7 @@ import { KeyResponse } from "./key_team_helpers/key_list";
 import { Form, Input, InputNumber, message, Select } from "antd";
 import { KeyEditView } from "./key_edit_view";
 import { RegenerateKeyModal } from "./regenerate_key_modal";
+import { rolesWithWriteAccess } from '../utils/roles';

 interface KeyInfoViewProps {
  keyId: string;
@ -128,6 +129,7 @@ export default function KeyInfoView({ keyId, onClose, keyData, accessToken, user
          <Title>{keyData.key_alias || "API Key"}</Title>
          <Text className="text-gray-500 font-mono">{keyData.token}</Text>
        </div>
+        {userRole && rolesWithWriteAccess.includes(userRole) && (
          <div className="flex gap-2">
            <Button
              icon={RefreshIcon}
@ -146,6 +148,7 @@ export default function KeyInfoView({ keyId, onClose, keyData, accessToken, user
              Delete Key
            </Button>
          </div>
+        )}
      </div>

      {/* Add RegenerateKeyModal */}
@ -246,7 +249,7 @@ export default function KeyInfoView({ keyId, onClose, keyData, accessToken, user
            <Card>
              <div className="flex justify-between items-center mb-4">
                <Title>Key Settings</Title>
-                {!isEditing && (
+                {!isEditing && userRole && rolesWithWriteAccess.includes(userRole) && (
                  <Button variant="light" onClick={() => setIsEditing(true)}>
                    Edit Settings
                  </Button>
--- a/ui/litellm-dashboard/src/components/leftnav.tsx
+++ b/ui/litellm-dashboard/src/components/leftnav.tsx
@ -21,7 +21,7 @@ import {
  ExperimentOutlined,
  ThunderboltOutlined,
 } from '@ant-design/icons';
-import { old_admin_roles, v2_admin_role_names, all_admin_roles, rolesAllowedToSeeUsage } from '../utils/roles';
+import { old_admin_roles, v2_admin_role_names, all_admin_roles, rolesAllowedToSeeUsage, rolesWithWriteAccess } from '../utils/roles';

 const { Sider } = Layout;

@ -45,7 +45,7 @@ interface MenuItem {
 // Note: If a menu item does not have a role, it is visible to all roles.
 const menuItems: MenuItem[] = [
  { key: "1", page: "api-keys", label: "Virtual Keys", icon: <KeyOutlined /> },
-  { key: "3", page: "llm-playground", label: "Test Key", icon: <PlayCircleOutlined /> },
+  { key: "3", page: "llm-playground", label: "Test Key", icon: <PlayCircleOutlined />, roles: rolesWithWriteAccess },
  { key: "2", page: "models", label: "Models", icon: <BlockOutlined />, roles: all_admin_roles },
  { key: "4", page: "usage", label: "Usage", icon: <BarChartOutlined /> },
  { key: "6", page: "teams", label: "Teams", icon: <TeamOutlined /> },
--- a/Show more
+++ b/Show more