diff --git a/.circleci/config.yml b/.circleci/config.yml
index ecae22f872..7158087445 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -49,7 +49,7 @@ jobs:
             pip install opentelemetry-api==1.25.0
             pip install opentelemetry-sdk==1.25.0
             pip install opentelemetry-exporter-otlp==1.25.0
-            pip install openai==1.54.0
+            pip install openai==1.66.1
             pip install prisma==0.11.0
             pip install "detect_secrets==1.5.0"
             pip install "httpx==0.24.1"
@@ -168,7 +168,7 @@ jobs:
             pip install opentelemetry-api==1.25.0
             pip install opentelemetry-sdk==1.25.0
             pip install opentelemetry-exporter-otlp==1.25.0
-            pip install openai==1.54.0
+            pip install openai==1.66.1
             pip install prisma==0.11.0
             pip install "detect_secrets==1.5.0"
             pip install "httpx==0.24.1"
@@ -267,7 +267,7 @@ jobs:
             pip install opentelemetry-api==1.25.0
             pip install opentelemetry-sdk==1.25.0
             pip install opentelemetry-exporter-otlp==1.25.0
-            pip install openai==1.54.0
+            pip install openai==1.66.1
             pip install prisma==0.11.0
             pip install "detect_secrets==1.5.0"
             pip install "httpx==0.24.1"
@@ -511,7 +511,7 @@ jobs:
             pip install opentelemetry-api==1.25.0
             pip install opentelemetry-sdk==1.25.0
             pip install opentelemetry-exporter-otlp==1.25.0
-            pip install openai==1.54.0
+            pip install openai==1.66.1
             pip install prisma==0.11.0
             pip install "detect_secrets==1.5.0"
             pip install "httpx==0.24.1"
@@ -678,6 +678,48 @@ jobs:
           paths:
             - llm_translation_coverage.xml
             - llm_translation_coverage
+  llm_responses_api_testing:
+    docker:
+      - image: cimg/python:3.11
+        auth:
+          username: ${DOCKERHUB_USERNAME}
+          password: ${DOCKERHUB_PASSWORD}
+    working_directory: ~/project
+
+    steps:
+      - checkout
+      - run:
+          name: Install Dependencies
+          command: |
+            python -m pip install --upgrade pip
+            python -m pip install -r requirements.txt
+            pip install "pytest==7.3.1"
+            pip install "pytest-retry==1.6.3"
+            pip install "pytest-cov==5.0.0"
+            pip install "pytest-asyncio==0.21.1"
+            pip install "respx==0.21.1"
+      # Run pytest and generate JUnit XML report
+      - run:
+          name: Run tests
+          command: |
+            pwd
+            ls
+            python -m pytest -vv tests/llm_responses_api_testing --cov=litellm --cov-report=xml -x -s -v --junitxml=test-results/junit.xml --durations=5
+          no_output_timeout: 120m
+      - run:
+          name: Rename the coverage files
+          command: |
+            mv coverage.xml llm_responses_api_coverage.xml
+            mv .coverage llm_responses_api_coverage
+
+      # Store test results
+      - store_test_results:
+          path: test-results
+      - persist_to_workspace:
+          root: .
+          paths:
+            - llm_responses_api_coverage.xml
+            - llm_responses_api_coverage
   litellm_mapped_tests:
     docker:
       - image: cimg/python:3.11
@@ -1234,7 +1276,7 @@ jobs:
             pip install "aiodynamo==23.10.1"
             pip install "asyncio==3.4.3"
             pip install "PyGithub==1.59.1"
-            pip install "openai==1.54.0 "
+            pip install "openai==1.66.1"
       - run:
           name: Install Grype
           command: |
@@ -1309,7 +1351,7 @@ jobs:
           command: |
             pwd
             ls
-            python -m pytest -s -vv tests/*.py -x --junitxml=test-results/junit.xml --durations=5 --ignore=tests/otel_tests --ignore=tests/pass_through_tests --ignore=tests/proxy_admin_ui_tests --ignore=tests/load_tests --ignore=tests/llm_translation --ignore=tests/image_gen_tests --ignore=tests/pass_through_unit_tests
+            python -m pytest -s -vv tests/*.py -x --junitxml=test-results/junit.xml --durations=5 --ignore=tests/otel_tests --ignore=tests/pass_through_tests --ignore=tests/proxy_admin_ui_tests --ignore=tests/load_tests --ignore=tests/llm_translation --ignore=tests/llm_responses_api_testing --ignore=tests/image_gen_tests --ignore=tests/pass_through_unit_tests
           no_output_timeout: 120m
 
       # Store test results
@@ -1370,7 +1412,7 @@ jobs:
             pip install "aiodynamo==23.10.1"
             pip install "asyncio==3.4.3"
             pip install "PyGithub==1.59.1"
-            pip install "openai==1.54.0 "
+            pip install "openai==1.66.1"
             # Run pytest and generate JUnit XML report
       - run:
           name: Build Docker image
@@ -1492,7 +1534,7 @@ jobs:
             pip install "aiodynamo==23.10.1"
             pip install "asyncio==3.4.3"
             pip install "PyGithub==1.59.1"
-            pip install "openai==1.54.0 "
+            pip install "openai==1.66.1"
       - run:
           name: Build Docker image
           command: docker build -t my-app:latest -f ./docker/Dockerfile.database .
@@ -1921,7 +1963,7 @@ jobs:
             pip install "pytest-asyncio==0.21.1"
             pip install "google-cloud-aiplatform==1.43.0"
             pip install aiohttp
-            pip install "openai==1.54.0 "
+            pip install "openai==1.66.1"
             pip install "assemblyai==0.37.0"
             python -m pip install --upgrade pip
             pip install "pydantic==2.7.1"
@@ -2068,7 +2110,7 @@ jobs:
             python -m venv venv
             . venv/bin/activate
             pip install coverage
-            coverage combine llm_translation_coverage logging_coverage litellm_router_coverage local_testing_coverage litellm_assistants_api_coverage auth_ui_unit_tests_coverage langfuse_coverage caching_coverage litellm_proxy_unit_tests_coverage image_gen_coverage pass_through_unit_tests_coverage batches_coverage litellm_proxy_security_tests_coverage
+            coverage combine llm_translation_coverage llm_responses_api_coverage logging_coverage litellm_router_coverage local_testing_coverage litellm_assistants_api_coverage auth_ui_unit_tests_coverage langfuse_coverage caching_coverage litellm_proxy_unit_tests_coverage image_gen_coverage pass_through_unit_tests_coverage batches_coverage litellm_proxy_security_tests_coverage
             coverage xml
       - codecov/upload:
           file: ./coverage.xml
@@ -2197,7 +2239,7 @@ jobs:
             pip install "pytest-retry==1.6.3"
             pip install "pytest-asyncio==0.21.1"
             pip install aiohttp
-            pip install "openai==1.54.0 "
+            pip install "openai==1.66.1"
             python -m pip install --upgrade pip
             pip install "pydantic==2.7.1"
             pip install "pytest==7.3.1"
@@ -2429,6 +2471,12 @@ workflows:
               only:
                 - main
                 - /litellm_.*/
+      - llm_responses_api_testing:
+          filters:
+            branches:
+              only:
+                - main
+                - /litellm_.*/
       - litellm_mapped_tests:
           filters:
             branches:
@@ -2468,6 +2516,7 @@ workflows:
       - upload-coverage:
           requires:
             - llm_translation_testing
+            - llm_responses_api_testing
             - litellm_mapped_tests
             - batches_testing
             - litellm_utils_testing
@@ -2526,6 +2575,7 @@ workflows:
             - load_testing
             - test_bad_database_url
             - llm_translation_testing
+            - llm_responses_api_testing
             - litellm_mapped_tests
             - batches_testing
             - litellm_utils_testing
diff --git a/.circleci/requirements.txt b/.circleci/requirements.txt
index 12e83a40f2..e63fb9dd9a 100644
--- a/.circleci/requirements.txt
+++ b/.circleci/requirements.txt
@@ -1,5 +1,5 @@
 # used by CI/CD testing
-openai==1.54.0 
+openai==1.66.1
 python-dotenv
 tiktoken
 importlib_metadata
diff --git a/docs/my-website/docs/anthropic_unified.md b/docs/my-website/docs/anthropic_unified.md
index 71b9203399..cf6ba798d5 100644
--- a/docs/my-website/docs/anthropic_unified.md
+++ b/docs/my-website/docs/anthropic_unified.md
@@ -1,7 +1,7 @@
 import Tabs from '@theme/Tabs';
 import TabItem from '@theme/TabItem';
 
-# [BETA] `/v1/messages`
+# /v1/messages [BETA] 
 
 LiteLLM provides a BETA endpoint in the spec of Anthropic's `/v1/messages` endpoint. 
 
diff --git a/docs/my-website/docs/assistants.md b/docs/my-website/docs/assistants.md
index 5e68e8dded..4032c74557 100644
--- a/docs/my-website/docs/assistants.md
+++ b/docs/my-website/docs/assistants.md
@@ -1,7 +1,7 @@
 import Tabs from '@theme/Tabs';
 import TabItem from '@theme/TabItem';
 
-# Assistants API 
+# /assistants
 
 Covers Threads, Messages, Assistants. 
 
diff --git a/docs/my-website/docs/batches.md b/docs/my-website/docs/batches.md
index 4ac9fa61e3..4918e30d1f 100644
--- a/docs/my-website/docs/batches.md
+++ b/docs/my-website/docs/batches.md
@@ -1,7 +1,7 @@
 import Tabs from '@theme/Tabs';
 import TabItem from '@theme/TabItem';
 
-# [BETA] Batches API
+# /batches
 
 Covers Batches, Files
 
diff --git a/docs/my-website/docs/embedding/supported_embedding.md b/docs/my-website/docs/embedding/supported_embedding.md
index d0cb59b46e..06d4107372 100644
--- a/docs/my-website/docs/embedding/supported_embedding.md
+++ b/docs/my-website/docs/embedding/supported_embedding.md
@@ -1,7 +1,7 @@
 import Tabs from '@theme/Tabs';
 import TabItem from '@theme/TabItem';
 
-# Embeddings
+# /embeddings
 
 ## Quick Start
 ```python
diff --git a/docs/my-website/docs/files_endpoints.md b/docs/my-website/docs/files_endpoints.md
index cccb35daa9..7e20982ff4 100644
--- a/docs/my-website/docs/files_endpoints.md
+++ b/docs/my-website/docs/files_endpoints.md
@@ -2,7 +2,7 @@
 import TabItem from '@theme/TabItem';
 import Tabs from '@theme/Tabs';
 
-# Files API
+# /files
 
 Files are used to upload documents that can be used with features like Assistants, Fine-tuning, and Batch API.
 
diff --git a/docs/my-website/docs/fine_tuning.md b/docs/my-website/docs/fine_tuning.md
index fd5d99a6a1..f9a9297e06 100644
--- a/docs/my-website/docs/fine_tuning.md
+++ b/docs/my-website/docs/fine_tuning.md
@@ -1,7 +1,7 @@
 import Tabs from '@theme/Tabs';
 import TabItem from '@theme/TabItem';
 
-# [Beta] Fine-tuning API
+# /fine_tuning
 
 
 :::info
diff --git a/docs/my-website/docs/moderation.md b/docs/my-website/docs/moderation.md
index 6dd092fb52..95fe8b2856 100644
--- a/docs/my-website/docs/moderation.md
+++ b/docs/my-website/docs/moderation.md
@@ -1,7 +1,7 @@
 import Tabs from '@theme/Tabs';
 import TabItem from '@theme/TabItem';
 
-# Moderation
+# /moderations
 
 
 ### Usage
diff --git a/docs/my-website/docs/realtime.md b/docs/my-website/docs/realtime.md
index 28697f44b9..4611c8fdcd 100644
--- a/docs/my-website/docs/realtime.md
+++ b/docs/my-website/docs/realtime.md
@@ -1,7 +1,7 @@
 import Tabs from '@theme/Tabs';
 import TabItem from '@theme/TabItem';
 
-# Realtime Endpoints
+# /realtime
 
 Use this to loadbalance across Azure + OpenAI. 
 
diff --git a/docs/my-website/docs/rerank.md b/docs/my-website/docs/rerank.md
index cc58c374c7..1e3cfd0fa5 100644
--- a/docs/my-website/docs/rerank.md
+++ b/docs/my-website/docs/rerank.md
@@ -1,4 +1,4 @@
-# Rerank
+# /rerank
 
 :::tip
 
diff --git a/docs/my-website/docs/response_api.md b/docs/my-website/docs/response_api.md
new file mode 100644
index 0000000000..67217776d4
--- /dev/null
+++ b/docs/my-website/docs/response_api.md
@@ -0,0 +1,117 @@
+import Tabs from '@theme/Tabs';
+import TabItem from '@theme/TabItem';
+
+# /responses
+
+LiteLLM provides a BETA endpoint in the spec of [OpenAI's `/responses` API](https://platform.openai.com/docs/api-reference/responses)
+
+| Feature | Supported | Notes |
+|---------|-----------|--------|
+| Cost Tracking | ✅ | Works with all supported models |
+| Logging | ✅ | Works across all integrations |
+| End-user Tracking | ✅ | |
+| Streaming | ✅ | |
+| Fallbacks | ✅ | Works between supported models |
+| Loadbalancing | ✅ | Works between supported models |
+| Supported LiteLLM Versions | 1.63.8+ | |
+| Supported LLM providers | `openai` | |
+
+## Usage
+
+## Create a model response
+
+<Tabs>
+<TabItem value="litellm-sdk" label="LiteLLM SDK">
+
+#### Non-streaming
+```python
+import litellm
+
+# Non-streaming response
+response = litellm.responses(
+    model="gpt-4o",
+    input="Tell me a three sentence bedtime story about a unicorn.",
+    max_output_tokens=100
+)
+
+print(response)
+```
+
+#### Streaming
+```python
+import litellm
+
+# Streaming response
+response = litellm.responses(
+    model="gpt-4o",
+    input="Tell me a three sentence bedtime story about a unicorn.",
+    stream=True
+)
+
+for event in response:
+    print(event)
+```
+
+</TabItem>
+<TabItem value="proxy" label="OpenAI SDK with LiteLLM Proxy">
+
+First, add this to your litellm proxy config.yaml:
+```yaml
+model_list:
+  - model_name: gpt-4o
+    litellm_params:
+      model: openai/gpt-4
+      api_key: os.environ/OPENAI_API_KEY
+```
+
+Start your LiteLLM proxy:
+```bash
+litellm --config /path/to/config.yaml
+
+# RUNNING on http://0.0.0.0:4000
+```
+
+Then use the OpenAI SDK pointed to your proxy:
+
+#### Non-streaming
+```python
+from openai import OpenAI
+
+# Initialize client with your proxy URL
+client = OpenAI(
+    base_url="http://localhost:4000",  # Your proxy URL
+    api_key="your-api-key"             # Your proxy API key
+)
+
+# Non-streaming response
+response = client.responses.create(
+    model="gpt-4o",
+    input="Tell me a three sentence bedtime story about a unicorn."
+)
+
+print(response)
+```
+
+#### Streaming
+```python
+from openai import OpenAI
+
+# Initialize client with your proxy URL
+client = OpenAI(
+    base_url="http://localhost:4000",  # Your proxy URL
+    api_key="your-api-key"             # Your proxy API key
+)
+
+# Streaming response
+response = client.responses.create(
+    model="gpt-4o",
+    input="Tell me a three sentence bedtime story about a unicorn.",
+    stream=True
+)
+
+for event in response:
+    print(event)
+```
+
+</TabItem>
+</Tabs>
diff --git a/docs/my-website/docs/text_completion.md b/docs/my-website/docs/text_completion.md
index 8be40dfdcd..cbf2db00a0 100644
--- a/docs/my-website/docs/text_completion.md
+++ b/docs/my-website/docs/text_completion.md
@@ -1,7 +1,7 @@
 import Tabs from '@theme/Tabs';
 import TabItem from '@theme/TabItem';
 
-# Text Completion
+# /completions
 
 ### Usage
 <Tabs>
diff --git a/docs/my-website/package-lock.json b/docs/my-website/package-lock.json
index b5392b32b4..6c07e67d91 100644
--- a/docs/my-website/package-lock.json
+++ b/docs/my-website/package-lock.json
@@ -706,12 +706,13 @@
       }
     },
     "node_modules/@babel/helpers": {
-      "version": "7.26.0",
-      "resolved": "https://registry.npmjs.org/@babel/helpers/-/helpers-7.26.0.tgz",
-      "integrity": "sha512-tbhNuIxNcVb21pInl3ZSjksLCvgdZy9KwJ8brv993QtIVKJBBkYXz4q4ZbAv31GdnC+R90np23L5FbEBlthAEw==",
+      "version": "7.26.10",
+      "resolved": "https://registry.npmjs.org/@babel/helpers/-/helpers-7.26.10.tgz",
+      "integrity": "sha512-UPYc3SauzZ3JGgj87GgZ89JVdC5dj0AoetR5Bw6wj4niittNyFh6+eOGonYvJ1ao6B8lEa3Q3klS7ADZ53bc5g==",
+      "license": "MIT",
       "dependencies": {
-        "@babel/template": "^7.25.9",
-        "@babel/types": "^7.26.0"
+        "@babel/template": "^7.26.9",
+        "@babel/types": "^7.26.10"
       },
       "engines": {
         "node": ">=6.9.0"
@@ -796,11 +797,12 @@
       }
     },
     "node_modules/@babel/parser": {
-      "version": "7.26.3",
-      "resolved": "https://registry.npmjs.org/@babel/parser/-/parser-7.26.3.tgz",
-      "integrity": "sha512-WJ/CvmY8Mea8iDXo6a7RK2wbmJITT5fN3BEkRuFlxVyNx8jOKIIhmC4fSkTcPcf8JyavbBwIe6OpiCOBXt/IcA==",
+      "version": "7.26.10",
+      "resolved": "https://registry.npmjs.org/@babel/parser/-/parser-7.26.10.tgz",
+      "integrity": "sha512-6aQR2zGE/QFi8JpDLjUZEPYOs7+mhKXm86VaKFiLP35JQwQb6bwUE+XbvkH0EptsYhbNBSUGaUBLKqxH1xSgsA==",
+      "license": "MIT",
       "dependencies": {
-        "@babel/types": "^7.26.3"
+        "@babel/types": "^7.26.10"
       },
       "bin": {
         "parser": "bin/babel-parser.js"
@@ -2157,9 +2159,10 @@
       }
     },
     "node_modules/@babel/runtime-corejs3": {
-      "version": "7.26.0",
-      "resolved": "https://registry.npmjs.org/@babel/runtime-corejs3/-/runtime-corejs3-7.26.0.tgz",
-      "integrity": "sha512-YXHu5lN8kJCb1LOb9PgV6pvak43X2h4HvRApcN5SdWeaItQOzfn1hgP6jasD6KWQyJDBxrVmA9o9OivlnNJK/w==",
+      "version": "7.26.10",
+      "resolved": "https://registry.npmjs.org/@babel/runtime-corejs3/-/runtime-corejs3-7.26.10.tgz",
+      "integrity": "sha512-uITFQYO68pMEYR46AHgQoyBg7KPPJDAbGn4jUTIRgCFJIp88MIBUianVOplhZDEec07bp9zIyr4Kp0FCyQzmWg==",
+      "license": "MIT",
       "dependencies": {
         "core-js-pure": "^3.30.2",
         "regenerator-runtime": "^0.14.0"
@@ -2169,13 +2172,14 @@
       }
     },
     "node_modules/@babel/template": {
-      "version": "7.25.9",
-      "resolved": "https://registry.npmjs.org/@babel/template/-/template-7.25.9.tgz",
-      "integrity": "sha512-9DGttpmPvIxBb/2uwpVo3dqJ+O6RooAFOS+lB+xDqoE2PVCE8nfoHMdZLpfCQRLwvohzXISPZcgxt80xLfsuwg==",
+      "version": "7.26.9",
+      "resolved": "https://registry.npmjs.org/@babel/template/-/template-7.26.9.tgz",
+      "integrity": "sha512-qyRplbeIpNZhmzOysF/wFMuP9sctmh2cFzRAZOn1YapxBsE1i9bJIY586R/WBLfLcmcBlM8ROBiQURnnNy+zfA==",
+      "license": "MIT",
       "dependencies": {
-        "@babel/code-frame": "^7.25.9",
-        "@babel/parser": "^7.25.9",
-        "@babel/types": "^7.25.9"
+        "@babel/code-frame": "^7.26.2",
+        "@babel/parser": "^7.26.9",
+        "@babel/types": "^7.26.9"
       },
       "engines": {
         "node": ">=6.9.0"
@@ -2199,9 +2203,10 @@
       }
     },
     "node_modules/@babel/types": {
-      "version": "7.26.3",
-      "resolved": "https://registry.npmjs.org/@babel/types/-/types-7.26.3.tgz",
-      "integrity": "sha512-vN5p+1kl59GVKMvTHt55NzzmYVxprfJD+ql7U9NFIfKCBkYE55LYtS+WtPlaYOyzydrKI8Nezd+aZextrd+FMA==",
+      "version": "7.26.10",
+      "resolved": "https://registry.npmjs.org/@babel/types/-/types-7.26.10.tgz",
+      "integrity": "sha512-emqcG3vHrpxUKTrxcblR36dcrcoRDvKmnL/dCL6ZsHaShW80qxCAcNhzQZrpeM765VzEos+xOi4s+r4IXzTwdQ==",
+      "license": "MIT",
       "dependencies": {
         "@babel/helper-string-parser": "^7.25.9",
         "@babel/helper-validator-identifier": "^7.25.9"
diff --git a/docs/my-website/sidebars.js b/docs/my-website/sidebars.js
index cf4f14b202..3bdd906c21 100644
--- a/docs/my-website/sidebars.js
+++ b/docs/my-website/sidebars.js
@@ -273,7 +273,7 @@ const sidebars = {
       items: [
         {
           type: "category",
-          label: "Chat",
+          label: "/chat/completions",
           link: {
             type: "generated-index",
             title: "Chat Completions",
@@ -286,12 +286,13 @@ const sidebars = {
             "completion/usage",
           ],
         },
+        "response_api",
         "text_completion",
         "embedding/supported_embedding",
         "anthropic_unified",
         {
           type: "category",
-          label: "Image",
+          label: "/images",
           items: [
             "image_generation",
             "image_variations",
@@ -299,7 +300,7 @@ const sidebars = {
         },
         {
           type: "category",
-          label: "Audio",
+          label: "/audio",
           "items": [
             "audio_transcription",
             "text_to_speech",
diff --git a/enterprise/enterprise_hooks/aporia_ai.py b/enterprise/enterprise_hooks/aporia_ai.py
index d258f00233..2b427bea5c 100644
--- a/enterprise/enterprise_hooks/aporia_ai.py
+++ b/enterprise/enterprise_hooks/aporia_ai.py
@@ -163,7 +163,7 @@ class AporiaGuardrail(CustomGuardrail):
 
         pass
 
-    async def async_moderation_hook(  ### 👈 KEY CHANGE ###
+    async def async_moderation_hook(
         self,
         data: dict,
         user_api_key_dict: UserAPIKeyAuth,
@@ -173,6 +173,7 @@ class AporiaGuardrail(CustomGuardrail):
             "image_generation",
             "moderation",
             "audio_transcription",
+            "responses",
         ],
     ):
         from litellm.proxy.common_utils.callback_utils import (
diff --git a/enterprise/enterprise_hooks/google_text_moderation.py b/enterprise/enterprise_hooks/google_text_moderation.py
index af5ea35987..fe26a03207 100644
--- a/enterprise/enterprise_hooks/google_text_moderation.py
+++ b/enterprise/enterprise_hooks/google_text_moderation.py
@@ -94,6 +94,7 @@ class _ENTERPRISE_GoogleTextModeration(CustomLogger):
             "image_generation",
             "moderation",
             "audio_transcription",
+            "responses",
         ],
     ):
         """
diff --git a/enterprise/enterprise_hooks/llama_guard.py b/enterprise/enterprise_hooks/llama_guard.py
index 8abbc996d3..2c53fafa5b 100644
--- a/enterprise/enterprise_hooks/llama_guard.py
+++ b/enterprise/enterprise_hooks/llama_guard.py
@@ -107,6 +107,7 @@ class _ENTERPRISE_LlamaGuard(CustomLogger):
             "image_generation",
             "moderation",
             "audio_transcription",
+            "responses",
         ],
     ):
         """
diff --git a/enterprise/enterprise_hooks/llm_guard.py b/enterprise/enterprise_hooks/llm_guard.py
index 1b639b8a08..078b8e216e 100644
--- a/enterprise/enterprise_hooks/llm_guard.py
+++ b/enterprise/enterprise_hooks/llm_guard.py
@@ -126,6 +126,7 @@ class _ENTERPRISE_LLMGuard(CustomLogger):
             "image_generation",
             "moderation",
             "audio_transcription",
+            "responses",
         ],
     ):
         """
diff --git a/enterprise/enterprise_hooks/openai_moderation.py b/enterprise/enterprise_hooks/openai_moderation.py
index 47506a00c4..1db932c853 100644
--- a/enterprise/enterprise_hooks/openai_moderation.py
+++ b/enterprise/enterprise_hooks/openai_moderation.py
@@ -31,7 +31,7 @@ class _ENTERPRISE_OpenAI_Moderation(CustomLogger):
 
     #### CALL HOOKS - proxy only ####
 
-    async def async_moderation_hook(  ### 👈 KEY CHANGE ###
+    async def async_moderation_hook(
         self,
         data: dict,
         user_api_key_dict: UserAPIKeyAuth,
@@ -41,6 +41,7 @@ class _ENTERPRISE_OpenAI_Moderation(CustomLogger):
             "image_generation",
             "moderation",
             "audio_transcription",
+            "responses",
         ],
     ):
         text = ""
diff --git a/litellm/__init__.py b/litellm/__init__.py
index fd026ffb9d..6d7a91dd5b 100644
--- a/litellm/__init__.py
+++ b/litellm/__init__.py
@@ -8,12 +8,14 @@ import os
 from typing import Callable, List, Optional, Dict, Union, Any, Literal, get_args
 from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler
 from litellm.caching.caching import Cache, DualCache, RedisCache, InMemoryCache
+from litellm.caching.llm_caching_handler import LLMClientCache
 from litellm.types.llms.bedrock import COHERE_EMBEDDING_INPUT_TYPES
 from litellm.types.utils import (
     ImageObject,
     BudgetConfig,
     all_litellm_params,
     all_litellm_params as _litellm_completion_params,
+    CredentialItem,
 )  # maintain backwards compatibility for root param
 from litellm._logging import (
     set_verbose,
@@ -189,15 +191,17 @@ ssl_verify: Union[str, bool] = True
 ssl_certificate: Optional[str] = None
 disable_streaming_logging: bool = False
 disable_add_transform_inline_image_block: bool = False
-in_memory_llm_clients_cache: InMemoryCache = InMemoryCache()
+in_memory_llm_clients_cache: LLMClientCache = LLMClientCache()
 safe_memory_mode: bool = False
 enable_azure_ad_token_refresh: Optional[bool] = False
 ### DEFAULT AZURE API VERSION ###
-AZURE_DEFAULT_API_VERSION = "2024-08-01-preview"  # this is updated to the latest
+AZURE_DEFAULT_API_VERSION = "2025-02-01-preview"  # this is updated to the latest
 ### DEFAULT WATSONX API VERSION ###
 WATSONX_DEFAULT_API_VERSION = "2024-03-13"
 ### COHERE EMBEDDINGS DEFAULT TYPE ###
 COHERE_DEFAULT_EMBEDDING_INPUT_TYPE: COHERE_EMBEDDING_INPUT_TYPES = "search_document"
+### CREDENTIALS ###
+credential_list: List[CredentialItem] = []
 ### GUARDRAILS ###
 llamaguard_model_name: Optional[str] = None
 openai_moderations_model_name: Optional[str] = None
@@ -922,6 +926,7 @@ from .llms.groq.chat.transformation import GroqChatConfig
 from .llms.voyage.embedding.transformation import VoyageEmbeddingConfig
 from .llms.azure_ai.chat.transformation import AzureAIStudioConfig
 from .llms.mistral.mistral_chat_transformation import MistralConfig
+from .llms.openai.responses.transformation import OpenAIResponsesAPIConfig
 from .llms.openai.chat.o_series_transformation import (
     OpenAIOSeriesConfig as OpenAIO1Config,  # maintain backwards compatibility
     OpenAIOSeriesConfig,
@@ -1011,6 +1016,7 @@ from .batches.main import *
 from .batch_completion.main import *  # type: ignore
 from .rerank_api.main import *
 from .llms.anthropic.experimental_pass_through.messages.handler import *
+from .responses.main import *
 from .realtime_api.main import _arealtime
 from .fine_tuning.main import *
 from .files.main import *
diff --git a/litellm/assistants/main.py b/litellm/assistants/main.py
index acb37b1e6f..28f4518f15 100644
--- a/litellm/assistants/main.py
+++ b/litellm/assistants/main.py
@@ -15,6 +15,7 @@ import litellm
 from litellm.types.router import GenericLiteLLMParams
 from litellm.utils import (
     exception_type,
+    get_litellm_params,
     get_llm_provider,
     get_secret,
     supports_httpx_timeout,
@@ -86,6 +87,7 @@ def get_assistants(
     optional_params = GenericLiteLLMParams(
         api_key=api_key, api_base=api_base, api_version=api_version, **kwargs
     )
+    litellm_params_dict = get_litellm_params(**kwargs)
 
     ### TIMEOUT LOGIC ###
     timeout = optional_params.timeout or kwargs.get("request_timeout", 600) or 600
@@ -169,6 +171,7 @@ def get_assistants(
             max_retries=optional_params.max_retries,
             client=client,
             aget_assistants=aget_assistants,  # type: ignore
+            litellm_params=litellm_params_dict,
         )
     else:
         raise litellm.exceptions.BadRequestError(
@@ -270,6 +273,7 @@ def create_assistants(
     optional_params = GenericLiteLLMParams(
         api_key=api_key, api_base=api_base, api_version=api_version, **kwargs
     )
+    litellm_params_dict = get_litellm_params(**kwargs)
 
     ### TIMEOUT LOGIC ###
     timeout = optional_params.timeout or kwargs.get("request_timeout", 600) or 600
@@ -371,6 +375,7 @@ def create_assistants(
             client=client,
             async_create_assistants=async_create_assistants,
             create_assistant_data=create_assistant_data,
+            litellm_params=litellm_params_dict,
         )
     else:
         raise litellm.exceptions.BadRequestError(
@@ -445,6 +450,8 @@ def delete_assistant(
         api_key=api_key, api_base=api_base, api_version=api_version, **kwargs
     )
 
+    litellm_params_dict = get_litellm_params(**kwargs)
+
     async_delete_assistants: Optional[bool] = kwargs.pop(
         "async_delete_assistants", None
     )
@@ -544,6 +551,7 @@ def delete_assistant(
             max_retries=optional_params.max_retries,
             client=client,
             async_delete_assistants=async_delete_assistants,
+            litellm_params=litellm_params_dict,
         )
     else:
         raise litellm.exceptions.BadRequestError(
@@ -639,6 +647,7 @@ def create_thread(
     """
     acreate_thread = kwargs.get("acreate_thread", None)
     optional_params = GenericLiteLLMParams(**kwargs)
+    litellm_params_dict = get_litellm_params(**kwargs)
 
     ### TIMEOUT LOGIC ###
     timeout = optional_params.timeout or kwargs.get("request_timeout", 600) or 600
@@ -731,6 +740,7 @@ def create_thread(
             max_retries=optional_params.max_retries,
             client=client,
             acreate_thread=acreate_thread,
+            litellm_params=litellm_params_dict,
         )
     else:
         raise litellm.exceptions.BadRequestError(
@@ -795,7 +805,7 @@ def get_thread(
     """Get the thread object, given a thread_id"""
     aget_thread = kwargs.pop("aget_thread", None)
     optional_params = GenericLiteLLMParams(**kwargs)
-
+    litellm_params_dict = get_litellm_params(**kwargs)
     ### TIMEOUT LOGIC ###
     timeout = optional_params.timeout or kwargs.get("request_timeout", 600) or 600
     # set timeout for 10 minutes by default
@@ -884,6 +894,7 @@ def get_thread(
             max_retries=optional_params.max_retries,
             client=client,
             aget_thread=aget_thread,
+            litellm_params=litellm_params_dict,
         )
     else:
         raise litellm.exceptions.BadRequestError(
@@ -972,6 +983,7 @@ def add_message(
     _message_data = MessageData(
         role=role, content=content, attachments=attachments, metadata=metadata
     )
+    litellm_params_dict = get_litellm_params(**kwargs)
     optional_params = GenericLiteLLMParams(**kwargs)
 
     message_data = get_optional_params_add_message(
@@ -1068,6 +1080,7 @@ def add_message(
             max_retries=optional_params.max_retries,
             client=client,
             a_add_message=a_add_message,
+            litellm_params=litellm_params_dict,
         )
     else:
         raise litellm.exceptions.BadRequestError(
@@ -1139,6 +1152,7 @@ def get_messages(
 ) -> SyncCursorPage[OpenAIMessage]:
     aget_messages = kwargs.pop("aget_messages", None)
     optional_params = GenericLiteLLMParams(**kwargs)
+    litellm_params_dict = get_litellm_params(**kwargs)
 
     ### TIMEOUT LOGIC ###
     timeout = optional_params.timeout or kwargs.get("request_timeout", 600) or 600
@@ -1225,6 +1239,7 @@ def get_messages(
             max_retries=optional_params.max_retries,
             client=client,
             aget_messages=aget_messages,
+            litellm_params=litellm_params_dict,
         )
     else:
         raise litellm.exceptions.BadRequestError(
@@ -1337,6 +1352,7 @@ def run_thread(
     """Run a given thread + assistant."""
     arun_thread = kwargs.pop("arun_thread", None)
     optional_params = GenericLiteLLMParams(**kwargs)
+    litellm_params_dict = get_litellm_params(**kwargs)
 
     ### TIMEOUT LOGIC ###
     timeout = optional_params.timeout or kwargs.get("request_timeout", 600) or 600
@@ -1437,6 +1453,7 @@ def run_thread(
             max_retries=optional_params.max_retries,
             client=client,
             arun_thread=arun_thread,
+            litellm_params=litellm_params_dict,
         )  # type: ignore
     else:
         raise litellm.exceptions.BadRequestError(
diff --git a/litellm/batches/main.py b/litellm/batches/main.py
index 2f4800043c..1ddcafce4c 100644
--- a/litellm/batches/main.py
+++ b/litellm/batches/main.py
@@ -111,6 +111,7 @@ def create_batch(
         proxy_server_request = kwargs.get("proxy_server_request", None)
         model_info = kwargs.get("model_info", None)
         _is_async = kwargs.pop("acreate_batch", False) is True
+        litellm_params = get_litellm_params(**kwargs)
         litellm_logging_obj: LiteLLMLoggingObj = kwargs.get("litellm_logging_obj", None)
         ### TIMEOUT LOGIC ###
         timeout = optional_params.timeout or kwargs.get("request_timeout", 600) or 600
@@ -217,6 +218,7 @@ def create_batch(
                 timeout=timeout,
                 max_retries=optional_params.max_retries,
                 create_batch_data=_create_batch_request,
+                litellm_params=litellm_params,
             )
         elif custom_llm_provider == "vertex_ai":
             api_base = optional_params.api_base or ""
@@ -320,15 +322,12 @@ def retrieve_batch(
     """
     try:
         optional_params = GenericLiteLLMParams(**kwargs)
-
         litellm_logging_obj: LiteLLMLoggingObj = kwargs.get("litellm_logging_obj", None)
         ### TIMEOUT LOGIC ###
         timeout = optional_params.timeout or kwargs.get("request_timeout", 600) or 600
         litellm_params = get_litellm_params(
             custom_llm_provider=custom_llm_provider,
-            litellm_call_id=kwargs.get("litellm_call_id", None),
-            litellm_trace_id=kwargs.get("litellm_trace_id"),
-            litellm_metadata=kwargs.get("litellm_metadata"),
+            **kwargs,
         )
         litellm_logging_obj.update_environment_variables(
             model=None,
@@ -424,6 +423,7 @@ def retrieve_batch(
                 timeout=timeout,
                 max_retries=optional_params.max_retries,
                 retrieve_batch_data=_retrieve_batch_request,
+                litellm_params=litellm_params,
             )
         elif custom_llm_provider == "vertex_ai":
             api_base = optional_params.api_base or ""
@@ -526,6 +526,10 @@ def list_batches(
     try:
         # set API KEY
         optional_params = GenericLiteLLMParams(**kwargs)
+        litellm_params = get_litellm_params(
+            custom_llm_provider=custom_llm_provider,
+            **kwargs,
+        )
         api_key = (
             optional_params.api_key
             or litellm.api_key  # for deepinfra/perplexity/anyscale we check in get_llm_provider and pass in the api key from there
@@ -603,6 +607,7 @@ def list_batches(
                 api_version=api_version,
                 timeout=timeout,
                 max_retries=optional_params.max_retries,
+                litellm_params=litellm_params,
             )
         else:
             raise litellm.exceptions.BadRequestError(
@@ -678,6 +683,10 @@ def cancel_batch(
     """
     try:
         optional_params = GenericLiteLLMParams(**kwargs)
+        litellm_params = get_litellm_params(
+            custom_llm_provider=custom_llm_provider,
+            **kwargs,
+        )
         ### TIMEOUT LOGIC ###
         timeout = optional_params.timeout or kwargs.get("request_timeout", 600) or 600
         # set timeout for 10 minutes by default
@@ -765,6 +774,7 @@ def cancel_batch(
                 timeout=timeout,
                 max_retries=optional_params.max_retries,
                 cancel_batch_data=_cancel_batch_request,
+                litellm_params=litellm_params,
             )
         else:
             raise litellm.exceptions.BadRequestError(
diff --git a/litellm/caching/llm_caching_handler.py b/litellm/caching/llm_caching_handler.py
new file mode 100644
index 0000000000..429634b7b1
--- /dev/null
+++ b/litellm/caching/llm_caching_handler.py
@@ -0,0 +1,40 @@
+"""
+Add the event loop to the cache key, to prevent event loop closed errors.
+"""
+
+import asyncio
+
+from .in_memory_cache import InMemoryCache
+
+
+class LLMClientCache(InMemoryCache):
+
+    def update_cache_key_with_event_loop(self, key):
+        """
+        Add the event loop to the cache key, to prevent event loop closed errors.
+        If none, use the key as is.
+        """
+        try:
+            event_loop = asyncio.get_event_loop()
+            stringified_event_loop = str(id(event_loop))
+            return f"{key}-{stringified_event_loop}"
+        except Exception:  # handle no current event loop
+            return key
+
+    def set_cache(self, key, value, **kwargs):
+        key = self.update_cache_key_with_event_loop(key)
+        return super().set_cache(key, value, **kwargs)
+
+    async def async_set_cache(self, key, value, **kwargs):
+        key = self.update_cache_key_with_event_loop(key)
+        return await super().async_set_cache(key, value, **kwargs)
+
+    def get_cache(self, key, **kwargs):
+        key = self.update_cache_key_with_event_loop(key)
+
+        return super().get_cache(key, **kwargs)
+
+    async def async_get_cache(self, key, **kwargs):
+        key = self.update_cache_key_with_event_loop(key)
+
+        return await super().async_get_cache(key, **kwargs)
diff --git a/litellm/constants.py b/litellm/constants.py
index 0288c45e40..b4551a78f5 100644
--- a/litellm/constants.py
+++ b/litellm/constants.py
@@ -18,6 +18,7 @@ SINGLE_DEPLOYMENT_TRAFFIC_FAILURE_THRESHOLD = 1000  # Minimum number of requests
 REPEATED_STREAMING_CHUNK_LIMIT = 100  # catch if model starts looping the same chunk while streaming. Uses high default to prevent false positives.
 #### Networking settings ####
 request_timeout: float = 6000  # time in seconds
+STREAM_SSE_DONE_STRING: str = "[DONE]"
 
 LITELLM_CHAT_PROVIDERS = [
     "openai",
diff --git a/litellm/cost_calculator.py b/litellm/cost_calculator.py
index b83fe09305..58600ea14f 100644
--- a/litellm/cost_calculator.py
+++ b/litellm/cost_calculator.py
@@ -44,7 +44,12 @@ from litellm.llms.vertex_ai.cost_calculator import cost_router as google_cost_ro
 from litellm.llms.vertex_ai.image_generation.cost_calculator import (
     cost_calculator as vertex_ai_image_cost_calculator,
 )
-from litellm.types.llms.openai import HttpxBinaryResponseContent
+from litellm.responses.utils import ResponseAPILoggingUtils
+from litellm.types.llms.openai import (
+    HttpxBinaryResponseContent,
+    ResponseAPIUsage,
+    ResponsesAPIResponse,
+)
 from litellm.types.rerank import RerankBilledUnits, RerankResponse
 from litellm.types.utils import (
     CallTypesLiteral,
@@ -464,6 +469,13 @@ def _get_usage_object(
     return usage_obj
 
 
+def _is_known_usage_objects(usage_obj):
+    """Returns True if the usage obj is a known Usage type"""
+    return isinstance(usage_obj, litellm.Usage) or isinstance(
+        usage_obj, ResponseAPIUsage
+    )
+
+
 def _infer_call_type(
     call_type: Optional[CallTypesLiteral], completion_response: Any
 ) -> Optional[CallTypesLiteral]:
@@ -585,8 +597,8 @@ def completion_cost(  # noqa: PLR0915
                 )
             else:
                 usage_obj = getattr(completion_response, "usage", {})
-            if isinstance(usage_obj, BaseModel) and not isinstance(
-                usage_obj, litellm.Usage
+            if isinstance(usage_obj, BaseModel) and not _is_known_usage_objects(
+                usage_obj=usage_obj
             ):
                 setattr(
                     completion_response,
@@ -599,6 +611,14 @@ def completion_cost(  # noqa: PLR0915
                 _usage = usage_obj.model_dump()
             else:
                 _usage = usage_obj
+
+            if ResponseAPILoggingUtils._is_response_api_usage(_usage):
+                _usage = (
+                    ResponseAPILoggingUtils._transform_response_api_usage_to_chat_usage(
+                        _usage
+                    ).model_dump()
+                )
+
             # get input/output tokens from completion_response
             prompt_tokens = _usage.get("prompt_tokens", 0)
             completion_tokens = _usage.get("completion_tokens", 0)
@@ -797,6 +817,7 @@ def response_cost_calculator(
         TextCompletionResponse,
         HttpxBinaryResponseContent,
         RerankResponse,
+        ResponsesAPIResponse,
     ],
     model: str,
     custom_llm_provider: Optional[str],
diff --git a/litellm/files/main.py b/litellm/files/main.py
index e49066e84b..db9a11ced1 100644
--- a/litellm/files/main.py
+++ b/litellm/files/main.py
@@ -25,7 +25,7 @@ from litellm.types.llms.openai import (
     HttpxBinaryResponseContent,
 )
 from litellm.types.router import *
-from litellm.utils import supports_httpx_timeout
+from litellm.utils import get_litellm_params, supports_httpx_timeout
 
 ####### ENVIRONMENT VARIABLES ###################
 openai_files_instance = OpenAIFilesAPI()
@@ -546,6 +546,7 @@ def create_file(
     try:
         _is_async = kwargs.pop("acreate_file", False) is True
         optional_params = GenericLiteLLMParams(**kwargs)
+        litellm_params_dict = get_litellm_params(**kwargs)
 
         ### TIMEOUT LOGIC ###
         timeout = optional_params.timeout or kwargs.get("request_timeout", 600) or 600
@@ -630,6 +631,7 @@ def create_file(
                 timeout=timeout,
                 max_retries=optional_params.max_retries,
                 create_file_data=_create_file_request,
+                litellm_params=litellm_params_dict,
             )
         elif custom_llm_provider == "vertex_ai":
             api_base = optional_params.api_base or ""
diff --git a/litellm/integrations/custom_logger.py b/litellm/integrations/custom_logger.py
index 457c0537bd..e115b7496d 100644
--- a/litellm/integrations/custom_logger.py
+++ b/litellm/integrations/custom_logger.py
@@ -239,6 +239,7 @@ class CustomLogger:  # https://docs.litellm.ai/docs/observability/custom_callbac
             "image_generation",
             "moderation",
             "audio_transcription",
+            "responses",
         ],
     ) -> Any:
         pass
diff --git a/litellm/litellm_core_utils/credential_accessor.py b/litellm/litellm_core_utils/credential_accessor.py
new file mode 100644
index 0000000000..d87dcc116b
--- /dev/null
+++ b/litellm/litellm_core_utils/credential_accessor.py
@@ -0,0 +1,34 @@
+"""Utils for accessing credentials."""
+
+from typing import List
+
+import litellm
+from litellm.types.utils import CredentialItem
+
+
+class CredentialAccessor:
+    @staticmethod
+    def get_credential_values(credential_name: str) -> dict:
+        """Safe accessor for credentials."""
+        if not litellm.credential_list:
+            return {}
+        for credential in litellm.credential_list:
+            if credential.credential_name == credential_name:
+                return credential.credential_values.copy()
+        return {}
+
+    @staticmethod
+    def upsert_credentials(credentials: List[CredentialItem]):
+        """Add a credential to the list of credentials."""
+
+        credential_names = [cred.credential_name for cred in litellm.credential_list]
+
+        for credential in credentials:
+            if credential.credential_name in credential_names:
+                # Find and replace the existing credential in the list
+                for i, existing_cred in enumerate(litellm.credential_list):
+                    if existing_cred.credential_name == credential.credential_name:
+                        litellm.credential_list[i] = credential
+                        break
+            else:
+                litellm.credential_list.append(credential)
diff --git a/litellm/litellm_core_utils/get_litellm_params.py b/litellm/litellm_core_utils/get_litellm_params.py
index fcf83d17a2..4f2f43f0de 100644
--- a/litellm/litellm_core_utils/get_litellm_params.py
+++ b/litellm/litellm_core_utils/get_litellm_params.py
@@ -59,6 +59,7 @@ def get_litellm_params(
     ssl_verify: Optional[bool] = None,
     merge_reasoning_content_in_choices: Optional[bool] = None,
     api_version: Optional[str] = None,
+    max_retries: Optional[int] = None,
     **kwargs,
 ) -> dict:
     litellm_params = {
@@ -101,5 +102,13 @@ def get_litellm_params(
         "ssl_verify": ssl_verify,
         "merge_reasoning_content_in_choices": merge_reasoning_content_in_choices,
         "api_version": api_version,
+        "azure_ad_token": kwargs.get("azure_ad_token"),
+        "tenant_id": kwargs.get("tenant_id"),
+        "client_id": kwargs.get("client_id"),
+        "client_secret": kwargs.get("client_secret"),
+        "azure_username": kwargs.get("azure_username"),
+        "azure_password": kwargs.get("azure_password"),
+        "max_retries": max_retries,
+        "timeout": kwargs.get("timeout"),
     }
     return litellm_params
diff --git a/litellm/litellm_core_utils/litellm_logging.py b/litellm/litellm_core_utils/litellm_logging.py
index a3d9a57a49..18af639918 100644
--- a/litellm/litellm_core_utils/litellm_logging.py
+++ b/litellm/litellm_core_utils/litellm_logging.py
@@ -39,11 +39,14 @@ from litellm.litellm_core_utils.redact_messages import (
     redact_message_input_output_from_custom_logger,
     redact_message_input_output_from_logging,
 )
+from litellm.responses.utils import ResponseAPILoggingUtils
 from litellm.types.llms.openai import (
     AllMessageValues,
     Batch,
     FineTuningJob,
     HttpxBinaryResponseContent,
+    ResponseCompletedEvent,
+    ResponsesAPIResponse,
 )
 from litellm.types.rerank import RerankResponse
 from litellm.types.router import SPECIAL_MODEL_INFO_PARAMS
@@ -851,6 +854,8 @@ class Logging(LiteLLMLoggingBaseClass):
             RerankResponse,
             Batch,
             FineTuningJob,
+            ResponsesAPIResponse,
+            ResponseCompletedEvent,
         ],
         cache_hit: Optional[bool] = None,
     ) -> Optional[float]:
@@ -1000,7 +1005,7 @@ class Logging(LiteLLMLoggingBaseClass):
                 standard_logging_object is None
                 and result is not None
                 and self.stream is not True
-            ):  # handle streaming separately
+            ):
                 if (
                     isinstance(result, ModelResponse)
                     or isinstance(result, ModelResponseStream)
@@ -1012,6 +1017,7 @@ class Logging(LiteLLMLoggingBaseClass):
                     or isinstance(result, RerankResponse)
                     or isinstance(result, FineTuningJob)
                     or isinstance(result, LiteLLMBatch)
+                    or isinstance(result, ResponsesAPIResponse)
                 ):
                     ## HIDDEN PARAMS ##
                     hidden_params = getattr(result, "_hidden_params", {})
@@ -1111,7 +1117,7 @@ class Logging(LiteLLMLoggingBaseClass):
 
             ## BUILD COMPLETE STREAMED RESPONSE
             complete_streaming_response: Optional[
-                Union[ModelResponse, TextCompletionResponse]
+                Union[ModelResponse, TextCompletionResponse, ResponsesAPIResponse]
             ] = None
             if "complete_streaming_response" in self.model_call_details:
                 return  # break out of this.
@@ -1633,7 +1639,7 @@ class Logging(LiteLLMLoggingBaseClass):
         if "async_complete_streaming_response" in self.model_call_details:
             return  # break out of this.
         complete_streaming_response: Optional[
-            Union[ModelResponse, TextCompletionResponse]
+            Union[ModelResponse, TextCompletionResponse, ResponsesAPIResponse]
         ] = self._get_assembled_streaming_response(
             result=result,
             start_time=start_time,
@@ -2343,16 +2349,24 @@ class Logging(LiteLLMLoggingBaseClass):
 
     def _get_assembled_streaming_response(
         self,
-        result: Union[ModelResponse, TextCompletionResponse, ModelResponseStream, Any],
+        result: Union[
+            ModelResponse,
+            TextCompletionResponse,
+            ModelResponseStream,
+            ResponseCompletedEvent,
+            Any,
+        ],
         start_time: datetime.datetime,
         end_time: datetime.datetime,
         is_async: bool,
         streaming_chunks: List[Any],
-    ) -> Optional[Union[ModelResponse, TextCompletionResponse]]:
+    ) -> Optional[Union[ModelResponse, TextCompletionResponse, ResponsesAPIResponse]]:
         if isinstance(result, ModelResponse):
             return result
         elif isinstance(result, TextCompletionResponse):
             return result
+        elif isinstance(result, ResponseCompletedEvent):
+            return result.response
         elif isinstance(result, ModelResponseStream):
             complete_streaming_response: Optional[
                 Union[ModelResponse, TextCompletionResponse]
@@ -3111,6 +3125,12 @@ class StandardLoggingPayloadSetup:
         elif isinstance(usage, Usage):
             return usage
         elif isinstance(usage, dict):
+            if ResponseAPILoggingUtils._is_response_api_usage(usage):
+                return (
+                    ResponseAPILoggingUtils._transform_response_api_usage_to_chat_usage(
+                        usage
+                    )
+                )
             return Usage(**usage)
 
         raise ValueError(f"usage is required, got={usage} of type {type(usage)}")
diff --git a/litellm/llms/azure/assistants.py b/litellm/llms/azure/assistants.py
index 2f67b5506f..1328eb1fea 100644
--- a/litellm/llms/azure/assistants.py
+++ b/litellm/llms/azure/assistants.py
@@ -1,4 +1,4 @@
-from typing import Coroutine, Iterable, Literal, Optional, Union
+from typing import Any, Coroutine, Dict, Iterable, Literal, Optional, Union
 
 import httpx
 from openai import AsyncAzureOpenAI, AzureOpenAI
@@ -18,10 +18,10 @@ from ...types.llms.openai import (
     SyncCursorPage,
     Thread,
 )
-from ..base import BaseLLM
+from .common_utils import BaseAzureLLM
 
 
-class AzureAssistantsAPI(BaseLLM):
+class AzureAssistantsAPI(BaseAzureLLM):
     def __init__(self) -> None:
         super().__init__()
 
@@ -34,18 +34,17 @@ class AzureAssistantsAPI(BaseLLM):
         timeout: Union[float, httpx.Timeout],
         max_retries: Optional[int],
         client: Optional[AzureOpenAI] = None,
+        litellm_params: Optional[dict] = None,
     ) -> AzureOpenAI:
-        received_args = locals()
         if client is None:
-            data = {}
-            for k, v in received_args.items():
-                if k == "self" or k == "client":
-                    pass
-                elif k == "api_base" and v is not None:
-                    data["azure_endpoint"] = v
-                elif v is not None:
-                    data[k] = v
-            azure_openai_client = AzureOpenAI(**data)  # type: ignore
+            azure_client_params = self.initialize_azure_sdk_client(
+                litellm_params=litellm_params or {},
+                api_key=api_key,
+                api_base=api_base,
+                model_name="",
+                api_version=api_version,
+            )
+            azure_openai_client = AzureOpenAI(**azure_client_params)  # type: ignore
         else:
             azure_openai_client = client
 
@@ -60,18 +59,18 @@ class AzureAssistantsAPI(BaseLLM):
         timeout: Union[float, httpx.Timeout],
         max_retries: Optional[int],
         client: Optional[AsyncAzureOpenAI] = None,
+        litellm_params: Optional[dict] = None,
     ) -> AsyncAzureOpenAI:
-        received_args = locals()
         if client is None:
-            data = {}
-            for k, v in received_args.items():
-                if k == "self" or k == "client":
-                    pass
-                elif k == "api_base" and v is not None:
-                    data["azure_endpoint"] = v
-                elif v is not None:
-                    data[k] = v
-            azure_openai_client = AsyncAzureOpenAI(**data)
+            azure_client_params = self.initialize_azure_sdk_client(
+                litellm_params=litellm_params or {},
+                api_key=api_key,
+                api_base=api_base,
+                model_name="",
+                api_version=api_version,
+            )
+
+            azure_openai_client = AsyncAzureOpenAI(**azure_client_params)
             # azure_openai_client = AsyncAzureOpenAI(**data)  # type: ignore
         else:
             azure_openai_client = client
@@ -89,6 +88,7 @@ class AzureAssistantsAPI(BaseLLM):
         timeout: Union[float, httpx.Timeout],
         max_retries: Optional[int],
         client: Optional[AsyncAzureOpenAI],
+        litellm_params: Optional[dict] = None,
     ) -> AsyncCursorPage[Assistant]:
         azure_openai_client = self.async_get_azure_client(
             api_key=api_key,
@@ -98,6 +98,7 @@ class AzureAssistantsAPI(BaseLLM):
             timeout=timeout,
             max_retries=max_retries,
             client=client,
+            litellm_params=litellm_params,
         )
 
         response = await azure_openai_client.beta.assistants.list()
@@ -146,6 +147,7 @@ class AzureAssistantsAPI(BaseLLM):
         max_retries: Optional[int],
         client=None,
         aget_assistants=None,
+        litellm_params: Optional[dict] = None,
     ):
         if aget_assistants is not None and aget_assistants is True:
             return self.async_get_assistants(
@@ -156,6 +158,7 @@ class AzureAssistantsAPI(BaseLLM):
                 timeout=timeout,
                 max_retries=max_retries,
                 client=client,
+                litellm_params=litellm_params,
             )
         azure_openai_client = self.get_azure_client(
             api_key=api_key,
@@ -165,6 +168,7 @@ class AzureAssistantsAPI(BaseLLM):
             max_retries=max_retries,
             client=client,
             api_version=api_version,
+            litellm_params=litellm_params,
         )
 
         response = azure_openai_client.beta.assistants.list()
@@ -184,6 +188,7 @@ class AzureAssistantsAPI(BaseLLM):
         timeout: Union[float, httpx.Timeout],
         max_retries: Optional[int],
         client: Optional[AsyncAzureOpenAI] = None,
+        litellm_params: Optional[dict] = None,
     ) -> OpenAIMessage:
         openai_client = self.async_get_azure_client(
             api_key=api_key,
@@ -193,6 +198,7 @@ class AzureAssistantsAPI(BaseLLM):
             timeout=timeout,
             max_retries=max_retries,
             client=client,
+            litellm_params=litellm_params,
         )
 
         thread_message: OpenAIMessage = await openai_client.beta.threads.messages.create(  # type: ignore
@@ -222,6 +228,7 @@ class AzureAssistantsAPI(BaseLLM):
         max_retries: Optional[int],
         client: Optional[AsyncAzureOpenAI],
         a_add_message: Literal[True],
+        litellm_params: Optional[dict] = None,
     ) -> Coroutine[None, None, OpenAIMessage]:
         ...
 
@@ -238,6 +245,7 @@ class AzureAssistantsAPI(BaseLLM):
         max_retries: Optional[int],
         client: Optional[AzureOpenAI],
         a_add_message: Optional[Literal[False]],
+        litellm_params: Optional[dict] = None,
     ) -> OpenAIMessage:
         ...
 
@@ -255,6 +263,7 @@ class AzureAssistantsAPI(BaseLLM):
         max_retries: Optional[int],
         client=None,
         a_add_message: Optional[bool] = None,
+        litellm_params: Optional[dict] = None,
     ):
         if a_add_message is not None and a_add_message is True:
             return self.a_add_message(
@@ -267,6 +276,7 @@ class AzureAssistantsAPI(BaseLLM):
                 timeout=timeout,
                 max_retries=max_retries,
                 client=client,
+                litellm_params=litellm_params,
             )
         openai_client = self.get_azure_client(
             api_key=api_key,
@@ -300,6 +310,7 @@ class AzureAssistantsAPI(BaseLLM):
         timeout: Union[float, httpx.Timeout],
         max_retries: Optional[int],
         client: Optional[AsyncAzureOpenAI] = None,
+        litellm_params: Optional[dict] = None,
     ) -> AsyncCursorPage[OpenAIMessage]:
         openai_client = self.async_get_azure_client(
             api_key=api_key,
@@ -309,6 +320,7 @@ class AzureAssistantsAPI(BaseLLM):
             timeout=timeout,
             max_retries=max_retries,
             client=client,
+            litellm_params=litellm_params,
         )
 
         response = await openai_client.beta.threads.messages.list(thread_id=thread_id)
@@ -329,6 +341,7 @@ class AzureAssistantsAPI(BaseLLM):
         max_retries: Optional[int],
         client: Optional[AsyncAzureOpenAI],
         aget_messages: Literal[True],
+        litellm_params: Optional[dict] = None,
     ) -> Coroutine[None, None, AsyncCursorPage[OpenAIMessage]]:
         ...
 
@@ -344,6 +357,7 @@ class AzureAssistantsAPI(BaseLLM):
         max_retries: Optional[int],
         client: Optional[AzureOpenAI],
         aget_messages: Optional[Literal[False]],
+        litellm_params: Optional[dict] = None,
     ) -> SyncCursorPage[OpenAIMessage]:
         ...
 
@@ -360,6 +374,7 @@ class AzureAssistantsAPI(BaseLLM):
         max_retries: Optional[int],
         client=None,
         aget_messages=None,
+        litellm_params: Optional[dict] = None,
     ):
         if aget_messages is not None and aget_messages is True:
             return self.async_get_messages(
@@ -371,6 +386,7 @@ class AzureAssistantsAPI(BaseLLM):
                 timeout=timeout,
                 max_retries=max_retries,
                 client=client,
+                litellm_params=litellm_params,
             )
         openai_client = self.get_azure_client(
             api_key=api_key,
@@ -380,6 +396,7 @@ class AzureAssistantsAPI(BaseLLM):
             timeout=timeout,
             max_retries=max_retries,
             client=client,
+            litellm_params=litellm_params,
         )
 
         response = openai_client.beta.threads.messages.list(thread_id=thread_id)
@@ -399,6 +416,7 @@ class AzureAssistantsAPI(BaseLLM):
         max_retries: Optional[int],
         client: Optional[AsyncAzureOpenAI],
         messages: Optional[Iterable[OpenAICreateThreadParamsMessage]],
+        litellm_params: Optional[dict] = None,
     ) -> Thread:
         openai_client = self.async_get_azure_client(
             api_key=api_key,
@@ -408,6 +426,7 @@ class AzureAssistantsAPI(BaseLLM):
             timeout=timeout,
             max_retries=max_retries,
             client=client,
+            litellm_params=litellm_params,
         )
 
         data = {}
@@ -435,6 +454,7 @@ class AzureAssistantsAPI(BaseLLM):
         messages: Optional[Iterable[OpenAICreateThreadParamsMessage]],
         client: Optional[AsyncAzureOpenAI],
         acreate_thread: Literal[True],
+        litellm_params: Optional[dict] = None,
     ) -> Coroutine[None, None, Thread]:
         ...
 
@@ -451,6 +471,7 @@ class AzureAssistantsAPI(BaseLLM):
         messages: Optional[Iterable[OpenAICreateThreadParamsMessage]],
         client: Optional[AzureOpenAI],
         acreate_thread: Optional[Literal[False]],
+        litellm_params: Optional[dict] = None,
     ) -> Thread:
         ...
 
@@ -468,6 +489,7 @@ class AzureAssistantsAPI(BaseLLM):
         messages: Optional[Iterable[OpenAICreateThreadParamsMessage]],
         client=None,
         acreate_thread=None,
+        litellm_params: Optional[dict] = None,
     ):
         """
         Here's an example:
@@ -490,6 +512,7 @@ class AzureAssistantsAPI(BaseLLM):
                 max_retries=max_retries,
                 client=client,
                 messages=messages,
+                litellm_params=litellm_params,
             )
         azure_openai_client = self.get_azure_client(
             api_key=api_key,
@@ -499,6 +522,7 @@ class AzureAssistantsAPI(BaseLLM):
             timeout=timeout,
             max_retries=max_retries,
             client=client,
+            litellm_params=litellm_params,
         )
 
         data = {}
@@ -521,6 +545,7 @@ class AzureAssistantsAPI(BaseLLM):
         timeout: Union[float, httpx.Timeout],
         max_retries: Optional[int],
         client: Optional[AsyncAzureOpenAI],
+        litellm_params: Optional[dict] = None,
     ) -> Thread:
         openai_client = self.async_get_azure_client(
             api_key=api_key,
@@ -530,6 +555,7 @@ class AzureAssistantsAPI(BaseLLM):
             timeout=timeout,
             max_retries=max_retries,
             client=client,
+            litellm_params=litellm_params,
         )
 
         response = await openai_client.beta.threads.retrieve(thread_id=thread_id)
@@ -550,6 +576,7 @@ class AzureAssistantsAPI(BaseLLM):
         max_retries: Optional[int],
         client: Optional[AsyncAzureOpenAI],
         aget_thread: Literal[True],
+        litellm_params: Optional[dict] = None,
     ) -> Coroutine[None, None, Thread]:
         ...
 
@@ -565,6 +592,7 @@ class AzureAssistantsAPI(BaseLLM):
         max_retries: Optional[int],
         client: Optional[AzureOpenAI],
         aget_thread: Optional[Literal[False]],
+        litellm_params: Optional[dict] = None,
     ) -> Thread:
         ...
 
@@ -581,6 +609,7 @@ class AzureAssistantsAPI(BaseLLM):
         max_retries: Optional[int],
         client=None,
         aget_thread=None,
+        litellm_params: Optional[dict] = None,
     ):
         if aget_thread is not None and aget_thread is True:
             return self.async_get_thread(
@@ -592,6 +621,7 @@ class AzureAssistantsAPI(BaseLLM):
                 timeout=timeout,
                 max_retries=max_retries,
                 client=client,
+                litellm_params=litellm_params,
             )
         openai_client = self.get_azure_client(
             api_key=api_key,
@@ -601,6 +631,7 @@ class AzureAssistantsAPI(BaseLLM):
             timeout=timeout,
             max_retries=max_retries,
             client=client,
+            litellm_params=litellm_params,
         )
 
         response = openai_client.beta.threads.retrieve(thread_id=thread_id)
@@ -618,7 +649,7 @@ class AzureAssistantsAPI(BaseLLM):
         assistant_id: str,
         additional_instructions: Optional[str],
         instructions: Optional[str],
-        metadata: Optional[object],
+        metadata: Optional[Dict],
         model: Optional[str],
         stream: Optional[bool],
         tools: Optional[Iterable[AssistantToolParam]],
@@ -629,6 +660,7 @@ class AzureAssistantsAPI(BaseLLM):
         timeout: Union[float, httpx.Timeout],
         max_retries: Optional[int],
         client: Optional[AsyncAzureOpenAI],
+        litellm_params: Optional[dict] = None,
     ) -> Run:
         openai_client = self.async_get_azure_client(
             api_key=api_key,
@@ -638,6 +670,7 @@ class AzureAssistantsAPI(BaseLLM):
             api_version=api_version,
             azure_ad_token=azure_ad_token,
             client=client,
+            litellm_params=litellm_params,
         )
 
         response = await openai_client.beta.threads.runs.create_and_poll(  # type: ignore
@@ -645,7 +678,7 @@ class AzureAssistantsAPI(BaseLLM):
             assistant_id=assistant_id,
             additional_instructions=additional_instructions,
             instructions=instructions,
-            metadata=metadata,
+            metadata=metadata,  # type: ignore
             model=model,
             tools=tools,
         )
@@ -659,12 +692,13 @@ class AzureAssistantsAPI(BaseLLM):
         assistant_id: str,
         additional_instructions: Optional[str],
         instructions: Optional[str],
-        metadata: Optional[object],
+        metadata: Optional[Dict],
         model: Optional[str],
         tools: Optional[Iterable[AssistantToolParam]],
         event_handler: Optional[AssistantEventHandler],
+        litellm_params: Optional[dict] = None,
     ) -> AsyncAssistantStreamManager[AsyncAssistantEventHandler]:
-        data = {
+        data: Dict[str, Any] = {
             "thread_id": thread_id,
             "assistant_id": assistant_id,
             "additional_instructions": additional_instructions,
@@ -684,12 +718,13 @@ class AzureAssistantsAPI(BaseLLM):
         assistant_id: str,
         additional_instructions: Optional[str],
         instructions: Optional[str],
-        metadata: Optional[object],
+        metadata: Optional[Dict],
         model: Optional[str],
         tools: Optional[Iterable[AssistantToolParam]],
         event_handler: Optional[AssistantEventHandler],
+        litellm_params: Optional[dict] = None,
     ) -> AssistantStreamManager[AssistantEventHandler]:
-        data = {
+        data: Dict[str, Any] = {
             "thread_id": thread_id,
             "assistant_id": assistant_id,
             "additional_instructions": additional_instructions,
@@ -711,7 +746,7 @@ class AzureAssistantsAPI(BaseLLM):
         assistant_id: str,
         additional_instructions: Optional[str],
         instructions: Optional[str],
-        metadata: Optional[object],
+        metadata: Optional[Dict],
         model: Optional[str],
         stream: Optional[bool],
         tools: Optional[Iterable[AssistantToolParam]],
@@ -733,7 +768,7 @@ class AzureAssistantsAPI(BaseLLM):
         assistant_id: str,
         additional_instructions: Optional[str],
         instructions: Optional[str],
-        metadata: Optional[object],
+        metadata: Optional[Dict],
         model: Optional[str],
         stream: Optional[bool],
         tools: Optional[Iterable[AssistantToolParam]],
@@ -756,7 +791,7 @@ class AzureAssistantsAPI(BaseLLM):
         assistant_id: str,
         additional_instructions: Optional[str],
         instructions: Optional[str],
-        metadata: Optional[object],
+        metadata: Optional[Dict],
         model: Optional[str],
         stream: Optional[bool],
         tools: Optional[Iterable[AssistantToolParam]],
@@ -769,6 +804,7 @@ class AzureAssistantsAPI(BaseLLM):
         client=None,
         arun_thread=None,
         event_handler: Optional[AssistantEventHandler] = None,
+        litellm_params: Optional[dict] = None,
     ):
         if arun_thread is not None and arun_thread is True:
             if stream is not None and stream is True:
@@ -780,6 +816,7 @@ class AzureAssistantsAPI(BaseLLM):
                     timeout=timeout,
                     max_retries=max_retries,
                     client=client,
+                    litellm_params=litellm_params,
                 )
                 return self.async_run_thread_stream(
                     client=azure_client,
@@ -791,13 +828,14 @@ class AzureAssistantsAPI(BaseLLM):
                     model=model,
                     tools=tools,
                     event_handler=event_handler,
+                    litellm_params=litellm_params,
                 )
             return self.arun_thread(
                 thread_id=thread_id,
                 assistant_id=assistant_id,
                 additional_instructions=additional_instructions,
                 instructions=instructions,
-                metadata=metadata,
+                metadata=metadata,  # type: ignore
                 model=model,
                 stream=stream,
                 tools=tools,
@@ -808,6 +846,7 @@ class AzureAssistantsAPI(BaseLLM):
                 timeout=timeout,
                 max_retries=max_retries,
                 client=client,
+                litellm_params=litellm_params,
             )
         openai_client = self.get_azure_client(
             api_key=api_key,
@@ -817,6 +856,7 @@ class AzureAssistantsAPI(BaseLLM):
             timeout=timeout,
             max_retries=max_retries,
             client=client,
+            litellm_params=litellm_params,
         )
 
         if stream is not None and stream is True:
@@ -830,6 +870,7 @@ class AzureAssistantsAPI(BaseLLM):
                 model=model,
                 tools=tools,
                 event_handler=event_handler,
+                litellm_params=litellm_params,
             )
 
         response = openai_client.beta.threads.runs.create_and_poll(  # type: ignore
@@ -837,7 +878,7 @@ class AzureAssistantsAPI(BaseLLM):
             assistant_id=assistant_id,
             additional_instructions=additional_instructions,
             instructions=instructions,
-            metadata=metadata,
+            metadata=metadata,  # type: ignore
             model=model,
             tools=tools,
         )
@@ -855,6 +896,7 @@ class AzureAssistantsAPI(BaseLLM):
         max_retries: Optional[int],
         client: Optional[AsyncAzureOpenAI],
         create_assistant_data: dict,
+        litellm_params: Optional[dict] = None,
     ) -> Assistant:
         azure_openai_client = self.async_get_azure_client(
             api_key=api_key,
@@ -864,6 +906,7 @@ class AzureAssistantsAPI(BaseLLM):
             timeout=timeout,
             max_retries=max_retries,
             client=client,
+            litellm_params=litellm_params,
         )
 
         response = await azure_openai_client.beta.assistants.create(
@@ -882,6 +925,7 @@ class AzureAssistantsAPI(BaseLLM):
         create_assistant_data: dict,
         client=None,
         async_create_assistants=None,
+        litellm_params: Optional[dict] = None,
     ):
         if async_create_assistants is not None and async_create_assistants is True:
             return self.async_create_assistants(
@@ -893,6 +937,7 @@ class AzureAssistantsAPI(BaseLLM):
                 max_retries=max_retries,
                 client=client,
                 create_assistant_data=create_assistant_data,
+                litellm_params=litellm_params,
             )
         azure_openai_client = self.get_azure_client(
             api_key=api_key,
@@ -902,6 +947,7 @@ class AzureAssistantsAPI(BaseLLM):
             timeout=timeout,
             max_retries=max_retries,
             client=client,
+            litellm_params=litellm_params,
         )
 
         response = azure_openai_client.beta.assistants.create(**create_assistant_data)
@@ -918,6 +964,7 @@ class AzureAssistantsAPI(BaseLLM):
         max_retries: Optional[int],
         client: Optional[AsyncAzureOpenAI],
         assistant_id: str,
+        litellm_params: Optional[dict] = None,
     ):
         azure_openai_client = self.async_get_azure_client(
             api_key=api_key,
@@ -927,6 +974,7 @@ class AzureAssistantsAPI(BaseLLM):
             timeout=timeout,
             max_retries=max_retries,
             client=client,
+            litellm_params=litellm_params,
         )
 
         response = await azure_openai_client.beta.assistants.delete(
@@ -945,6 +993,7 @@ class AzureAssistantsAPI(BaseLLM):
         assistant_id: str,
         async_delete_assistants: Optional[bool] = None,
         client=None,
+        litellm_params: Optional[dict] = None,
     ):
         if async_delete_assistants is not None and async_delete_assistants is True:
             return self.async_delete_assistant(
@@ -956,6 +1005,7 @@ class AzureAssistantsAPI(BaseLLM):
                 max_retries=max_retries,
                 client=client,
                 assistant_id=assistant_id,
+                litellm_params=litellm_params,
             )
         azure_openai_client = self.get_azure_client(
             api_key=api_key,
@@ -965,6 +1015,7 @@ class AzureAssistantsAPI(BaseLLM):
             timeout=timeout,
             max_retries=max_retries,
             client=client,
+            litellm_params=litellm_params,
         )
 
         response = azure_openai_client.beta.assistants.delete(assistant_id=assistant_id)
diff --git a/litellm/llms/azure/audio_transcriptions.py b/litellm/llms/azure/audio_transcriptions.py
index ba9ac01400..52a3d780fb 100644
--- a/litellm/llms/azure/audio_transcriptions.py
+++ b/litellm/llms/azure/audio_transcriptions.py
@@ -13,11 +13,7 @@ from litellm.utils import (
     extract_duration_from_srt_or_vtt,
 )
 
-from .azure import (
-    AzureChatCompletion,
-    get_azure_ad_token_from_oidc,
-    select_azure_base_url_or_endpoint,
-)
+from .azure import AzureChatCompletion
 
 
 class AzureAudioTranscription(AzureChatCompletion):
@@ -36,29 +32,18 @@ class AzureAudioTranscription(AzureChatCompletion):
         client=None,
         azure_ad_token: Optional[str] = None,
         atranscription: bool = False,
+        litellm_params: Optional[dict] = None,
     ) -> TranscriptionResponse:
         data = {"model": model, "file": audio_file, **optional_params}
 
         # init AzureOpenAI Client
-        azure_client_params = {
-            "api_version": api_version,
-            "azure_endpoint": api_base,
-            "azure_deployment": model,
-            "timeout": timeout,
-        }
-
-        azure_client_params = select_azure_base_url_or_endpoint(
-            azure_client_params=azure_client_params
+        azure_client_params = self.initialize_azure_sdk_client(
+            litellm_params=litellm_params or {},
+            api_key=api_key,
+            model_name=model,
+            api_version=api_version,
+            api_base=api_base,
         )
-        if api_key is not None:
-            azure_client_params["api_key"] = api_key
-        elif azure_ad_token is not None:
-            if azure_ad_token.startswith("oidc/"):
-                azure_ad_token = get_azure_ad_token_from_oidc(azure_ad_token)
-            azure_client_params["azure_ad_token"] = azure_ad_token
-
-        if max_retries is not None:
-            azure_client_params["max_retries"] = max_retries
 
         if atranscription is True:
             return self.async_audio_transcriptions(  # type: ignore
@@ -128,7 +113,6 @@ class AzureAudioTranscription(AzureChatCompletion):
             if client is None:
                 async_azure_client = AsyncAzureOpenAI(
                     **azure_client_params,
-                    http_client=litellm.aclient_session,
                 )
             else:
                 async_azure_client = client
diff --git a/litellm/llms/azure/azure.py b/litellm/llms/azure/azure.py
index dcd5af7b96..e0c1079a2a 100644
--- a/litellm/llms/azure/azure.py
+++ b/litellm/llms/azure/azure.py
@@ -1,6 +1,5 @@
 import asyncio
 import json
-import os
 import time
 from typing import Any, Callable, Dict, List, Literal, Optional, Union
 
@@ -8,7 +7,6 @@ import httpx  # type: ignore
 from openai import APITimeoutError, AsyncAzureOpenAI, AzureOpenAI
 
 import litellm
-from litellm.caching.caching import DualCache
 from litellm.constants import DEFAULT_MAX_RETRIES
 from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj
 from litellm.llms.custom_httpx.http_handler import (
@@ -25,15 +23,18 @@ from litellm.types.utils import (
 from litellm.utils import (
     CustomStreamWrapper,
     convert_to_model_response_object,
-    get_secret,
     modify_url,
 )
 
 from ...types.llms.openai import HttpxBinaryResponseContent
 from ..base import BaseLLM
-from .common_utils import AzureOpenAIError, process_azure_headers
-
-azure_ad_cache = DualCache()
+from .common_utils import (
+    AzureOpenAIError,
+    BaseAzureLLM,
+    get_azure_ad_token_from_oidc,
+    process_azure_headers,
+    select_azure_base_url_or_endpoint,
+)
 
 
 class AzureOpenAIAssistantsAPIConfig:
@@ -98,93 +99,6 @@ class AzureOpenAIAssistantsAPIConfig:
         return optional_params
 
 
-def select_azure_base_url_or_endpoint(azure_client_params: dict):
-    azure_endpoint = azure_client_params.get("azure_endpoint", None)
-    if azure_endpoint is not None:
-        # see : https://github.com/openai/openai-python/blob/3d61ed42aba652b547029095a7eb269ad4e1e957/src/openai/lib/azure.py#L192
-        if "/openai/deployments" in azure_endpoint:
-            # this is base_url, not an azure_endpoint
-            azure_client_params["base_url"] = azure_endpoint
-            azure_client_params.pop("azure_endpoint")
-
-    return azure_client_params
-
-
-def get_azure_ad_token_from_oidc(azure_ad_token: str):
-    azure_client_id = os.getenv("AZURE_CLIENT_ID", None)
-    azure_tenant_id = os.getenv("AZURE_TENANT_ID", None)
-    azure_authority_host = os.getenv(
-        "AZURE_AUTHORITY_HOST", "https://login.microsoftonline.com"
-    )
-
-    if azure_client_id is None or azure_tenant_id is None:
-        raise AzureOpenAIError(
-            status_code=422,
-            message="AZURE_CLIENT_ID and AZURE_TENANT_ID must be set",
-        )
-
-    oidc_token = get_secret(azure_ad_token)
-
-    if oidc_token is None:
-        raise AzureOpenAIError(
-            status_code=401,
-            message="OIDC token could not be retrieved from secret manager.",
-        )
-
-    azure_ad_token_cache_key = json.dumps(
-        {
-            "azure_client_id": azure_client_id,
-            "azure_tenant_id": azure_tenant_id,
-            "azure_authority_host": azure_authority_host,
-            "oidc_token": oidc_token,
-        }
-    )
-
-    azure_ad_token_access_token = azure_ad_cache.get_cache(azure_ad_token_cache_key)
-    if azure_ad_token_access_token is not None:
-        return azure_ad_token_access_token
-
-    client = litellm.module_level_client
-    req_token = client.post(
-        f"{azure_authority_host}/{azure_tenant_id}/oauth2/v2.0/token",
-        data={
-            "client_id": azure_client_id,
-            "grant_type": "client_credentials",
-            "scope": "https://cognitiveservices.azure.com/.default",
-            "client_assertion_type": "urn:ietf:params:oauth:client-assertion-type:jwt-bearer",
-            "client_assertion": oidc_token,
-        },
-    )
-
-    if req_token.status_code != 200:
-        raise AzureOpenAIError(
-            status_code=req_token.status_code,
-            message=req_token.text,
-        )
-
-    azure_ad_token_json = req_token.json()
-    azure_ad_token_access_token = azure_ad_token_json.get("access_token", None)
-    azure_ad_token_expires_in = azure_ad_token_json.get("expires_in", None)
-
-    if azure_ad_token_access_token is None:
-        raise AzureOpenAIError(
-            status_code=422, message="Azure AD Token access_token not returned"
-        )
-
-    if azure_ad_token_expires_in is None:
-        raise AzureOpenAIError(
-            status_code=422, message="Azure AD Token expires_in not returned"
-        )
-
-    azure_ad_cache.set_cache(
-        key=azure_ad_token_cache_key,
-        value=azure_ad_token_access_token,
-        ttl=azure_ad_token_expires_in,
-    )
-
-    return azure_ad_token_access_token
-
-
 def _check_dynamic_azure_params(
     azure_client_params: dict,
     azure_client: Optional[Union[AzureOpenAI, AsyncAzureOpenAI]],
@@ -206,7 +120,7 @@ def _check_dynamic_azure_params(
     return False
 
 
-class AzureChatCompletion(BaseLLM):
+class AzureChatCompletion(BaseAzureLLM, BaseLLM):
     def __init__(self) -> None:
         super().__init__()
 
@@ -238,27 +152,16 @@ class AzureChatCompletion(BaseLLM):
         timeout: Union[float, httpx.Timeout],
         client: Optional[Any],
         client_type: Literal["sync", "async"],
+        litellm_params: Optional[dict] = None,
     ):
         # init AzureOpenAI Client
-        azure_client_params: Dict[str, Any] = {
-            "api_version": api_version,
-            "azure_endpoint": api_base,
-            "azure_deployment": model,
-            "http_client": litellm.client_session,
-            "max_retries": max_retries,
-            "timeout": timeout,
-        }
-        azure_client_params = select_azure_base_url_or_endpoint(
-            azure_client_params=azure_client_params
+        azure_client_params: Dict[str, Any] = self.initialize_azure_sdk_client(
+            litellm_params=litellm_params or {},
+            api_key=api_key,
+            model_name=model,
+            api_version=api_version,
+            api_base=api_base,
         )
-        if api_key is not None:
-            azure_client_params["api_key"] = api_key
-        elif azure_ad_token is not None:
-            if azure_ad_token.startswith("oidc/"):
-                azure_ad_token = get_azure_ad_token_from_oidc(azure_ad_token)
-            azure_client_params["azure_ad_token"] = azure_ad_token
-        elif azure_ad_token_provider is not None:
-            azure_client_params["azure_ad_token_provider"] = azure_ad_token_provider
         if client is None:
             if client_type == "sync":
                 azure_client = AzureOpenAI(**azure_client_params)  # type: ignore
@@ -357,6 +260,13 @@ class AzureChatCompletion(BaseLLM):
                 max_retries = DEFAULT_MAX_RETRIES
             json_mode: Optional[bool] = optional_params.pop("json_mode", False)
 
+            azure_client_params = self.initialize_azure_sdk_client(
+                litellm_params=litellm_params or {},
+                api_key=api_key,
+                api_base=api_base,
+                model_name=model,
+                api_version=api_version,
+            )
             ### CHECK IF CLOUDFLARE AI GATEWAY ###
             ### if so - set the model as part of the base url
             if "gateway.ai.cloudflare.com" in api_base:
@@ -417,6 +327,7 @@ class AzureChatCompletion(BaseLLM):
                         timeout=timeout,
                         client=client,
                         max_retries=max_retries,
+                        azure_client_params=azure_client_params,
                     )
                 else:
                     return self.acompletion(
@@ -434,6 +345,7 @@ class AzureChatCompletion(BaseLLM):
                         logging_obj=logging_obj,
                         max_retries=max_retries,
                         convert_tool_call_to_json_mode=json_mode,
+                        azure_client_params=azure_client_params,
                     )
             elif "stream" in optional_params and optional_params["stream"] is True:
                 return self.streaming(
@@ -470,28 +382,6 @@ class AzureChatCompletion(BaseLLM):
                         status_code=422, message="max retries must be an int"
                     )
                 # init AzureOpenAI Client
-                azure_client_params = {
-                    "api_version": api_version,
-                    "azure_endpoint": api_base,
-                    "azure_deployment": model,
-                    "http_client": litellm.client_session,
-                    "max_retries": max_retries,
-                    "timeout": timeout,
-                }
-                azure_client_params = select_azure_base_url_or_endpoint(
-                    azure_client_params=azure_client_params
-                )
-                if api_key is not None:
-                    azure_client_params["api_key"] = api_key
-                elif azure_ad_token is not None:
-                    if azure_ad_token.startswith("oidc/"):
-                        azure_ad_token = get_azure_ad_token_from_oidc(azure_ad_token)
-                    azure_client_params["azure_ad_token"] = azure_ad_token
-                elif azure_ad_token_provider is not None:
-                    azure_client_params["azure_ad_token_provider"] = (
-                        azure_ad_token_provider
-                    )
-
                 if (
                     client is None
                     or not isinstance(client, AzureOpenAI)
@@ -566,30 +456,10 @@ class AzureChatCompletion(BaseLLM):
         azure_ad_token_provider: Optional[Callable] = None,
         convert_tool_call_to_json_mode: Optional[bool] = None,
         client=None,  # this is the AsyncAzureOpenAI
+        azure_client_params: dict = {},
     ):
         response = None
         try:
-            # init AzureOpenAI Client
-            azure_client_params = {
-                "api_version": api_version,
-                "azure_endpoint": api_base,
-                "azure_deployment": model,
-                "http_client": litellm.aclient_session,
-                "max_retries": max_retries,
-                "timeout": timeout,
-            }
-            azure_client_params = select_azure_base_url_or_endpoint(
-                azure_client_params=azure_client_params
-            )
-            if api_key is not None:
-                azure_client_params["api_key"] = api_key
-            elif azure_ad_token is not None:
-                if azure_ad_token.startswith("oidc/"):
-                    azure_ad_token = get_azure_ad_token_from_oidc(azure_ad_token)
-                azure_client_params["azure_ad_token"] = azure_ad_token
-            elif azure_ad_token_provider is not None:
-                azure_client_params["azure_ad_token_provider"] = azure_ad_token_provider
-
             # setting Azure client
             if client is None or dynamic_params:
                 azure_client = AsyncAzureOpenAI(**azure_client_params)
@@ -747,28 +617,9 @@ class AzureChatCompletion(BaseLLM):
         azure_ad_token: Optional[str] = None,
         azure_ad_token_provider: Optional[Callable] = None,
         client=None,
+        azure_client_params: dict = {},
     ):
         try:
-            # init AzureOpenAI Client
-            azure_client_params = {
-                "api_version": api_version,
-                "azure_endpoint": api_base,
-                "azure_deployment": model,
-                "http_client": litellm.aclient_session,
-                "max_retries": max_retries,
-                "timeout": timeout,
-            }
-            azure_client_params = select_azure_base_url_or_endpoint(
-                azure_client_params=azure_client_params
-            )
-            if api_key is not None:
-                azure_client_params["api_key"] = api_key
-            elif azure_ad_token is not None:
-                if azure_ad_token.startswith("oidc/"):
-                    azure_ad_token = get_azure_ad_token_from_oidc(azure_ad_token)
-                azure_client_params["azure_ad_token"] = azure_ad_token
-            elif azure_ad_token_provider is not None:
-                azure_client_params["azure_ad_token_provider"] = azure_ad_token_provider
             if client is None or dynamic_params:
                 azure_client = AsyncAzureOpenAI(**azure_client_params)
             else:
@@ -833,6 +684,7 @@ class AzureChatCompletion(BaseLLM):
     ):
         response = None
         try:
+
             if client is None:
                 openai_aclient = AsyncAzureOpenAI(**azure_client_params)
             else:
@@ -884,6 +736,7 @@ class AzureChatCompletion(BaseLLM):
         client=None,
         aembedding=None,
         headers: Optional[dict] = None,
+        litellm_params: Optional[dict] = None,
     ) -> EmbeddingResponse:
         if headers:
             optional_params["extra_headers"] = headers
@@ -899,29 +752,14 @@ class AzureChatCompletion(BaseLLM):
                 )
 
             # init AzureOpenAI Client
-            azure_client_params = {
-                "api_version": api_version,
-                "azure_endpoint": api_base,
-                "azure_deployment": model,
-                "max_retries": max_retries,
-                "timeout": timeout,
-            }
-            azure_client_params = select_azure_base_url_or_endpoint(
-                azure_client_params=azure_client_params
-            )
-            if aembedding:
-                azure_client_params["http_client"] = litellm.aclient_session
-            else:
-                azure_client_params["http_client"] = litellm.client_session
-            if api_key is not None:
-                azure_client_params["api_key"] = api_key
-            elif azure_ad_token is not None:
-                if azure_ad_token.startswith("oidc/"):
-                    azure_ad_token = get_azure_ad_token_from_oidc(azure_ad_token)
-                azure_client_params["azure_ad_token"] = azure_ad_token
-            elif azure_ad_token_provider is not None:
-                azure_client_params["azure_ad_token_provider"] = azure_ad_token_provider
 
+            azure_client_params = self.initialize_azure_sdk_client(
+                litellm_params=litellm_params or {},
+                api_key=api_key,
+                model_name=model,
+                api_version=api_version,
+                api_base=api_base,
+            )
             ## LOGGING
             logging_obj.pre_call(
                 input=input,
@@ -1281,6 +1119,7 @@ class AzureChatCompletion(BaseLLM):
         azure_ad_token_provider: Optional[Callable] = None,
         client=None,
         aimg_generation=None,
+        litellm_params: Optional[dict] = None,
     ) -> ImageResponse:
         try:
             if model and len(model) > 0:
@@ -1305,25 +1144,13 @@ class AzureChatCompletion(BaseLLM):
                 )
 
             # init AzureOpenAI Client
-            azure_client_params: Dict[str, Any] = {
-                "api_version": api_version,
-                "azure_endpoint": api_base,
-                "azure_deployment": model,
-                "max_retries": max_retries,
-                "timeout": timeout,
-            }
-            azure_client_params = select_azure_base_url_or_endpoint(
-                azure_client_params=azure_client_params
+            azure_client_params: Dict[str, Any] = self.initialize_azure_sdk_client(
+                litellm_params=litellm_params or {},
+                api_key=api_key,
+                model_name=model or "",
+                api_version=api_version,
+                api_base=api_base,
             )
-            if api_key is not None:
-                azure_client_params["api_key"] = api_key
-            elif azure_ad_token is not None:
-                if azure_ad_token.startswith("oidc/"):
-                    azure_ad_token = get_azure_ad_token_from_oidc(azure_ad_token)
-                azure_client_params["azure_ad_token"] = azure_ad_token
-            elif azure_ad_token_provider is not None:
-                azure_client_params["azure_ad_token_provider"] = azure_ad_token_provider
-
             if aimg_generation is True:
                 return self.aimage_generation(data=data, input=input, logging_obj=logging_obj, model_response=model_response, api_key=api_key, client=client, azure_client_params=azure_client_params, timeout=timeout, headers=headers)  # type: ignore
 
@@ -1386,6 +1213,7 @@ class AzureChatCompletion(BaseLLM):
         azure_ad_token_provider: Optional[Callable] = None,
         aspeech: Optional[bool] = None,
         client=None,
+        litellm_params: Optional[dict] = None,
     ) -> HttpxBinaryResponseContent:
 
         max_retries = optional_params.pop("max_retries", 2)
@@ -1404,6 +1232,7 @@ class AzureChatCompletion(BaseLLM):
                 max_retries=max_retries,
                 timeout=timeout,
                 client=client,
+                litellm_params=litellm_params,
             )  # type: ignore
 
         azure_client: AzureOpenAI = self._get_sync_azure_client(
@@ -1417,6 +1246,7 @@ class AzureChatCompletion(BaseLLM):
             timeout=timeout,
             client=client,
             client_type="sync",
+            litellm_params=litellm_params,
         )  # type: ignore
 
         response = azure_client.audio.speech.create(
@@ -1441,6 +1271,7 @@ class AzureChatCompletion(BaseLLM):
         max_retries: int,
         timeout: Union[float, httpx.Timeout],
         client=None,
+        litellm_params: Optional[dict] = None,
     ) -> HttpxBinaryResponseContent:
 
         azure_client: AsyncAzureOpenAI = self._get_sync_azure_client(
@@ -1454,6 +1285,7 @@ class AzureChatCompletion(BaseLLM):
             timeout=timeout,
             client=client,
             client_type="async",
+            litellm_params=litellm_params,
         )  # type: ignore
 
         azure_response = await azure_client.audio.speech.create(
diff --git a/litellm/llms/azure/batches/handler.py b/litellm/llms/azure/batches/handler.py
index d36ae648ab..1b93c526d5 100644
--- a/litellm/llms/azure/batches/handler.py
+++ b/litellm/llms/azure/batches/handler.py
@@ -6,7 +6,6 @@ from typing import Any, Coroutine, Optional, Union, cast
 
 import httpx
 
-import litellm
 from litellm.llms.azure.azure import AsyncAzureOpenAI, AzureOpenAI
 from litellm.types.llms.openai import (
     Batch,
@@ -16,8 +15,10 @@ from litellm.types.llms.openai import (
 )
 from litellm.types.utils import LiteLLMBatch
 
+from ..common_utils import BaseAzureLLM
 
-class AzureBatchesAPI:
+
+class AzureBatchesAPI(BaseAzureLLM):
     """
     Azure methods to support for batches
     - create_batch()
@@ -29,38 +30,6 @@ class AzureBatchesAPI:
     def __init__(self) -> None:
         super().__init__()
 
-    def get_azure_openai_client(
-        self,
-        api_key: Optional[str],
-        api_base: Optional[str],
-        timeout: Union[float, httpx.Timeout],
-        max_retries: Optional[int],
-        api_version: Optional[str] = None,
-        client: Optional[Union[AzureOpenAI, AsyncAzureOpenAI]] = None,
-        _is_async: bool = False,
-    ) -> Optional[Union[AzureOpenAI, AsyncAzureOpenAI]]:
-        received_args = locals()
-        openai_client: Optional[Union[AzureOpenAI, AsyncAzureOpenAI]] = None
-        if client is None:
-            data = {}
-            for k, v in received_args.items():
-                if k == "self" or k == "client" or k == "_is_async":
-                    pass
-                elif k == "api_base" and v is not None:
-                    data["azure_endpoint"] = v
-                elif v is not None:
-                    data[k] = v
-            if "api_version" not in data:
-                data["api_version"] = litellm.AZURE_DEFAULT_API_VERSION
-            if _is_async is True:
-                openai_client = AsyncAzureOpenAI(**data)
-            else:
-                openai_client = AzureOpenAI(**data)  # type: ignore
-        else:
-            openai_client = client
-
-        return openai_client
-
     async def acreate_batch(
         self,
         create_batch_data: CreateBatchRequest,
@@ -79,16 +48,16 @@ class AzureBatchesAPI:
         timeout: Union[float, httpx.Timeout],
         max_retries: Optional[int],
         client: Optional[Union[AzureOpenAI, AsyncAzureOpenAI]] = None,
+        litellm_params: Optional[dict] = None,
     ) -> Union[LiteLLMBatch, Coroutine[Any, Any, LiteLLMBatch]]:
         azure_client: Optional[Union[AzureOpenAI, AsyncAzureOpenAI]] = (
             self.get_azure_openai_client(
                 api_key=api_key,
                 api_base=api_base,
-                timeout=timeout,
                 api_version=api_version,
-                max_retries=max_retries,
                 client=client,
                 _is_async=_is_async,
+                litellm_params=litellm_params or {},
             )
         )
         if azure_client is None:
@@ -125,16 +94,16 @@ class AzureBatchesAPI:
         timeout: Union[float, httpx.Timeout],
         max_retries: Optional[int],
         client: Optional[AzureOpenAI] = None,
+        litellm_params: Optional[dict] = None,
     ):
         azure_client: Optional[Union[AzureOpenAI, AsyncAzureOpenAI]] = (
             self.get_azure_openai_client(
                 api_key=api_key,
                 api_base=api_base,
                 api_version=api_version,
-                timeout=timeout,
-                max_retries=max_retries,
                 client=client,
                 _is_async=_is_async,
+                litellm_params=litellm_params or {},
             )
         )
         if azure_client is None:
@@ -173,16 +142,16 @@ class AzureBatchesAPI:
         timeout: Union[float, httpx.Timeout],
         max_retries: Optional[int],
         client: Optional[AzureOpenAI] = None,
+        litellm_params: Optional[dict] = None,
     ):
         azure_client: Optional[Union[AzureOpenAI, AsyncAzureOpenAI]] = (
             self.get_azure_openai_client(
                 api_key=api_key,
                 api_base=api_base,
                 api_version=api_version,
-                timeout=timeout,
-                max_retries=max_retries,
                 client=client,
                 _is_async=_is_async,
+                litellm_params=litellm_params or {},
             )
         )
         if azure_client is None:
@@ -212,16 +181,16 @@ class AzureBatchesAPI:
         after: Optional[str] = None,
         limit: Optional[int] = None,
         client: Optional[AzureOpenAI] = None,
+        litellm_params: Optional[dict] = None,
     ):
         azure_client: Optional[Union[AzureOpenAI, AsyncAzureOpenAI]] = (
             self.get_azure_openai_client(
                 api_key=api_key,
                 api_base=api_base,
-                timeout=timeout,
-                max_retries=max_retries,
                 api_version=api_version,
                 client=client,
                 _is_async=_is_async,
+                litellm_params=litellm_params or {},
             )
         )
         if azure_client is None:
diff --git a/litellm/llms/azure/chat/o_series_handler.py b/litellm/llms/azure/chat/o_series_handler.py
index a2042b3e2a..2f3e9e6399 100644
--- a/litellm/llms/azure/chat/o_series_handler.py
+++ b/litellm/llms/azure/chat/o_series_handler.py
@@ -4,50 +4,69 @@ Handler file for calls to Azure OpenAI's o1/o3 family of models
 Written separately to handle faking streaming for o1 and o3 models.
 """
 
-from typing import Optional, Union
+from typing import Any, Callable, Optional, Union
 
 import httpx
-from openai import AsyncAzureOpenAI, AsyncOpenAI, AzureOpenAI, OpenAI
+
+from litellm.types.utils import ModelResponse
 
 from ...openai.openai import OpenAIChatCompletion
-from ..common_utils import get_azure_openai_client
+from ..common_utils import BaseAzureLLM
 
 
-class AzureOpenAIO1ChatCompletion(OpenAIChatCompletion):
-    def _get_openai_client(
+class AzureOpenAIO1ChatCompletion(BaseAzureLLM, OpenAIChatCompletion):
+    def completion(
         self,
-        is_async: bool,
+        model_response: ModelResponse,
+        timeout: Union[float, httpx.Timeout],
+        optional_params: dict,
+        litellm_params: dict,
+        logging_obj: Any,
+        model: Optional[str] = None,
+        messages: Optional[list] = None,
+        print_verbose: Optional[Callable] = None,
         api_key: Optional[str] = None,
         api_base: Optional[str] = None,
         api_version: Optional[str] = None,
-        timeout: Union[float, httpx.Timeout] = httpx.Timeout(None),
-        max_retries: Optional[int] = 2,
+        dynamic_params: Optional[bool] = None,
+        azure_ad_token: Optional[str] = None,
+        acompletion: bool = False,
+        logger_fn=None,
+        headers: Optional[dict] = None,
+        custom_prompt_dict: dict = {},
+        client=None,
         organization: Optional[str] = None,
-        client: Optional[
-            Union[OpenAI, AsyncOpenAI, AzureOpenAI, AsyncAzureOpenAI]
-        ] = None,
-    ) -> Optional[
-        Union[
-            OpenAI,
-            AsyncOpenAI,
-            AzureOpenAI,
-            AsyncAzureOpenAI,
-        ]
-    ]:
-
-        # Override to use Azure-specific client initialization
-        if not isinstance(client, AzureOpenAI) and not isinstance(
-            client, AsyncAzureOpenAI
-        ):
-            client = None
-
-        return get_azure_openai_client(
+        custom_llm_provider: Optional[str] = None,
+        drop_params: Optional[bool] = None,
+    ):
+        client = self.get_azure_openai_client(
+            litellm_params=litellm_params,
             api_key=api_key,
             api_base=api_base,
-            timeout=timeout,
-            max_retries=max_retries,
-            organization=organization,
             api_version=api_version,
             client=client,
-            _is_async=is_async,
+            _is_async=acompletion,
+        )
+        return super().completion(
+            model_response=model_response,
+            timeout=timeout,
+            optional_params=optional_params,
+            litellm_params=litellm_params,
+            logging_obj=logging_obj,
+            model=model,
+            messages=messages,
+            print_verbose=print_verbose,
+            api_key=api_key,
+            api_base=api_base,
+            api_version=api_version,
+            dynamic_params=dynamic_params,
+            azure_ad_token=azure_ad_token,
+            acompletion=acompletion,
+            logger_fn=logger_fn,
+            headers=headers,
+            custom_prompt_dict=custom_prompt_dict,
+            client=client,
+            organization=organization,
+            custom_llm_provider=custom_llm_provider,
+            drop_params=drop_params,
         )
diff --git a/litellm/llms/azure/common_utils.py b/litellm/llms/azure/common_utils.py
index 43f3480ed6..909fcd88a5 100644
--- a/litellm/llms/azure/common_utils.py
+++ b/litellm/llms/azure/common_utils.py
@@ -1,3 +1,5 @@
+import json
+import os
 from typing import Callable, Optional, Union
 
 import httpx
@@ -5,9 +7,15 @@ from openai import AsyncAzureOpenAI, AzureOpenAI
 
 import litellm
 from litellm._logging import verbose_logger
+from litellm.caching.caching import DualCache
 from litellm.llms.base_llm.chat.transformation import BaseLLMException
+from litellm.secret_managers.get_azure_ad_token_provider import (
+    get_azure_ad_token_provider,
+)
 from litellm.secret_managers.main import get_secret_str
 
+azure_ad_cache = DualCache()
+
 
 class AzureOpenAIError(BaseLLMException):
     def __init__(
@@ -29,39 +37,6 @@ class AzureOpenAIError(BaseLLMException):
         )
 
 
-def get_azure_openai_client(
-    api_key: Optional[str],
-    api_base: Optional[str],
-    timeout: Union[float, httpx.Timeout],
-    max_retries: Optional[int],
-    api_version: Optional[str] = None,
-    organization: Optional[str] = None,
-    client: Optional[Union[AzureOpenAI, AsyncAzureOpenAI]] = None,
-    _is_async: bool = False,
-) -> Optional[Union[AzureOpenAI, AsyncAzureOpenAI]]:
-    received_args = locals()
-    openai_client: Optional[Union[AzureOpenAI, AsyncAzureOpenAI]] = None
-    if client is None:
-        data = {}
-        for k, v in received_args.items():
-            if k == "self" or k == "client" or k == "_is_async":
-                pass
-            elif k == "api_base" and v is not None:
-                data["azure_endpoint"] = v
-            elif v is not None:
-                data[k] = v
-        if "api_version" not in data:
-            data["api_version"] = litellm.AZURE_DEFAULT_API_VERSION
-        if _is_async is True:
-            openai_client = AsyncAzureOpenAI(**data)
-        else:
-            openai_client = AzureOpenAI(**data)  # type: ignore
-    else:
-        openai_client = client
-
-    return openai_client
-
-
 def process_azure_headers(headers: Union[httpx.Headers, dict]) -> dict:
     openai_headers = {}
     if "x-ratelimit-limit-requests" in headers:
@@ -180,3 +155,199 @@ def get_azure_ad_token_from_username_password(
     verbose_logger.debug("token_provider %s", token_provider)
 
     return token_provider
+
+
+def get_azure_ad_token_from_oidc(azure_ad_token: str):
+    azure_client_id = os.getenv("AZURE_CLIENT_ID", None)
+    azure_tenant_id = os.getenv("AZURE_TENANT_ID", None)
+    azure_authority_host = os.getenv(
+        "AZURE_AUTHORITY_HOST", "https://login.microsoftonline.com"
+    )
+
+    if azure_client_id is None or azure_tenant_id is None:
+        raise AzureOpenAIError(
+            status_code=422,
+            message="AZURE_CLIENT_ID and AZURE_TENANT_ID must be set",
+        )
+
+    oidc_token = get_secret_str(azure_ad_token)
+
+    if oidc_token is None:
+        raise AzureOpenAIError(
+            status_code=401,
+            message="OIDC token could not be retrieved from secret manager.",
+        )
+
+    azure_ad_token_cache_key = json.dumps(
+        {
+            "azure_client_id": azure_client_id,
+            "azure_tenant_id": azure_tenant_id,
+            "azure_authority_host": azure_authority_host,
+            "oidc_token": oidc_token,
+        }
+    )
+
+    azure_ad_token_access_token = azure_ad_cache.get_cache(azure_ad_token_cache_key)
+    if azure_ad_token_access_token is not None:
+        return azure_ad_token_access_token
+
+    client = litellm.module_level_client
+    req_token = client.post(
+        f"{azure_authority_host}/{azure_tenant_id}/oauth2/v2.0/token",
+        data={
+            "client_id": azure_client_id,
+            "grant_type": "client_credentials",
+            "scope": "https://cognitiveservices.azure.com/.default",
+            "client_assertion_type": "urn:ietf:params:oauth:client-assertion-type:jwt-bearer",
+            "client_assertion": oidc_token,
+        },
+    )
+
+    if req_token.status_code != 200:
+        raise AzureOpenAIError(
+            status_code=req_token.status_code,
+            message=req_token.text,
+        )
+
+    azure_ad_token_json = req_token.json()
+    azure_ad_token_access_token = azure_ad_token_json.get("access_token", None)
+    azure_ad_token_expires_in = azure_ad_token_json.get("expires_in", None)
+
+    if azure_ad_token_access_token is None:
+        raise AzureOpenAIError(
+            status_code=422, message="Azure AD Token access_token not returned"
+        )
+
+    if azure_ad_token_expires_in is None:
+        raise AzureOpenAIError(
+            status_code=422, message="Azure AD Token expires_in not returned"
+        )
+
+    azure_ad_cache.set_cache(
+        key=azure_ad_token_cache_key,
+        value=azure_ad_token_access_token,
+        ttl=azure_ad_token_expires_in,
+    )
+
+    return azure_ad_token_access_token
+
+
+def select_azure_base_url_or_endpoint(azure_client_params: dict):
+    azure_endpoint = azure_client_params.get("azure_endpoint", None)
+    if azure_endpoint is not None:
+        # see : https://github.com/openai/openai-python/blob/3d61ed42aba652b547029095a7eb269ad4e1e957/src/openai/lib/azure.py#L192
+        if "/openai/deployments" in azure_endpoint:
+            # this is base_url, not an azure_endpoint
+            azure_client_params["base_url"] = azure_endpoint
+            azure_client_params.pop("azure_endpoint")
+
+    return azure_client_params
+
+
+class BaseAzureLLM:
+    def get_azure_openai_client(
+        self,
+        litellm_params: dict,
+        api_key: Optional[str],
+        api_base: Optional[str],
+        api_version: Optional[str] = None,
+        client: Optional[Union[AzureOpenAI, AsyncAzureOpenAI]] = None,
+        _is_async: bool = False,
+    ) -> Optional[Union[AzureOpenAI, AsyncAzureOpenAI]]:
+        openai_client: Optional[Union[AzureOpenAI, AsyncAzureOpenAI]] = None
+        if client is None:
+            azure_client_params = self.initialize_azure_sdk_client(
+                litellm_params=litellm_params,
+                api_key=api_key,
+                api_base=api_base,
+                model_name="",
+                api_version=api_version,
+            )
+            if _is_async is True:
+                openai_client = AsyncAzureOpenAI(**azure_client_params)
+            else:
+                openai_client = AzureOpenAI(**azure_client_params)  # type: ignore
+        else:
+            openai_client = client
+
+        return openai_client
+
+    def initialize_azure_sdk_client(
+        self,
+        litellm_params: dict,
+        api_key: Optional[str],
+        api_base: Optional[str],
+        model_name: str,
+        api_version: Optional[str],
+    ) -> dict:
+
+        azure_ad_token_provider: Optional[Callable[[], str]] = None
+        # If we have api_key, then we have higher priority
+        azure_ad_token = litellm_params.get("azure_ad_token")
+        tenant_id = litellm_params.get("tenant_id")
+        client_id = litellm_params.get("client_id")
+        client_secret = litellm_params.get("client_secret")
+        azure_username = litellm_params.get("azure_username")
+        azure_password = litellm_params.get("azure_password")
+        max_retries = litellm_params.get("max_retries")
+        timeout = litellm_params.get("timeout")
+        if not api_key and tenant_id and client_id and client_secret:
+            verbose_logger.debug("Using Azure AD Token Provider for Azure Auth")
+            azure_ad_token_provider = get_azure_ad_token_from_entrata_id(
+                tenant_id=tenant_id,
+                client_id=client_id,
+                client_secret=client_secret,
+            )
+        if azure_username and azure_password and client_id:
+            azure_ad_token_provider = get_azure_ad_token_from_username_password(
+                azure_username=azure_username,
+                azure_password=azure_password,
+                client_id=client_id,
+            )
+
+        if azure_ad_token is not None and azure_ad_token.startswith("oidc/"):
+            azure_ad_token = get_azure_ad_token_from_oidc(azure_ad_token)
+        elif (
+            not api_key
+            and azure_ad_token_provider is None
+            and litellm.enable_azure_ad_token_refresh is True
+        ):
+            try:
+                azure_ad_token_provider = get_azure_ad_token_provider()
+            except ValueError:
+                verbose_logger.debug("Azure AD Token Provider could not be used.")
+        if api_version is None:
+            api_version = os.getenv(
+                "AZURE_API_VERSION", litellm.AZURE_DEFAULT_API_VERSION
+            )
+
+        _api_key = api_key
+        if _api_key is not None and isinstance(_api_key, str):
+            # only show first 5 chars of api_key
+            _api_key = _api_key[:8] + "*" * 15
+        verbose_logger.debug(
+            f"Initializing Azure OpenAI Client for {model_name}, Api Base: {str(api_base)}, Api Key:{_api_key}"
+        )
+        azure_client_params = {
+            "api_key": api_key,
+            "azure_endpoint": api_base,
+            "api_version": api_version,
+            "azure_ad_token": azure_ad_token,
+            "azure_ad_token_provider": azure_ad_token_provider,
+            "http_client": litellm.client_session,
+        }
+        if max_retries is not None:
+            azure_client_params["max_retries"] = max_retries
+        if timeout is not None:
+            azure_client_params["timeout"] = timeout
+
+        if azure_ad_token_provider is not None:
+            azure_client_params["azure_ad_token_provider"] = azure_ad_token_provider
+        # this decides if we should set azure_endpoint or base_url on Azure OpenAI Client
+        # required to support GPT-4 vision enhancements, since base_url needs to be set on Azure OpenAI Client
+
+        azure_client_params = select_azure_base_url_or_endpoint(
+            azure_client_params=azure_client_params
+        )
+
+        return azure_client_params
diff --git a/litellm/llms/azure/completion/handler.py b/litellm/llms/azure/completion/handler.py
index fafa5665bb..4ec5c435da 100644
--- a/litellm/llms/azure/completion/handler.py
+++ b/litellm/llms/azure/completion/handler.py
@@ -6,9 +6,8 @@ import litellm
 from litellm.litellm_core_utils.prompt_templates.factory import prompt_factory
 from litellm.utils import CustomStreamWrapper, ModelResponse, TextCompletionResponse
 
-from ...base import BaseLLM
 from ...openai.completion.transformation import OpenAITextCompletionConfig
-from ..common_utils import AzureOpenAIError
+from ..common_utils import AzureOpenAIError, BaseAzureLLM
 
 openai_text_completion_config = OpenAITextCompletionConfig()
 
@@ -25,7 +24,7 @@ def select_azure_base_url_or_endpoint(azure_client_params: dict):
     return azure_client_params
 
 
-class AzureTextCompletion(BaseLLM):
+class AzureTextCompletion(BaseAzureLLM):
     def __init__(self) -> None:
         super().__init__()
 
@@ -60,7 +59,6 @@ class AzureTextCompletion(BaseLLM):
         headers: Optional[dict] = None,
         client=None,
     ):
-        super().completion()
         try:
             if model is None or messages is None:
                 raise AzureOpenAIError(
@@ -72,6 +70,14 @@ class AzureTextCompletion(BaseLLM):
                 messages=messages, model=model, custom_llm_provider="azure_text"
             )
 
+            azure_client_params = self.initialize_azure_sdk_client(
+                litellm_params=litellm_params or {},
+                api_key=api_key,
+                model_name=model,
+                api_version=api_version,
+                api_base=api_base,
+            )
+
             ### CHECK IF CLOUDFLARE AI GATEWAY ###
             ### if so - set the model as part of the base url
             if "gateway.ai.cloudflare.com" in api_base:
@@ -118,6 +124,7 @@ class AzureTextCompletion(BaseLLM):
                         azure_ad_token=azure_ad_token,
                         timeout=timeout,
                         client=client,
+                        azure_client_params=azure_client_params,
                     )
                 else:
                     return self.acompletion(
@@ -132,6 +139,7 @@ class AzureTextCompletion(BaseLLM):
                         client=client,
                         logging_obj=logging_obj,
                         max_retries=max_retries,
+                        azure_client_params=azure_client_params,
                     )
             elif "stream" in optional_params and optional_params["stream"] is True:
                 return self.streaming(
@@ -144,6 +152,7 @@ class AzureTextCompletion(BaseLLM):
                     azure_ad_token=azure_ad_token,
                     timeout=timeout,
                     client=client,
+                    azure_client_params=azure_client_params,
                 )
             else:
                 ## LOGGING
@@ -165,22 +174,6 @@ class AzureTextCompletion(BaseLLM):
                         status_code=422, message="max retries must be an int"
                     )
                 # init AzureOpenAI Client
-                azure_client_params = {
-                    "api_version": api_version,
-                    "azure_endpoint": api_base,
-                    "azure_deployment": model,
-                    "http_client": litellm.client_session,
-                    "max_retries": max_retries,
-                    "timeout": timeout,
-                    "azure_ad_token_provider": azure_ad_token_provider,
-                }
-                azure_client_params = select_azure_base_url_or_endpoint(
-                    azure_client_params=azure_client_params
-                )
-                if api_key is not None:
-                    azure_client_params["api_key"] = api_key
-                elif azure_ad_token is not None:
-                    azure_client_params["azure_ad_token"] = azure_ad_token
                 if client is None:
                     azure_client = AzureOpenAI(**azure_client_params)
                 else:
@@ -240,26 +233,11 @@ class AzureTextCompletion(BaseLLM):
         max_retries: int,
         azure_ad_token: Optional[str] = None,
         client=None,  # this is the AsyncAzureOpenAI
+        azure_client_params: dict = {},
     ):
         response = None
         try:
             # init AzureOpenAI Client
-            azure_client_params = {
-                "api_version": api_version,
-                "azure_endpoint": api_base,
-                "azure_deployment": model,
-                "http_client": litellm.client_session,
-                "max_retries": max_retries,
-                "timeout": timeout,
-            }
-            azure_client_params = select_azure_base_url_or_endpoint(
-                azure_client_params=azure_client_params
-            )
-            if api_key is not None:
-                azure_client_params["api_key"] = api_key
-            elif azure_ad_token is not None:
-                azure_client_params["azure_ad_token"] = azure_ad_token
-
             # setting Azure client
             if client is None:
                 azure_client = AsyncAzureOpenAI(**azure_client_params)
@@ -312,6 +290,7 @@ class AzureTextCompletion(BaseLLM):
         timeout: Any,
         azure_ad_token: Optional[str] = None,
         client=None,
+        azure_client_params: dict = {},
     ):
         max_retries = data.pop("max_retries", 2)
         if not isinstance(max_retries, int):
@@ -319,21 +298,6 @@ class AzureTextCompletion(BaseLLM):
                 status_code=422, message="max retries must be an int"
             )
         # init AzureOpenAI Client
-        azure_client_params = {
-            "api_version": api_version,
-            "azure_endpoint": api_base,
-            "azure_deployment": model,
-            "http_client": litellm.client_session,
-            "max_retries": max_retries,
-            "timeout": timeout,
-        }
-        azure_client_params = select_azure_base_url_or_endpoint(
-            azure_client_params=azure_client_params
-        )
-        if api_key is not None:
-            azure_client_params["api_key"] = api_key
-        elif azure_ad_token is not None:
-            azure_client_params["azure_ad_token"] = azure_ad_token
         if client is None:
             azure_client = AzureOpenAI(**azure_client_params)
         else:
@@ -375,24 +339,10 @@ class AzureTextCompletion(BaseLLM):
         timeout: Any,
         azure_ad_token: Optional[str] = None,
         client=None,
+        azure_client_params: dict = {},
     ):
         try:
             # init AzureOpenAI Client
-            azure_client_params = {
-                "api_version": api_version,
-                "azure_endpoint": api_base,
-                "azure_deployment": model,
-                "http_client": litellm.client_session,
-                "max_retries": data.pop("max_retries", 2),
-                "timeout": timeout,
-            }
-            azure_client_params = select_azure_base_url_or_endpoint(
-                azure_client_params=azure_client_params
-            )
-            if api_key is not None:
-                azure_client_params["api_key"] = api_key
-            elif azure_ad_token is not None:
-                azure_client_params["azure_ad_token"] = azure_ad_token
             if client is None:
                 azure_client = AsyncAzureOpenAI(**azure_client_params)
             else:
diff --git a/litellm/llms/azure/files/handler.py b/litellm/llms/azure/files/handler.py
index f442af855e..d45ac9a315 100644
--- a/litellm/llms/azure/files/handler.py
+++ b/litellm/llms/azure/files/handler.py
@@ -5,13 +5,12 @@ from openai import AsyncAzureOpenAI, AzureOpenAI
 from openai.types.file_deleted import FileDeleted
 
 from litellm._logging import verbose_logger
-from litellm.llms.base import BaseLLM
 from litellm.types.llms.openai import *
 
-from ..common_utils import get_azure_openai_client
+from ..common_utils import BaseAzureLLM
 
 
-class AzureOpenAIFilesAPI(BaseLLM):
+class AzureOpenAIFilesAPI(BaseAzureLLM):
     """
     AzureOpenAI methods to support for batches
     - create_file()
@@ -45,14 +44,15 @@ class AzureOpenAIFilesAPI(BaseLLM):
         timeout: Union[float, httpx.Timeout],
         max_retries: Optional[int],
         client: Optional[Union[AzureOpenAI, AsyncAzureOpenAI]] = None,
+        litellm_params: Optional[dict] = None,
     ) -> Union[FileObject, Coroutine[Any, Any, FileObject]]:
+
         openai_client: Optional[Union[AzureOpenAI, AsyncAzureOpenAI]] = (
-            get_azure_openai_client(
+            self.get_azure_openai_client(
+                litellm_params=litellm_params or {},
                 api_key=api_key,
                 api_base=api_base,
                 api_version=api_version,
-                timeout=timeout,
-                max_retries=max_retries,
                 client=client,
                 _is_async=_is_async,
             )
@@ -91,17 +91,16 @@ class AzureOpenAIFilesAPI(BaseLLM):
         max_retries: Optional[int],
         api_version: Optional[str] = None,
         client: Optional[Union[AzureOpenAI, AsyncAzureOpenAI]] = None,
+        litellm_params: Optional[dict] = None,
     ) -> Union[
         HttpxBinaryResponseContent, Coroutine[Any, Any, HttpxBinaryResponseContent]
     ]:
         openai_client: Optional[Union[AzureOpenAI, AsyncAzureOpenAI]] = (
-            get_azure_openai_client(
+            self.get_azure_openai_client(
+                litellm_params=litellm_params or {},
                 api_key=api_key,
                 api_base=api_base,
-                timeout=timeout,
                 api_version=api_version,
-                max_retries=max_retries,
-                organization=None,
                 client=client,
                 _is_async=_is_async,
             )
@@ -144,14 +143,13 @@ class AzureOpenAIFilesAPI(BaseLLM):
         max_retries: Optional[int],
         api_version: Optional[str] = None,
         client: Optional[Union[AzureOpenAI, AsyncAzureOpenAI]] = None,
+        litellm_params: Optional[dict] = None,
     ):
         openai_client: Optional[Union[AzureOpenAI, AsyncAzureOpenAI]] = (
-            get_azure_openai_client(
+            self.get_azure_openai_client(
+                litellm_params=litellm_params or {},
                 api_key=api_key,
                 api_base=api_base,
-                timeout=timeout,
-                max_retries=max_retries,
-                organization=None,
                 api_version=api_version,
                 client=client,
                 _is_async=_is_async,
@@ -197,14 +195,13 @@ class AzureOpenAIFilesAPI(BaseLLM):
         organization: Optional[str] = None,
         api_version: Optional[str] = None,
         client: Optional[Union[AzureOpenAI, AsyncAzureOpenAI]] = None,
+        litellm_params: Optional[dict] = None,
     ):
         openai_client: Optional[Union[AzureOpenAI, AsyncAzureOpenAI]] = (
-            get_azure_openai_client(
+            self.get_azure_openai_client(
+                litellm_params=litellm_params or {},
                 api_key=api_key,
                 api_base=api_base,
-                timeout=timeout,
-                max_retries=max_retries,
-                organization=organization,
                 api_version=api_version,
                 client=client,
                 _is_async=_is_async,
@@ -252,14 +249,13 @@ class AzureOpenAIFilesAPI(BaseLLM):
         purpose: Optional[str] = None,
         api_version: Optional[str] = None,
         client: Optional[Union[AzureOpenAI, AsyncAzureOpenAI]] = None,
+        litellm_params: Optional[dict] = None,
     ):
         openai_client: Optional[Union[AzureOpenAI, AsyncAzureOpenAI]] = (
-            get_azure_openai_client(
+            self.get_azure_openai_client(
+                litellm_params=litellm_params or {},
                 api_key=api_key,
                 api_base=api_base,
-                timeout=timeout,
-                max_retries=max_retries,
-                organization=None,  # openai param
                 api_version=api_version,
                 client=client,
                 _is_async=_is_async,
diff --git a/litellm/llms/azure/fine_tuning/handler.py b/litellm/llms/azure/fine_tuning/handler.py
index c34b181eff..3d7cc336fb 100644
--- a/litellm/llms/azure/fine_tuning/handler.py
+++ b/litellm/llms/azure/fine_tuning/handler.py
@@ -3,11 +3,11 @@ from typing import Optional, Union
 import httpx
 from openai import AsyncAzureOpenAI, AsyncOpenAI, AzureOpenAI, OpenAI
 
-from litellm.llms.azure.files.handler import get_azure_openai_client
+from litellm.llms.azure.common_utils import BaseAzureLLM
 from litellm.llms.openai.fine_tuning.handler import OpenAIFineTuningAPI
 
 
-class AzureOpenAIFineTuningAPI(OpenAIFineTuningAPI):
+class AzureOpenAIFineTuningAPI(OpenAIFineTuningAPI, BaseAzureLLM):
     """
     AzureOpenAI methods to support fine tuning, inherits from OpenAIFineTuningAPI.
     """
@@ -24,6 +24,7 @@ class AzureOpenAIFineTuningAPI(OpenAIFineTuningAPI):
         ] = None,
         _is_async: bool = False,
         api_version: Optional[str] = None,
+        litellm_params: Optional[dict] = None,
     ) -> Optional[
         Union[
             OpenAI,
@@ -36,12 +37,10 @@ class AzureOpenAIFineTuningAPI(OpenAIFineTuningAPI):
         if isinstance(client, OpenAI) or isinstance(client, AsyncOpenAI):
             client = None
 
-        return get_azure_openai_client(
+        return self.get_azure_openai_client(
+            litellm_params=litellm_params or {},
             api_key=api_key,
             api_base=api_base,
-            timeout=timeout,
-            max_retries=max_retries,
-            organization=organization,
             api_version=api_version,
             client=client,
             _is_async=_is_async,
diff --git a/litellm/llms/base_llm/responses/transformation.py b/litellm/llms/base_llm/responses/transformation.py
new file mode 100644
index 0000000000..c41d63842b
--- /dev/null
+++ b/litellm/llms/base_llm/responses/transformation.py
@@ -0,0 +1,133 @@
+import types
+from abc import ABC, abstractmethod
+from typing import TYPE_CHECKING, Any, Dict, Optional, Union
+
+import httpx
+
+from litellm.types.llms.openai import (
+    ResponseInputParam,
+    ResponsesAPIOptionalRequestParams,
+    ResponsesAPIRequestParams,
+    ResponsesAPIResponse,
+    ResponsesAPIStreamingResponse,
+)
+from litellm.types.router import GenericLiteLLMParams
+
+if TYPE_CHECKING:
+    from litellm.litellm_core_utils.litellm_logging import Logging as _LiteLLMLoggingObj
+
+    from ..chat.transformation import BaseLLMException as _BaseLLMException
+
+    LiteLLMLoggingObj = _LiteLLMLoggingObj
+    BaseLLMException = _BaseLLMException
+else:
+    LiteLLMLoggingObj = Any
+    BaseLLMException = Any
+
+
+class BaseResponsesAPIConfig(ABC):
+    def __init__(self):
+        pass
+
+    @classmethod
+    def get_config(cls):
+        return {
+            k: v
+            for k, v in cls.__dict__.items()
+            if not k.startswith("__")
+            and not k.startswith("_abc")
+            and not isinstance(
+                v,
+                (
+                    types.FunctionType,
+                    types.BuiltinFunctionType,
+                    classmethod,
+                    staticmethod,
+                ),
+            )
+            and v is not None
+        }
+
+    @abstractmethod
+    def get_supported_openai_params(self, model: str) -> list:
+        pass
+
+    @abstractmethod
+    def map_openai_params(
+        self,
+        response_api_optional_params: ResponsesAPIOptionalRequestParams,
+        model: str,
+        drop_params: bool,
+    ) -> Dict:
+
+        pass
+
+    @abstractmethod
+    def validate_environment(
+        self,
+        headers: dict,
+        model: str,
+        api_key: Optional[str] = None,
+    ) -> dict:
+        return {}
+
+    @abstractmethod
+    def get_complete_url(
+        self,
+        api_base: Optional[str],
+        model: str,
+        stream: Optional[bool] = None,
+    ) -> str:
+        """
+        OPTIONAL
+
+        Get the complete url for the request
+
+        Some providers need `model` in `api_base`
+        """
+        if api_base is None:
+            raise ValueError("api_base is required")
+        return api_base
+
+    @abstractmethod
+    def transform_responses_api_request(
+        self,
+        model: str,
+        input: Union[str, ResponseInputParam],
+        response_api_optional_request_params: Dict,
+        litellm_params: GenericLiteLLMParams,
+        headers: dict,
+    ) -> ResponsesAPIRequestParams:
+        pass
+
+    @abstractmethod
+    def transform_response_api_response(
+        self,
+        model: str,
+        raw_response: httpx.Response,
+        logging_obj: LiteLLMLoggingObj,
+    ) -> ResponsesAPIResponse:
+        pass
+
+    @abstractmethod
+    def transform_streaming_response(
+        self,
+        model: str,
+        parsed_chunk: dict,
+        logging_obj: LiteLLMLoggingObj,
+    ) -> ResponsesAPIStreamingResponse:
+        """
+        Transform a parsed streaming response chunk into a ResponsesAPIStreamingResponse
+        """
+        pass
+
+    def get_error_class(
+        self, error_message: str, status_code: int, headers: Union[dict, httpx.Headers]
+    ) -> BaseLLMException:
+        from ..chat.transformation import BaseLLMException
+
+        raise BaseLLMException(
+            status_code=status_code,
+            message=error_message,
+            headers=headers,
+        )
diff --git a/litellm/llms/custom_httpx/llm_http_handler.py b/litellm/llms/custom_httpx/llm_http_handler.py
index df8b18b81f..01fe36acda 100644
--- a/litellm/llms/custom_httpx/llm_http_handler.py
+++ b/litellm/llms/custom_httpx/llm_http_handler.py
@@ -1,6 +1,6 @@
 import io
 import json
-from typing import TYPE_CHECKING, Any, Optional, Tuple, Union
+from typing import TYPE_CHECKING, Any, Coroutine, Dict, Optional, Tuple, Union
 
 import httpx  # type: ignore
 
@@ -11,13 +11,21 @@ import litellm.types.utils
 from litellm.llms.base_llm.chat.transformation import BaseConfig
 from litellm.llms.base_llm.embedding.transformation import BaseEmbeddingConfig
 from litellm.llms.base_llm.rerank.transformation import BaseRerankConfig
+from litellm.llms.base_llm.responses.transformation import BaseResponsesAPIConfig
 from litellm.llms.custom_httpx.http_handler import (
     AsyncHTTPHandler,
     HTTPHandler,
     _get_httpx_client,
     get_async_httpx_client,
 )
+from litellm.responses.streaming_iterator import (
+    BaseResponsesAPIStreamingIterator,
+    ResponsesAPIStreamingIterator,
+    SyncResponsesAPIStreamingIterator,
+)
+from litellm.types.llms.openai import ResponseInputParam, ResponsesAPIResponse
 from litellm.types.rerank import OptionalRerankParams, RerankResponse
+from litellm.types.router import GenericLiteLLMParams
 from litellm.types.utils import EmbeddingResponse, FileTypes, TranscriptionResponse
 from litellm.utils import CustomStreamWrapper, ModelResponse, ProviderConfigManager
 
@@ -956,8 +964,235 @@ class BaseLLMHTTPHandler:
             return returned_response
         return model_response
 
+    def response_api_handler(
+        self,
+        model: str,
+        input: Union[str, ResponseInputParam],
+        responses_api_provider_config: BaseResponsesAPIConfig,
+        response_api_optional_request_params: Dict,
+        custom_llm_provider: str,
+        litellm_params: GenericLiteLLMParams,
+        logging_obj: LiteLLMLoggingObj,
+        extra_headers: Optional[Dict[str, Any]] = None,
+        extra_body: Optional[Dict[str, Any]] = None,
+        timeout: Optional[Union[float, httpx.Timeout]] = None,
+        client: Optional[Union[HTTPHandler, AsyncHTTPHandler]] = None,
+        _is_async: bool = False,
+    ) -> Union[
+        ResponsesAPIResponse,
+        BaseResponsesAPIStreamingIterator,
+        Coroutine[
+            Any, Any, Union[ResponsesAPIResponse, BaseResponsesAPIStreamingIterator]
+        ],
+    ]:
+        """
+        Handles responses API requests.
+        When _is_async=True, returns a coroutine instead of making the call directly.
+        """
+        if _is_async:
+            # Return the async coroutine if called with _is_async=True
+            return self.async_response_api_handler(
+                model=model,
+                input=input,
+                responses_api_provider_config=responses_api_provider_config,
+                response_api_optional_request_params=response_api_optional_request_params,
+                custom_llm_provider=custom_llm_provider,
+                litellm_params=litellm_params,
+                logging_obj=logging_obj,
+                extra_headers=extra_headers,
+                extra_body=extra_body,
+                timeout=timeout,
+                client=client if isinstance(client, AsyncHTTPHandler) else None,
+            )
+
+        if client is None or not isinstance(client, HTTPHandler):
+            sync_httpx_client = _get_httpx_client(
+                params={"ssl_verify": litellm_params.get("ssl_verify", None)}
+            )
+        else:
+            sync_httpx_client = client
+
+        headers = responses_api_provider_config.validate_environment(
+            api_key=litellm_params.api_key,
+            headers=response_api_optional_request_params.get("extra_headers", {}) or {},
+            model=model,
+        )
+
+        if extra_headers:
+            headers.update(extra_headers)
+
+        api_base = responses_api_provider_config.get_complete_url(
+            api_base=litellm_params.api_base,
+            model=model,
+        )
+
+        data = responses_api_provider_config.transform_responses_api_request(
+            model=model,
+            input=input,
+            response_api_optional_request_params=response_api_optional_request_params,
+            litellm_params=litellm_params,
+            headers=headers,
+        )
+
+        ## LOGGING
+        logging_obj.pre_call(
+            input=input,
+            api_key="",
+            additional_args={
+                "complete_input_dict": data,
+                "api_base": api_base,
+                "headers": headers,
+            },
+        )
+
+        # Check if streaming is requested
+        stream = response_api_optional_request_params.get("stream", False)
+
+        try:
+            if stream:
+                # For streaming, use stream=True in the request
+                response = sync_httpx_client.post(
+                    url=api_base,
+                    headers=headers,
+                    data=json.dumps(data),
+                    timeout=timeout
+                    or response_api_optional_request_params.get("timeout"),
+                    stream=True,
+                )
+
+                return SyncResponsesAPIStreamingIterator(
+                    response=response,
+                    model=model,
+                    logging_obj=logging_obj,
+                    responses_api_provider_config=responses_api_provider_config,
+                )
+            else:
+                # For non-streaming requests
+                response = sync_httpx_client.post(
+                    url=api_base,
+                    headers=headers,
+                    data=json.dumps(data),
+                    timeout=timeout
+                    or response_api_optional_request_params.get("timeout"),
+                )
+        except Exception as e:
+            raise self._handle_error(
+                e=e,
+                provider_config=responses_api_provider_config,
+            )
+
+        return responses_api_provider_config.transform_response_api_response(
+            model=model,
+            raw_response=response,
+            logging_obj=logging_obj,
+        )
+
+    async def async_response_api_handler(
+        self,
+        model: str,
+        input: Union[str, ResponseInputParam],
+        responses_api_provider_config: BaseResponsesAPIConfig,
+        response_api_optional_request_params: Dict,
+        custom_llm_provider: str,
+        litellm_params: GenericLiteLLMParams,
+        logging_obj: LiteLLMLoggingObj,
+        extra_headers: Optional[Dict[str, Any]] = None,
+        extra_body: Optional[Dict[str, Any]] = None,
+        timeout: Optional[Union[float, httpx.Timeout]] = None,
+        client: Optional[Union[HTTPHandler, AsyncHTTPHandler]] = None,
+    ) -> Union[ResponsesAPIResponse, BaseResponsesAPIStreamingIterator]:
+        """
+        Async version of the responses API handler.
+        Uses async HTTP client to make requests.
+        """
+        if client is None or not isinstance(client, AsyncHTTPHandler):
+            async_httpx_client = get_async_httpx_client(
+                llm_provider=litellm.LlmProviders(custom_llm_provider),
+                params={"ssl_verify": litellm_params.get("ssl_verify", None)},
+            )
+        else:
+            async_httpx_client = client
+
+        headers = responses_api_provider_config.validate_environment(
+            api_key=litellm_params.api_key,
+            headers=response_api_optional_request_params.get("extra_headers", {}) or {},
+            model=model,
+        )
+
+        if extra_headers:
+            headers.update(extra_headers)
+
+        api_base = responses_api_provider_config.get_complete_url(
+            api_base=litellm_params.api_base,
+            model=model,
+        )
+
+        data = responses_api_provider_config.transform_responses_api_request(
+            model=model,
+            input=input,
+            response_api_optional_request_params=response_api_optional_request_params,
+            litellm_params=litellm_params,
+            headers=headers,
+        )
+
+        ## LOGGING
+        logging_obj.pre_call(
+            input=input,
+            api_key="",
+            additional_args={
+                "complete_input_dict": data,
+                "api_base": api_base,
+                "headers": headers,
+            },
+        )
+
+        # Check if streaming is requested
+        stream = response_api_optional_request_params.get("stream", False)
+
+        try:
+            if stream:
+                # For streaming, we need to use stream=True in the request
+                response = await async_httpx_client.post(
+                    url=api_base,
+                    headers=headers,
+                    data=json.dumps(data),
+                    timeout=timeout
+                    or response_api_optional_request_params.get("timeout"),
+                    stream=True,
+                )
+
+                # Return the streaming iterator
+                return ResponsesAPIStreamingIterator(
+                    response=response,
+                    model=model,
+                    logging_obj=logging_obj,
+                    responses_api_provider_config=responses_api_provider_config,
+                )
+            else:
+                # For non-streaming, proceed as before
+                response = await async_httpx_client.post(
+                    url=api_base,
+                    headers=headers,
+                    data=json.dumps(data),
+                    timeout=timeout
+                    or response_api_optional_request_params.get("timeout"),
+                )
+        except Exception as e:
+            raise self._handle_error(
+                e=e,
+                provider_config=responses_api_provider_config,
+            )
+
+        return responses_api_provider_config.transform_response_api_response(
+            model=model,
+            raw_response=response,
+            logging_obj=logging_obj,
+        )
+
     def _handle_error(
-        self, e: Exception, provider_config: Union[BaseConfig, BaseRerankConfig]
+        self,
+        e: Exception,
+        provider_config: Union[BaseConfig, BaseRerankConfig, BaseResponsesAPIConfig],
     ):
         status_code = getattr(e, "status_code", 500)
         error_headers = getattr(e, "headers", None)
diff --git a/litellm/llms/openai/fine_tuning/handler.py b/litellm/llms/openai/fine_tuning/handler.py
index b7eab8e5fd..97b237c757 100644
--- a/litellm/llms/openai/fine_tuning/handler.py
+++ b/litellm/llms/openai/fine_tuning/handler.py
@@ -27,6 +27,7 @@ class OpenAIFineTuningAPI:
         ] = None,
         _is_async: bool = False,
         api_version: Optional[str] = None,
+        litellm_params: Optional[dict] = None,
     ) -> Optional[
         Union[
             OpenAI,
diff --git a/litellm/llms/openai/openai.py b/litellm/llms/openai/openai.py
index 7935c46293..880a043d08 100644
--- a/litellm/llms/openai/openai.py
+++ b/litellm/llms/openai/openai.py
@@ -2650,7 +2650,7 @@ class OpenAIAssistantsAPI(BaseLLM):
         assistant_id: str,
         additional_instructions: Optional[str],
         instructions: Optional[str],
-        metadata: Optional[object],
+        metadata: Optional[Dict],
         model: Optional[str],
         stream: Optional[bool],
         tools: Optional[Iterable[AssistantToolParam]],
@@ -2689,12 +2689,12 @@ class OpenAIAssistantsAPI(BaseLLM):
         assistant_id: str,
         additional_instructions: Optional[str],
         instructions: Optional[str],
-        metadata: Optional[object],
+        metadata: Optional[Dict],
         model: Optional[str],
         tools: Optional[Iterable[AssistantToolParam]],
         event_handler: Optional[AssistantEventHandler],
     ) -> AsyncAssistantStreamManager[AsyncAssistantEventHandler]:
-        data = {
+        data: Dict[str, Any] = {
             "thread_id": thread_id,
             "assistant_id": assistant_id,
             "additional_instructions": additional_instructions,
@@ -2714,12 +2714,12 @@ class OpenAIAssistantsAPI(BaseLLM):
         assistant_id: str,
         additional_instructions: Optional[str],
         instructions: Optional[str],
-        metadata: Optional[object],
+        metadata: Optional[Dict],
         model: Optional[str],
         tools: Optional[Iterable[AssistantToolParam]],
         event_handler: Optional[AssistantEventHandler],
     ) -> AssistantStreamManager[AssistantEventHandler]:
-        data = {
+        data: Dict[str, Any] = {
             "thread_id": thread_id,
             "assistant_id": assistant_id,
             "additional_instructions": additional_instructions,
@@ -2741,7 +2741,7 @@ class OpenAIAssistantsAPI(BaseLLM):
         assistant_id: str,
         additional_instructions: Optional[str],
         instructions: Optional[str],
-        metadata: Optional[object],
+        metadata: Optional[Dict],
         model: Optional[str],
         stream: Optional[bool],
         tools: Optional[Iterable[AssistantToolParam]],
@@ -2763,7 +2763,7 @@ class OpenAIAssistantsAPI(BaseLLM):
         assistant_id: str,
         additional_instructions: Optional[str],
         instructions: Optional[str],
-        metadata: Optional[object],
+        metadata: Optional[Dict],
         model: Optional[str],
         stream: Optional[bool],
         tools: Optional[Iterable[AssistantToolParam]],
@@ -2786,7 +2786,7 @@ class OpenAIAssistantsAPI(BaseLLM):
         assistant_id: str,
         additional_instructions: Optional[str],
         instructions: Optional[str],
-        metadata: Optional[object],
+        metadata: Optional[Dict],
         model: Optional[str],
         stream: Optional[bool],
         tools: Optional[Iterable[AssistantToolParam]],
diff --git a/litellm/llms/openai/responses/transformation.py b/litellm/llms/openai/responses/transformation.py
new file mode 100644
index 0000000000..ce4052dc19
--- /dev/null
+++ b/litellm/llms/openai/responses/transformation.py
@@ -0,0 +1,190 @@
+from typing import TYPE_CHECKING, Any, Dict, Optional, Union, cast
+
+import httpx
+
+import litellm
+from litellm._logging import verbose_logger
+from litellm.llms.base_llm.responses.transformation import BaseResponsesAPIConfig
+from litellm.secret_managers.main import get_secret_str
+from litellm.types.llms.openai import *
+from litellm.types.router import GenericLiteLLMParams
+
+from ..common_utils import OpenAIError
+
+if TYPE_CHECKING:
+    from litellm.litellm_core_utils.litellm_logging import Logging as _LiteLLMLoggingObj
+
+    LiteLLMLoggingObj = _LiteLLMLoggingObj
+else:
+    LiteLLMLoggingObj = Any
+
+
+class OpenAIResponsesAPIConfig(BaseResponsesAPIConfig):
+    def get_supported_openai_params(self, model: str) -> list:
+        """
+        All OpenAI Responses API params are supported
+        """
+        return [
+            "input",
+            "model",
+            "include",
+            "instructions",
+            "max_output_tokens",
+            "metadata",
+            "parallel_tool_calls",
+            "previous_response_id",
+            "reasoning",
+            "store",
+            "stream",
+            "temperature",
+            "text",
+            "tool_choice",
+            "tools",
+            "top_p",
+            "truncation",
+            "user",
+            "extra_headers",
+            "extra_query",
+            "extra_body",
+            "timeout",
+        ]
+
+    def map_openai_params(
+        self,
+        response_api_optional_params: ResponsesAPIOptionalRequestParams,
+        model: str,
+        drop_params: bool,
+    ) -> Dict:
+        """No mapping applied since inputs are in OpenAI spec already"""
+        return dict(response_api_optional_params)
+
+    def transform_responses_api_request(
+        self,
+        model: str,
+        input: Union[str, ResponseInputParam],
+        response_api_optional_request_params: Dict,
+        litellm_params: GenericLiteLLMParams,
+        headers: dict,
+    ) -> ResponsesAPIRequestParams:
+        """No transform applied since inputs are in OpenAI spec already"""
+        return ResponsesAPIRequestParams(
+            model=model, input=input, **response_api_optional_request_params
+        )
+
+    def transform_response_api_response(
+        self,
+        model: str,
+        raw_response: httpx.Response,
+        logging_obj: LiteLLMLoggingObj,
+    ) -> ResponsesAPIResponse:
+        """No transform applied since outputs are in OpenAI spec already"""
+        try:
+            raw_response_json = raw_response.json()
+        except Exception:
+            raise OpenAIError(
+                message=raw_response.text, status_code=raw_response.status_code
+            )
+        return ResponsesAPIResponse(**raw_response_json)
+
+    def validate_environment(
+        self,
+        headers: dict,
+        model: str,
+        api_key: Optional[str] = None,
+    ) -> dict:
+        api_key = (
+            api_key
+            or litellm.api_key
+            or litellm.openai_key
+            or get_secret_str("OPENAI_API_KEY")
+        )
+        headers.update(
+            {
+                "Authorization": f"Bearer {api_key}",
+            }
+        )
+        return headers
+
+    def get_complete_url(
+        self,
+        api_base: Optional[str],
+        model: str,
+        stream: Optional[bool] = None,
+    ) -> str:
+        """
+        Get the endpoint for OpenAI responses API
+        """
+        api_base = (
+            api_base
+            or litellm.api_base
+            or get_secret_str("OPENAI_API_BASE")
+            or "https://api.openai.com/v1"
+        )
+
+        # Remove trailing slashes
+        api_base = api_base.rstrip("/")
+
+        return f"{api_base}/responses"
+
+    def transform_streaming_response(
+        self,
+        model: str,
+        parsed_chunk: dict,
+        logging_obj: LiteLLMLoggingObj,
+    ) -> ResponsesAPIStreamingResponse:
+        """
+        Transform a parsed streaming response chunk into a ResponsesAPIStreamingResponse
+        """
+        # Convert the dictionary to a properly typed ResponsesAPIStreamingResponse
+        verbose_logger.debug("Raw OpenAI Chunk=%s", parsed_chunk)
+        event_type = str(parsed_chunk.get("type"))
+        event_pydantic_model = OpenAIResponsesAPIConfig.get_event_model_class(
+            event_type=event_type
+        )
+        return event_pydantic_model(**parsed_chunk)
+
+    @staticmethod
+    def get_event_model_class(event_type: str) -> Any:
+        """
+        Returns the appropriate event model class based on the event type.
+
+        Args:
+            event_type (str): The type of event from the response chunk
+
+        Returns:
+            Any: The corresponding event model class
+
+        Raises:
+            ValueError: If the event type is unknown
+        """
+        event_models = {
+            ResponsesAPIStreamEvents.RESPONSE_CREATED: ResponseCreatedEvent,
+            ResponsesAPIStreamEvents.RESPONSE_IN_PROGRESS: ResponseInProgressEvent,
+            ResponsesAPIStreamEvents.RESPONSE_COMPLETED: ResponseCompletedEvent,
+            ResponsesAPIStreamEvents.RESPONSE_FAILED: ResponseFailedEvent,
+            ResponsesAPIStreamEvents.RESPONSE_INCOMPLETE: ResponseIncompleteEvent,
+            ResponsesAPIStreamEvents.OUTPUT_ITEM_ADDED: OutputItemAddedEvent,
+            ResponsesAPIStreamEvents.OUTPUT_ITEM_DONE: OutputItemDoneEvent,
+            ResponsesAPIStreamEvents.CONTENT_PART_ADDED: ContentPartAddedEvent,
+            ResponsesAPIStreamEvents.CONTENT_PART_DONE: ContentPartDoneEvent,
+            ResponsesAPIStreamEvents.OUTPUT_TEXT_DELTA: OutputTextDeltaEvent,
+            ResponsesAPIStreamEvents.OUTPUT_TEXT_ANNOTATION_ADDED: OutputTextAnnotationAddedEvent,
+            ResponsesAPIStreamEvents.OUTPUT_TEXT_DONE: OutputTextDoneEvent,
+            ResponsesAPIStreamEvents.REFUSAL_DELTA: RefusalDeltaEvent,
+            ResponsesAPIStreamEvents.REFUSAL_DONE: RefusalDoneEvent,
+            ResponsesAPIStreamEvents.FUNCTION_CALL_ARGUMENTS_DELTA: FunctionCallArgumentsDeltaEvent,
+            ResponsesAPIStreamEvents.FUNCTION_CALL_ARGUMENTS_DONE: FunctionCallArgumentsDoneEvent,
+            ResponsesAPIStreamEvents.FILE_SEARCH_CALL_IN_PROGRESS: FileSearchCallInProgressEvent,
+            ResponsesAPIStreamEvents.FILE_SEARCH_CALL_SEARCHING: FileSearchCallSearchingEvent,
+            ResponsesAPIStreamEvents.FILE_SEARCH_CALL_COMPLETED: FileSearchCallCompletedEvent,
+            ResponsesAPIStreamEvents.WEB_SEARCH_CALL_IN_PROGRESS: WebSearchCallInProgressEvent,
+            ResponsesAPIStreamEvents.WEB_SEARCH_CALL_SEARCHING: WebSearchCallSearchingEvent,
+            ResponsesAPIStreamEvents.WEB_SEARCH_CALL_COMPLETED: WebSearchCallCompletedEvent,
+            ResponsesAPIStreamEvents.ERROR: ErrorEvent,
+        }
+
+        model_class = event_models.get(cast(ResponsesAPIStreamEvents, event_type))
+        if not model_class:
+            raise ValueError(f"Unknown event type: {event_type}")
+
+        return model_class
diff --git a/litellm/main.py b/litellm/main.py
index a6a1a7c7d1..84ad92dfe0 100644
--- a/litellm/main.py
+++ b/litellm/main.py
@@ -1163,6 +1163,14 @@ def completion(  # type: ignore # noqa: PLR0915
                 "merge_reasoning_content_in_choices", None
             ),
             api_version=api_version,
+            azure_ad_token=kwargs.get("azure_ad_token"),
+            tenant_id=kwargs.get("tenant_id"),
+            client_id=kwargs.get("client_id"),
+            client_secret=kwargs.get("client_secret"),
+            azure_username=kwargs.get("azure_username"),
+            azure_password=kwargs.get("azure_password"),
+            max_retries=max_retries,
+            timeout=timeout,
         )
         logging.update_environment_variables(
             model=model,
@@ -3351,6 +3359,7 @@ def embedding(  # noqa: PLR0915
                 }
             }
         )
+
     litellm_params_dict = get_litellm_params(**kwargs)
 
     logging: Logging = litellm_logging_obj  # type: ignore
@@ -3412,6 +3421,7 @@ def embedding(  # noqa: PLR0915
                 aembedding=aembedding,
                 max_retries=max_retries,
                 headers=headers or extra_headers,
+                litellm_params=litellm_params_dict,
             )
         elif (
             model in litellm.open_ai_embedding_models
@@ -4515,6 +4525,8 @@ def image_generation(  # noqa: PLR0915
             **non_default_params,
         )
 
+        litellm_params_dict = get_litellm_params(**kwargs)
+
         logging: Logging = litellm_logging_obj
         logging.update_environment_variables(
             model=model,
@@ -4585,6 +4597,7 @@ def image_generation(  # noqa: PLR0915
                 aimg_generation=aimg_generation,
                 client=client,
                 headers=headers,
+                litellm_params=litellm_params_dict,
             )
         elif (
             custom_llm_provider == "openai"
@@ -4980,6 +4993,7 @@ def transcription(
         custom_llm_provider=custom_llm_provider,
         drop_params=drop_params,
     )
+    litellm_params_dict = get_litellm_params(**kwargs)
 
     litellm_logging_obj.update_environment_variables(
         model=model,
@@ -5033,6 +5047,7 @@ def transcription(
             api_version=api_version,
             azure_ad_token=azure_ad_token,
             max_retries=max_retries,
+            litellm_params=litellm_params_dict,
         )
     elif (
         custom_llm_provider == "openai"
@@ -5135,7 +5150,7 @@ async def aspeech(*args, **kwargs) -> HttpxBinaryResponseContent:
 
 
 @client
-def speech(
+def speech(  # noqa: PLR0915
     model: str,
     input: str,
     voice: Optional[Union[str, dict]] = None,
@@ -5176,7 +5191,7 @@ def speech(
 
     if max_retries is None:
         max_retries = litellm.num_retries or openai.DEFAULT_MAX_RETRIES
-
+    litellm_params_dict = get_litellm_params(**kwargs)
     logging_obj = kwargs.get("litellm_logging_obj", None)
     logging_obj.update_environment_variables(
         model=model,
@@ -5293,6 +5308,7 @@ def speech(
             timeout=timeout,
             client=client,  # pass AsyncOpenAI, OpenAI client
             aspeech=aspeech,
+            litellm_params=litellm_params_dict,
         )
     elif custom_llm_provider == "vertex_ai" or custom_llm_provider == "vertex_ai_beta":
 
diff --git a/litellm/proxy/_new_secret_config.yaml b/litellm/proxy/_new_secret_config.yaml
index 96e6c64376..9436e8af0f 100644
--- a/litellm/proxy/_new_secret_config.yaml
+++ b/litellm/proxy/_new_secret_config.yaml
@@ -3,15 +3,15 @@ model_list:
     litellm_params:
       model: bedrock/amazon.nova-canvas-v1:0
       aws_region_name: "us-east-1"
-  - model_name: gpt-4o-mini-3
-    litellm_params:
-      model: azure/gpt-4o-mini-3
+      litellm_credential_name: "azure"
+
+credential_list:
+  - credential_name: azure
+    credential_values:
       api_key: os.environ/AZURE_API_KEY
       api_base: os.environ/AZURE_API_BASE
-    model_info:
-      base_model: azure/eu.gpt-4o-mini-2
-  - model_name: gpt-4o-mini-2
-    litellm_params:
-      model: azure/gpt-4o-mini-2
-      api_key: os.environ/AZURE_API_KEY
-      api_base: os.environ/AZURE_API_BASE
\ No newline at end of file
+    credential_info:
+      description: "Azure API Key and Base URL"
+      type: "azure"
+      required: true
+      default: "azure"
\ No newline at end of file
diff --git a/litellm/proxy/_types.py b/litellm/proxy/_types.py
index 15013407ea..a9fe6517ea 100644
--- a/litellm/proxy/_types.py
+++ b/litellm/proxy/_types.py
@@ -2299,6 +2299,7 @@ class SpecialHeaders(enum.Enum):
     azure_authorization = "API-Key"
     anthropic_authorization = "x-api-key"
     google_ai_studio_authorization = "x-goog-api-key"
+    azure_apim_authorization = "Ocp-Apim-Subscription-Key"
 
 
 class LitellmDataForBackendLLMCall(TypedDict, total=False):
diff --git a/litellm/proxy/anthropic_endpoints/endpoints.py b/litellm/proxy/anthropic_endpoints/endpoints.py
index a3956ef274..78078b93f8 100644
--- a/litellm/proxy/anthropic_endpoints/endpoints.py
+++ b/litellm/proxy/anthropic_endpoints/endpoints.py
@@ -14,6 +14,7 @@ import litellm
 from litellm._logging import verbose_proxy_logger
 from litellm.proxy._types import *
 from litellm.proxy.auth.user_api_key_auth import user_api_key_auth
+from litellm.proxy.common_request_processing import ProxyBaseLLMRequestProcessing
 from litellm.proxy.common_utils.http_parsing_utils import _read_request_body
 from litellm.proxy.litellm_pre_call_utils import add_litellm_data_to_request
 from litellm.proxy.utils import ProxyLogging
@@ -89,7 +90,6 @@ async def anthropic_response(  # noqa: PLR0915
     """
     from litellm.proxy.proxy_server import (
         general_settings,
-        get_custom_headers,
         llm_router,
         proxy_config,
         proxy_logging_obj,
@@ -205,7 +205,7 @@ async def anthropic_response(  # noqa: PLR0915
         verbose_proxy_logger.debug("final response: %s", response)
 
         fastapi_response.headers.update(
-            get_custom_headers(
+            ProxyBaseLLMRequestProcessing.get_custom_headers(
                 user_api_key_dict=user_api_key_dict,
                 model_id=model_id,
                 cache_key=cache_key,
diff --git a/litellm/proxy/auth/user_api_key_auth.py b/litellm/proxy/auth/user_api_key_auth.py
index 7ce097e0d7..7e293b758d 100644
--- a/litellm/proxy/auth/user_api_key_auth.py
+++ b/litellm/proxy/auth/user_api_key_auth.py
@@ -77,6 +77,11 @@ google_ai_studio_api_key_header = APIKeyHeader(
     auto_error=False,
     description="If google ai studio client used.",
 )
+azure_apim_header = APIKeyHeader(
+    name=SpecialHeaders.azure_apim_authorization.value,
+    auto_error=False,
+    description="The default name of the subscription key header of Azure",
+)
 
 
 def _get_bearer_token(
@@ -301,6 +306,7 @@ async def _user_api_key_auth_builder(  # noqa: PLR0915
     azure_api_key_header: str,
     anthropic_api_key_header: Optional[str],
     google_ai_studio_api_key_header: Optional[str],
+    azure_apim_header: Optional[str],
     request_data: dict,
 ) -> UserAPIKeyAuth:
 
@@ -344,6 +350,8 @@ async def _user_api_key_auth_builder(  # noqa: PLR0915
             api_key = anthropic_api_key_header
         elif isinstance(google_ai_studio_api_key_header, str):
             api_key = google_ai_studio_api_key_header
+        elif isinstance(azure_apim_header, str):
+            api_key = azure_apim_header
         elif pass_through_endpoints is not None:
             for endpoint in pass_through_endpoints:
                 if endpoint.get("path", "") == route:
@@ -1165,6 +1173,7 @@ async def user_api_key_auth(
     google_ai_studio_api_key_header: Optional[str] = fastapi.Security(
         google_ai_studio_api_key_header
     ),
+    azure_apim_header: Optional[str] = fastapi.Security(azure_apim_header),
 ) -> UserAPIKeyAuth:
     """
     Parent function to authenticate user api key / jwt token.
@@ -1178,6 +1187,7 @@ async def user_api_key_auth(
         azure_api_key_header=azure_api_key_header,
         anthropic_api_key_header=anthropic_api_key_header,
         google_ai_studio_api_key_header=google_ai_studio_api_key_header,
+        azure_apim_header=azure_apim_header,
         request_data=request_data,
     )
 
diff --git a/litellm/proxy/batches_endpoints/endpoints.py b/litellm/proxy/batches_endpoints/endpoints.py
index e00112b8d8..6b7651d48f 100644
--- a/litellm/proxy/batches_endpoints/endpoints.py
+++ b/litellm/proxy/batches_endpoints/endpoints.py
@@ -18,6 +18,7 @@ from litellm.batches.main import (
 )
 from litellm.proxy._types import *
 from litellm.proxy.auth.user_api_key_auth import user_api_key_auth
+from litellm.proxy.common_request_processing import ProxyBaseLLMRequestProcessing
 from litellm.proxy.common_utils.http_parsing_utils import _read_request_body
 from litellm.proxy.common_utils.openai_endpoint_utils import (
     get_custom_llm_provider_from_request_body,
@@ -69,7 +70,6 @@ async def create_batch(
     from litellm.proxy.proxy_server import (
         add_litellm_data_to_request,
         general_settings,
-        get_custom_headers,
         llm_router,
         proxy_config,
         proxy_logging_obj,
@@ -137,7 +137,7 @@ async def create_batch(
         api_base = hidden_params.get("api_base", None) or ""
 
         fastapi_response.headers.update(
-            get_custom_headers(
+            ProxyBaseLLMRequestProcessing.get_custom_headers(
                 user_api_key_dict=user_api_key_dict,
                 model_id=model_id,
                 cache_key=cache_key,
@@ -201,7 +201,6 @@ async def retrieve_batch(
     from litellm.proxy.proxy_server import (
         add_litellm_data_to_request,
         general_settings,
-        get_custom_headers,
         llm_router,
         proxy_config,
         proxy_logging_obj,
@@ -266,7 +265,7 @@ async def retrieve_batch(
         api_base = hidden_params.get("api_base", None) or ""
 
         fastapi_response.headers.update(
-            get_custom_headers(
+            ProxyBaseLLMRequestProcessing.get_custom_headers(
                 user_api_key_dict=user_api_key_dict,
                 model_id=model_id,
                 cache_key=cache_key,
@@ -326,11 +325,7 @@ async def list_batches(
 
     ```
     """
-    from litellm.proxy.proxy_server import (
-        get_custom_headers,
-        proxy_logging_obj,
-        version,
-    )
+    from litellm.proxy.proxy_server import proxy_logging_obj, version
 
     verbose_proxy_logger.debug("GET /v1/batches after={} limit={}".format(after, limit))
     try:
@@ -352,7 +347,7 @@ async def list_batches(
         api_base = hidden_params.get("api_base", None) or ""
 
         fastapi_response.headers.update(
-            get_custom_headers(
+            ProxyBaseLLMRequestProcessing.get_custom_headers(
                 user_api_key_dict=user_api_key_dict,
                 model_id=model_id,
                 cache_key=cache_key,
@@ -417,7 +412,6 @@ async def cancel_batch(
     from litellm.proxy.proxy_server import (
         add_litellm_data_to_request,
         general_settings,
-        get_custom_headers,
         proxy_config,
         proxy_logging_obj,
         version,
@@ -463,7 +457,7 @@ async def cancel_batch(
         api_base = hidden_params.get("api_base", None) or ""
 
         fastapi_response.headers.update(
-            get_custom_headers(
+            ProxyBaseLLMRequestProcessing.get_custom_headers(
                 user_api_key_dict=user_api_key_dict,
                 model_id=model_id,
                 cache_key=cache_key,
diff --git a/litellm/proxy/common_request_processing.py b/litellm/proxy/common_request_processing.py
new file mode 100644
index 0000000000..7f131efb04
--- /dev/null
+++ b/litellm/proxy/common_request_processing.py
@@ -0,0 +1,356 @@
+import asyncio
+import json
+import uuid
+from datetime import datetime
+from typing import TYPE_CHECKING, Any, Callable, Literal, Optional, Union
+
+import httpx
+from fastapi import HTTPException, Request, status
+from fastapi.responses import Response, StreamingResponse
+
+import litellm
+from litellm._logging import verbose_proxy_logger
+from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj
+from litellm.proxy._types import ProxyException, UserAPIKeyAuth
+from litellm.proxy.auth.auth_utils import check_response_size_is_safe
+from litellm.proxy.common_utils.callback_utils import (
+    get_logging_caching_headers,
+    get_remaining_tokens_and_requests_from_request_data,
+)
+from litellm.proxy.route_llm_request import route_request
+from litellm.proxy.utils import ProxyLogging
+from litellm.router import Router
+
+if TYPE_CHECKING:
+    from litellm.proxy.proxy_server import ProxyConfig as _ProxyConfig
+
+    ProxyConfig = _ProxyConfig
+else:
+    ProxyConfig = Any
+from litellm.proxy.litellm_pre_call_utils import add_litellm_data_to_request
+
+
+class ProxyBaseLLMRequestProcessing:
+    def __init__(self, data: dict):
+        self.data = data
+
+    @staticmethod
+    def get_custom_headers(
+        *,
+        user_api_key_dict: UserAPIKeyAuth,
+        call_id: Optional[str] = None,
+        model_id: Optional[str] = None,
+        cache_key: Optional[str] = None,
+        api_base: Optional[str] = None,
+        version: Optional[str] = None,
+        model_region: Optional[str] = None,
+        response_cost: Optional[Union[float, str]] = None,
+        hidden_params: Optional[dict] = None,
+        fastest_response_batch_completion: Optional[bool] = None,
+        request_data: Optional[dict] = {},
+        timeout: Optional[Union[float, int, httpx.Timeout]] = None,
+        **kwargs,
+    ) -> dict:
+        exclude_values = {"", None, "None"}
+        hidden_params = hidden_params or {}
+        headers = {
+            "x-litellm-call-id": call_id,
+            "x-litellm-model-id": model_id,
+            "x-litellm-cache-key": cache_key,
+            "x-litellm-model-api-base": api_base,
+            "x-litellm-version": version,
+            "x-litellm-model-region": model_region,
+            "x-litellm-response-cost": str(response_cost),
+            "x-litellm-key-tpm-limit": str(user_api_key_dict.tpm_limit),
+            "x-litellm-key-rpm-limit": str(user_api_key_dict.rpm_limit),
+            "x-litellm-key-max-budget": str(user_api_key_dict.max_budget),
+            "x-litellm-key-spend": str(user_api_key_dict.spend),
+            "x-litellm-response-duration-ms": str(
+                hidden_params.get("_response_ms", None)
+            ),
+            "x-litellm-overhead-duration-ms": str(
+                hidden_params.get("litellm_overhead_time_ms", None)
+            ),
+            "x-litellm-fastest_response_batch_completion": (
+                str(fastest_response_batch_completion)
+                if fastest_response_batch_completion is not None
+                else None
+            ),
+            "x-litellm-timeout": str(timeout) if timeout is not None else None,
+            **{k: str(v) for k, v in kwargs.items()},
+        }
+        if request_data:
+            remaining_tokens_header = (
+                get_remaining_tokens_and_requests_from_request_data(request_data)
+            )
+            headers.update(remaining_tokens_header)
+
+            logging_caching_headers = get_logging_caching_headers(request_data)
+            if logging_caching_headers:
+                headers.update(logging_caching_headers)
+
+        try:
+            return {
+                key: str(value)
+                for key, value in headers.items()
+                if value not in exclude_values
+            }
+        except Exception as e:
+            verbose_proxy_logger.error(f"Error setting custom headers: {e}")
+            return {}
+
+    async def base_process_llm_request(
+        self,
+        request: Request,
+        fastapi_response: Response,
+        user_api_key_dict: UserAPIKeyAuth,
+        route_type: Literal["acompletion", "aresponses"],
+        proxy_logging_obj: ProxyLogging,
+        general_settings: dict,
+        proxy_config: ProxyConfig,
+        select_data_generator: Callable,
+        llm_router: Optional[Router] = None,
+        model: Optional[str] = None,
+        user_model: Optional[str] = None,
+        user_temperature: Optional[float] = None,
+        user_request_timeout: Optional[float] = None,
+        user_max_tokens: Optional[int] = None,
+        user_api_base: Optional[str] = None,
+        version: Optional[str] = None,
+    ) -> Any:
+        """
+        Common request processing logic for both chat completions and responses API endpoints
+        """
+        verbose_proxy_logger.debug(
+            "Request received by LiteLLM:\n{}".format(json.dumps(self.data, indent=4)),
+        )
+
+        self.data = await add_litellm_data_to_request(
+            data=self.data,
+            request=request,
+            general_settings=general_settings,
+            user_api_key_dict=user_api_key_dict,
+            version=version,
+            proxy_config=proxy_config,
+        )
+
+        self.data["model"] = (
+            general_settings.get("completion_model", None)  # server default
+            or user_model  # model name passed via cli args
+            or model  # for azure deployments
+            or self.data.get("model", None)  # default passed in http request
+        )
+
+        # override with user settings, these are params passed via cli
+        if user_temperature:
+            self.data["temperature"] = user_temperature
+        if user_request_timeout:
+            self.data["request_timeout"] = user_request_timeout
+        if user_max_tokens:
+            self.data["max_tokens"] = user_max_tokens
+        if user_api_base:
+            self.data["api_base"] = user_api_base
+
+        ### MODEL ALIAS MAPPING ###
+        # check if model name in model alias map
+        # get the actual model name
+        if (
+            isinstance(self.data["model"], str)
+            and self.data["model"] in litellm.model_alias_map
+        ):
+            self.data["model"] = litellm.model_alias_map[self.data["model"]]
+
+        ### CALL HOOKS ### - modify/reject incoming data before calling the model
+        self.data = await proxy_logging_obj.pre_call_hook(  # type: ignore
+            user_api_key_dict=user_api_key_dict, data=self.data, call_type="completion"
+        )
+
+        ## LOGGING OBJECT ## - initialize logging object for logging success/failure events for call
+        ## IMPORTANT Note: - initialize this before running pre-call checks. Ensures we log rejected requests to langfuse.
+        self.data["litellm_call_id"] = request.headers.get(
+            "x-litellm-call-id", str(uuid.uuid4())
+        )
+        logging_obj, self.data = litellm.utils.function_setup(
+            original_function=route_type,
+            rules_obj=litellm.utils.Rules(),
+            start_time=datetime.now(),
+            **self.data,
+        )
+
+        self.data["litellm_logging_obj"] = logging_obj
+
+        tasks = []
+        tasks.append(
+            proxy_logging_obj.during_call_hook(
+                data=self.data,
+                user_api_key_dict=user_api_key_dict,
+                call_type=ProxyBaseLLMRequestProcessing._get_pre_call_type(
+                    route_type=route_type
+                ),
+            )
+        )
+
+        ### ROUTE THE REQUEST ###
+        # Do not change this - it should be a constant time fetch - ALWAYS
+        llm_call = await route_request(
+            data=self.data,
+            route_type=route_type,
+            llm_router=llm_router,
+            user_model=user_model,
+        )
+        tasks.append(llm_call)
+
+        # wait for call to end
+        llm_responses = asyncio.gather(
+            *tasks
+        )  # run the moderation check in parallel to the actual llm api call
+
+        responses = await llm_responses
+
+        response = responses[1]
+
+        hidden_params = getattr(response, "_hidden_params", {}) or {}
+        model_id = hidden_params.get("model_id", None) or ""
+        cache_key = hidden_params.get("cache_key", None) or ""
+        api_base = hidden_params.get("api_base", None) or ""
+        response_cost = hidden_params.get("response_cost", None) or ""
+        fastest_response_batch_completion = hidden_params.get(
+            "fastest_response_batch_completion", None
+        )
+        additional_headers: dict = hidden_params.get("additional_headers", {}) or {}
+
+        # Post Call Processing
+        if llm_router is not None:
+            self.data["deployment"] = llm_router.get_deployment(model_id=model_id)
+        asyncio.create_task(
+            proxy_logging_obj.update_request_status(
+                litellm_call_id=self.data.get("litellm_call_id", ""), status="success"
+            )
+        )
+        if (
+            "stream" in self.data and self.data["stream"] is True
+        ):  # use generate_responses to stream responses
+            custom_headers = ProxyBaseLLMRequestProcessing.get_custom_headers(
+                user_api_key_dict=user_api_key_dict,
+                call_id=logging_obj.litellm_call_id,
+                model_id=model_id,
+                cache_key=cache_key,
+                api_base=api_base,
+                version=version,
+                response_cost=response_cost,
+                model_region=getattr(user_api_key_dict, "allowed_model_region", ""),
+                fastest_response_batch_completion=fastest_response_batch_completion,
+                request_data=self.data,
+                hidden_params=hidden_params,
+                **additional_headers,
+            )
+            selected_data_generator = select_data_generator(
+                response=response,
+                user_api_key_dict=user_api_key_dict,
+                request_data=self.data,
+            )
+            return StreamingResponse(
+                selected_data_generator,
+                media_type="text/event-stream",
+                headers=custom_headers,
+            )
+
+        ### CALL HOOKS ### - modify outgoing data
+        response = await proxy_logging_obj.post_call_success_hook(
+            data=self.data, user_api_key_dict=user_api_key_dict, response=response
+        )
+
+        hidden_params = (
+            getattr(response, "_hidden_params", {}) or {}
+        )  # get any updated response headers
+        additional_headers = hidden_params.get("additional_headers", {}) or {}
+
+        fastapi_response.headers.update(
+            ProxyBaseLLMRequestProcessing.get_custom_headers(
+                user_api_key_dict=user_api_key_dict,
+                call_id=logging_obj.litellm_call_id,
+                model_id=model_id,
+                cache_key=cache_key,
+                api_base=api_base,
+                version=version,
+                response_cost=response_cost,
+                model_region=getattr(user_api_key_dict, "allowed_model_region", ""),
+                fastest_response_batch_completion=fastest_response_batch_completion,
+                request_data=self.data,
+                hidden_params=hidden_params,
+                **additional_headers,
+            )
+        )
+        await check_response_size_is_safe(response=response)
+
+        return response
+
+    async def _handle_llm_api_exception(
+        self,
+        e: Exception,
+        user_api_key_dict: UserAPIKeyAuth,
+        proxy_logging_obj: ProxyLogging,
+        version: Optional[str] = None,
+    ):
+        """Raises ProxyException (OpenAI API compatible) if an exception is raised"""
+        verbose_proxy_logger.exception(
+            f"litellm.proxy.proxy_server._handle_llm_api_exception(): Exception occured - {str(e)}"
+        )
+        await proxy_logging_obj.post_call_failure_hook(
+            user_api_key_dict=user_api_key_dict,
+            original_exception=e,
+            request_data=self.data,
+        )
+        litellm_debug_info = getattr(e, "litellm_debug_info", "")
+        verbose_proxy_logger.debug(
+            "\033[1;31mAn error occurred: %s %s\n\n Debug this by setting `--debug`, e.g. `litellm --model gpt-3.5-turbo --debug`",
+            e,
+            litellm_debug_info,
+        )
+
+        timeout = getattr(
+            e, "timeout", None
+        )  # returns the timeout set by the wrapper. Used for testing if model-specific timeout are set correctly
+        _litellm_logging_obj: Optional[LiteLLMLoggingObj] = self.data.get(
+            "litellm_logging_obj", None
+        )
+        custom_headers = ProxyBaseLLMRequestProcessing.get_custom_headers(
+            user_api_key_dict=user_api_key_dict,
+            call_id=(
+                _litellm_logging_obj.litellm_call_id if _litellm_logging_obj else None
+            ),
+            version=version,
+            response_cost=0,
+            model_region=getattr(user_api_key_dict, "allowed_model_region", ""),
+            request_data=self.data,
+            timeout=timeout,
+        )
+        headers = getattr(e, "headers", {}) or {}
+        headers.update(custom_headers)
+
+        if isinstance(e, HTTPException):
+            raise ProxyException(
+                message=getattr(e, "detail", str(e)),
+                type=getattr(e, "type", "None"),
+                param=getattr(e, "param", "None"),
+                code=getattr(e, "status_code", status.HTTP_400_BAD_REQUEST),
+                headers=headers,
+            )
+        error_msg = f"{str(e)}"
+        raise ProxyException(
+            message=getattr(e, "message", error_msg),
+            type=getattr(e, "type", "None"),
+            param=getattr(e, "param", "None"),
+            openai_code=getattr(e, "code", None),
+            code=getattr(e, "status_code", 500),
+            headers=headers,
+        )
+
+    @staticmethod
+    def _get_pre_call_type(
+        route_type: Literal["acompletion", "aresponses"]
+    ) -> Literal["completion", "responses"]:
+        if route_type == "acompletion":
+            return "completion"
+        elif route_type == "aresponses":
+            return "responses"
diff --git a/litellm/proxy/credential_endpoints/endpoints.py b/litellm/proxy/credential_endpoints/endpoints.py
new file mode 100644
index 0000000000..84b35e3c52
--- /dev/null
+++ b/litellm/proxy/credential_endpoints/endpoints.py
@@ -0,0 +1,200 @@
+"""
+CRUD endpoints for storing reusable credentials.
+"""
+
+from fastapi import APIRouter, Depends, HTTPException, Request, Response
+
+import litellm
+from litellm._logging import verbose_proxy_logger
+from litellm.litellm_core_utils.credential_accessor import CredentialAccessor
+from litellm.proxy._types import CommonProxyErrors, UserAPIKeyAuth
+from litellm.proxy.auth.user_api_key_auth import user_api_key_auth
+from litellm.proxy.common_utils.encrypt_decrypt_utils import encrypt_value_helper
+from litellm.proxy.utils import handle_exception_on_proxy, jsonify_object
+from litellm.types.utils import CredentialItem
+
+router = APIRouter()
+
+
+class CredentialHelperUtils:
+    @staticmethod
+    def encrypt_credential_values(credential: CredentialItem) -> CredentialItem:
+        """Encrypt values in credential.credential_values and add to DB"""
+        encrypted_credential_values = {}
+        for key, value in credential.credential_values.items():
+            encrypted_credential_values[key] = encrypt_value_helper(value)
+        credential.credential_values = encrypted_credential_values
+        return credential
+
+
+@router.post(
+    "/credentials",
+    dependencies=[Depends(user_api_key_auth)],
+    tags=["credential management"],
+)
+async def create_credential(
+    request: Request,
+    fastapi_response: Response,
+    credential: CredentialItem,
+    user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
+):
+    """
+    [BETA] endpoint. This might change unexpectedly.
+    Stores credential in DB.
+    Reloads credentials in memory.
+    """
+    from litellm.proxy.proxy_server import prisma_client
+
+    try:
+        if prisma_client is None:
+            raise HTTPException(
+                status_code=500,
+                detail={"error": CommonProxyErrors.db_not_connected_error.value},
+            )
+
+        encrypted_credential = CredentialHelperUtils.encrypt_credential_values(
+            credential
+        )
+        credentials_dict = encrypted_credential.model_dump()
+        credentials_dict_jsonified = jsonify_object(credentials_dict)
+        await prisma_client.db.litellm_credentialstable.create(
+            data={
+                **credentials_dict_jsonified,
+                "created_by": user_api_key_dict.user_id,
+                "updated_by": user_api_key_dict.user_id,
+            }
+        )
+
+        ## ADD TO LITELLM ##
+        CredentialAccessor.upsert_credentials([credential])
+
+        return {"success": True, "message": "Credential created successfully"}
+    except Exception as e:
+        verbose_proxy_logger.exception(e)
+        raise handle_exception_on_proxy(e)
+
+
+@router.get(
+    "/credentials",
+    dependencies=[Depends(user_api_key_auth)],
+    tags=["credential management"],
+)
+async def get_credentials(
+    request: Request,
+    fastapi_response: Response,
+    user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
+):
+    """
+    [BETA] endpoint. This might change unexpectedly.
+    """
+    try:
+        masked_credentials = [
+            {
+                "credential_name": credential.credential_name,
+                "credential_info": credential.credential_info,
+            }
+            for credential in litellm.credential_list
+        ]
+        return {"success": True, "credentials": masked_credentials}
+    except Exception as e:
+        return handle_exception_on_proxy(e)
+
+
+@router.get(
+    "/credentials/{credential_name}",
+    dependencies=[Depends(user_api_key_auth)],
+    tags=["credential management"],
+)
+async def get_credential(
+    request: Request,
+    fastapi_response: Response,
+    credential_name: str,
+    user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
+):
+    """
+    [BETA] endpoint. This might change unexpectedly.
+    """
+    try:
+        for credential in litellm.credential_list:
+            if credential.credential_name == credential_name:
+                masked_credential = {
+                    "credential_name": credential.credential_name,
+                    "credential_values": credential.credential_values,
+                }
+                return {"success": True, "credential": masked_credential}
+        return {"success": False, "message": "Credential not found"}
+    except Exception as e:
+        return handle_exception_on_proxy(e)
+
+
+@router.delete(
+    "/credentials/{credential_name}",
+    dependencies=[Depends(user_api_key_auth)],
+    tags=["credential management"],
+)
+async def delete_credential(
+    request: Request,
+    fastapi_response: Response,
+    credential_name: str,
+    user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
+):
+    """
+    [BETA] endpoint. This might change unexpectedly.
+    """
+    from litellm.proxy.proxy_server import prisma_client
+
+    try:
+        if prisma_client is None:
+            raise HTTPException(
+                status_code=500,
+                detail={"error": CommonProxyErrors.db_not_connected_error.value},
+            )
+        await prisma_client.db.litellm_credentialstable.delete(
+            where={"credential_name": credential_name}
+        )
+
+        ## DELETE FROM LITELLM ##
+        litellm.credential_list = [
+            cred
+            for cred in litellm.credential_list
+            if cred.credential_name != credential_name
+        ]
+        return {"success": True, "message": "Credential deleted successfully"}
+    except Exception as e:
+        return handle_exception_on_proxy(e)
+
+
+@router.put(
+    "/credentials/{credential_name}",
+    dependencies=[Depends(user_api_key_auth)],
+    tags=["credential management"],
+)
+async def update_credential(
+    request: Request,
+    fastapi_response: Response,
+    credential_name: str,
+    credential: CredentialItem,
+    user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
+):
+    """
+    [BETA] endpoint. This might change unexpectedly.
+    """
+    from litellm.proxy.proxy_server import prisma_client
+
+    try:
+        if prisma_client is None:
+            raise HTTPException(
+                status_code=500,
+                detail={"error": CommonProxyErrors.db_not_connected_error.value},
+            )
+        credential_object_jsonified = jsonify_object(credential.model_dump())
+        await prisma_client.db.litellm_credentialstable.update(
+            where={"credential_name": credential_name},
+            data={
+                **credential_object_jsonified,
+                "updated_by": user_api_key_dict.user_id,
+            },
+        )
+        return {"success": True, "message": "Credential updated successfully"}
+    except Exception as e:
+        return handle_exception_on_proxy(e)
diff --git a/litellm/proxy/example_config_yaml/custom_callbacks1.py b/litellm/proxy/example_config_yaml/custom_callbacks1.py
index 9211111270..2cc644a184 100644
--- a/litellm/proxy/example_config_yaml/custom_callbacks1.py
+++ b/litellm/proxy/example_config_yaml/custom_callbacks1.py
@@ -61,6 +61,7 @@ class MyCustomHandler(
             "image_generation",
             "moderation",
             "audio_transcription",
+            "responses",
         ],
     ):
         pass
diff --git a/litellm/proxy/example_config_yaml/custom_guardrail.py b/litellm/proxy/example_config_yaml/custom_guardrail.py
index abd5b672cb..5a5c784410 100644
--- a/litellm/proxy/example_config_yaml/custom_guardrail.py
+++ b/litellm/proxy/example_config_yaml/custom_guardrail.py
@@ -66,6 +66,7 @@ class myCustomGuardrail(CustomGuardrail):
             "image_generation",
             "moderation",
             "audio_transcription",
+            "responses",
         ],
     ):
         """
diff --git a/litellm/proxy/fine_tuning_endpoints/endpoints.py b/litellm/proxy/fine_tuning_endpoints/endpoints.py
index 63b0546bfa..d4c4250b37 100644
--- a/litellm/proxy/fine_tuning_endpoints/endpoints.py
+++ b/litellm/proxy/fine_tuning_endpoints/endpoints.py
@@ -15,6 +15,7 @@ import litellm
 from litellm._logging import verbose_proxy_logger
 from litellm.proxy._types import *
 from litellm.proxy.auth.user_api_key_auth import user_api_key_auth
+from litellm.proxy.common_request_processing import ProxyBaseLLMRequestProcessing
 from litellm.proxy.utils import handle_exception_on_proxy
 
 router = APIRouter()
@@ -97,7 +98,6 @@ async def create_fine_tuning_job(
     from litellm.proxy.proxy_server import (
         add_litellm_data_to_request,
         general_settings,
-        get_custom_headers,
         premium_user,
         proxy_config,
         proxy_logging_obj,
@@ -151,7 +151,7 @@ async def create_fine_tuning_job(
         api_base = hidden_params.get("api_base", None) or ""
 
         fastapi_response.headers.update(
-            get_custom_headers(
+            ProxyBaseLLMRequestProcessing.get_custom_headers(
                 user_api_key_dict=user_api_key_dict,
                 model_id=model_id,
                 cache_key=cache_key,
@@ -205,7 +205,6 @@ async def retrieve_fine_tuning_job(
     from litellm.proxy.proxy_server import (
         add_litellm_data_to_request,
         general_settings,
-        get_custom_headers,
         premium_user,
         proxy_config,
         proxy_logging_obj,
@@ -248,7 +247,7 @@ async def retrieve_fine_tuning_job(
         api_base = hidden_params.get("api_base", None) or ""
 
         fastapi_response.headers.update(
-            get_custom_headers(
+            ProxyBaseLLMRequestProcessing.get_custom_headers(
                 user_api_key_dict=user_api_key_dict,
                 model_id=model_id,
                 cache_key=cache_key,
@@ -305,7 +304,6 @@ async def list_fine_tuning_jobs(
     from litellm.proxy.proxy_server import (
         add_litellm_data_to_request,
         general_settings,
-        get_custom_headers,
         premium_user,
         proxy_config,
         proxy_logging_obj,
@@ -349,7 +347,7 @@ async def list_fine_tuning_jobs(
         api_base = hidden_params.get("api_base", None) or ""
 
         fastapi_response.headers.update(
-            get_custom_headers(
+            ProxyBaseLLMRequestProcessing.get_custom_headers(
                 user_api_key_dict=user_api_key_dict,
                 model_id=model_id,
                 cache_key=cache_key,
@@ -404,7 +402,6 @@ async def cancel_fine_tuning_job(
     from litellm.proxy.proxy_server import (
         add_litellm_data_to_request,
         general_settings,
-        get_custom_headers,
         premium_user,
         proxy_config,
         proxy_logging_obj,
@@ -451,7 +448,7 @@ async def cancel_fine_tuning_job(
         api_base = hidden_params.get("api_base", None) or ""
 
         fastapi_response.headers.update(
-            get_custom_headers(
+            ProxyBaseLLMRequestProcessing.get_custom_headers(
                 user_api_key_dict=user_api_key_dict,
                 model_id=model_id,
                 cache_key=cache_key,
diff --git a/litellm/proxy/guardrails/guardrail_hooks/aim.py b/litellm/proxy/guardrails/guardrail_hooks/aim.py
index 91d19e277c..cdc5f00963 100644
--- a/litellm/proxy/guardrails/guardrail_hooks/aim.py
+++ b/litellm/proxy/guardrails/guardrail_hooks/aim.py
@@ -25,8 +25,12 @@ class AimGuardrailMissingSecrets(Exception):
 
 
 class AimGuardrail(CustomGuardrail):
-    def __init__(self, api_key: Optional[str] = None, api_base: Optional[str] = None, **kwargs):
-        self.async_handler = get_async_httpx_client(llm_provider=httpxSpecialProvider.GuardrailCallback)
+    def __init__(
+        self, api_key: Optional[str] = None, api_base: Optional[str] = None, **kwargs
+    ):
+        self.async_handler = get_async_httpx_client(
+            llm_provider=httpxSpecialProvider.GuardrailCallback
+        )
         self.api_key = api_key or os.environ.get("AIM_API_KEY")
         if not self.api_key:
             msg = (
@@ -34,7 +38,9 @@ class AimGuardrail(CustomGuardrail):
                 "pass it as a parameter to the guardrail in the config file"
             )
             raise AimGuardrailMissingSecrets(msg)
-        self.api_base = api_base or os.environ.get("AIM_API_BASE") or "https://api.aim.security"
+        self.api_base = (
+            api_base or os.environ.get("AIM_API_BASE") or "https://api.aim.security"
+        )
         super().__init__(**kwargs)
 
     async def async_pre_call_hook(
@@ -68,6 +74,7 @@ class AimGuardrail(CustomGuardrail):
             "image_generation",
             "moderation",
             "audio_transcription",
+            "responses",
         ],
     ) -> Union[Exception, str, dict, None]:
         verbose_proxy_logger.debug("Inside AIM Moderation Hook")
@@ -77,9 +84,10 @@ class AimGuardrail(CustomGuardrail):
 
     async def call_aim_guardrail(self, data: dict, hook: str) -> None:
         user_email = data.get("metadata", {}).get("headers", {}).get("x-aim-user-email")
-        headers = {"Authorization": f"Bearer {self.api_key}", "x-aim-litellm-hook": hook} | (
-            {"x-aim-user-email": user_email} if user_email else {}
-        )
+        headers = {
+            "Authorization": f"Bearer {self.api_key}",
+            "x-aim-litellm-hook": hook,
+        } | ({"x-aim-user-email": user_email} if user_email else {})
         response = await self.async_handler.post(
             f"{self.api_base}/detect/openai",
             headers=headers,
diff --git a/litellm/proxy/guardrails/guardrail_hooks/aporia_ai.py b/litellm/proxy/guardrails/guardrail_hooks/aporia_ai.py
index 4e37b4eb84..3c39b90b0a 100644
--- a/litellm/proxy/guardrails/guardrail_hooks/aporia_ai.py
+++ b/litellm/proxy/guardrails/guardrail_hooks/aporia_ai.py
@@ -178,7 +178,7 @@ class AporiaGuardrail(CustomGuardrail):
         pass
 
     @log_guardrail_information
-    async def async_moderation_hook(  ### 👈 KEY CHANGE ###
+    async def async_moderation_hook(
         self,
         data: dict,
         user_api_key_dict: UserAPIKeyAuth,
@@ -188,6 +188,7 @@ class AporiaGuardrail(CustomGuardrail):
             "image_generation",
             "moderation",
             "audio_transcription",
+            "responses",
         ],
     ):
         from litellm.proxy.common_utils.callback_utils import (
diff --git a/litellm/proxy/guardrails/guardrail_hooks/bedrock_guardrails.py b/litellm/proxy/guardrails/guardrail_hooks/bedrock_guardrails.py
index 53ab08999e..7686fba7cf 100644
--- a/litellm/proxy/guardrails/guardrail_hooks/bedrock_guardrails.py
+++ b/litellm/proxy/guardrails/guardrail_hooks/bedrock_guardrails.py
@@ -240,7 +240,7 @@ class BedrockGuardrail(CustomGuardrail, BaseAWSLLM):
             )
 
     @log_guardrail_information
-    async def async_moderation_hook(  ### 👈 KEY CHANGE ###
+    async def async_moderation_hook(
         self,
         data: dict,
         user_api_key_dict: UserAPIKeyAuth,
@@ -250,6 +250,7 @@ class BedrockGuardrail(CustomGuardrail, BaseAWSLLM):
             "image_generation",
             "moderation",
             "audio_transcription",
+            "responses",
         ],
     ):
         from litellm.proxy.common_utils.callback_utils import (
diff --git a/litellm/proxy/guardrails/guardrail_hooks/custom_guardrail.py b/litellm/proxy/guardrails/guardrail_hooks/custom_guardrail.py
index a45343b37d..87860477f0 100644
--- a/litellm/proxy/guardrails/guardrail_hooks/custom_guardrail.py
+++ b/litellm/proxy/guardrails/guardrail_hooks/custom_guardrail.py
@@ -70,6 +70,7 @@ class myCustomGuardrail(CustomGuardrail):
             "image_generation",
             "moderation",
             "audio_transcription",
+            "responses",
         ],
     ):
         """
diff --git a/litellm/proxy/guardrails/guardrail_hooks/lakera_ai.py b/litellm/proxy/guardrails/guardrail_hooks/lakera_ai.py
index f55b78b0a9..5d3b8be334 100644
--- a/litellm/proxy/guardrails/guardrail_hooks/lakera_ai.py
+++ b/litellm/proxy/guardrails/guardrail_hooks/lakera_ai.py
@@ -134,6 +134,7 @@ class lakeraAI_Moderation(CustomGuardrail):
             "audio_transcription",
             "pass_through_endpoint",
             "rerank",
+            "responses",
         ],
     ):
         if (
@@ -335,7 +336,7 @@ class lakeraAI_Moderation(CustomGuardrail):
         )
 
     @log_guardrail_information
-    async def async_moderation_hook(  ### 👈 KEY CHANGE ###
+    async def async_moderation_hook(
         self,
         data: dict,
         user_api_key_dict: UserAPIKeyAuth,
@@ -345,6 +346,7 @@ class lakeraAI_Moderation(CustomGuardrail):
             "image_generation",
             "moderation",
             "audio_transcription",
+            "responses",
         ],
     ):
         if self.event_hook is None:
diff --git a/litellm/proxy/litellm_pre_call_utils.py b/litellm/proxy/litellm_pre_call_utils.py
index 693e44ac77..ece5ecf4b7 100644
--- a/litellm/proxy/litellm_pre_call_utils.py
+++ b/litellm/proxy/litellm_pre_call_utils.py
@@ -62,10 +62,18 @@ def _get_metadata_variable_name(request: Request) -> str:
     """
     if RouteChecks._is_assistants_api_request(request):
         return "litellm_metadata"
-    if "batches" in request.url.path:
-        return "litellm_metadata"
-    if "/v1/messages" in request.url.path:
-        # anthropic API has a field called metadata
+
+    LITELLM_METADATA_ROUTES = [
+        "batches",
+        "/v1/messages",
+        "responses",
+    ]
+    if any(
+        [
+            litellm_metadata_route in request.url.path
+            for litellm_metadata_route in LITELLM_METADATA_ROUTES
+        ]
+    ):
         return "litellm_metadata"
     else:
         return "metadata"
diff --git a/litellm/proxy/openai_files_endpoints/files_endpoints.py b/litellm/proxy/openai_files_endpoints/files_endpoints.py
index d45a410be3..ffbca91c69 100644
--- a/litellm/proxy/openai_files_endpoints/files_endpoints.py
+++ b/litellm/proxy/openai_files_endpoints/files_endpoints.py
@@ -27,6 +27,7 @@ from litellm import CreateFileRequest, get_secret_str
 from litellm._logging import verbose_proxy_logger
 from litellm.proxy._types import *
 from litellm.proxy.auth.user_api_key_auth import user_api_key_auth
+from litellm.proxy.common_request_processing import ProxyBaseLLMRequestProcessing
 from litellm.proxy.common_utils.openai_endpoint_utils import (
     get_custom_llm_provider_from_request_body,
 )
@@ -145,7 +146,6 @@ async def create_file(
     from litellm.proxy.proxy_server import (
         add_litellm_data_to_request,
         general_settings,
-        get_custom_headers,
         llm_router,
         proxy_config,
         proxy_logging_obj,
@@ -234,7 +234,7 @@ async def create_file(
         api_base = hidden_params.get("api_base", None) or ""
 
         fastapi_response.headers.update(
-            get_custom_headers(
+            ProxyBaseLLMRequestProcessing.get_custom_headers(
                 user_api_key_dict=user_api_key_dict,
                 model_id=model_id,
                 cache_key=cache_key,
@@ -309,7 +309,6 @@ async def get_file_content(
     from litellm.proxy.proxy_server import (
         add_litellm_data_to_request,
         general_settings,
-        get_custom_headers,
         proxy_config,
         proxy_logging_obj,
         version,
@@ -351,7 +350,7 @@ async def get_file_content(
         api_base = hidden_params.get("api_base", None) or ""
 
         fastapi_response.headers.update(
-            get_custom_headers(
+            ProxyBaseLLMRequestProcessing.get_custom_headers(
                 user_api_key_dict=user_api_key_dict,
                 model_id=model_id,
                 cache_key=cache_key,
@@ -437,7 +436,6 @@ async def get_file(
     from litellm.proxy.proxy_server import (
         add_litellm_data_to_request,
         general_settings,
-        get_custom_headers,
         proxy_config,
         proxy_logging_obj,
         version,
@@ -477,7 +475,7 @@ async def get_file(
         api_base = hidden_params.get("api_base", None) or ""
 
         fastapi_response.headers.update(
-            get_custom_headers(
+            ProxyBaseLLMRequestProcessing.get_custom_headers(
                 user_api_key_dict=user_api_key_dict,
                 model_id=model_id,
                 cache_key=cache_key,
@@ -554,7 +552,6 @@ async def delete_file(
     from litellm.proxy.proxy_server import (
         add_litellm_data_to_request,
         general_settings,
-        get_custom_headers,
         proxy_config,
         proxy_logging_obj,
         version,
@@ -595,7 +592,7 @@ async def delete_file(
         api_base = hidden_params.get("api_base", None) or ""
 
         fastapi_response.headers.update(
-            get_custom_headers(
+            ProxyBaseLLMRequestProcessing.get_custom_headers(
                 user_api_key_dict=user_api_key_dict,
                 model_id=model_id,
                 cache_key=cache_key,
@@ -671,7 +668,6 @@ async def list_files(
     from litellm.proxy.proxy_server import (
         add_litellm_data_to_request,
         general_settings,
-        get_custom_headers,
         proxy_config,
         proxy_logging_obj,
         version,
@@ -712,7 +708,7 @@ async def list_files(
         api_base = hidden_params.get("api_base", None) or ""
 
         fastapi_response.headers.update(
-            get_custom_headers(
+            ProxyBaseLLMRequestProcessing.get_custom_headers(
                 user_api_key_dict=user_api_key_dict,
                 model_id=model_id,
                 cache_key=cache_key,
diff --git a/litellm/proxy/pass_through_endpoints/pass_through_endpoints.py b/litellm/proxy/pass_through_endpoints/pass_through_endpoints.py
index db11cb5b6e..b13d614678 100644
--- a/litellm/proxy/pass_through_endpoints/pass_through_endpoints.py
+++ b/litellm/proxy/pass_through_endpoints/pass_through_endpoints.py
@@ -3,8 +3,8 @@ import asyncio
 import json
 from base64 import b64encode
 from datetime import datetime
-from typing import List, Optional
-from urllib.parse import urlparse
+from typing import Dict, List, Optional, Union
+from urllib.parse import parse_qs, urlencode, urlparse
 
 import httpx
 from fastapi import APIRouter, Depends, HTTPException, Request, Response, status
@@ -23,6 +23,7 @@ from litellm.proxy._types import (
     UserAPIKeyAuth,
 )
 from litellm.proxy.auth.user_api_key_auth import user_api_key_auth
+from litellm.proxy.common_request_processing import ProxyBaseLLMRequestProcessing
 from litellm.proxy.common_utils.http_parsing_utils import _read_request_body
 from litellm.secret_managers.main import get_secret_str
 from litellm.types.llms.custom_http import httpxSpecialProvider
@@ -106,7 +107,6 @@ async def chat_completion_pass_through_endpoint(  # noqa: PLR0915
     from litellm.proxy.proxy_server import (
         add_litellm_data_to_request,
         general_settings,
-        get_custom_headers,
         llm_router,
         proxy_config,
         proxy_logging_obj,
@@ -231,7 +231,7 @@ async def chat_completion_pass_through_endpoint(  # noqa: PLR0915
         verbose_proxy_logger.debug("final response: %s", response)
 
         fastapi_response.headers.update(
-            get_custom_headers(
+            ProxyBaseLLMRequestProcessing.get_custom_headers(
                 user_api_key_dict=user_api_key_dict,
                 model_id=model_id,
                 cache_key=cache_key,
@@ -307,6 +307,21 @@ class HttpPassThroughEndpointHelpers:
             return EndpointType.ANTHROPIC
         return EndpointType.GENERIC
 
+    @staticmethod
+    def get_merged_query_parameters(
+        existing_url: httpx.URL, request_query_params: Dict[str, Union[str, list]]
+    ) -> Dict[str, Union[str, List[str]]]:
+        # Get the existing query params from the target URL
+        existing_query_string = existing_url.query.decode("utf-8")
+        existing_query_params = parse_qs(existing_query_string)
+
+        # parse_qs returns a dict where each value is a list, so let's flatten it
+        updated_existing_query_params = {
+            k: v[0] if len(v) == 1 else v for k, v in existing_query_params.items()
+        }
+        # Merge the query params, giving priority to the existing ones
+        return {**request_query_params, **updated_existing_query_params}
+
     @staticmethod
     async def _make_non_streaming_http_request(
         request: Request,
@@ -346,6 +361,7 @@ async def pass_through_request(  # noqa: PLR0915
     user_api_key_dict: UserAPIKeyAuth,
     custom_body: Optional[dict] = None,
     forward_headers: Optional[bool] = False,
+    merge_query_params: Optional[bool] = False,
     query_params: Optional[dict] = None,
     stream: Optional[bool] = None,
 ):
@@ -361,6 +377,18 @@ async def pass_through_request(  # noqa: PLR0915
             request=request, headers=headers, forward_headers=forward_headers
         )
 
+        if merge_query_params:
+
+            # Create a new URL with the merged query params
+            url = url.copy_with(
+                query=urlencode(
+                    HttpPassThroughEndpointHelpers.get_merged_query_parameters(
+                        existing_url=url,
+                        request_query_params=dict(request.query_params),
+                    )
+                ).encode("ascii")
+            )
+
         endpoint_type: EndpointType = HttpPassThroughEndpointHelpers.get_endpoint_type(
             str(url)
         )
@@ -657,6 +685,7 @@ def create_pass_through_route(
     target: str,
     custom_headers: Optional[dict] = None,
     _forward_headers: Optional[bool] = False,
+    _merge_query_params: Optional[bool] = False,
     dependencies: Optional[List] = None,
 ):
     # check if target is an adapter.py or a url
@@ -703,6 +732,7 @@ def create_pass_through_route(
                 custom_headers=custom_headers or {},
                 user_api_key_dict=user_api_key_dict,
                 forward_headers=_forward_headers,
+                merge_query_params=_merge_query_params,
                 query_params=query_params,
                 stream=stream,
                 custom_body=custom_body,
@@ -732,6 +762,7 @@ async def initialize_pass_through_endpoints(pass_through_endpoints: list):
             custom_headers=_custom_headers
         )
         _forward_headers = endpoint.get("forward_headers", None)
+        _merge_query_params = endpoint.get("merge_query_params", None)
         _auth = endpoint.get("auth", None)
         _dependencies = None
         if _auth is not None and str(_auth).lower() == "true":
@@ -753,7 +784,12 @@ async def initialize_pass_through_endpoints(pass_through_endpoints: list):
         app.add_api_route(  # type: ignore
             path=_path,
             endpoint=create_pass_through_route(  # type: ignore
-                _path, _target, _custom_headers, _forward_headers, _dependencies
+                _path,
+                _target,
+                _custom_headers,
+                _forward_headers,
+                _merge_query_params,
+                _dependencies,
             ),
             methods=["GET", "POST", "PUT", "DELETE", "PATCH"],
             dependencies=_dependencies,
diff --git a/litellm/proxy/proxy_config.yaml b/litellm/proxy/proxy_config.yaml
index b64bd84aad..c5add9ee09 100644
--- a/litellm/proxy/proxy_config.yaml
+++ b/litellm/proxy/proxy_config.yaml
@@ -1,10 +1,6 @@
 model_list:
-  - model_name: thinking-us.anthropic.claude-3-7-sonnet-20250219-v1:0
+  - model_name: gpt-4o
     litellm_params:
-      model: bedrock/us.anthropic.claude-3-7-sonnet-20250219-v1:0
-      thinking: {"type": "enabled", "budget_tokens": 1024}
-      max_tokens: 1080
-      merge_reasoning_content_in_choices: true
-
+      model: gpt-4o
 
 
diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py
index de1baad96f..cfd722def6 100644
--- a/litellm/proxy/proxy_server.py
+++ b/litellm/proxy/proxy_server.py
@@ -114,6 +114,7 @@ from litellm.litellm_core_utils.core_helpers import (
     _get_parent_otel_span_from_kwargs,
     get_litellm_metadata_from_kwargs,
 )
+from litellm.litellm_core_utils.credential_accessor import CredentialAccessor
 from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj
 from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler
 from litellm.proxy._types import *
@@ -138,12 +139,9 @@ from litellm.proxy.batches_endpoints.endpoints import router as batches_router
 
 ## Import All Misc routes here ##
 from litellm.proxy.caching_routes import router as caching_router
+from litellm.proxy.common_request_processing import ProxyBaseLLMRequestProcessing
 from litellm.proxy.common_utils.admin_ui_utils import html_form
-from litellm.proxy.common_utils.callback_utils import (
-    get_logging_caching_headers,
-    get_remaining_tokens_and_requests_from_request_data,
-    initialize_callbacks_on_proxy,
-)
+from litellm.proxy.common_utils.callback_utils import initialize_callbacks_on_proxy
 from litellm.proxy.common_utils.debug_utils import init_verbose_loggers
 from litellm.proxy.common_utils.debug_utils import router as debugging_endpoints_router
 from litellm.proxy.common_utils.encrypt_decrypt_utils import (
@@ -164,6 +162,7 @@ from litellm.proxy.common_utils.openai_endpoint_utils import (
 from litellm.proxy.common_utils.proxy_state import ProxyState
 from litellm.proxy.common_utils.reset_budget_job import ResetBudgetJob
 from litellm.proxy.common_utils.swagger_utils import ERROR_RESPONSES
+from litellm.proxy.credential_endpoints.endpoints import router as credential_router
 from litellm.proxy.fine_tuning_endpoints.endpoints import router as fine_tuning_router
 from litellm.proxy.fine_tuning_endpoints.endpoints import set_fine_tuning_config
 from litellm.proxy.guardrails.guardrail_endpoints import router as guardrails_router
@@ -234,6 +233,7 @@ from litellm.proxy.pass_through_endpoints.pass_through_endpoints import (
     router as pass_through_router,
 )
 from litellm.proxy.rerank_endpoints.endpoints import router as rerank_router
+from litellm.proxy.response_api_endpoints.endpoints import router as response_router
 from litellm.proxy.route_llm_request import route_request
 from litellm.proxy.spend_tracking.spend_management_endpoints import (
     router as spend_management_router,
@@ -287,7 +287,7 @@ from litellm.types.llms.openai import HttpxBinaryResponseContent
 from litellm.types.router import DeploymentTypedDict
 from litellm.types.router import ModelInfo as RouterModelInfo
 from litellm.types.router import RouterGeneralSettings, updateDeployment
-from litellm.types.utils import CustomHuggingfaceTokenizer
+from litellm.types.utils import CredentialItem, CustomHuggingfaceTokenizer
 from litellm.types.utils import ModelInfo as ModelMapInfo
 from litellm.types.utils import RawRequestTypedDict, StandardLoggingPayload
 from litellm.utils import _add_custom_logger_callback_to_specific_event
@@ -781,69 +781,6 @@ db_writer_client: Optional[AsyncHTTPHandler] = None
 ### logger ###
 
 
-def get_custom_headers(
-    *,
-    user_api_key_dict: UserAPIKeyAuth,
-    call_id: Optional[str] = None,
-    model_id: Optional[str] = None,
-    cache_key: Optional[str] = None,
-    api_base: Optional[str] = None,
-    version: Optional[str] = None,
-    model_region: Optional[str] = None,
-    response_cost: Optional[Union[float, str]] = None,
-    hidden_params: Optional[dict] = None,
-    fastest_response_batch_completion: Optional[bool] = None,
-    request_data: Optional[dict] = {},
-    timeout: Optional[Union[float, int, httpx.Timeout]] = None,
-    **kwargs,
-) -> dict:
-    exclude_values = {"", None, "None"}
-    hidden_params = hidden_params or {}
-    headers = {
-        "x-litellm-call-id": call_id,
-        "x-litellm-model-id": model_id,
-        "x-litellm-cache-key": cache_key,
-        "x-litellm-model-api-base": api_base,
-        "x-litellm-version": version,
-        "x-litellm-model-region": model_region,
-        "x-litellm-response-cost": str(response_cost),
-        "x-litellm-key-tpm-limit": str(user_api_key_dict.tpm_limit),
-        "x-litellm-key-rpm-limit": str(user_api_key_dict.rpm_limit),
-        "x-litellm-key-max-budget": str(user_api_key_dict.max_budget),
-        "x-litellm-key-spend": str(user_api_key_dict.spend),
-        "x-litellm-response-duration-ms": str(hidden_params.get("_response_ms", None)),
-        "x-litellm-overhead-duration-ms": str(
-            hidden_params.get("litellm_overhead_time_ms", None)
-        ),
-        "x-litellm-fastest_response_batch_completion": (
-            str(fastest_response_batch_completion)
-            if fastest_response_batch_completion is not None
-            else None
-        ),
-        "x-litellm-timeout": str(timeout) if timeout is not None else None,
-        **{k: str(v) for k, v in kwargs.items()},
-    }
-    if request_data:
-        remaining_tokens_header = get_remaining_tokens_and_requests_from_request_data(
-            request_data
-        )
-        headers.update(remaining_tokens_header)
-
-        logging_caching_headers = get_logging_caching_headers(request_data)
-        if logging_caching_headers:
-            headers.update(logging_caching_headers)
-
-    try:
-        return {
-            key: str(value)
-            for key, value in headers.items()
-            if value not in exclude_values
-        }
-    except Exception as e:
-        verbose_proxy_logger.error(f"Error setting custom headers: {e}")
-        return {}
-
-
 async def check_request_disconnection(request: Request, llm_api_call_task):
     """
     Asynchronously checks if the request is disconnected at regular intervals.
@@ -1723,6 +1660,16 @@ class ProxyConfig:
             )
             return {}
 
+    def load_credential_list(self, config: dict) -> List[CredentialItem]:
+        """
+        Load the credential list from the database
+        """
+        credential_list_dict = config.get("credential_list")
+        credential_list = []
+        if credential_list_dict:
+            credential_list = [CredentialItem(**cred) for cred in credential_list_dict]
+        return credential_list
+
     async def load_config(  # noqa: PLR0915
         self, router: Optional[litellm.Router], config_file_path: str
     ):
@@ -2186,6 +2133,10 @@ class ProxyConfig:
             init_guardrails_v2(
                 all_guardrails=guardrails_v2, config_file_path=config_file_path
             )
+
+        ## CREDENTIALS
+        credential_list_dict = self.load_credential_list(config=config)
+        litellm.credential_list = credential_list_dict
         return router, router.get_model_list(), general_settings
 
     def _load_alerting_settings(self, general_settings: dict):
@@ -2832,6 +2783,60 @@ class ProxyConfig:
                 )
             )
 
+    def decrypt_credentials(self, credential: Union[dict, BaseModel]) -> CredentialItem:
+        if isinstance(credential, dict):
+            credential_object = CredentialItem(**credential)
+        elif isinstance(credential, BaseModel):
+            credential_object = CredentialItem(**credential.model_dump())
+
+        decrypted_credential_values = {}
+        for k, v in credential_object.credential_values.items():
+            decrypted_credential_values[k] = decrypt_value_helper(v) or v
+
+        credential_object.credential_values = decrypted_credential_values
+        return credential_object
+
+    async def delete_credentials(self, db_credentials: List[CredentialItem]):
+        """
+        Create all-up list of db credentials + local credentials
+        Compare to the litellm.credential_list
+        Delete any from litellm.credential_list that are not in the all-up list
+        """
+        ## CONFIG credentials ##
+        config = await self.get_config(config_file_path=user_config_file_path)
+        credential_list = self.load_credential_list(config=config)
+
+        ## COMBINED LIST ##
+        combined_list = db_credentials + credential_list
+
+        ## DELETE ##
+        idx_to_delete = []
+        for idx, credential in enumerate(litellm.credential_list):
+            if credential.credential_name not in [
+                cred.credential_name for cred in combined_list
+            ]:
+                idx_to_delete.append(idx)
+        for idx in sorted(idx_to_delete, reverse=True):
+            litellm.credential_list.pop(idx)
+
+    async def get_credentials(self, prisma_client: PrismaClient):
+        try:
+            credentials = await prisma_client.db.litellm_credentialstable.find_many()
+            credentials = [self.decrypt_credentials(cred) for cred in credentials]
+            await self.delete_credentials(
+                credentials
+            )  # delete credentials that are not in the all-up list
+            CredentialAccessor.upsert_credentials(
+                credentials
+            )  # upsert credentials that are in the all-up list
+        except Exception as e:
+            verbose_proxy_logger.exception(
+                "litellm.proxy_server.py::get_credentials() - Error getting credentials from DB - {}".format(
+                    str(e)
+                )
+            )
+            return []
+
 
 proxy_config = ProxyConfig()
 
@@ -3253,6 +3258,14 @@ class ProxyStartupEvent:
                 prisma_client=prisma_client, proxy_logging_obj=proxy_logging_obj
             )
 
+        ### GET STORED CREDENTIALS ###
+        scheduler.add_job(
+            proxy_config.get_credentials,
+            "interval",
+            seconds=10,
+            args=[prisma_client],
+        )
+        await proxy_config.get_credentials(prisma_client=prisma_client)
         if (
             proxy_logging_obj is not None
             and proxy_logging_obj.slack_alerting_instance.alerting is not None
@@ -3475,169 +3488,28 @@ async def chat_completion(  # noqa: PLR0915
 
     """
     global general_settings, user_debug, proxy_logging_obj, llm_model_list
-
-    data = {}
+    global user_temperature, user_request_timeout, user_max_tokens, user_api_base
+    data = await _read_request_body(request=request)
+    base_llm_response_processor = ProxyBaseLLMRequestProcessing(data=data)
     try:
-        data = await _read_request_body(request=request)
-        verbose_proxy_logger.debug(
-            "Request received by LiteLLM:\n{}".format(json.dumps(data, indent=4)),
-        )
-
-        data = await add_litellm_data_to_request(
-            data=data,
+        return await base_llm_response_processor.base_process_llm_request(
             request=request,
-            general_settings=general_settings,
+            fastapi_response=fastapi_response,
             user_api_key_dict=user_api_key_dict,
-            version=version,
-            proxy_config=proxy_config,
-        )
-
-        data["model"] = (
-            general_settings.get("completion_model", None)  # server default
-            or user_model  # model name passed via cli args
-            or model  # for azure deployments
-            or data.get("model", None)  # default passed in http request
-        )
-
-        global user_temperature, user_request_timeout, user_max_tokens, user_api_base
-        # override with user settings, these are params passed via cli
-        if user_temperature:
-            data["temperature"] = user_temperature
-        if user_request_timeout:
-            data["request_timeout"] = user_request_timeout
-        if user_max_tokens:
-            data["max_tokens"] = user_max_tokens
-        if user_api_base:
-            data["api_base"] = user_api_base
-
-        ### MODEL ALIAS MAPPING ###
-        # check if model name in model alias map
-        # get the actual model name
-        if isinstance(data["model"], str) and data["model"] in litellm.model_alias_map:
-            data["model"] = litellm.model_alias_map[data["model"]]
-
-        ### CALL HOOKS ### - modify/reject incoming data before calling the model
-        data = await proxy_logging_obj.pre_call_hook(  # type: ignore
-            user_api_key_dict=user_api_key_dict, data=data, call_type="completion"
-        )
-
-        ## LOGGING OBJECT ## - initialize logging object for logging success/failure events for call
-        ## IMPORTANT Note: - initialize this before running pre-call checks. Ensures we log rejected requests to langfuse.
-        data["litellm_call_id"] = request.headers.get(
-            "x-litellm-call-id", str(uuid.uuid4())
-        )
-        logging_obj, data = litellm.utils.function_setup(
-            original_function="acompletion",
-            rules_obj=litellm.utils.Rules(),
-            start_time=datetime.now(),
-            **data,
-        )
-
-        data["litellm_logging_obj"] = logging_obj
-
-        tasks = []
-        tasks.append(
-            proxy_logging_obj.during_call_hook(
-                data=data,
-                user_api_key_dict=user_api_key_dict,
-                call_type="completion",
-            )
-        )
-
-        ### ROUTE THE REQUEST ###
-        # Do not change this - it should be a constant time fetch - ALWAYS
-        llm_call = await route_request(
-            data=data,
             route_type="acompletion",
+            proxy_logging_obj=proxy_logging_obj,
             llm_router=llm_router,
+            general_settings=general_settings,
+            proxy_config=proxy_config,
+            select_data_generator=select_data_generator,
+            model=model,
             user_model=user_model,
+            user_temperature=user_temperature,
+            user_request_timeout=user_request_timeout,
+            user_max_tokens=user_max_tokens,
+            user_api_base=user_api_base,
+            version=version,
         )
-        tasks.append(llm_call)
-
-        # wait for call to end
-        llm_responses = asyncio.gather(
-            *tasks
-        )  # run the moderation check in parallel to the actual llm api call
-
-        responses = await llm_responses
-
-        response = responses[1]
-
-        hidden_params = getattr(response, "_hidden_params", {}) or {}
-        model_id = hidden_params.get("model_id", None) or ""
-        cache_key = hidden_params.get("cache_key", None) or ""
-        api_base = hidden_params.get("api_base", None) or ""
-        response_cost = hidden_params.get("response_cost", None) or ""
-        fastest_response_batch_completion = hidden_params.get(
-            "fastest_response_batch_completion", None
-        )
-        additional_headers: dict = hidden_params.get("additional_headers", {}) or {}
-
-        # Post Call Processing
-        if llm_router is not None:
-            data["deployment"] = llm_router.get_deployment(model_id=model_id)
-        asyncio.create_task(
-            proxy_logging_obj.update_request_status(
-                litellm_call_id=data.get("litellm_call_id", ""), status="success"
-            )
-        )
-        if (
-            "stream" in data and data["stream"] is True
-        ):  # use generate_responses to stream responses
-            custom_headers = get_custom_headers(
-                user_api_key_dict=user_api_key_dict,
-                call_id=logging_obj.litellm_call_id,
-                model_id=model_id,
-                cache_key=cache_key,
-                api_base=api_base,
-                version=version,
-                response_cost=response_cost,
-                model_region=getattr(user_api_key_dict, "allowed_model_region", ""),
-                fastest_response_batch_completion=fastest_response_batch_completion,
-                request_data=data,
-                hidden_params=hidden_params,
-                **additional_headers,
-            )
-            selected_data_generator = select_data_generator(
-                response=response,
-                user_api_key_dict=user_api_key_dict,
-                request_data=data,
-            )
-            return StreamingResponse(
-                selected_data_generator,
-                media_type="text/event-stream",
-                headers=custom_headers,
-            )
-
-        ### CALL HOOKS ### - modify outgoing data
-        response = await proxy_logging_obj.post_call_success_hook(
-            data=data, user_api_key_dict=user_api_key_dict, response=response
-        )
-
-        hidden_params = (
-            getattr(response, "_hidden_params", {}) or {}
-        )  # get any updated response headers
-        additional_headers = hidden_params.get("additional_headers", {}) or {}
-
-        fastapi_response.headers.update(
-            get_custom_headers(
-                user_api_key_dict=user_api_key_dict,
-                call_id=logging_obj.litellm_call_id,
-                model_id=model_id,
-                cache_key=cache_key,
-                api_base=api_base,
-                version=version,
-                response_cost=response_cost,
-                model_region=getattr(user_api_key_dict, "allowed_model_region", ""),
-                fastest_response_batch_completion=fastest_response_batch_completion,
-                request_data=data,
-                hidden_params=hidden_params,
-                **additional_headers,
-            )
-        )
-        await check_response_size_is_safe(response=response)
-
-        return response
     except RejectedRequestError as e:
         _data = e.request_data
         await proxy_logging_obj.post_call_failure_hook(
@@ -3672,55 +3544,10 @@ async def chat_completion(  # noqa: PLR0915
         _chat_response.usage = _usage  # type: ignore
         return _chat_response
     except Exception as e:
-        verbose_proxy_logger.exception(
-            f"litellm.proxy.proxy_server.chat_completion(): Exception occured - {str(e)}"
-        )
-        await proxy_logging_obj.post_call_failure_hook(
-            user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data
-        )
-        litellm_debug_info = getattr(e, "litellm_debug_info", "")
-        verbose_proxy_logger.debug(
-            "\033[1;31mAn error occurred: %s %s\n\n Debug this by setting `--debug`, e.g. `litellm --model gpt-3.5-turbo --debug`",
-            e,
-            litellm_debug_info,
-        )
-
-        timeout = getattr(
-            e, "timeout", None
-        )  # returns the timeout set by the wrapper. Used for testing if model-specific timeout are set correctly
-        _litellm_logging_obj: Optional[LiteLLMLoggingObj] = data.get(
-            "litellm_logging_obj", None
-        )
-        custom_headers = get_custom_headers(
+        raise await base_llm_response_processor._handle_llm_api_exception(
+            e=e,
             user_api_key_dict=user_api_key_dict,
-            call_id=(
-                _litellm_logging_obj.litellm_call_id if _litellm_logging_obj else None
-            ),
-            version=version,
-            response_cost=0,
-            model_region=getattr(user_api_key_dict, "allowed_model_region", ""),
-            request_data=data,
-            timeout=timeout,
-        )
-        headers = getattr(e, "headers", {}) or {}
-        headers.update(custom_headers)
-
-        if isinstance(e, HTTPException):
-            raise ProxyException(
-                message=getattr(e, "detail", str(e)),
-                type=getattr(e, "type", "None"),
-                param=getattr(e, "param", "None"),
-                code=getattr(e, "status_code", status.HTTP_400_BAD_REQUEST),
-                headers=headers,
-            )
-        error_msg = f"{str(e)}"
-        raise ProxyException(
-            message=getattr(e, "message", error_msg),
-            type=getattr(e, "type", "None"),
-            param=getattr(e, "param", "None"),
-            openai_code=getattr(e, "code", None),
-            code=getattr(e, "status_code", 500),
-            headers=headers,
+            proxy_logging_obj=proxy_logging_obj,
         )
 
 
@@ -3837,7 +3664,7 @@ async def completion(  # noqa: PLR0915
         if (
             "stream" in data and data["stream"] is True
         ):  # use generate_responses to stream responses
-            custom_headers = get_custom_headers(
+            custom_headers = ProxyBaseLLMRequestProcessing.get_custom_headers(
                 user_api_key_dict=user_api_key_dict,
                 call_id=litellm_call_id,
                 model_id=model_id,
@@ -3865,7 +3692,7 @@ async def completion(  # noqa: PLR0915
         )
 
         fastapi_response.headers.update(
-            get_custom_headers(
+            ProxyBaseLLMRequestProcessing.get_custom_headers(
                 user_api_key_dict=user_api_key_dict,
                 call_id=litellm_call_id,
                 model_id=model_id,
@@ -4096,7 +3923,7 @@ async def embeddings(  # noqa: PLR0915
         additional_headers: dict = hidden_params.get("additional_headers", {}) or {}
 
         fastapi_response.headers.update(
-            get_custom_headers(
+            ProxyBaseLLMRequestProcessing.get_custom_headers(
                 user_api_key_dict=user_api_key_dict,
                 model_id=model_id,
                 cache_key=cache_key,
@@ -4224,7 +4051,7 @@ async def image_generation(
         litellm_call_id = hidden_params.get("litellm_call_id", None) or ""
 
         fastapi_response.headers.update(
-            get_custom_headers(
+            ProxyBaseLLMRequestProcessing.get_custom_headers(
                 user_api_key_dict=user_api_key_dict,
                 model_id=model_id,
                 cache_key=cache_key,
@@ -4345,7 +4172,7 @@ async def audio_speech(
             async for chunk in _generator:
                 yield chunk
 
-        custom_headers = get_custom_headers(
+        custom_headers = ProxyBaseLLMRequestProcessing.get_custom_headers(
             user_api_key_dict=user_api_key_dict,
             model_id=model_id,
             cache_key=cache_key,
@@ -4486,7 +4313,7 @@ async def audio_transcriptions(
         additional_headers: dict = hidden_params.get("additional_headers", {}) or {}
 
         fastapi_response.headers.update(
-            get_custom_headers(
+            ProxyBaseLLMRequestProcessing.get_custom_headers(
                 user_api_key_dict=user_api_key_dict,
                 model_id=model_id,
                 cache_key=cache_key,
@@ -4638,7 +4465,7 @@ async def get_assistants(
         api_base = hidden_params.get("api_base", None) or ""
 
         fastapi_response.headers.update(
-            get_custom_headers(
+            ProxyBaseLLMRequestProcessing.get_custom_headers(
                 user_api_key_dict=user_api_key_dict,
                 model_id=model_id,
                 cache_key=cache_key,
@@ -4737,7 +4564,7 @@ async def create_assistant(
         api_base = hidden_params.get("api_base", None) or ""
 
         fastapi_response.headers.update(
-            get_custom_headers(
+            ProxyBaseLLMRequestProcessing.get_custom_headers(
                 user_api_key_dict=user_api_key_dict,
                 model_id=model_id,
                 cache_key=cache_key,
@@ -4834,7 +4661,7 @@ async def delete_assistant(
         api_base = hidden_params.get("api_base", None) or ""
 
         fastapi_response.headers.update(
-            get_custom_headers(
+            ProxyBaseLLMRequestProcessing.get_custom_headers(
                 user_api_key_dict=user_api_key_dict,
                 model_id=model_id,
                 cache_key=cache_key,
@@ -4931,7 +4758,7 @@ async def create_threads(
         api_base = hidden_params.get("api_base", None) or ""
 
         fastapi_response.headers.update(
-            get_custom_headers(
+            ProxyBaseLLMRequestProcessing.get_custom_headers(
                 user_api_key_dict=user_api_key_dict,
                 model_id=model_id,
                 cache_key=cache_key,
@@ -5027,7 +4854,7 @@ async def get_thread(
         api_base = hidden_params.get("api_base", None) or ""
 
         fastapi_response.headers.update(
-            get_custom_headers(
+            ProxyBaseLLMRequestProcessing.get_custom_headers(
                 user_api_key_dict=user_api_key_dict,
                 model_id=model_id,
                 cache_key=cache_key,
@@ -5126,7 +4953,7 @@ async def add_messages(
         api_base = hidden_params.get("api_base", None) or ""
 
         fastapi_response.headers.update(
-            get_custom_headers(
+            ProxyBaseLLMRequestProcessing.get_custom_headers(
                 user_api_key_dict=user_api_key_dict,
                 model_id=model_id,
                 cache_key=cache_key,
@@ -5221,7 +5048,7 @@ async def get_messages(
         api_base = hidden_params.get("api_base", None) or ""
 
         fastapi_response.headers.update(
-            get_custom_headers(
+            ProxyBaseLLMRequestProcessing.get_custom_headers(
                 user_api_key_dict=user_api_key_dict,
                 model_id=model_id,
                 cache_key=cache_key,
@@ -5330,7 +5157,7 @@ async def run_thread(
         api_base = hidden_params.get("api_base", None) or ""
 
         fastapi_response.headers.update(
-            get_custom_headers(
+            ProxyBaseLLMRequestProcessing.get_custom_headers(
                 user_api_key_dict=user_api_key_dict,
                 model_id=model_id,
                 cache_key=cache_key,
@@ -5453,7 +5280,7 @@ async def moderations(
         api_base = hidden_params.get("api_base", None) or ""
 
         fastapi_response.headers.update(
-            get_custom_headers(
+            ProxyBaseLLMRequestProcessing.get_custom_headers(
                 user_api_key_dict=user_api_key_dict,
                 model_id=model_id,
                 cache_key=cache_key,
@@ -8597,9 +8424,11 @@ async def get_routes():
 
 
 app.include_router(router)
+app.include_router(response_router)
 app.include_router(batches_router)
 app.include_router(rerank_router)
 app.include_router(fine_tuning_router)
+app.include_router(credential_router)
 app.include_router(vertex_router)
 app.include_router(llm_passthrough_router)
 app.include_router(anthropic_router)
diff --git a/litellm/proxy/rerank_endpoints/endpoints.py b/litellm/proxy/rerank_endpoints/endpoints.py
index 5599ced640..ba9046b3c2 100644
--- a/litellm/proxy/rerank_endpoints/endpoints.py
+++ b/litellm/proxy/rerank_endpoints/endpoints.py
@@ -7,10 +7,12 @@ from fastapi.responses import ORJSONResponse
 from litellm._logging import verbose_proxy_logger
 from litellm.proxy._types import *
 from litellm.proxy.auth.user_api_key_auth import user_api_key_auth
+from litellm.proxy.common_request_processing import ProxyBaseLLMRequestProcessing
 
 router = APIRouter()
 import asyncio
 
+
 @router.post(
     "/v2/rerank",
     dependencies=[Depends(user_api_key_auth)],
@@ -37,7 +39,6 @@ async def rerank(
     from litellm.proxy.proxy_server import (
         add_litellm_data_to_request,
         general_settings,
-        get_custom_headers,
         llm_router,
         proxy_config,
         proxy_logging_obj,
@@ -89,7 +90,7 @@ async def rerank(
         api_base = hidden_params.get("api_base", None) or ""
         additional_headers = hidden_params.get("additional_headers", None) or {}
         fastapi_response.headers.update(
-            get_custom_headers(
+            ProxyBaseLLMRequestProcessing.get_custom_headers(
                 user_api_key_dict=user_api_key_dict,
                 model_id=model_id,
                 cache_key=cache_key,
diff --git a/litellm/proxy/response_api_endpoints/endpoints.py b/litellm/proxy/response_api_endpoints/endpoints.py
new file mode 100644
index 0000000000..8649276b0e
--- /dev/null
+++ b/litellm/proxy/response_api_endpoints/endpoints.py
@@ -0,0 +1,80 @@
+from fastapi import APIRouter, Depends, Request, Response
+
+from litellm.proxy._types import *
+from litellm.proxy.auth.user_api_key_auth import UserAPIKeyAuth, user_api_key_auth
+from litellm.proxy.common_request_processing import ProxyBaseLLMRequestProcessing
+
+router = APIRouter()
+
+
+@router.post(
+    "/v1/responses",
+    dependencies=[Depends(user_api_key_auth)],
+    tags=["responses"],
+)
+@router.post(
+    "/responses",
+    dependencies=[Depends(user_api_key_auth)],
+    tags=["responses"],
+)
+async def responses_api(
+    request: Request,
+    fastapi_response: Response,
+    user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
+):
+    """
+    Follows the OpenAI Responses API spec: https://platform.openai.com/docs/api-reference/responses
+
+    ```bash
+    curl -X POST http://localhost:4000/v1/responses \
+    -H "Content-Type: application/json" \
+    -H "Authorization: Bearer sk-1234" \
+    -d '{
+        "model": "gpt-4o",
+        "input": "Tell me about AI"
+    }'
+    ```
+    """
+    from litellm.proxy.proxy_server import (
+        _read_request_body,
+        general_settings,
+        llm_router,
+        proxy_config,
+        proxy_logging_obj,
+        select_data_generator,
+        user_api_base,
+        user_max_tokens,
+        user_model,
+        user_request_timeout,
+        user_temperature,
+        version,
+    )
+
+    data = await _read_request_body(request=request)
+    processor = ProxyBaseLLMRequestProcessing(data=data)
+    try:
+        return await processor.base_process_llm_request(
+            request=request,
+            fastapi_response=fastapi_response,
+            user_api_key_dict=user_api_key_dict,
+            route_type="aresponses",
+            proxy_logging_obj=proxy_logging_obj,
+            llm_router=llm_router,
+            general_settings=general_settings,
+            proxy_config=proxy_config,
+            select_data_generator=select_data_generator,
+            model=None,
+            user_model=user_model,
+            user_temperature=user_temperature,
+            user_request_timeout=user_request_timeout,
+            user_max_tokens=user_max_tokens,
+            user_api_base=user_api_base,
+            version=version,
+        )
+    except Exception as e:
+        raise await processor._handle_llm_api_exception(
+            e=e,
+            user_api_key_dict=user_api_key_dict,
+            proxy_logging_obj=proxy_logging_obj,
+            version=version,
+        )
diff --git a/litellm/proxy/route_llm_request.py b/litellm/proxy/route_llm_request.py
index 6683a18b9a..ac9332b219 100644
--- a/litellm/proxy/route_llm_request.py
+++ b/litellm/proxy/route_llm_request.py
@@ -21,6 +21,7 @@ ROUTE_ENDPOINT_MAPPING = {
     "atranscription": "/audio/transcriptions",
     "amoderation": "/moderations",
     "arerank": "/rerank",
+    "aresponses": "/responses",
 }
 
 
@@ -45,6 +46,7 @@ async def route_request(
         "atranscription",
         "amoderation",
         "arerank",
+        "aresponses",
         "_arealtime",  # private function for realtime API
     ],
 ):
diff --git a/litellm/proxy/schema.prisma b/litellm/proxy/schema.prisma
index fedbb271da..e453e74b46 100644
--- a/litellm/proxy/schema.prisma
+++ b/litellm/proxy/schema.prisma
@@ -29,6 +29,18 @@ model LiteLLM_BudgetTable {
   organization_membership LiteLLM_OrganizationMembership[] // budgets of Users within a Organization 
 }
 
+// Models on proxy
+model LiteLLM_CredentialsTable {
+  credential_id String @id @default(uuid())
+  credential_name String @unique
+  credential_values Json
+  credential_info Json? 
+  created_at    DateTime               @default(now()) @map("created_at")
+  created_by String
+  updated_at    DateTime               @default(now()) @updatedAt @map("updated_at")
+  updated_by String
+}
+
 // Models on proxy
 model LiteLLM_ProxyModelTable {
   model_id String @id @default(uuid())
diff --git a/litellm/proxy/utils.py b/litellm/proxy/utils.py
index 525c4e684f..08afcf23c1 100644
--- a/litellm/proxy/utils.py
+++ b/litellm/proxy/utils.py
@@ -537,6 +537,7 @@ class ProxyLogging:
         user_api_key_dict: UserAPIKeyAuth,
         call_type: Literal[
             "completion",
+            "responses",
             "embeddings",
             "image_generation",
             "moderation",
diff --git a/litellm/responses/main.py b/litellm/responses/main.py
new file mode 100644
index 0000000000..ce70292e96
--- /dev/null
+++ b/litellm/responses/main.py
@@ -0,0 +1,217 @@
+import asyncio
+import contextvars
+from functools import partial
+from typing import Any, Dict, Iterable, List, Literal, Optional, Union
+
+import httpx
+
+import litellm
+from litellm.constants import request_timeout
+from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj
+from litellm.llms.base_llm.responses.transformation import BaseResponsesAPIConfig
+from litellm.llms.custom_httpx.llm_http_handler import BaseLLMHTTPHandler
+from litellm.responses.utils import ResponsesAPIRequestUtils
+from litellm.types.llms.openai import (
+    Reasoning,
+    ResponseIncludable,
+    ResponseInputParam,
+    ResponsesAPIOptionalRequestParams,
+    ResponsesAPIResponse,
+    ResponseTextConfigParam,
+    ToolChoice,
+    ToolParam,
+)
+from litellm.types.router import GenericLiteLLMParams
+from litellm.utils import ProviderConfigManager, client
+
+from .streaming_iterator import BaseResponsesAPIStreamingIterator
+
+####### ENVIRONMENT VARIABLES ###################
+# Initialize any necessary instances or variables here
+base_llm_http_handler = BaseLLMHTTPHandler()
+#################################################
+
+
+@client
+async def aresponses(
+    input: Union[str, ResponseInputParam],
+    model: str,
+    include: Optional[List[ResponseIncludable]] = None,
+    instructions: Optional[str] = None,
+    max_output_tokens: Optional[int] = None,
+    metadata: Optional[Dict[str, Any]] = None,
+    parallel_tool_calls: Optional[bool] = None,
+    previous_response_id: Optional[str] = None,
+    reasoning: Optional[Reasoning] = None,
+    store: Optional[bool] = None,
+    stream: Optional[bool] = None,
+    temperature: Optional[float] = None,
+    text: Optional[ResponseTextConfigParam] = None,
+    tool_choice: Optional[ToolChoice] = None,
+    tools: Optional[Iterable[ToolParam]] = None,
+    top_p: Optional[float] = None,
+    truncation: Optional[Literal["auto", "disabled"]] = None,
+    user: Optional[str] = None,
+    # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+    # The extra values given here take precedence over values defined on the client or passed to this method.
+    extra_headers: Optional[Dict[str, Any]] = None,
+    extra_query: Optional[Dict[str, Any]] = None,
+    extra_body: Optional[Dict[str, Any]] = None,
+    timeout: Optional[Union[float, httpx.Timeout]] = None,
+    **kwargs,
+) -> Union[ResponsesAPIResponse, BaseResponsesAPIStreamingIterator]:
+    """
+    Async: Handles responses API requests by reusing the synchronous function
+    """
+    try:
+        loop = asyncio.get_event_loop()
+        kwargs["aresponses"] = True
+
+        func = partial(
+            responses,
+            input=input,
+            model=model,
+            include=include,
+            instructions=instructions,
+            max_output_tokens=max_output_tokens,
+            metadata=metadata,
+            parallel_tool_calls=parallel_tool_calls,
+            previous_response_id=previous_response_id,
+            reasoning=reasoning,
+            store=store,
+            stream=stream,
+            temperature=temperature,
+            text=text,
+            tool_choice=tool_choice,
+            tools=tools,
+            top_p=top_p,
+            truncation=truncation,
+            user=user,
+            extra_headers=extra_headers,
+            extra_query=extra_query,
+            extra_body=extra_body,
+            timeout=timeout,
+            **kwargs,
+        )
+
+        ctx = contextvars.copy_context()
+        func_with_context = partial(ctx.run, func)
+        init_response = await loop.run_in_executor(None, func_with_context)
+
+        if asyncio.iscoroutine(init_response):
+            response = await init_response
+        else:
+            response = init_response
+        return response
+    except Exception as e:
+        raise e
+
+
+@client
+def responses(
+    input: Union[str, ResponseInputParam],
+    model: str,
+    include: Optional[List[ResponseIncludable]] = None,
+    instructions: Optional[str] = None,
+    max_output_tokens: Optional[int] = None,
+    metadata: Optional[Dict[str, Any]] = None,
+    parallel_tool_calls: Optional[bool] = None,
+    previous_response_id: Optional[str] = None,
+    reasoning: Optional[Reasoning] = None,
+    store: Optional[bool] = None,
+    stream: Optional[bool] = None,
+    temperature: Optional[float] = None,
+    text: Optional[ResponseTextConfigParam] = None,
+    tool_choice: Optional[ToolChoice] = None,
+    tools: Optional[Iterable[ToolParam]] = None,
+    top_p: Optional[float] = None,
+    truncation: Optional[Literal["auto", "disabled"]] = None,
+    user: Optional[str] = None,
+    # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+    # The extra values given here take precedence over values defined on the client or passed to this method.
+    extra_headers: Optional[Dict[str, Any]] = None,
+    extra_query: Optional[Dict[str, Any]] = None,
+    extra_body: Optional[Dict[str, Any]] = None,
+    timeout: Optional[Union[float, httpx.Timeout]] = None,
+    **kwargs,
+):
+    """
+    Synchronous version of the Responses API.
+    Uses the synchronous HTTP handler to make requests.
+    """
+    litellm_logging_obj: LiteLLMLoggingObj = kwargs.get("litellm_logging_obj")  # type: ignore
+    litellm_call_id: Optional[str] = kwargs.get("litellm_call_id", None)
+    _is_async = kwargs.pop("aresponses", False) is True
+
+    # get llm provider logic
+    litellm_params = GenericLiteLLMParams(**kwargs)
+    model, custom_llm_provider, dynamic_api_key, dynamic_api_base = (
+        litellm.get_llm_provider(
+            model=model,
+            custom_llm_provider=kwargs.get("custom_llm_provider", None),
+            api_base=litellm_params.api_base,
+            api_key=litellm_params.api_key,
+        )
+    )
+
+    # get provider config
+    responses_api_provider_config: Optional[BaseResponsesAPIConfig] = (
+        ProviderConfigManager.get_provider_responses_api_config(
+            model=model,
+            provider=litellm.LlmProviders(custom_llm_provider),
+        )
+    )
+
+    if responses_api_provider_config is None:
+        raise litellm.BadRequestError(
+            model=model,
+            llm_provider=custom_llm_provider,
+            message=f"Responses API not available for custom_llm_provider={custom_llm_provider}, model: {model}",
+        )
+
+    # Get all parameters using locals() and combine with kwargs
+    local_vars = locals()
+    local_vars.update(kwargs)
+    # Get ResponsesAPIOptionalRequestParams with only valid parameters
+    response_api_optional_params: ResponsesAPIOptionalRequestParams = (
+        ResponsesAPIRequestUtils.get_requested_response_api_optional_param(local_vars)
+    )
+
+    # Get optional parameters for the responses API
+    responses_api_request_params: Dict = (
+        ResponsesAPIRequestUtils.get_optional_params_responses_api(
+            model=model,
+            responses_api_provider_config=responses_api_provider_config,
+            response_api_optional_params=response_api_optional_params,
+        )
+    )
+
+    # Pre Call logging
+    litellm_logging_obj.update_environment_variables(
+        model=model,
+        user=user,
+        optional_params=dict(responses_api_request_params),
+        litellm_params={
+            "litellm_call_id": litellm_call_id,
+            **responses_api_request_params,
+        },
+        custom_llm_provider=custom_llm_provider,
+    )
+
+    # Call the handler with _is_async flag instead of directly calling the async handler
+    response = base_llm_http_handler.response_api_handler(
+        model=model,
+        input=input,
+        responses_api_provider_config=responses_api_provider_config,
+        response_api_optional_request_params=responses_api_request_params,
+        custom_llm_provider=custom_llm_provider,
+        litellm_params=litellm_params,
+        logging_obj=litellm_logging_obj,
+        extra_headers=extra_headers,
+        extra_body=extra_body,
+        timeout=timeout or request_timeout,
+        _is_async=_is_async,
+        client=kwargs.get("client"),
+    )
+
+    return response
diff --git a/litellm/responses/streaming_iterator.py b/litellm/responses/streaming_iterator.py
new file mode 100644
index 0000000000..c016e71e7e
--- /dev/null
+++ b/litellm/responses/streaming_iterator.py
@@ -0,0 +1,209 @@
+import asyncio
+import json
+from datetime import datetime
+from typing import Optional
+
+import httpx
+
+from litellm.constants import STREAM_SSE_DONE_STRING
+from litellm.litellm_core_utils.asyncify import run_async_function
+from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj
+from litellm.litellm_core_utils.thread_pool_executor import executor
+from litellm.llms.base_llm.responses.transformation import BaseResponsesAPIConfig
+from litellm.types.llms.openai import (
+    ResponsesAPIStreamEvents,
+    ResponsesAPIStreamingResponse,
+)
+from litellm.utils import CustomStreamWrapper
+
+
+class BaseResponsesAPIStreamingIterator:
+    """
+    Base class for streaming iterators that process responses from the Responses API.
+
+    This class contains shared logic for both synchronous and asynchronous iterators.
+    """
+
+    def __init__(
+        self,
+        response: httpx.Response,
+        model: str,
+        responses_api_provider_config: BaseResponsesAPIConfig,
+        logging_obj: LiteLLMLoggingObj,
+    ):
+        self.response = response
+        self.model = model
+        self.logging_obj = logging_obj
+        self.finished = False
+        self.responses_api_provider_config = responses_api_provider_config
+        self.completed_response: Optional[ResponsesAPIStreamingResponse] = None
+        self.start_time = datetime.now()
+
+    def _process_chunk(self, chunk):
+        """Process a single chunk of data from the stream"""
+        if not chunk:
+            return None
+
+        # Handle SSE format (data: {...})
+        chunk = CustomStreamWrapper._strip_sse_data_from_chunk(chunk)
+        if chunk is None:
+            return None
+
+        # Handle "[DONE]" marker
+        if chunk == STREAM_SSE_DONE_STRING:
+            self.finished = True
+            return None
+
+        try:
+            # Parse the JSON chunk
+            parsed_chunk = json.loads(chunk)
+
+            # Format as ResponsesAPIStreamingResponse
+            if isinstance(parsed_chunk, dict):
+                openai_responses_api_chunk = (
+                    self.responses_api_provider_config.transform_streaming_response(
+                        model=self.model,
+                        parsed_chunk=parsed_chunk,
+                        logging_obj=self.logging_obj,
+                    )
+                )
+                # Store the completed response
+                if (
+                    openai_responses_api_chunk
+                    and openai_responses_api_chunk.type
+                    == ResponsesAPIStreamEvents.RESPONSE_COMPLETED
+                ):
+                    self.completed_response = openai_responses_api_chunk
+                    self._handle_logging_completed_response()
+
+                return openai_responses_api_chunk
+
+            return None
+        except json.JSONDecodeError:
+            # If we can't parse the chunk, continue
+            return None
+
+    def _handle_logging_completed_response(self):
+        """Base implementation - should be overridden by subclasses"""
+        pass
+
+
+class ResponsesAPIStreamingIterator(BaseResponsesAPIStreamingIterator):
+    """
+    Async iterator for processing streaming responses from the Responses API.
+    """
+
+    def __init__(
+        self,
+        response: httpx.Response,
+        model: str,
+        responses_api_provider_config: BaseResponsesAPIConfig,
+        logging_obj: LiteLLMLoggingObj,
+    ):
+        super().__init__(response, model, responses_api_provider_config, logging_obj)
+        self.stream_iterator = response.aiter_lines()
+
+    def __aiter__(self):
+        return self
+
+    async def __anext__(self) -> ResponsesAPIStreamingResponse:
+        try:
+            while True:
+                # Get the next chunk from the stream
+                try:
+                    chunk = await self.stream_iterator.__anext__()
+                except StopAsyncIteration:
+                    self.finished = True
+                    raise StopAsyncIteration
+
+                result = self._process_chunk(chunk)
+
+                if self.finished:
+                    raise StopAsyncIteration
+                elif result is not None:
+                    return result
+                # If result is None, continue the loop to get the next chunk
+
+        except httpx.HTTPError as e:
+            # Handle HTTP errors
+            self.finished = True
+            raise e
+
+    def _handle_logging_completed_response(self):
+        """Handle logging for completed responses in async context"""
+        asyncio.create_task(
+            self.logging_obj.async_success_handler(
+                result=self.completed_response,
+                start_time=self.start_time,
+                end_time=datetime.now(),
+                cache_hit=None,
+            )
+        )
+
+        executor.submit(
+            self.logging_obj.success_handler,
+            result=self.completed_response,
+            cache_hit=None,
+            start_time=self.start_time,
+            end_time=datetime.now(),
+        )
+
+
+class SyncResponsesAPIStreamingIterator(BaseResponsesAPIStreamingIterator):
+    """
+    Synchronous iterator for processing streaming responses from the Responses API.
+    """
+
+    def __init__(
+        self,
+        response: httpx.Response,
+        model: str,
+        responses_api_provider_config: BaseResponsesAPIConfig,
+        logging_obj: LiteLLMLoggingObj,
+    ):
+        super().__init__(response, model, responses_api_provider_config, logging_obj)
+        self.stream_iterator = response.iter_lines()
+
+    def __iter__(self):
+        return self
+
+    def __next__(self):
+        try:
+            while True:
+                # Get the next chunk from the stream
+                try:
+                    chunk = next(self.stream_iterator)
+                except StopIteration:
+                    self.finished = True
+                    raise StopIteration
+
+                result = self._process_chunk(chunk)
+
+                if self.finished:
+                    raise StopIteration
+                elif result is not None:
+                    return result
+                # If result is None, continue the loop to get the next chunk
+
+        except httpx.HTTPError as e:
+            # Handle HTTP errors
+            self.finished = True
+            raise e
+
+    def _handle_logging_completed_response(self):
+        """Handle logging for completed responses in sync context"""
+        run_async_function(
+            async_function=self.logging_obj.async_success_handler,
+            result=self.completed_response,
+            start_time=self.start_time,
+            end_time=datetime.now(),
+            cache_hit=None,
+        )
+
+        executor.submit(
+            self.logging_obj.success_handler,
+            result=self.completed_response,
+            cache_hit=None,
+            start_time=self.start_time,
+            end_time=datetime.now(),
+        )
diff --git a/litellm/responses/utils.py b/litellm/responses/utils.py
new file mode 100644
index 0000000000..49d850ec6a
--- /dev/null
+++ b/litellm/responses/utils.py
@@ -0,0 +1,97 @@
+from typing import Any, Dict, cast, get_type_hints
+
+import litellm
+from litellm.llms.base_llm.responses.transformation import BaseResponsesAPIConfig
+from litellm.types.llms.openai import (
+    ResponseAPIUsage,
+    ResponsesAPIOptionalRequestParams,
+)
+from litellm.types.utils import Usage
+
+
+class ResponsesAPIRequestUtils:
+    """Helper utils for constructing ResponseAPI requests"""
+
+    @staticmethod
+    def get_optional_params_responses_api(
+        model: str,
+        responses_api_provider_config: BaseResponsesAPIConfig,
+        response_api_optional_params: ResponsesAPIOptionalRequestParams,
+    ) -> Dict:
+        """
+        Get optional parameters for the responses API.
+
+        Args:
+            params: Dictionary of all parameters
+            model: The model name
+            responses_api_provider_config: The provider configuration for responses API
+
+        Returns:
+            A dictionary of supported parameters for the responses API
+        """
+        # Remove None values and internal parameters
+
+        # Get supported parameters for the model
+        supported_params = responses_api_provider_config.get_supported_openai_params(
+            model
+        )
+
+        # Check for unsupported parameters
+        unsupported_params = [
+            param
+            for param in response_api_optional_params
+            if param not in supported_params
+        ]
+
+        if unsupported_params:
+            raise litellm.UnsupportedParamsError(
+                model=model,
+                message=f"The following parameters are not supported for model {model}: {', '.join(unsupported_params)}",
+            )
+
+        # Map parameters to provider-specific format
+        mapped_params = responses_api_provider_config.map_openai_params(
+            response_api_optional_params=response_api_optional_params,
+            model=model,
+            drop_params=litellm.drop_params,
+        )
+
+        return mapped_params
+
+    @staticmethod
+    def get_requested_response_api_optional_param(
+        params: Dict[str, Any]
+    ) -> ResponsesAPIOptionalRequestParams:
+        """
+        Filter parameters to only include those defined in ResponsesAPIOptionalRequestParams.
+
+        Args:
+            params: Dictionary of parameters to filter
+
+        Returns:
+            ResponsesAPIOptionalRequestParams instance with only the valid parameters
+        """
+        valid_keys = get_type_hints(ResponsesAPIOptionalRequestParams).keys()
+        filtered_params = {k: v for k, v in params.items() if k in valid_keys}
+        return cast(ResponsesAPIOptionalRequestParams, filtered_params)
+
+
+class ResponseAPILoggingUtils:
+    @staticmethod
+    def _is_response_api_usage(usage: dict) -> bool:
+        """returns True if usage is from OpenAI Response API"""
+        if "input_tokens" in usage and "output_tokens" in usage:
+            return True
+        return False
+
+    @staticmethod
+    def _transform_response_api_usage_to_chat_usage(usage: dict) -> Usage:
+        """Tranforms the ResponseAPIUsage object to a Usage object"""
+        response_api_usage: ResponseAPIUsage = ResponseAPIUsage(**usage)
+        prompt_tokens: int = response_api_usage.input_tokens or 0
+        completion_tokens: int = response_api_usage.output_tokens or 0
+        return Usage(
+            prompt_tokens=prompt_tokens,
+            completion_tokens=completion_tokens,
+            total_tokens=prompt_tokens + completion_tokens,
+        )
diff --git a/litellm/router.py b/litellm/router.py
index aba9e16104..f7f361354b 100644
--- a/litellm/router.py
+++ b/litellm/router.py
@@ -71,7 +71,7 @@ from litellm.router_utils.batch_utils import (
     _get_router_metadata_variable_name,
     replace_model_in_jsonl,
 )
-from litellm.router_utils.client_initalization_utils import InitalizeOpenAISDKClient
+from litellm.router_utils.client_initalization_utils import InitalizeCachedClient
 from litellm.router_utils.clientside_credential_handler import (
     get_dynamic_litellm_params,
     is_clientside_credential,
@@ -581,13 +581,7 @@ class Router:
             self._initialize_alerting()
 
         self.initialize_assistants_endpoint()
-
-        self.amoderation = self.factory_function(
-            litellm.amoderation, call_type="moderation"
-        )
-        self.aanthropic_messages = self.factory_function(
-            litellm.anthropic_messages, call_type="anthropic_messages"
-        )
+        self.initialize_router_endpoints()
 
     def discard(self):
         """
@@ -653,6 +647,18 @@ class Router:
         self.aget_messages = self.factory_function(litellm.aget_messages)
         self.arun_thread = self.factory_function(litellm.arun_thread)
 
+    def initialize_router_endpoints(self):
+        self.amoderation = self.factory_function(
+            litellm.amoderation, call_type="moderation"
+        )
+        self.aanthropic_messages = self.factory_function(
+            litellm.anthropic_messages, call_type="anthropic_messages"
+        )
+        self.aresponses = self.factory_function(
+            litellm.aresponses, call_type="aresponses"
+        )
+        self.responses = self.factory_function(litellm.responses, call_type="responses")
+
     def routing_strategy_init(
         self, routing_strategy: Union[RoutingStrategy, str], routing_strategy_args: dict
     ):
@@ -955,6 +961,7 @@ class Router:
                 specific_deployment=kwargs.pop("specific_deployment", None),
                 request_kwargs=kwargs,
             )
+
             _timeout_debug_deployment_dict = deployment
             end_time = time.time()
             _duration = end_time - start_time
@@ -1079,17 +1086,22 @@ class Router:
         kwargs.setdefault("litellm_trace_id", str(uuid.uuid4()))
         kwargs.setdefault("metadata", {}).update({"model_group": model})
 
-    def _update_kwargs_with_default_litellm_params(self, kwargs: dict) -> None:
+    def _update_kwargs_with_default_litellm_params(
+        self, kwargs: dict, metadata_variable_name: Optional[str] = "metadata"
+    ) -> None:
         """
         Adds default litellm params to kwargs, if set.
         """
+        self.default_litellm_params[metadata_variable_name] = (
+            self.default_litellm_params.pop("metadata", {})
+        )
         for k, v in self.default_litellm_params.items():
             if (
                 k not in kwargs and v is not None
             ):  # prioritize model-specific params > default router params
                 kwargs[k] = v
-            elif k == "metadata":
-                kwargs[k].update(v)
+            elif k == metadata_variable_name:
+                kwargs[metadata_variable_name].update(v)
 
     def _handle_clientside_credential(
         self, deployment: dict, kwargs: dict
@@ -1120,7 +1132,12 @@ class Router:
         )  # add new deployment to router
         return deployment_pydantic_obj
 
-    def _update_kwargs_with_deployment(self, deployment: dict, kwargs: dict) -> None:
+    def _update_kwargs_with_deployment(
+        self,
+        deployment: dict,
+        kwargs: dict,
+        function_name: Optional[str] = None,
+    ) -> None:
         """
         2 jobs:
         - Adds selected deployment, model_info and api_base to kwargs["metadata"] (used for logging)
@@ -1137,7 +1154,10 @@ class Router:
             deployment_model_name = deployment_pydantic_obj.litellm_params.model
             deployment_api_base = deployment_pydantic_obj.litellm_params.api_base
 
-        kwargs.setdefault("metadata", {}).update(
+        metadata_variable_name = _get_router_metadata_variable_name(
+            function_name=function_name,
+        )
+        kwargs.setdefault(metadata_variable_name, {}).update(
             {
                 "deployment": deployment_model_name,
                 "model_info": model_info,
@@ -1150,7 +1170,9 @@ class Router:
             kwargs=kwargs, data=deployment["litellm_params"]
         )
 
-        self._update_kwargs_with_default_litellm_params(kwargs=kwargs)
+        self._update_kwargs_with_default_litellm_params(
+            kwargs=kwargs, metadata_variable_name=metadata_variable_name
+        )
 
     def _get_async_openai_model_client(self, deployment: dict, kwargs: dict):
         """
@@ -2395,22 +2417,18 @@ class Router:
                 messages=kwargs.get("messages", None),
                 specific_deployment=kwargs.pop("specific_deployment", None),
             )
-            self._update_kwargs_with_deployment(deployment=deployment, kwargs=kwargs)
+            self._update_kwargs_with_deployment(
+                deployment=deployment, kwargs=kwargs, function_name="generic_api_call"
+            )
 
             data = deployment["litellm_params"].copy()
             model_name = data["model"]
-
-            model_client = self._get_async_openai_model_client(
-                deployment=deployment,
-                kwargs=kwargs,
-            )
             self.total_calls[model_name] += 1
 
             response = original_function(
                 **{
                     **data,
                     "caching": self.cache_responses,
-                    "client": model_client,
                     **kwargs,
                 }
             )
@@ -2452,6 +2470,61 @@ class Router:
                 self.fail_calls[model] += 1
             raise e
 
+    def _generic_api_call_with_fallbacks(
+        self, model: str, original_function: Callable, **kwargs
+    ):
+        """
+        Make a generic LLM API call through the router, this allows you to use retries/fallbacks with litellm router
+        Args:
+            model: The model to use
+            original_function: The handler function to call (e.g., litellm.completion)
+            **kwargs: Additional arguments to pass to the handler function
+        Returns:
+            The response from the handler function
+        """
+        handler_name = original_function.__name__
+        try:
+            verbose_router_logger.debug(
+                f"Inside _generic_api_call() - handler: {handler_name}, model: {model}; kwargs: {kwargs}"
+            )
+            deployment = self.get_available_deployment(
+                model=model,
+                messages=kwargs.get("messages", None),
+                specific_deployment=kwargs.pop("specific_deployment", None),
+            )
+            self._update_kwargs_with_deployment(
+                deployment=deployment, kwargs=kwargs, function_name="generic_api_call"
+            )
+
+            data = deployment["litellm_params"].copy()
+            model_name = data["model"]
+
+            self.total_calls[model_name] += 1
+
+            # Perform pre-call checks for routing strategy
+            self.routing_strategy_pre_call_checks(deployment=deployment)
+
+            response = original_function(
+                **{
+                    **data,
+                    "caching": self.cache_responses,
+                    **kwargs,
+                }
+            )
+
+            self.success_calls[model_name] += 1
+            verbose_router_logger.info(
+                f"{handler_name}(model={model_name})\033[32m 200 OK\033[0m"
+            )
+            return response
+        except Exception as e:
+            verbose_router_logger.info(
+                f"{handler_name}(model={model})\033[31m Exception {str(e)}\033[0m"
+            )
+            if model is not None:
+                self.fail_calls[model] += 1
+            raise e
+
     def embedding(
         self,
         model: str,
@@ -2973,14 +3046,42 @@ class Router:
         self,
         original_function: Callable,
         call_type: Literal[
-            "assistants", "moderation", "anthropic_messages"
+            "assistants",
+            "moderation",
+            "anthropic_messages",
+            "aresponses",
+            "responses",
         ] = "assistants",
     ):
-        async def new_function(
+        """
+        Creates appropriate wrapper functions for different API call types.
+
+        Returns:
+            - A synchronous function for synchronous call types
+            - An asynchronous function for asynchronous call types
+        """
+        # Handle synchronous call types
+        if call_type == "responses":
+
+            def sync_wrapper(
+                custom_llm_provider: Optional[
+                    Literal["openai", "azure", "anthropic"]
+                ] = None,
+                client: Optional[Any] = None,
+                **kwargs,
+            ):
+                return self._generic_api_call_with_fallbacks(
+                    original_function=original_function, **kwargs
+                )
+
+            return sync_wrapper
+
+        # Handle asynchronous call types
+        async def async_wrapper(
             custom_llm_provider: Optional[
                 Literal["openai", "azure", "anthropic"]
             ] = None,
-            client: Optional["AsyncOpenAI"] = None,
+            client: Optional[Any] = None,
             **kwargs,
         ):
             if call_type == "assistants":
@@ -2991,18 +3092,16 @@ class Router:
                     **kwargs,
                 )
             elif call_type == "moderation":
-
-                return await self._pass_through_moderation_endpoint_factory(  # type: ignore
-                    original_function=original_function,
-                    **kwargs,
+                return await self._pass_through_moderation_endpoint_factory(
+                    original_function=original_function, **kwargs
                 )
-            elif call_type == "anthropic_messages":
-                return await self._ageneric_api_call_with_fallbacks(  # type: ignore
+            elif call_type in ("anthropic_messages", "aresponses"):
+                return await self._ageneric_api_call_with_fallbacks(
                     original_function=original_function,
                     **kwargs,
                 )
 
-        return new_function
+        return async_wrapper
 
     async def _pass_through_assistants_endpoint_factory(
         self,
@@ -4373,10 +4472,10 @@ class Router:
         if custom_llm_provider not in litellm.provider_list:
             raise Exception(f"Unsupported provider - {custom_llm_provider}")
 
-        # init OpenAI, Azure clients
-        InitalizeOpenAISDKClient.set_client(
-            litellm_router_instance=self, model=deployment.to_json(exclude_none=True)
-        )
+        # # init OpenAI, Azure clients
+        # InitalizeOpenAISDKClient.set_client(
+        #     litellm_router_instance=self, model=deployment.to_json(exclude_none=True)
+        # )
 
         self._initialize_deployment_for_pass_through(
             deployment=deployment,
@@ -5345,6 +5444,13 @@ class Router:
             client = self.cache.get_cache(
                 key=cache_key, local_only=True, parent_otel_span=parent_otel_span
             )
+            if client is None:
+                InitalizeCachedClient.set_max_parallel_requests_client(
+                    litellm_router_instance=self, model=deployment
+                )
+                client = self.cache.get_cache(
+                    key=cache_key, local_only=True, parent_otel_span=parent_otel_span
+                )
             return client
         elif client_type == "async":
             if kwargs.get("stream") is True:
@@ -5352,36 +5458,12 @@ class Router:
                 client = self.cache.get_cache(
                     key=cache_key, local_only=True, parent_otel_span=parent_otel_span
                 )
-                if client is None:
-                    """
-                    Re-initialize the client
-                    """
-                    InitalizeOpenAISDKClient.set_client(
-                        litellm_router_instance=self, model=deployment
-                    )
-                    client = self.cache.get_cache(
-                        key=cache_key,
-                        local_only=True,
-                        parent_otel_span=parent_otel_span,
-                    )
                 return client
             else:
                 cache_key = f"{model_id}_async_client"
                 client = self.cache.get_cache(
                     key=cache_key, local_only=True, parent_otel_span=parent_otel_span
                 )
-                if client is None:
-                    """
-                    Re-initialize the client
-                    """
-                    InitalizeOpenAISDKClient.set_client(
-                        litellm_router_instance=self, model=deployment
-                    )
-                    client = self.cache.get_cache(
-                        key=cache_key,
-                        local_only=True,
-                        parent_otel_span=parent_otel_span,
-                    )
                 return client
         else:
             if kwargs.get("stream") is True:
@@ -5389,32 +5471,12 @@ class Router:
                 client = self.cache.get_cache(
                     key=cache_key, parent_otel_span=parent_otel_span
                 )
-                if client is None:
-                    """
-                    Re-initialize the client
-                    """
-                    InitalizeOpenAISDKClient.set_client(
-                        litellm_router_instance=self, model=deployment
-                    )
-                    client = self.cache.get_cache(
-                        key=cache_key, parent_otel_span=parent_otel_span
-                    )
                 return client
             else:
                 cache_key = f"{model_id}_client"
                 client = self.cache.get_cache(
                     key=cache_key, parent_otel_span=parent_otel_span
                 )
-                if client is None:
-                    """
-                    Re-initialize the client
-                    """
-                    InitalizeOpenAISDKClient.set_client(
-                        litellm_router_instance=self, model=deployment
-                    )
-                    client = self.cache.get_cache(
-                        key=cache_key, parent_otel_span=parent_otel_span
-                    )
                 return client
 
     def _pre_call_checks(  # noqa: PLR0915
diff --git a/litellm/router_utils/batch_utils.py b/litellm/router_utils/batch_utils.py
index 51cc164d30..a41bae254c 100644
--- a/litellm/router_utils/batch_utils.py
+++ b/litellm/router_utils/batch_utils.py
@@ -56,7 +56,8 @@ def _get_router_metadata_variable_name(function_name) -> str:
 
     For ALL other endpoints we call this "metadata
     """
-    if "batch" in function_name:
+    ROUTER_METHODS_USING_LITELLM_METADATA = set(["batch", "generic_api_call"])
+    if function_name in ROUTER_METHODS_USING_LITELLM_METADATA:
         return "litellm_metadata"
     else:
         return "metadata"
diff --git a/litellm/router_utils/client_initalization_utils.py b/litellm/router_utils/client_initalization_utils.py
index 7956d8c72e..e24d237853 100644
--- a/litellm/router_utils/client_initalization_utils.py
+++ b/litellm/router_utils/client_initalization_utils.py
@@ -1,21 +1,6 @@
 import asyncio
-import os
-from typing import TYPE_CHECKING, Any, Callable, Optional
+from typing import TYPE_CHECKING, Any
 
-import httpx
-import openai
-
-import litellm
-from litellm import get_secret, get_secret_str
-from litellm._logging import verbose_router_logger
-from litellm.llms.azure.azure import get_azure_ad_token_from_oidc
-from litellm.llms.azure.common_utils import (
-    get_azure_ad_token_from_entrata_id,
-    get_azure_ad_token_from_username_password,
-)
-from litellm.secret_managers.get_azure_ad_token_provider import (
-    get_azure_ad_token_provider,
-)
 from litellm.utils import calculate_max_parallel_requests
 
 if TYPE_CHECKING:
@@ -26,46 +11,13 @@ else:
     LitellmRouter = Any
 
 
-class InitalizeOpenAISDKClient:
+class InitalizeCachedClient:
     @staticmethod
-    def should_initialize_sync_client(
-        litellm_router_instance: LitellmRouter,
-    ) -> bool:
-        """
-        Returns if Sync OpenAI, Azure Clients should be initialized.
-
-        Do not init sync clients when router.router_general_settings.async_only_mode is True
-
-        """
-        if litellm_router_instance is None:
-            return False
-
-        if litellm_router_instance.router_general_settings is not None:
-            if (
-                hasattr(litellm_router_instance, "router_general_settings")
-                and hasattr(
-                    litellm_router_instance.router_general_settings, "async_only_mode"
-                )
-                and litellm_router_instance.router_general_settings.async_only_mode
-                is True
-            ):
-                return False
-
-        return True
-
-    @staticmethod
-    def set_client(  # noqa: PLR0915
+    def set_max_parallel_requests_client(
         litellm_router_instance: LitellmRouter, model: dict
     ):
-        """
-        - Initializes Azure/OpenAI clients. Stores them in cache, b/c of this - https://github.com/BerriAI/litellm/issues/1278
-        - Initializes Semaphore for client w/ rpm. Stores them in cache. b/c of this - https://github.com/BerriAI/litellm/issues/2994
-        """
-        client_ttl = litellm_router_instance.client_ttl
         litellm_params = model.get("litellm_params", {})
-        model_name = litellm_params.get("model")
         model_id = model["model_info"]["id"]
-        # ### IF RPM SET - initialize a semaphore ###
         rpm = litellm_params.get("rpm", None)
         tpm = litellm_params.get("tpm", None)
         max_parallel_requests = litellm_params.get("max_parallel_requests", None)
@@ -83,480 +35,3 @@ class InitalizeOpenAISDKClient:
                 value=semaphore,
                 local_only=True,
             )
-
-        ####  for OpenAI / Azure we need to initalize the Client for High Traffic ########
-        custom_llm_provider = litellm_params.get("custom_llm_provider")
-        custom_llm_provider = custom_llm_provider or model_name.split("/", 1)[0] or ""
-        default_api_base = None
-        default_api_key = None
-        if custom_llm_provider in litellm.openai_compatible_providers:
-            _, custom_llm_provider, api_key, api_base = litellm.get_llm_provider(
-                model=model_name
-            )
-            default_api_base = api_base
-            default_api_key = api_key
-
-        if (
-            model_name in litellm.open_ai_chat_completion_models
-            or custom_llm_provider in litellm.openai_compatible_providers
-            or custom_llm_provider == "azure"
-            or custom_llm_provider == "azure_text"
-            or custom_llm_provider == "custom_openai"
-            or custom_llm_provider == "openai"
-            or custom_llm_provider == "text-completion-openai"
-            or "ft:gpt-3.5-turbo" in model_name
-            or model_name in litellm.open_ai_embedding_models
-        ):
-            is_azure_ai_studio_model: bool = False
-            if custom_llm_provider == "azure":
-                if litellm.utils._is_non_openai_azure_model(model_name):
-                    is_azure_ai_studio_model = True
-                    custom_llm_provider = "openai"
-                    # remove azure prefx from model_name
-                    model_name = model_name.replace("azure/", "")
-            # glorified / complicated reading of configs
-            # user can pass vars directly or they can pas os.environ/AZURE_API_KEY, in which case we will read the env
-            # we do this here because we init clients for Azure, OpenAI and we need to set the right key
-            api_key = litellm_params.get("api_key") or default_api_key
-            if (
-                api_key
-                and isinstance(api_key, str)
-                and api_key.startswith("os.environ/")
-            ):
-                api_key_env_name = api_key.replace("os.environ/", "")
-                api_key = get_secret_str(api_key_env_name)
-                litellm_params["api_key"] = api_key
-
-            api_base = litellm_params.get("api_base")
-            base_url: Optional[str] = litellm_params.get("base_url")
-            api_base = (
-                api_base or base_url or default_api_base
-            )  # allow users to pass in `api_base` or `base_url` for azure
-            if api_base and api_base.startswith("os.environ/"):
-                api_base_env_name = api_base.replace("os.environ/", "")
-                api_base = get_secret_str(api_base_env_name)
-                litellm_params["api_base"] = api_base
-
-            ## AZURE AI STUDIO MISTRAL CHECK ##
-            """
-            Make sure api base ends in /v1/
-
-            if not, add it - https://github.com/BerriAI/litellm/issues/2279
-            """
-            if (
-                is_azure_ai_studio_model is True
-                and api_base is not None
-                and isinstance(api_base, str)
-                and not api_base.endswith("/v1/")
-            ):
-                # check if it ends with a trailing slash
-                if api_base.endswith("/"):
-                    api_base += "v1/"
-                elif api_base.endswith("/v1"):
-                    api_base += "/"
-                else:
-                    api_base += "/v1/"
-
-            api_version = litellm_params.get("api_version")
-            if api_version and api_version.startswith("os.environ/"):
-                api_version_env_name = api_version.replace("os.environ/", "")
-                api_version = get_secret_str(api_version_env_name)
-                litellm_params["api_version"] = api_version
-
-            timeout: Optional[float] = (
-                litellm_params.pop("timeout", None) or litellm.request_timeout
-            )
-            if isinstance(timeout, str) and timeout.startswith("os.environ/"):
-                timeout_env_name = timeout.replace("os.environ/", "")
-                timeout = get_secret(timeout_env_name)  # type: ignore
-                litellm_params["timeout"] = timeout
-
-            stream_timeout: Optional[float] = litellm_params.pop(
-                "stream_timeout", timeout
-            )  # if no stream_timeout is set, default to timeout
-            if isinstance(stream_timeout, str) and stream_timeout.startswith(
-                "os.environ/"
-            ):
-                stream_timeout_env_name = stream_timeout.replace("os.environ/", "")
-                stream_timeout = get_secret(stream_timeout_env_name)  # type: ignore
-                litellm_params["stream_timeout"] = stream_timeout
-
-            max_retries: Optional[int] = litellm_params.pop(
-                "max_retries", 0
-            )  # router handles retry logic
-            if isinstance(max_retries, str) and max_retries.startswith("os.environ/"):
-                max_retries_env_name = max_retries.replace("os.environ/", "")
-                max_retries = get_secret(max_retries_env_name)  # type: ignore
-                litellm_params["max_retries"] = max_retries
-
-            organization = litellm_params.get("organization", None)
-            if isinstance(organization, str) and organization.startswith("os.environ/"):
-                organization_env_name = organization.replace("os.environ/", "")
-                organization = get_secret_str(organization_env_name)
-                litellm_params["organization"] = organization
-            azure_ad_token_provider: Optional[Callable[[], str]] = None
-            # If we have api_key, then we have higher priority
-            if not api_key and litellm_params.get("tenant_id"):
-                verbose_router_logger.debug(
-                    "Using Azure AD Token Provider for Azure Auth"
-                )
-                azure_ad_token_provider = get_azure_ad_token_from_entrata_id(
-                    tenant_id=litellm_params.get("tenant_id"),
-                    client_id=litellm_params.get("client_id"),
-                    client_secret=litellm_params.get("client_secret"),
-                )
-            if litellm_params.get("azure_username") and litellm_params.get(
-                "azure_password"
-            ):
-                azure_ad_token_provider = get_azure_ad_token_from_username_password(
-                    azure_username=litellm_params.get("azure_username"),
-                    azure_password=litellm_params.get("azure_password"),
-                    client_id=litellm_params.get("client_id"),
-                )
-
-            if custom_llm_provider == "azure" or custom_llm_provider == "azure_text":
-                if api_base is None or not isinstance(api_base, str):
-                    filtered_litellm_params = {
-                        k: v
-                        for k, v in model["litellm_params"].items()
-                        if k != "api_key"
-                    }
-                    _filtered_model = {
-                        "model_name": model["model_name"],
-                        "litellm_params": filtered_litellm_params,
-                    }
-                    raise ValueError(
-                        f"api_base is required for Azure OpenAI. Set it on your config. Model - {_filtered_model}"
-                    )
-                azure_ad_token = litellm_params.get("azure_ad_token")
-                if azure_ad_token is not None:
-                    if azure_ad_token.startswith("oidc/"):
-                        azure_ad_token = get_azure_ad_token_from_oidc(azure_ad_token)
-                elif (
-                     not api_key and azure_ad_token_provider is None
-                    and litellm.enable_azure_ad_token_refresh is True
-                ):
-                    try:
-                        azure_ad_token_provider = get_azure_ad_token_provider()
-                    except ValueError:
-                        verbose_router_logger.debug(
-                            "Azure AD Token Provider could not be used."
-                        )
-                if api_version is None:
-                    api_version = os.getenv(
-                        "AZURE_API_VERSION", litellm.AZURE_DEFAULT_API_VERSION
-                    )
-
-                if "gateway.ai.cloudflare.com" in api_base:
-                    if not api_base.endswith("/"):
-                        api_base += "/"
-                    azure_model = model_name.replace("azure/", "")
-                    api_base += f"{azure_model}"
-                    cache_key = f"{model_id}_async_client"
-                    _client = openai.AsyncAzureOpenAI(
-                        api_key=api_key,
-                        azure_ad_token=azure_ad_token,
-                        azure_ad_token_provider=azure_ad_token_provider,
-                        base_url=api_base,
-                        api_version=api_version,
-                        timeout=timeout,  # type: ignore
-                        max_retries=max_retries,  # type: ignore
-                        http_client=httpx.AsyncClient(
-                            limits=httpx.Limits(
-                                max_connections=1000, max_keepalive_connections=100
-                            ),
-                            verify=litellm.ssl_verify,
-                        ),  # type: ignore
-                    )
-                    litellm_router_instance.cache.set_cache(
-                        key=cache_key,
-                        value=_client,
-                        ttl=client_ttl,
-                        local_only=True,
-                    )  # cache for 1 hr
-
-                    if InitalizeOpenAISDKClient.should_initialize_sync_client(
-                        litellm_router_instance=litellm_router_instance
-                    ):
-                        cache_key = f"{model_id}_client"
-                        _client = openai.AzureOpenAI(  # type: ignore
-                            api_key=api_key,
-                            azure_ad_token=azure_ad_token,
-                            azure_ad_token_provider=azure_ad_token_provider,
-                            base_url=api_base,
-                            api_version=api_version,
-                            timeout=timeout,  # type: ignore
-                            max_retries=max_retries,  # type: ignore
-                            http_client=httpx.Client(
-                                limits=httpx.Limits(
-                                    max_connections=1000, max_keepalive_connections=100
-                                ),
-                                verify=litellm.ssl_verify,
-                            ),  # type: ignore
-                        )
-                        litellm_router_instance.cache.set_cache(
-                            key=cache_key,
-                            value=_client,
-                            ttl=client_ttl,
-                            local_only=True,
-                        )  # cache for 1 hr
-                    # streaming clients can have diff timeouts
-                    cache_key = f"{model_id}_stream_async_client"
-                    _client = openai.AsyncAzureOpenAI(  # type: ignore
-                        api_key=api_key,
-                        azure_ad_token=azure_ad_token,
-                        azure_ad_token_provider=azure_ad_token_provider,
-                        base_url=api_base,
-                        api_version=api_version,
-                        timeout=stream_timeout,  # type: ignore
-                        max_retries=max_retries,  # type: ignore
-                        http_client=httpx.AsyncClient(
-                            limits=httpx.Limits(
-                                max_connections=1000, max_keepalive_connections=100
-                            ),
-                            verify=litellm.ssl_verify,
-                        ),  # type: ignore
-                    )
-                    litellm_router_instance.cache.set_cache(
-                        key=cache_key,
-                        value=_client,
-                        ttl=client_ttl,
-                        local_only=True,
-                    )  # cache for 1 hr
-
-                    if InitalizeOpenAISDKClient.should_initialize_sync_client(
-                        litellm_router_instance=litellm_router_instance
-                    ):
-                        cache_key = f"{model_id}_stream_client"
-                        _client = openai.AzureOpenAI(  # type: ignore
-                            api_key=api_key,
-                            azure_ad_token=azure_ad_token,
-                            azure_ad_token_provider=azure_ad_token_provider,
-                            base_url=api_base,
-                            api_version=api_version,
-                            timeout=stream_timeout,  # type: ignore
-                            max_retries=max_retries,  # type: ignore
-                            http_client=httpx.Client(
-                                limits=httpx.Limits(
-                                    max_connections=1000, max_keepalive_connections=100
-                                ),
-                                verify=litellm.ssl_verify,
-                            ),  # type: ignore
-                        )
-                        litellm_router_instance.cache.set_cache(
-                            key=cache_key,
-                            value=_client,
-                            ttl=client_ttl,
-                            local_only=True,
-                        )  # cache for 1 hr
-                else:
-                    _api_key = api_key
-                    if _api_key is not None and isinstance(_api_key, str):
-                        # only show first 5 chars of api_key
-                        _api_key = _api_key[:8] + "*" * 15
-                    verbose_router_logger.debug(
-                        f"Initializing Azure OpenAI Client for {model_name}, Api Base: {str(api_base)}, Api Key:{_api_key}"
-                    )
-                    azure_client_params = {
-                        "api_key": api_key,
-                        "azure_endpoint": api_base,
-                        "api_version": api_version,
-                        "azure_ad_token": azure_ad_token,
-                        "azure_ad_token_provider": azure_ad_token_provider,
-                    }
-
-                    if azure_ad_token_provider is not None:
-                        azure_client_params["azure_ad_token_provider"] = (
-                            azure_ad_token_provider
-                        )
-                    from litellm.llms.azure.azure import (
-                        select_azure_base_url_or_endpoint,
-                    )
-
-                    # this decides if we should set azure_endpoint or base_url on Azure OpenAI Client
-                    # required to support GPT-4 vision enhancements, since base_url needs to be set on Azure OpenAI Client
-                    azure_client_params = select_azure_base_url_or_endpoint(
-                        azure_client_params
-                    )
-
-                    cache_key = f"{model_id}_async_client"
-                    _client = openai.AsyncAzureOpenAI(  # type: ignore
-                        **azure_client_params,
-                        timeout=timeout,  # type: ignore
-                        max_retries=max_retries,  # type: ignore
-                        http_client=httpx.AsyncClient(
-                            limits=httpx.Limits(
-                                max_connections=1000, max_keepalive_connections=100
-                            ),
-                            verify=litellm.ssl_verify,
-                        ),  # type: ignore
-                    )
-                    litellm_router_instance.cache.set_cache(
-                        key=cache_key,
-                        value=_client,
-                        ttl=client_ttl,
-                        local_only=True,
-                    )  # cache for 1 hr
-                    if InitalizeOpenAISDKClient.should_initialize_sync_client(
-                        litellm_router_instance=litellm_router_instance
-                    ):
-                        cache_key = f"{model_id}_client"
-                        _client = openai.AzureOpenAI(  # type: ignore
-                            **azure_client_params,
-                            timeout=timeout,  # type: ignore
-                            max_retries=max_retries,  # type: ignore
-                            http_client=httpx.Client(
-                                limits=httpx.Limits(
-                                    max_connections=1000, max_keepalive_connections=100
-                                ),
-                                verify=litellm.ssl_verify,
-                            ),  # type: ignore
-                        )
-                        litellm_router_instance.cache.set_cache(
-                            key=cache_key,
-                            value=_client,
-                            ttl=client_ttl,
-                            local_only=True,
-                        )  # cache for 1 hr
-
-                    # streaming clients should have diff timeouts
-                    cache_key = f"{model_id}_stream_async_client"
-                    _client = openai.AsyncAzureOpenAI(  # type: ignore
-                        **azure_client_params,
-                        timeout=stream_timeout,  # type: ignore
-                        max_retries=max_retries,  # type: ignore
-                        http_client=httpx.AsyncClient(
-                            limits=httpx.Limits(
-                                max_connections=1000, max_keepalive_connections=100
-                            ),
-                            verify=litellm.ssl_verify,
-                        ),
-                    )
-                    litellm_router_instance.cache.set_cache(
-                        key=cache_key,
-                        value=_client,
-                        ttl=client_ttl,
-                        local_only=True,
-                    )  # cache for 1 hr
-
-                    if InitalizeOpenAISDKClient.should_initialize_sync_client(
-                        litellm_router_instance=litellm_router_instance
-                    ):
-                        cache_key = f"{model_id}_stream_client"
-                        _client = openai.AzureOpenAI(  # type: ignore
-                            **azure_client_params,
-                            timeout=stream_timeout,  # type: ignore
-                            max_retries=max_retries,  # type: ignore
-                            http_client=httpx.Client(
-                                limits=httpx.Limits(
-                                    max_connections=1000, max_keepalive_connections=100
-                                ),
-                                verify=litellm.ssl_verify,
-                            ),
-                        )
-                        litellm_router_instance.cache.set_cache(
-                            key=cache_key,
-                            value=_client,
-                            ttl=client_ttl,
-                            local_only=True,
-                        )  # cache for 1 hr
-
-            else:
-                _api_key = api_key  # type: ignore
-                if _api_key is not None and isinstance(_api_key, str):
-                    # only show first 5 chars of api_key
-                    _api_key = _api_key[:8] + "*" * 15
-                verbose_router_logger.debug(
-                    f"Initializing OpenAI Client for {model_name}, Api Base:{str(api_base)}, Api Key:{_api_key}"
-                )
-                cache_key = f"{model_id}_async_client"
-                _client = openai.AsyncOpenAI(  # type: ignore
-                    api_key=api_key,
-                    base_url=api_base,
-                    timeout=timeout,  # type: ignore
-                    max_retries=max_retries,  # type: ignore
-                    organization=organization,
-                    http_client=httpx.AsyncClient(
-                        limits=httpx.Limits(
-                            max_connections=1000, max_keepalive_connections=100
-                        ),
-                        verify=litellm.ssl_verify,
-                    ),  # type: ignore
-                )
-                litellm_router_instance.cache.set_cache(
-                    key=cache_key,
-                    value=_client,
-                    ttl=client_ttl,
-                    local_only=True,
-                )  # cache for 1 hr
-
-                if InitalizeOpenAISDKClient.should_initialize_sync_client(
-                    litellm_router_instance=litellm_router_instance
-                ):
-                    cache_key = f"{model_id}_client"
-                    _client = openai.OpenAI(  # type: ignore
-                        api_key=api_key,
-                        base_url=api_base,
-                        timeout=timeout,  # type: ignore
-                        max_retries=max_retries,  # type: ignore
-                        organization=organization,
-                        http_client=httpx.Client(
-                            limits=httpx.Limits(
-                                max_connections=1000, max_keepalive_connections=100
-                            ),
-                            verify=litellm.ssl_verify,
-                        ),  # type: ignore
-                    )
-                    litellm_router_instance.cache.set_cache(
-                        key=cache_key,
-                        value=_client,
-                        ttl=client_ttl,
-                        local_only=True,
-                    )  # cache for 1 hr
-
-                # streaming clients should have diff timeouts
-                cache_key = f"{model_id}_stream_async_client"
-                _client = openai.AsyncOpenAI(  # type: ignore
-                    api_key=api_key,
-                    base_url=api_base,
-                    timeout=stream_timeout,  # type: ignore
-                    max_retries=max_retries,  # type: ignore
-                    organization=organization,
-                    http_client=httpx.AsyncClient(
-                        limits=httpx.Limits(
-                            max_connections=1000, max_keepalive_connections=100
-                        ),
-                        verify=litellm.ssl_verify,
-                    ),  # type: ignore
-                )
-                litellm_router_instance.cache.set_cache(
-                    key=cache_key,
-                    value=_client,
-                    ttl=client_ttl,
-                    local_only=True,
-                )  # cache for 1 hr
-
-                if InitalizeOpenAISDKClient.should_initialize_sync_client(
-                    litellm_router_instance=litellm_router_instance
-                ):
-                    # streaming clients should have diff timeouts
-                    cache_key = f"{model_id}_stream_client"
-                    _client = openai.OpenAI(  # type: ignore
-                        api_key=api_key,
-                        base_url=api_base,
-                        timeout=stream_timeout,  # type: ignore
-                        max_retries=max_retries,  # type: ignore
-                        organization=organization,
-                        http_client=httpx.Client(
-                            limits=httpx.Limits(
-                                max_connections=1000, max_keepalive_connections=100
-                            ),
-                            verify=litellm.ssl_verify,
-                        ),  # type: ignore
-                    )
-                    litellm_router_instance.cache.set_cache(
-                        key=cache_key,
-                        value=_client,
-                        ttl=client_ttl,
-                        local_only=True,
-                    )  # cache for 1 hr
diff --git a/litellm/types/llms/openai.py b/litellm/types/llms/openai.py
index ee01774435..df746036da 100644
--- a/litellm/types/llms/openai.py
+++ b/litellm/types/llms/openai.py
@@ -1,6 +1,8 @@
+from enum import Enum
 from os import PathLike
 from typing import IO, Any, Iterable, List, Literal, Mapping, Optional, Tuple, Union
 
+import httpx
 from openai._legacy_response import (
     HttpxBinaryResponseContent as _HttpxBinaryResponseContent,
 )
@@ -31,8 +33,24 @@ from openai.types.chat.chat_completion_prediction_content_param import (
 )
 from openai.types.embedding import Embedding as OpenAIEmbedding
 from openai.types.fine_tuning.fine_tuning_job import FineTuningJob
-from pydantic import BaseModel, Field
-from typing_extensions import Dict, Required, TypedDict, override
+from openai.types.responses.response import (
+    IncompleteDetails,
+    Response,
+    ResponseOutputItem,
+    ResponseTextConfig,
+    Tool,
+    ToolChoice,
+)
+from openai.types.responses.response_create_params import (
+    Reasoning,
+    ResponseIncludable,
+    ResponseInputParam,
+    ResponseTextConfigParam,
+    ToolChoice,
+    ToolParam,
+)
+from pydantic import BaseModel, Discriminator, Field, PrivateAttr
+from typing_extensions import Annotated, Dict, Required, TypedDict, override
 
 FileContent = Union[IO[bytes], bytes, PathLike]
 
@@ -684,3 +702,326 @@ OpenAIAudioTranscriptionOptionalParams = Literal[
 
 
 OpenAIImageVariationOptionalParams = Literal["n", "size", "response_format", "user"]
+
+
+class ResponsesAPIOptionalRequestParams(TypedDict, total=False):
+    """TypedDict for Optional parameters supported by the responses API."""
+
+    include: Optional[List[ResponseIncludable]]
+    instructions: Optional[str]
+    max_output_tokens: Optional[int]
+    metadata: Optional[Dict[str, Any]]
+    parallel_tool_calls: Optional[bool]
+    previous_response_id: Optional[str]
+    reasoning: Optional[Reasoning]
+    store: Optional[bool]
+    stream: Optional[bool]
+    temperature: Optional[float]
+    text: Optional[ResponseTextConfigParam]
+    tool_choice: Optional[ToolChoice]
+    tools: Optional[Iterable[ToolParam]]
+    top_p: Optional[float]
+    truncation: Optional[Literal["auto", "disabled"]]
+    user: Optional[str]
+
+
+class ResponsesAPIRequestParams(ResponsesAPIOptionalRequestParams, total=False):
+    """TypedDict for request parameters supported by the responses API."""
+
+    input: Union[str, ResponseInputParam]
+    model: str
+
+
+class BaseLiteLLMOpenAIResponseObject(BaseModel):
+    def __getitem__(self, key):
+        return self.__dict__[key]
+
+    def get(self, key, default=None):
+        return self.__dict__.get(key, default)
+
+    def __contains__(self, key):
+        return key in self.__dict__
+
+    def items(self):
+        return self.__dict__.items()
+
+
+class OutputTokensDetails(BaseLiteLLMOpenAIResponseObject):
+    reasoning_tokens: int
+
+    model_config = {"extra": "allow"}
+
+
+class ResponseAPIUsage(BaseLiteLLMOpenAIResponseObject):
+    input_tokens: int
+    """The number of input tokens."""
+
+    output_tokens: int
+    """The number of output tokens."""
+
+    output_tokens_details: Optional[OutputTokensDetails]
+    """A detailed breakdown of the output tokens."""
+
+    total_tokens: int
+    """The total number of tokens used."""
+
+    model_config = {"extra": "allow"}
+
+
+class ResponsesAPIResponse(BaseLiteLLMOpenAIResponseObject):
+    id: str
+    created_at: float
+    error: Optional[dict]
+    incomplete_details: Optional[IncompleteDetails]
+    instructions: Optional[str]
+    metadata: Optional[Dict]
+    model: Optional[str]
+    object: Optional[str]
+    output: List[ResponseOutputItem]
+    parallel_tool_calls: bool
+    temperature: Optional[float]
+    tool_choice: ToolChoice
+    tools: List[Tool]
+    top_p: Optional[float]
+    max_output_tokens: Optional[int]
+    previous_response_id: Optional[str]
+    reasoning: Optional[Reasoning]
+    status: Optional[str]
+    text: Optional[ResponseTextConfig]
+    truncation: Optional[Literal["auto", "disabled"]]
+    usage: Optional[ResponseAPIUsage]
+    user: Optional[str]
+    # Define private attributes using PrivateAttr
+    _hidden_params: dict = PrivateAttr(default_factory=dict)
+
+
+class ResponsesAPIStreamEvents(str, Enum):
+    """
+    Enum representing all supported OpenAI stream event types for the Responses API.
+
+    Inherits from str to allow direct string comparison and usage as dictionary keys.
+    """
+
+    # Response lifecycle events
+    RESPONSE_CREATED = "response.created"
+    RESPONSE_IN_PROGRESS = "response.in_progress"
+    RESPONSE_COMPLETED = "response.completed"
+    RESPONSE_FAILED = "response.failed"
+    RESPONSE_INCOMPLETE = "response.incomplete"
+
+    # Output item events
+    OUTPUT_ITEM_ADDED = "response.output_item.added"
+    OUTPUT_ITEM_DONE = "response.output_item.done"
+
+    # Content part events
+    CONTENT_PART_ADDED = "response.content_part.added"
+    CONTENT_PART_DONE = "response.content_part.done"
+
+    # Output text events
+    OUTPUT_TEXT_DELTA = "response.output_text.delta"
+    OUTPUT_TEXT_ANNOTATION_ADDED = "response.output_text.annotation.added"
+    OUTPUT_TEXT_DONE = "response.output_text.done"
+
+    # Refusal events
+    REFUSAL_DELTA = "response.refusal.delta"
+    REFUSAL_DONE = "response.refusal.done"
+
+    # Function call events
+    FUNCTION_CALL_ARGUMENTS_DELTA = "response.function_call_arguments.delta"
+    FUNCTION_CALL_ARGUMENTS_DONE = "response.function_call_arguments.done"
+
+    # File search events
+    FILE_SEARCH_CALL_IN_PROGRESS = "response.file_search_call.in_progress"
+    FILE_SEARCH_CALL_SEARCHING = "response.file_search_call.searching"
+    FILE_SEARCH_CALL_COMPLETED = "response.file_search_call.completed"
+
+    # Web search events
+    WEB_SEARCH_CALL_IN_PROGRESS = "response.web_search_call.in_progress"
+    WEB_SEARCH_CALL_SEARCHING = "response.web_search_call.searching"
+    WEB_SEARCH_CALL_COMPLETED = "response.web_search_call.completed"
+
+    # Error event
+    ERROR = "error"
+
+
+class ResponseCreatedEvent(BaseLiteLLMOpenAIResponseObject):
+    type: Literal[ResponsesAPIStreamEvents.RESPONSE_CREATED]
+    response: ResponsesAPIResponse
+
+
+class ResponseInProgressEvent(BaseLiteLLMOpenAIResponseObject):
+    type: Literal[ResponsesAPIStreamEvents.RESPONSE_IN_PROGRESS]
+    response: ResponsesAPIResponse
+
+
+class ResponseCompletedEvent(BaseLiteLLMOpenAIResponseObject):
+    type: Literal[ResponsesAPIStreamEvents.RESPONSE_COMPLETED]
+    response: ResponsesAPIResponse
+    _hidden_params: dict = PrivateAttr(default_factory=dict)
+
+
+class ResponseFailedEvent(BaseLiteLLMOpenAIResponseObject):
+    type: Literal[ResponsesAPIStreamEvents.RESPONSE_FAILED]
+    response: ResponsesAPIResponse
+
+
+class ResponseIncompleteEvent(BaseLiteLLMOpenAIResponseObject):
+    type: Literal[ResponsesAPIStreamEvents.RESPONSE_INCOMPLETE]
+    response: ResponsesAPIResponse
+
+
+class OutputItemAddedEvent(BaseLiteLLMOpenAIResponseObject):
+    type: Literal[ResponsesAPIStreamEvents.OUTPUT_ITEM_ADDED]
+    output_index: int
+    item: dict
+
+
+class OutputItemDoneEvent(BaseLiteLLMOpenAIResponseObject):
+    type: Literal[ResponsesAPIStreamEvents.OUTPUT_ITEM_DONE]
+    output_index: int
+    item: dict
+
+
+class ContentPartAddedEvent(BaseLiteLLMOpenAIResponseObject):
+    type: Literal[ResponsesAPIStreamEvents.CONTENT_PART_ADDED]
+    item_id: str
+    output_index: int
+    content_index: int
+    part: dict
+
+
+class ContentPartDoneEvent(BaseLiteLLMOpenAIResponseObject):
+    type: Literal[ResponsesAPIStreamEvents.CONTENT_PART_DONE]
+    item_id: str
+    output_index: int
+    content_index: int
+    part: dict
+
+
+class OutputTextDeltaEvent(BaseLiteLLMOpenAIResponseObject):
+    type: Literal[ResponsesAPIStreamEvents.OUTPUT_TEXT_DELTA]
+    item_id: str
+    output_index: int
+    content_index: int
+    delta: str
+
+
+class OutputTextAnnotationAddedEvent(BaseLiteLLMOpenAIResponseObject):
+    type: Literal[ResponsesAPIStreamEvents.OUTPUT_TEXT_ANNOTATION_ADDED]
+    item_id: str
+    output_index: int
+    content_index: int
+    annotation_index: int
+    annotation: dict
+
+
+class OutputTextDoneEvent(BaseLiteLLMOpenAIResponseObject):
+    type: Literal[ResponsesAPIStreamEvents.OUTPUT_TEXT_DONE]
+    item_id: str
+    output_index: int
+    content_index: int
+    text: str
+
+
+class RefusalDeltaEvent(BaseLiteLLMOpenAIResponseObject):
+    type: Literal[ResponsesAPIStreamEvents.REFUSAL_DELTA]
+    item_id: str
+    output_index: int
+    content_index: int
+    delta: str
+
+
+class RefusalDoneEvent(BaseLiteLLMOpenAIResponseObject):
+    type: Literal[ResponsesAPIStreamEvents.REFUSAL_DONE]
+    item_id: str
+    output_index: int
+    content_index: int
+    refusal: str
+
+
+class FunctionCallArgumentsDeltaEvent(BaseLiteLLMOpenAIResponseObject):
+    type: Literal[ResponsesAPIStreamEvents.FUNCTION_CALL_ARGUMENTS_DELTA]
+    item_id: str
+    output_index: int
+    delta: str
+
+
+class FunctionCallArgumentsDoneEvent(BaseLiteLLMOpenAIResponseObject):
+    type: Literal[ResponsesAPIStreamEvents.FUNCTION_CALL_ARGUMENTS_DONE]
+    item_id: str
+    output_index: int
+    arguments: str
+
+
+class FileSearchCallInProgressEvent(BaseLiteLLMOpenAIResponseObject):
+    type: Literal[ResponsesAPIStreamEvents.FILE_SEARCH_CALL_IN_PROGRESS]
+    output_index: int
+    item_id: str
+
+
+class FileSearchCallSearchingEvent(BaseLiteLLMOpenAIResponseObject):
+    type: Literal[ResponsesAPIStreamEvents.FILE_SEARCH_CALL_SEARCHING]
+    output_index: int
+    item_id: str
+
+
+class FileSearchCallCompletedEvent(BaseLiteLLMOpenAIResponseObject):
+    type: Literal[ResponsesAPIStreamEvents.FILE_SEARCH_CALL_COMPLETED]
+    output_index: int
+    item_id: str
+
+
+class WebSearchCallInProgressEvent(BaseLiteLLMOpenAIResponseObject):
+    type: Literal[ResponsesAPIStreamEvents.WEB_SEARCH_CALL_IN_PROGRESS]
+    output_index: int
+    item_id: str
+
+
+class WebSearchCallSearchingEvent(BaseLiteLLMOpenAIResponseObject):
+    type: Literal[ResponsesAPIStreamEvents.WEB_SEARCH_CALL_SEARCHING]
+    output_index: int
+    item_id: str
+
+
+class WebSearchCallCompletedEvent(BaseLiteLLMOpenAIResponseObject):
+    type: Literal[ResponsesAPIStreamEvents.WEB_SEARCH_CALL_COMPLETED]
+    output_index: int
+    item_id: str
+
+
+class ErrorEvent(BaseLiteLLMOpenAIResponseObject):
+    type: Literal[ResponsesAPIStreamEvents.ERROR]
+    code: Optional[str]
+    message: str
+    param: Optional[str]
+
+
+# Union type for all possible streaming responses
+ResponsesAPIStreamingResponse = Annotated[
+    Union[
+        ResponseCreatedEvent,
+        ResponseInProgressEvent,
+        ResponseCompletedEvent,
+        ResponseFailedEvent,
+        ResponseIncompleteEvent,
+        OutputItemAddedEvent,
+        OutputItemDoneEvent,
+        ContentPartAddedEvent,
+        ContentPartDoneEvent,
+        OutputTextDeltaEvent,
+        OutputTextAnnotationAddedEvent,
+        OutputTextDoneEvent,
+        RefusalDeltaEvent,
+        RefusalDoneEvent,
+        FunctionCallArgumentsDeltaEvent,
+        FunctionCallArgumentsDoneEvent,
+        FileSearchCallInProgressEvent,
+        FileSearchCallSearchingEvent,
+        FileSearchCallCompletedEvent,
+        WebSearchCallInProgressEvent,
+        WebSearchCallSearchingEvent,
+        WebSearchCallCompletedEvent,
+        ErrorEvent,
+    ],
+    Discriminator("type"),
+]
diff --git a/litellm/types/utils.py b/litellm/types/utils.py
index 4af88100fa..db315e2696 100644
--- a/litellm/types/utils.py
+++ b/litellm/types/utils.py
@@ -191,6 +191,44 @@ class CallTypes(Enum):
     retrieve_batch = "retrieve_batch"
     pass_through = "pass_through_endpoint"
     anthropic_messages = "anthropic_messages"
+    get_assistants = "get_assistants"
+    aget_assistants = "aget_assistants"
+    create_assistants = "create_assistants"
+    acreate_assistants = "acreate_assistants"
+    delete_assistant = "delete_assistant"
+    adelete_assistant = "adelete_assistant"
+    acreate_thread = "acreate_thread"
+    create_thread = "create_thread"
+    aget_thread = "aget_thread"
+    get_thread = "get_thread"
+    a_add_message = "a_add_message"
+    add_message = "add_message"
+    aget_messages = "aget_messages"
+    get_messages = "get_messages"
+    arun_thread = "arun_thread"
+    run_thread = "run_thread"
+    arun_thread_stream = "arun_thread_stream"
+    run_thread_stream = "run_thread_stream"
+    afile_retrieve = "afile_retrieve"
+    file_retrieve = "file_retrieve"
+    afile_delete = "afile_delete"
+    file_delete = "file_delete"
+    afile_list = "afile_list"
+    file_list = "file_list"
+    acreate_file = "acreate_file"
+    create_file = "create_file"
+    afile_content = "afile_content"
+    file_content = "file_content"
+    create_fine_tuning_job = "create_fine_tuning_job"
+    acreate_fine_tuning_job = "acreate_fine_tuning_job"
+    acancel_fine_tuning_job = "acancel_fine_tuning_job"
+    cancel_fine_tuning_job = "cancel_fine_tuning_job"
+    alist_fine_tuning_jobs = "alist_fine_tuning_jobs"
+    list_fine_tuning_jobs = "list_fine_tuning_jobs"
+    aretrieve_fine_tuning_job = "aretrieve_fine_tuning_job"
+    retrieve_fine_tuning_job = "retrieve_fine_tuning_job"
+    responses = "responses"
+    aresponses = "aresponses"
 
 
 CallTypesLiteral = Literal[
@@ -1815,6 +1853,7 @@ all_litellm_params = [
     "budget_duration",
     "use_in_pass_through",
     "merge_reasoning_content_in_choices",
+    "litellm_credential_name",
 ] + list(StandardCallbackDynamicParams.__annotations__.keys())
 
 
@@ -2011,3 +2050,9 @@ class RawRequestTypedDict(TypedDict, total=False):
     raw_request_body: Optional[dict]
     raw_request_headers: Optional[dict]
     error: Optional[str]
+
+
+class CredentialItem(BaseModel):
+    credential_name: str
+    credential_values: dict
+    credential_info: dict
diff --git a/litellm/utils.py b/litellm/utils.py
index 2f1cac743c..db1a3c7f30 100644
--- a/litellm/utils.py
+++ b/litellm/utils.py
@@ -66,6 +66,7 @@ from litellm.litellm_core_utils.core_helpers import (
     map_finish_reason,
     process_response_headers,
 )
+from litellm.litellm_core_utils.credential_accessor import CredentialAccessor
 from litellm.litellm_core_utils.default_encoding import encoding
 from litellm.litellm_core_utils.exception_mapping_utils import (
     _get_response_headers,
@@ -141,6 +142,7 @@ from litellm.types.utils import (
     ChatCompletionMessageToolCall,
     Choices,
     CostPerToken,
+    CredentialItem,
     CustomHuggingfaceTokenizer,
     Delta,
     Embedding,
@@ -209,6 +211,7 @@ from litellm.llms.base_llm.image_variations.transformation import (
     BaseImageVariationConfig,
 )
 from litellm.llms.base_llm.rerank.transformation import BaseRerankConfig
+from litellm.llms.base_llm.responses.transformation import BaseResponsesAPIConfig
 
 from ._logging import _is_debugging_on, verbose_logger
 from .caching.caching import (
@@ -455,6 +458,18 @@ def get_applied_guardrails(kwargs: Dict[str, Any]) -> List[str]:
     return applied_guardrails
 
 
+def load_credentials_from_list(kwargs: dict):
+    """
+    Updates kwargs with the credentials if credential_name in kwarg
+    """
+    credential_name = kwargs.get("litellm_credential_name")
+    if credential_name and litellm.credential_list:
+        credential_accessor = CredentialAccessor.get_credential_values(credential_name)
+        for key, value in credential_accessor.items():
+            if key not in kwargs:
+                kwargs[key] = value
+
+
 def get_dynamic_callbacks(
     dynamic_callbacks: Optional[List[Union[str, Callable, CustomLogger]]]
 ) -> List:
@@ -715,6 +730,11 @@ def function_setup(  # noqa: PLR0915
             call_type == CallTypes.aspeech.value or call_type == CallTypes.speech.value
         ):
             messages = kwargs.get("input", "speech")
+        elif (
+            call_type == CallTypes.aresponses.value
+            or call_type == CallTypes.responses.value
+        ):
+            messages = args[0] if len(args) > 0 else kwargs["input"]
         else:
             messages = "default-message-value"
         stream = True if "stream" in kwargs and kwargs["stream"] is True else False
@@ -983,6 +1003,8 @@ def client(original_function):  # noqa: PLR0915
                 logging_obj, kwargs = function_setup(
                     original_function.__name__, rules_obj, start_time, *args, **kwargs
                 )
+            ## LOAD CREDENTIALS
+            load_credentials_from_list(kwargs)
             kwargs["litellm_logging_obj"] = logging_obj
             _llm_caching_handler: LLMCachingHandler = LLMCachingHandler(
                 original_function=original_function,
@@ -1239,6 +1261,8 @@ def client(original_function):  # noqa: PLR0915
                     original_function.__name__, rules_obj, start_time, *args, **kwargs
                 )
             kwargs["litellm_logging_obj"] = logging_obj
+            ## LOAD CREDENTIALS
+            load_credentials_from_list(kwargs)
             logging_obj._llm_caching_handler = _llm_caching_handler
             # [OPTIONAL] CHECK BUDGET
             if litellm.max_budget:
@@ -5104,7 +5128,7 @@ def prompt_token_calculator(model, messages):
         from anthropic import AI_PROMPT, HUMAN_PROMPT, Anthropic
 
         anthropic_obj = Anthropic()
-        num_tokens = anthropic_obj.count_tokens(text)
+        num_tokens = anthropic_obj.count_tokens(text)  # type: ignore
     else:
         num_tokens = len(encoding.encode(text))
     return num_tokens
@@ -6276,6 +6300,15 @@ class ProviderConfigManager:
             return litellm.DeepgramAudioTranscriptionConfig()
         return None
 
+    @staticmethod
+    def get_provider_responses_api_config(
+        model: str,
+        provider: LlmProviders,
+    ) -> Optional[BaseResponsesAPIConfig]:
+        if litellm.LlmProviders.OPENAI == provider:
+            return litellm.OpenAIResponsesAPIConfig()
+        return None
+
     @staticmethod
     def get_provider_text_completion_config(
         model: str,
diff --git a/model_prices_and_context_window.json b/model_prices_and_context_window.json
index 70f8623597..4824ceec46 100644
--- a/model_prices_and_context_window.json
+++ b/model_prices_and_context_window.json
@@ -2294,6 +2294,7 @@
         "output_cost_per_token": 0.0,
         "litellm_provider": "azure_ai",
         "mode": "embedding",
+        "supports_embedding_image_input": true,
         "source":"https://azuremarketplace.microsoft.com/en-us/marketplace/apps/cohere.cohere-embed-v3-english-offer?tab=PlansAndPrice"
     },
     "azure_ai/Cohere-embed-v3-multilingual": {
@@ -2304,6 +2305,7 @@
         "output_cost_per_token": 0.0,
         "litellm_provider": "azure_ai",
         "mode": "embedding",
+        "supports_embedding_image_input": true,
         "source":"https://azuremarketplace.microsoft.com/en-us/marketplace/apps/cohere.cohere-embed-v3-english-offer?tab=PlansAndPrice"
     },
     "babbage-002": {
@@ -4508,7 +4510,7 @@
     "gemini/gemini-2.0-flash-thinking-exp": {
         "max_tokens": 8192,
         "max_input_tokens": 1048576,
-        "max_output_tokens": 8192,
+        "max_output_tokens": 65536,
         "max_images_per_prompt": 3000,
         "max_videos_per_prompt": 10,
         "max_video_length": 1,
@@ -4541,6 +4543,98 @@
         "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash",
         "supports_tool_choice": true
     },
+    "gemini/gemini-2.0-flash-thinking-exp-01-21": {
+        "max_tokens": 8192,
+        "max_input_tokens": 1048576,
+        "max_output_tokens": 65536,
+        "max_images_per_prompt": 3000,
+        "max_videos_per_prompt": 10,
+        "max_video_length": 1,
+        "max_audio_length_hours": 8.4,
+        "max_audio_per_prompt": 1,
+        "max_pdf_size_mb": 30,
+        "input_cost_per_image": 0,
+        "input_cost_per_video_per_second": 0,
+        "input_cost_per_audio_per_second": 0,
+        "input_cost_per_token": 0,
+        "input_cost_per_character": 0, 
+        "input_cost_per_token_above_128k_tokens": 0, 
+        "input_cost_per_character_above_128k_tokens": 0, 
+        "input_cost_per_image_above_128k_tokens": 0,
+        "input_cost_per_video_per_second_above_128k_tokens": 0,
+        "input_cost_per_audio_per_second_above_128k_tokens": 0,
+        "output_cost_per_token": 0,
+        "output_cost_per_character": 0,
+        "output_cost_per_token_above_128k_tokens": 0,
+        "output_cost_per_character_above_128k_tokens": 0,
+        "litellm_provider": "gemini",
+        "mode": "chat",
+        "supports_system_messages": true,
+        "supports_function_calling": true,
+        "supports_vision": true,
+        "supports_response_schema": true,
+        "supports_audio_output": true,
+        "tpm": 4000000,
+        "rpm": 10,
+        "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash",
+        "supports_tool_choice": true
+    },
+    "gemini/gemma-3-27b-it": {
+        "max_tokens": 8192,
+        "max_input_tokens": 131072,
+        "max_output_tokens": 8192,
+        "input_cost_per_image": 0,
+        "input_cost_per_video_per_second": 0,
+        "input_cost_per_audio_per_second": 0,
+        "input_cost_per_token": 0,
+        "input_cost_per_character": 0, 
+        "input_cost_per_token_above_128k_tokens": 0, 
+        "input_cost_per_character_above_128k_tokens": 0, 
+        "input_cost_per_image_above_128k_tokens": 0,
+        "input_cost_per_video_per_second_above_128k_tokens": 0,
+        "input_cost_per_audio_per_second_above_128k_tokens": 0,
+        "output_cost_per_token": 0,
+        "output_cost_per_character": 0,
+        "output_cost_per_token_above_128k_tokens": 0,
+        "output_cost_per_character_above_128k_tokens": 0,
+        "litellm_provider": "gemini",
+        "mode": "chat",
+        "supports_system_messages": true,
+        "supports_function_calling": true,
+        "supports_vision": true,
+        "supports_response_schema": true,
+        "supports_audio_output": false,
+        "source": "https://aistudio.google.com",
+        "supports_tool_choice": true
+    },
+    "gemini/learnIm-1.5-pro-experimental": {
+        "max_tokens": 8192,
+        "max_input_tokens": 32767,
+        "max_output_tokens": 8192,
+        "input_cost_per_image": 0,
+        "input_cost_per_video_per_second": 0,
+        "input_cost_per_audio_per_second": 0,
+        "input_cost_per_token": 0,
+        "input_cost_per_character": 0, 
+        "input_cost_per_token_above_128k_tokens": 0, 
+        "input_cost_per_character_above_128k_tokens": 0, 
+        "input_cost_per_image_above_128k_tokens": 0,
+        "input_cost_per_video_per_second_above_128k_tokens": 0,
+        "input_cost_per_audio_per_second_above_128k_tokens": 0,
+        "output_cost_per_token": 0,
+        "output_cost_per_character": 0,
+        "output_cost_per_token_above_128k_tokens": 0,
+        "output_cost_per_character_above_128k_tokens": 0,
+        "litellm_provider": "gemini",
+        "mode": "chat",
+        "supports_system_messages": true,
+        "supports_function_calling": true,
+        "supports_vision": true,
+        "supports_response_schema": true,
+        "supports_audio_output": false,
+        "source": "https://aistudio.google.com",
+        "supports_tool_choice": true
+    },
     "vertex_ai/claude-3-sonnet": {
         "max_tokens": 4096,
         "max_input_tokens": 200000,
@@ -5708,6 +5802,7 @@
         "input_cost_per_token": 0.00000010,
         "output_cost_per_token": 0.00000,
         "litellm_provider": "cohere",
+        "supports_embedding_image_input": true,
         "mode": "embedding"
     },
     "embed-english-v2.0": {
@@ -7889,8 +7984,9 @@
         "max_input_tokens": 512, 
         "input_cost_per_token": 0.0000001,
         "output_cost_per_token": 0.000000,
-        "litellm_provider": "bedrock",
-        "mode": "embedding"
+        "litellm_provider": "bedrock",                
+        "mode": "embedding",
+        "supports_embedding_image_input": true
     },
     "cohere.embed-multilingual-v3": {
         "max_tokens": 512, 
@@ -7898,7 +7994,8 @@
         "input_cost_per_token": 0.0000001,
         "output_cost_per_token": 0.000000,
         "litellm_provider": "bedrock",
-        "mode": "embedding"
+        "mode": "embedding",
+        "supports_embedding_image_input": true
     },
     "us.deepseek.r1-v1:0": {
         "max_tokens": 4096, 
diff --git a/poetry.lock b/poetry.lock
index d6b72e015e..772036eb3a 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -1,4 +1,4 @@
-# This file is automatically @generated by Poetry 2.0.0 and should not be changed by hand.
+# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand.
 
 [[package]]
 name = "aiohappyeyeballs"
@@ -6,7 +6,6 @@ version = "2.4.4"
 description = "Happy Eyeballs for asyncio"
 optional = false
 python-versions = ">=3.8"
-groups = ["main"]
 files = [
     {file = "aiohappyeyeballs-2.4.4-py3-none-any.whl", hash = "sha256:a980909d50efcd44795c4afeca523296716d50cd756ddca6af8c65b996e27de8"},
     {file = "aiohappyeyeballs-2.4.4.tar.gz", hash = "sha256:5fdd7d87889c63183afc18ce9271f9b0a7d32c2303e394468dd45d514a757745"},
@@ -18,7 +17,6 @@ version = "3.10.11"
 description = "Async http client/server framework (asyncio)"
 optional = false
 python-versions = ">=3.8"
-groups = ["main"]
 files = [
     {file = "aiohttp-3.10.11-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:5077b1a5f40ffa3ba1f40d537d3bec4383988ee51fbba6b74aa8fb1bc466599e"},
     {file = "aiohttp-3.10.11-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:8d6a14a4d93b5b3c2891fca94fa9d41b2322a68194422bef0dd5ec1e57d7d298"},
@@ -131,7 +129,6 @@ version = "1.3.1"
 description = "aiosignal: a list of registered asynchronous callbacks"
 optional = false
 python-versions = ">=3.7"
-groups = ["main"]
 files = [
     {file = "aiosignal-1.3.1-py3-none-any.whl", hash = "sha256:f8376fb07dd1e86a584e4fcdec80b36b7f81aac666ebc724e2c090300dd83b17"},
     {file = "aiosignal-1.3.1.tar.gz", hash = "sha256:54cd96e15e1649b75d6c87526a6ff0b6c1b0dd3459f43d9ca11d48c339b68cfc"},
@@ -146,7 +143,6 @@ version = "0.7.0"
 description = "Reusable constraint types to use with typing.Annotated"
 optional = false
 python-versions = ">=3.8"
-groups = ["main"]
 files = [
     {file = "annotated_types-0.7.0-py3-none-any.whl", hash = "sha256:1f02e8b43a8fbbc3f3e0d4f0f4bfc8131bcb4eebe8849b8e5c773f3a1c582a53"},
     {file = "annotated_types-0.7.0.tar.gz", hash = "sha256:aff07c09a53a08bc8cfccb9c85b05f1aa9a2a6f23728d790723543408344ce89"},
@@ -161,7 +157,6 @@ version = "4.5.2"
 description = "High level compatibility layer for multiple asynchronous event loop implementations"
 optional = false
 python-versions = ">=3.8"
-groups = ["main"]
 files = [
     {file = "anyio-4.5.2-py3-none-any.whl", hash = "sha256:c011ee36bc1e8ba40e5a81cb9df91925c218fe9b778554e0b56a21e1b5d4716f"},
     {file = "anyio-4.5.2.tar.gz", hash = "sha256:23009af4ed04ce05991845451e11ef02fc7c5ed29179ac9a420e5ad0ac7ddc5b"},
@@ -184,8 +179,6 @@ version = "3.11.0"
 description = "In-process task scheduler with Cron-like capabilities"
 optional = true
 python-versions = ">=3.8"
-groups = ["main"]
-markers = "extra == \"proxy\""
 files = [
     {file = "APScheduler-3.11.0-py3-none-any.whl", hash = "sha256:fc134ca32e50f5eadcc4938e3a4545ab19131435e851abb40b34d63d5141c6da"},
     {file = "apscheduler-3.11.0.tar.gz", hash = "sha256:4c622d250b0955a65d5d0eb91c33e6d43fd879834bf541e0a18661ae60460133"},
@@ -214,8 +207,6 @@ version = "5.0.1"
 description = "Timeout context manager for asyncio programs"
 optional = false
 python-versions = ">=3.8"
-groups = ["main"]
-markers = "python_full_version < \"3.11.3\" and extra == \"proxy\" or python_version < \"3.11\""
 files = [
     {file = "async_timeout-5.0.1-py3-none-any.whl", hash = "sha256:39e3809566ff85354557ec2398b55e096c8364bacac9405a7a1fa429e77fe76c"},
     {file = "async_timeout-5.0.1.tar.gz", hash = "sha256:d9321a7a3d5a6a5e187e824d2fa0793ce379a202935782d555d6e9d2735677d3"},
@@ -223,21 +214,20 @@ files = [
 
 [[package]]
 name = "attrs"
-version = "25.1.0"
+version = "25.2.0"
 description = "Classes Without Boilerplate"
 optional = false
 python-versions = ">=3.8"
-groups = ["main"]
 files = [
-    {file = "attrs-25.1.0-py3-none-any.whl", hash = "sha256:c75a69e28a550a7e93789579c22aa26b0f5b83b75dc4e08fe092980051e1090a"},
-    {file = "attrs-25.1.0.tar.gz", hash = "sha256:1c97078a80c814273a76b2a298a932eb681c87415c11dee0a6921de7f1b02c3e"},
+    {file = "attrs-25.2.0-py3-none-any.whl", hash = "sha256:611344ff0a5fed735d86d7784610c84f8126b95e549bcad9ff61b4242f2d386b"},
+    {file = "attrs-25.2.0.tar.gz", hash = "sha256:18a06db706db43ac232cce80443fcd9f2500702059ecf53489e3c5a3f417acaf"},
 ]
 
 [package.extras]
 benchmark = ["cloudpickle", "hypothesis", "mypy (>=1.11.1)", "pympler", "pytest (>=4.3.0)", "pytest-codspeed", "pytest-mypy-plugins", "pytest-xdist[psutil]"]
 cov = ["cloudpickle", "coverage[toml] (>=5.3)", "hypothesis", "mypy (>=1.11.1)", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins", "pytest-xdist[psutil]"]
 dev = ["cloudpickle", "hypothesis", "mypy (>=1.11.1)", "pre-commit-uv", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins", "pytest-xdist[psutil]"]
-docs = ["cogapp", "furo", "myst-parser", "sphinx", "sphinx-notfound-page", "sphinxcontrib-towncrier", "towncrier (<24.7)"]
+docs = ["cogapp", "furo", "myst-parser", "sphinx", "sphinx-notfound-page", "sphinxcontrib-towncrier", "towncrier"]
 tests = ["cloudpickle", "hypothesis", "mypy (>=1.11.1)", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins", "pytest-xdist[psutil]"]
 tests-mypy = ["mypy (>=1.11.1)", "pytest-mypy-plugins"]
 
@@ -247,8 +237,6 @@ version = "1.32.0"
 description = "Microsoft Azure Core Library for Python"
 optional = true
 python-versions = ">=3.8"
-groups = ["main"]
-markers = "extra == \"extra-proxy\""
 files = [
     {file = "azure_core-1.32.0-py3-none-any.whl", hash = "sha256:eac191a0efb23bfa83fddf321b27b122b4ec847befa3091fa736a5c32c50d7b4"},
     {file = "azure_core-1.32.0.tar.gz", hash = "sha256:22b3c35d6b2dae14990f6c1be2912bf23ffe50b220e708a28ab1bb92b1c730e5"},
@@ -264,15 +252,13 @@ aio = ["aiohttp (>=3.0)"]
 
 [[package]]
 name = "azure-identity"
-version = "1.19.0"
+version = "1.21.0"
 description = "Microsoft Azure Identity Library for Python"
 optional = true
 python-versions = ">=3.8"
-groups = ["main"]
-markers = "extra == \"extra-proxy\""
 files = [
-    {file = "azure_identity-1.19.0-py3-none-any.whl", hash = "sha256:e3f6558c181692d7509f09de10cca527c7dce426776454fb97df512a46527e81"},
-    {file = "azure_identity-1.19.0.tar.gz", hash = "sha256:500144dc18197d7019b81501165d4fa92225f03778f17d7ca8a2a180129a9c83"},
+    {file = "azure_identity-1.21.0-py3-none-any.whl", hash = "sha256:258ea6325537352440f71b35c3dffe9d240eae4a5126c1b7ce5efd5766bd9fd9"},
+    {file = "azure_identity-1.21.0.tar.gz", hash = "sha256:ea22ce6e6b0f429bc1b8d9212d5b9f9877bd4c82f1724bfa910760612c07a9a6"},
 ]
 
 [package.dependencies]
@@ -288,8 +274,6 @@ version = "4.9.0"
 description = "Microsoft Azure Key Vault Secrets Client Library for Python"
 optional = true
 python-versions = ">=3.8"
-groups = ["main"]
-markers = "extra == \"extra-proxy\""
 files = [
     {file = "azure_keyvault_secrets-4.9.0-py3-none-any.whl", hash = "sha256:33c7e2aca2cc2092cebc8c6e96eca36a5cc30c767e16ea429c5fa21270e9fba6"},
     {file = "azure_keyvault_secrets-4.9.0.tar.gz", hash = "sha256:2a03bb2ffd9a0d6c8ad1c330d9d0310113985a9de06607ece378fd72a5889fe1"},
@@ -306,8 +290,6 @@ version = "2.2.1"
 description = "Function decoration for backoff and retry"
 optional = true
 python-versions = ">=3.7,<4.0"
-groups = ["main"]
-markers = "extra == \"proxy\""
 files = [
     {file = "backoff-2.2.1-py3-none-any.whl", hash = "sha256:63579f9a0628e06278f7e47b7d7d5b6ce20dc65c5e96a6f3ca99a6adca0396e8"},
     {file = "backoff-2.2.1.tar.gz", hash = "sha256:03f829f5bb1923180821643f8753b0502c3b682293992485b0eef2807afa5cba"},
@@ -319,8 +301,6 @@ version = "0.2.1"
 description = "Backport of the standard library zoneinfo module"
 optional = true
 python-versions = ">=3.6"
-groups = ["main"]
-markers = "extra == \"proxy\" and python_version < \"3.9\""
 files = [
     {file = "backports.zoneinfo-0.2.1-cp36-cp36m-macosx_10_14_x86_64.whl", hash = "sha256:da6013fd84a690242c310d77ddb8441a559e9cb3d3d59ebac9aca1a57b2e18bc"},
     {file = "backports.zoneinfo-0.2.1-cp36-cp36m-manylinux1_i686.whl", hash = "sha256:89a48c0d158a3cc3f654da4c2de1ceba85263fafb861b98b59040a5086259722"},
@@ -349,7 +329,6 @@ version = "23.12.1"
 description = "The uncompromising code formatter."
 optional = false
 python-versions = ">=3.8"
-groups = ["dev"]
 files = [
     {file = "black-23.12.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:e0aaf6041986767a5e0ce663c7a2f0e9eaf21e6ff87a5f95cbf3675bfd4c41d2"},
     {file = "black-23.12.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:c88b3711d12905b74206227109272673edce0cb29f27e1385f33b0163c414bba"},
@@ -392,15 +371,13 @@ uvloop = ["uvloop (>=0.15.2)"]
 
 [[package]]
 name = "cachetools"
-version = "5.5.1"
+version = "5.5.2"
 description = "Extensible memoizing collections and decorators"
 optional = true
 python-versions = ">=3.7"
-groups = ["main"]
-markers = "extra == \"extra-proxy\""
 files = [
-    {file = "cachetools-5.5.1-py3-none-any.whl", hash = "sha256:b76651fdc3b24ead3c648bbdeeb940c1b04d365b38b4af66788f9ec4a81d42bb"},
-    {file = "cachetools-5.5.1.tar.gz", hash = "sha256:70f238fbba50383ef62e55c6aff6d9673175fe59f7c6782c7a0b9e38f4a9df95"},
+    {file = "cachetools-5.5.2-py3-none-any.whl", hash = "sha256:d26a22bcc62eb95c3beabd9f1ee5e820d3d2704fe2967cbe350e20c8ffcd3f0a"},
+    {file = "cachetools-5.5.2.tar.gz", hash = "sha256:1a661caa9175d26759571b2e19580f9d6393969e5dfca11fdb1f947a23e640d4"},
 ]
 
 [[package]]
@@ -409,7 +386,6 @@ version = "2025.1.31"
 description = "Python package for providing Mozilla's CA Bundle."
 optional = false
 python-versions = ">=3.6"
-groups = ["main"]
 files = [
     {file = "certifi-2025.1.31-py3-none-any.whl", hash = "sha256:ca78db4565a652026a4db2bcdf68f2fb589ea80d0be70e03929ed730746b84fe"},
     {file = "certifi-2025.1.31.tar.gz", hash = "sha256:3d5da6925056f6f18f119200434a4780a94263f10d1c21d032a6f6b2baa20651"},
@@ -421,8 +397,6 @@ version = "1.17.1"
 description = "Foreign Function Interface for Python calling C code."
 optional = true
 python-versions = ">=3.8"
-groups = ["main"]
-markers = "extra == \"proxy\" or extra == \"extra-proxy\" and platform_python_implementation != \"PyPy\""
 files = [
     {file = "cffi-1.17.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:df8b1c11f177bc2313ec4b2d46baec87a5f3e71fc8b45dab2ee7cae86d9aba14"},
     {file = "cffi-1.17.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:8f2cdc858323644ab277e9bb925ad72ae0e67f69e804f4898c070998d50b1a67"},
@@ -502,7 +476,6 @@ version = "3.4.1"
 description = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet."
 optional = false
 python-versions = ">=3.7"
-groups = ["main"]
 files = [
     {file = "charset_normalizer-3.4.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:91b36a978b5ae0ee86c394f5a54d6ef44db1de0815eb43de826d41d21e4af3de"},
     {file = "charset_normalizer-3.4.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7461baadb4dc00fd9e0acbe254e3d7d2112e7f92ced2adc96e54ef6501c5f176"},
@@ -604,7 +577,6 @@ version = "8.1.8"
 description = "Composable command line interface toolkit"
 optional = false
 python-versions = ">=3.7"
-groups = ["main", "dev"]
 files = [
     {file = "click-8.1.8-py3-none-any.whl", hash = "sha256:63c132bbbed01578a06712a2d1f497bb62d9c1c0d329b7903a866228027263b2"},
     {file = "click-8.1.8.tar.gz", hash = "sha256:ed53c9d8990d83c2a27deae68e4ee337473f6330c040a31d4225c9574d16096a"},
@@ -619,12 +591,10 @@ version = "0.4.6"
 description = "Cross-platform colored terminal text."
 optional = false
 python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7"
-groups = ["main", "dev"]
 files = [
     {file = "colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"},
     {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"},
 ]
-markers = {main = "platform_system == \"Windows\"", dev = "platform_system == \"Windows\" or sys_platform == \"win32\""}
 
 [[package]]
 name = "cryptography"
@@ -632,8 +602,6 @@ version = "43.0.3"
 description = "cryptography is a package which provides cryptographic recipes and primitives to Python developers."
 optional = true
 python-versions = ">=3.7"
-groups = ["main"]
-markers = "extra == \"proxy\" or extra == \"extra-proxy\""
 files = [
     {file = "cryptography-43.0.3-cp37-abi3-macosx_10_9_universal2.whl", hash = "sha256:bf7a1932ac4176486eab36a19ed4c0492da5d97123f1406cf15e41b05e787d2e"},
     {file = "cryptography-43.0.3-cp37-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:63efa177ff54aec6e1c0aefaa1a241232dcd37413835a9b674b6e3f0ae2bfd3e"},
@@ -683,7 +651,6 @@ version = "1.9.0"
 description = "Distro - an OS platform information API"
 optional = false
 python-versions = ">=3.6"
-groups = ["main"]
 files = [
     {file = "distro-1.9.0-py3-none-any.whl", hash = "sha256:7bffd925d65168f85027d8da9af6bddab658135b840670a223589bc0c8ef02b2"},
     {file = "distro-1.9.0.tar.gz", hash = "sha256:2fa77c6fd8940f116ee1d6b94a2f90b13b5ea8d019b98bc8bafdcabcdd9bdbed"},
@@ -695,8 +662,6 @@ version = "2.6.1"
 description = "DNS toolkit"
 optional = true
 python-versions = ">=3.8"
-groups = ["main"]
-markers = "extra == \"proxy\""
 files = [
     {file = "dnspython-2.6.1-py3-none-any.whl", hash = "sha256:5ef3b9680161f6fa89daf8ad451b5f1a33b18ae8a1c6778cdf4b43f08c0a6e50"},
     {file = "dnspython-2.6.1.tar.gz", hash = "sha256:e8f0f9c23a7b7cb99ded64e6c3a6f3e701d78f50c55e002b839dea7225cff7cc"},
@@ -717,8 +682,6 @@ version = "2.2.0"
 description = "A robust email address syntax and deliverability validation library."
 optional = true
 python-versions = ">=3.8"
-groups = ["main"]
-markers = "extra == \"proxy\""
 files = [
     {file = "email_validator-2.2.0-py3-none-any.whl", hash = "sha256:561977c2d73ce3611850a06fa56b414621e0c8faa9d66f2611407d87465da631"},
     {file = "email_validator-2.2.0.tar.gz", hash = "sha256:cb690f344c617a714f22e66ae771445a1ceb46821152df8e165c5f9a364582b7"},
@@ -734,8 +697,6 @@ version = "1.2.2"
 description = "Backport of PEP 654 (exception groups)"
 optional = false
 python-versions = ">=3.7"
-groups = ["main", "dev"]
-markers = "python_version < \"3.11\""
 files = [
     {file = "exceptiongroup-1.2.2-py3-none-any.whl", hash = "sha256:3111b9d131c238bec2f8f516e123e14ba243563fb135d3fe885990585aa7795b"},
     {file = "exceptiongroup-1.2.2.tar.gz", hash = "sha256:47c2edf7c6738fafb49fd34290706d1a1a2f4d1c6df275526b62cbb4aa5393cc"},
@@ -746,20 +707,18 @@ test = ["pytest (>=6)"]
 
 [[package]]
 name = "fastapi"
-version = "0.115.8"
+version = "0.115.11"
 description = "FastAPI framework, high performance, easy to learn, fast to code, ready for production"
 optional = true
 python-versions = ">=3.8"
-groups = ["main"]
-markers = "extra == \"proxy\""
 files = [
-    {file = "fastapi-0.115.8-py3-none-any.whl", hash = "sha256:753a96dd7e036b34eeef8babdfcfe3f28ff79648f86551eb36bfc1b0bf4a8cbf"},
-    {file = "fastapi-0.115.8.tar.gz", hash = "sha256:0ce9111231720190473e222cdf0f07f7206ad7e53ea02beb1d2dc36e2f0741e9"},
+    {file = "fastapi-0.115.11-py3-none-any.whl", hash = "sha256:32e1541b7b74602e4ef4a0260ecaf3aadf9d4f19590bba3e1bf2ac4666aa2c64"},
+    {file = "fastapi-0.115.11.tar.gz", hash = "sha256:cc81f03f688678b92600a65a5e618b93592c65005db37157147204d8924bf94f"},
 ]
 
 [package.dependencies]
 pydantic = ">=1.7.4,<1.8 || >1.8,<1.8.1 || >1.8.1,<2.0.0 || >2.0.0,<2.0.1 || >2.0.1,<2.1.0 || >2.1.0,<3.0.0"
-starlette = ">=0.40.0,<0.46.0"
+starlette = ">=0.40.0,<0.47.0"
 typing-extensions = ">=4.8.0"
 
 [package.extras]
@@ -772,8 +731,6 @@ version = "0.16.0"
 description = "FastAPI plugin to enable SSO to most common providers (such as Facebook login, Google login and login via Microsoft Office 365 Account)"
 optional = true
 python-versions = "<4.0,>=3.8"
-groups = ["main"]
-markers = "extra == \"proxy\""
 files = [
     {file = "fastapi_sso-0.16.0-py3-none-any.whl", hash = "sha256:3a66a942474ef9756d3a9d8b945d55bd9faf99781facdb9b87a40b73d6d6b0c3"},
     {file = "fastapi_sso-0.16.0.tar.gz", hash = "sha256:f3941f986347566b7d3747c710cf474a907f581bfb6697ff3bb3e44eb76b438c"},
@@ -792,7 +749,6 @@ version = "3.16.1"
 description = "A platform independent file lock."
 optional = false
 python-versions = ">=3.8"
-groups = ["main"]
 files = [
     {file = "filelock-3.16.1-py3-none-any.whl", hash = "sha256:2082e5703d51fbf98ea75855d9d5527e33d8ff23099bec374a134febee6946b0"},
     {file = "filelock-3.16.1.tar.gz", hash = "sha256:c249fbfcd5db47e5e2d6d62198e565475ee65e4831e2561c8e313fa7eb961435"},
@@ -809,7 +765,6 @@ version = "6.1.0"
 description = "the modular source code checker: pep8 pyflakes and co"
 optional = false
 python-versions = ">=3.8.1"
-groups = ["dev"]
 files = [
     {file = "flake8-6.1.0-py2.py3-none-any.whl", hash = "sha256:ffdfce58ea94c6580c77888a86506937f9a1a227dfcd15f245d694ae20a6b6e5"},
     {file = "flake8-6.1.0.tar.gz", hash = "sha256:d5b3857f07c030bdb5bf41c7f53799571d75c4491748a3adcd47de929e34cd23"},
@@ -826,7 +781,6 @@ version = "1.5.0"
 description = "A list-like structure which implements collections.abc.MutableSequence"
 optional = false
 python-versions = ">=3.8"
-groups = ["main"]
 files = [
     {file = "frozenlist-1.5.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:5b6a66c18b5b9dd261ca98dffcb826a525334b2f29e7caa54e182255c5f6a65a"},
     {file = "frozenlist-1.5.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:d1b3eb7b05ea246510b43a7e53ed1653e55c2121019a97e60cad7efb881a97bb"},
@@ -924,14 +878,13 @@ files = [
 
 [[package]]
 name = "fsspec"
-version = "2025.2.0"
+version = "2025.3.0"
 description = "File-system specification"
 optional = false
 python-versions = ">=3.8"
-groups = ["main"]
 files = [
-    {file = "fsspec-2025.2.0-py3-none-any.whl", hash = "sha256:9de2ad9ce1f85e1931858535bc882543171d197001a0a5eb2ddc04f1781ab95b"},
-    {file = "fsspec-2025.2.0.tar.gz", hash = "sha256:1c24b16eaa0a1798afa0337aa0db9b256718ab2a89c425371f5628d22c3b6afd"},
+    {file = "fsspec-2025.3.0-py3-none-any.whl", hash = "sha256:efb87af3efa9103f94ca91a7f8cb7a4df91af9f74fc106c9c7ea0efd7277c1b3"},
+    {file = "fsspec-2025.3.0.tar.gz", hash = "sha256:a935fd1ea872591f2b5148907d103488fc523295e6c64b835cfad8c3eca44972"},
 ]
 
 [package.extras]
@@ -964,20 +917,18 @@ tqdm = ["tqdm"]
 
 [[package]]
 name = "google-api-core"
-version = "2.24.1"
+version = "2.24.2"
 description = "Google API client core library"
 optional = true
 python-versions = ">=3.7"
-groups = ["main"]
-markers = "extra == \"extra-proxy\""
 files = [
-    {file = "google_api_core-2.24.1-py3-none-any.whl", hash = "sha256:bc78d608f5a5bf853b80bd70a795f703294de656c096c0968320830a4bc280f1"},
-    {file = "google_api_core-2.24.1.tar.gz", hash = "sha256:f8b36f5456ab0dd99a1b693a40a31d1e7757beea380ad1b38faaf8941eae9d8a"},
+    {file = "google_api_core-2.24.2-py3-none-any.whl", hash = "sha256:810a63ac95f3c441b7c0e43d344e372887f62ce9071ba972eacf32672e072de9"},
+    {file = "google_api_core-2.24.2.tar.gz", hash = "sha256:81718493daf06d96d6bc76a91c23874dbf2fac0adbbf542831b805ee6e974696"},
 ]
 
 [package.dependencies]
-google-auth = ">=2.14.1,<3.0.dev0"
-googleapis-common-protos = ">=1.56.2,<2.0.dev0"
+google-auth = ">=2.14.1,<3.0.0"
+googleapis-common-protos = ">=1.56.2,<2.0.0"
 grpcio = [
     {version = ">=1.49.1,<2.0dev", optional = true, markers = "python_version >= \"3.11\" and extra == \"grpc\""},
     {version = ">=1.33.2,<2.0dev", optional = true, markers = "python_version < \"3.11\" and extra == \"grpc\""},
@@ -987,11 +938,11 @@ grpcio-status = [
     {version = ">=1.33.2,<2.0.dev0", optional = true, markers = "python_version < \"3.11\" and extra == \"grpc\""},
 ]
 proto-plus = [
-    {version = ">=1.25.0,<2.0.0dev", markers = "python_version >= \"3.13\""},
-    {version = ">=1.22.3,<2.0.0dev", markers = "python_version < \"3.13\""},
+    {version = ">=1.25.0,<2.0.0", markers = "python_version >= \"3.13\""},
+    {version = ">=1.22.3,<2.0.0", markers = "python_version < \"3.13\""},
 ]
-protobuf = ">=3.19.5,<3.20.0 || >3.20.0,<3.20.1 || >3.20.1,<4.21.0 || >4.21.0,<4.21.1 || >4.21.1,<4.21.2 || >4.21.2,<4.21.3 || >4.21.3,<4.21.4 || >4.21.4,<4.21.5 || >4.21.5,<6.0.0.dev0"
-requests = ">=2.18.0,<3.0.0.dev0"
+protobuf = ">=3.19.5,<3.20.0 || >3.20.0,<3.20.1 || >3.20.1,<4.21.0 || >4.21.0,<4.21.1 || >4.21.1,<4.21.2 || >4.21.2,<4.21.3 || >4.21.3,<4.21.4 || >4.21.4,<4.21.5 || >4.21.5,<7.0.0"
+requests = ">=2.18.0,<3.0.0"
 
 [package.extras]
 async-rest = ["google-auth[aiohttp] (>=2.35.0,<3.0.dev0)"]
@@ -1005,8 +956,6 @@ version = "2.38.0"
 description = "Google Authentication Library"
 optional = true
 python-versions = ">=3.7"
-groups = ["main"]
-markers = "extra == \"extra-proxy\""
 files = [
     {file = "google_auth-2.38.0-py2.py3-none-any.whl", hash = "sha256:e7dae6694313f434a2727bf2906f27ad259bae090d7aa896590d86feec3d9d4a"},
     {file = "google_auth-2.38.0.tar.gz", hash = "sha256:8285113607d3b80a3f1543b75962447ba8a09fe85783432a784fdeef6ac094c4"},
@@ -1031,8 +980,6 @@ version = "2.24.2"
 description = "Google Cloud Kms API client library"
 optional = true
 python-versions = ">=3.7"
-groups = ["main"]
-markers = "extra == \"extra-proxy\""
 files = [
     {file = "google_cloud_kms-2.24.2-py2.py3-none-any.whl", hash = "sha256:368209b035dfac691a467c1cf50986d8b1b26cac1166bdfbaa25d738df91ff7b"},
     {file = "google_cloud_kms-2.24.2.tar.gz", hash = "sha256:e9e18bbfafd1a4035c76c03fb5ff03f4f57f596d08e1a9ede7e69ec0151b27a1"},
@@ -1047,15 +994,13 @@ protobuf = ">=3.20.2,<4.21.0 || >4.21.0,<4.21.1 || >4.21.1,<4.21.2 || >4.21.2,<4
 
 [[package]]
 name = "googleapis-common-protos"
-version = "1.66.0"
+version = "1.69.1"
 description = "Common protobufs used in Google APIs"
 optional = true
 python-versions = ">=3.7"
-groups = ["main"]
-markers = "extra == \"extra-proxy\""
 files = [
-    {file = "googleapis_common_protos-1.66.0-py2.py3-none-any.whl", hash = "sha256:d7abcd75fabb2e0ec9f74466401f6c119a0b498e27370e9be4c94cb7e382b8ed"},
-    {file = "googleapis_common_protos-1.66.0.tar.gz", hash = "sha256:c3e7b33d15fdca5374cc0a7346dd92ffa847425cc4ea941d970f13680052ec8c"},
+    {file = "googleapis_common_protos-1.69.1-py2.py3-none-any.whl", hash = "sha256:4077f27a6900d5946ee5a369fab9c8ded4c0ef1c6e880458ea2f70c14f7b70d5"},
+    {file = "googleapis_common_protos-1.69.1.tar.gz", hash = "sha256:e20d2d8dda87da6fe7340afbbdf4f0bcb4c8fae7e6cadf55926c31f946b0b9b1"},
 ]
 
 [package.dependencies]
@@ -1067,15 +1012,13 @@ grpc = ["grpcio (>=1.44.0,<2.0.0.dev0)"]
 
 [[package]]
 name = "grpc-google-iam-v1"
-version = "0.14.0"
+version = "0.14.1"
 description = "IAM API client library"
 optional = true
 python-versions = ">=3.7"
-groups = ["main"]
-markers = "extra == \"extra-proxy\""
 files = [
-    {file = "grpc_google_iam_v1-0.14.0-py2.py3-none-any.whl", hash = "sha256:fb4a084b30099ba3ab07d61d620a0d4429570b13ff53bd37bac75235f98b7da4"},
-    {file = "grpc_google_iam_v1-0.14.0.tar.gz", hash = "sha256:c66e07aa642e39bb37950f9e7f491f70dad150ac9801263b42b2814307c2df99"},
+    {file = "grpc_google_iam_v1-0.14.1-py2.py3-none-any.whl", hash = "sha256:b4eca35b2231dd76066ebf1728f3cd30d51034db946827ef63ef138da14eea16"},
+    {file = "grpc_google_iam_v1-0.14.1.tar.gz", hash = "sha256:14149f37af0e5779fa8a22a8ae588663269e8a479d9c2e69a5056e589bf8a891"},
 ]
 
 [package.dependencies]
@@ -1089,8 +1032,6 @@ version = "1.70.0"
 description = "HTTP/2-based RPC framework"
 optional = true
 python-versions = ">=3.8"
-groups = ["main"]
-markers = "extra == \"extra-proxy\""
 files = [
     {file = "grpcio-1.70.0-cp310-cp310-linux_armv7l.whl", hash = "sha256:95469d1977429f45fe7df441f586521361e235982a0b39e33841549143ae2851"},
     {file = "grpcio-1.70.0-cp310-cp310-macosx_12_0_universal2.whl", hash = "sha256:ed9718f17fbdb472e33b869c77a16d0b55e166b100ec57b016dc7de9c8d236bf"},
@@ -1152,14 +1093,75 @@ files = [
 [package.extras]
 protobuf = ["grpcio-tools (>=1.70.0)"]
 
+[[package]]
+name = "grpcio"
+version = "1.71.0"
+description = "HTTP/2-based RPC framework"
+optional = true
+python-versions = ">=3.9"
+files = [
+    {file = "grpcio-1.71.0-cp310-cp310-linux_armv7l.whl", hash = "sha256:c200cb6f2393468142eb50ab19613229dcc7829b5ccee8b658a36005f6669fdd"},
+    {file = "grpcio-1.71.0-cp310-cp310-macosx_12_0_universal2.whl", hash = "sha256:b2266862c5ad664a380fbbcdbdb8289d71464c42a8c29053820ee78ba0119e5d"},
+    {file = "grpcio-1.71.0-cp310-cp310-manylinux_2_17_aarch64.whl", hash = "sha256:0ab8b2864396663a5b0b0d6d79495657ae85fa37dcb6498a2669d067c65c11ea"},
+    {file = "grpcio-1.71.0-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c30f393f9d5ff00a71bb56de4aa75b8fe91b161aeb61d39528db6b768d7eac69"},
+    {file = "grpcio-1.71.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f250ff44843d9a0615e350c77f890082102a0318d66a99540f54769c8766ab73"},
+    {file = "grpcio-1.71.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:e6d8de076528f7c43a2f576bc311799f89d795aa6c9b637377cc2b1616473804"},
+    {file = "grpcio-1.71.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:9b91879d6da1605811ebc60d21ab6a7e4bae6c35f6b63a061d61eb818c8168f6"},
+    {file = "grpcio-1.71.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:f71574afdf944e6652203cd1badcda195b2a27d9c83e6d88dc1ce3cfb73b31a5"},
+    {file = "grpcio-1.71.0-cp310-cp310-win32.whl", hash = "sha256:8997d6785e93308f277884ee6899ba63baafa0dfb4729748200fcc537858a509"},
+    {file = "grpcio-1.71.0-cp310-cp310-win_amd64.whl", hash = "sha256:7d6ac9481d9d0d129224f6d5934d5832c4b1cddb96b59e7eba8416868909786a"},
+    {file = "grpcio-1.71.0-cp311-cp311-linux_armv7l.whl", hash = "sha256:d6aa986318c36508dc1d5001a3ff169a15b99b9f96ef5e98e13522c506b37eef"},
+    {file = "grpcio-1.71.0-cp311-cp311-macosx_10_14_universal2.whl", hash = "sha256:d2c170247315f2d7e5798a22358e982ad6eeb68fa20cf7a820bb74c11f0736e7"},
+    {file = "grpcio-1.71.0-cp311-cp311-manylinux_2_17_aarch64.whl", hash = "sha256:e6f83a583ed0a5b08c5bc7a3fe860bb3c2eac1f03f1f63e0bc2091325605d2b7"},
+    {file = "grpcio-1.71.0-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4be74ddeeb92cc87190e0e376dbc8fc7736dbb6d3d454f2fa1f5be1dee26b9d7"},
+    {file = "grpcio-1.71.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4dd0dfbe4d5eb1fcfec9490ca13f82b089a309dc3678e2edabc144051270a66e"},
+    {file = "grpcio-1.71.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:a2242d6950dc892afdf9e951ed7ff89473aaf744b7d5727ad56bdaace363722b"},
+    {file = "grpcio-1.71.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:0fa05ee31a20456b13ae49ad2e5d585265f71dd19fbd9ef983c28f926d45d0a7"},
+    {file = "grpcio-1.71.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:3d081e859fb1ebe176de33fc3adb26c7d46b8812f906042705346b314bde32c3"},
+    {file = "grpcio-1.71.0-cp311-cp311-win32.whl", hash = "sha256:d6de81c9c00c8a23047136b11794b3584cdc1460ed7cbc10eada50614baa1444"},
+    {file = "grpcio-1.71.0-cp311-cp311-win_amd64.whl", hash = "sha256:24e867651fc67717b6f896d5f0cac0ec863a8b5fb7d6441c2ab428f52c651c6b"},
+    {file = "grpcio-1.71.0-cp312-cp312-linux_armv7l.whl", hash = "sha256:0ff35c8d807c1c7531d3002be03221ff9ae15712b53ab46e2a0b4bb271f38537"},
+    {file = "grpcio-1.71.0-cp312-cp312-macosx_10_14_universal2.whl", hash = "sha256:b78a99cd1ece4be92ab7c07765a0b038194ded2e0a26fd654591ee136088d8d7"},
+    {file = "grpcio-1.71.0-cp312-cp312-manylinux_2_17_aarch64.whl", hash = "sha256:dc1a1231ed23caac1de9f943d031f1bc38d0f69d2a3b243ea0d664fc1fbd7fec"},
+    {file = "grpcio-1.71.0-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e6beeea5566092c5e3c4896c6d1d307fb46b1d4bdf3e70c8340b190a69198594"},
+    {file = "grpcio-1.71.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d5170929109450a2c031cfe87d6716f2fae39695ad5335d9106ae88cc32dc84c"},
+    {file = "grpcio-1.71.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:5b08d03ace7aca7b2fadd4baf291139b4a5f058805a8327bfe9aece7253b6d67"},
+    {file = "grpcio-1.71.0-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:f903017db76bf9cc2b2d8bdd37bf04b505bbccad6be8a81e1542206875d0e9db"},
+    {file = "grpcio-1.71.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:469f42a0b410883185eab4689060a20488a1a0a00f8bbb3cbc1061197b4c5a79"},
+    {file = "grpcio-1.71.0-cp312-cp312-win32.whl", hash = "sha256:ad9f30838550695b5eb302add33f21f7301b882937460dd24f24b3cc5a95067a"},
+    {file = "grpcio-1.71.0-cp312-cp312-win_amd64.whl", hash = "sha256:652350609332de6dac4ece254e5d7e1ff834e203d6afb769601f286886f6f3a8"},
+    {file = "grpcio-1.71.0-cp313-cp313-linux_armv7l.whl", hash = "sha256:cebc1b34ba40a312ab480ccdb396ff3c529377a2fce72c45a741f7215bfe8379"},
+    {file = "grpcio-1.71.0-cp313-cp313-macosx_10_14_universal2.whl", hash = "sha256:85da336e3649a3d2171e82f696b5cad2c6231fdd5bad52616476235681bee5b3"},
+    {file = "grpcio-1.71.0-cp313-cp313-manylinux_2_17_aarch64.whl", hash = "sha256:f9a412f55bb6e8f3bb000e020dbc1e709627dcb3a56f6431fa7076b4c1aab0db"},
+    {file = "grpcio-1.71.0-cp313-cp313-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:47be9584729534660416f6d2a3108aaeac1122f6b5bdbf9fd823e11fe6fbaa29"},
+    {file = "grpcio-1.71.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7c9c80ac6091c916db81131d50926a93ab162a7e97e4428ffc186b6e80d6dda4"},
+    {file = "grpcio-1.71.0-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:789d5e2a3a15419374b7b45cd680b1e83bbc1e52b9086e49308e2c0b5bbae6e3"},
+    {file = "grpcio-1.71.0-cp313-cp313-musllinux_1_1_i686.whl", hash = "sha256:1be857615e26a86d7363e8a163fade914595c81fec962b3d514a4b1e8760467b"},
+    {file = "grpcio-1.71.0-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:a76d39b5fafd79ed604c4be0a869ec3581a172a707e2a8d7a4858cb05a5a7637"},
+    {file = "grpcio-1.71.0-cp313-cp313-win32.whl", hash = "sha256:74258dce215cb1995083daa17b379a1a5a87d275387b7ffe137f1d5131e2cfbb"},
+    {file = "grpcio-1.71.0-cp313-cp313-win_amd64.whl", hash = "sha256:22c3bc8d488c039a199f7a003a38cb7635db6656fa96437a8accde8322ce2366"},
+    {file = "grpcio-1.71.0-cp39-cp39-linux_armv7l.whl", hash = "sha256:c6a0a28450c16809f94e0b5bfe52cabff63e7e4b97b44123ebf77f448534d07d"},
+    {file = "grpcio-1.71.0-cp39-cp39-macosx_10_14_universal2.whl", hash = "sha256:a371e6b6a5379d3692cc4ea1cb92754d2a47bdddeee755d3203d1f84ae08e03e"},
+    {file = "grpcio-1.71.0-cp39-cp39-manylinux_2_17_aarch64.whl", hash = "sha256:39983a9245d37394fd59de71e88c4b295eb510a3555e0a847d9965088cdbd033"},
+    {file = "grpcio-1.71.0-cp39-cp39-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9182e0063112e55e74ee7584769ec5a0b4f18252c35787f48738627e23a62b97"},
+    {file = "grpcio-1.71.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:693bc706c031aeb848849b9d1c6b63ae6bcc64057984bb91a542332b75aa4c3d"},
+    {file = "grpcio-1.71.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:20e8f653abd5ec606be69540f57289274c9ca503ed38388481e98fa396ed0b41"},
+    {file = "grpcio-1.71.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:8700a2a57771cc43ea295296330daaddc0d93c088f0a35cc969292b6db959bf3"},
+    {file = "grpcio-1.71.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:d35a95f05a8a2cbe8e02be137740138b3b2ea5f80bd004444e4f9a1ffc511e32"},
+    {file = "grpcio-1.71.0-cp39-cp39-win32.whl", hash = "sha256:f9c30c464cb2ddfbc2ddf9400287701270fdc0f14be5f08a1e3939f1e749b455"},
+    {file = "grpcio-1.71.0-cp39-cp39-win_amd64.whl", hash = "sha256:63e41b91032f298b3e973b3fa4093cbbc620c875e2da7b93e249d4728b54559a"},
+    {file = "grpcio-1.71.0.tar.gz", hash = "sha256:2b85f7820475ad3edec209d3d89a7909ada16caab05d3f2e08a7e8ae3200a55c"},
+]
+
+[package.extras]
+protobuf = ["grpcio-tools (>=1.71.0)"]
+
 [[package]]
 name = "grpcio-status"
 version = "1.70.0"
 description = "Status proto mapping for gRPC"
 optional = true
 python-versions = ">=3.8"
-groups = ["main"]
-markers = "extra == \"extra-proxy\""
 files = [
     {file = "grpcio_status-1.70.0-py3-none-any.whl", hash = "sha256:fc5a2ae2b9b1c1969cc49f3262676e6854aa2398ec69cb5bd6c47cd501904a85"},
     {file = "grpcio_status-1.70.0.tar.gz", hash = "sha256:0e7b42816512433b18b9d764285ff029bde059e9d41f8fe10a60631bd8348101"},
@@ -1170,14 +1172,28 @@ googleapis-common-protos = ">=1.5.5"
 grpcio = ">=1.70.0"
 protobuf = ">=5.26.1,<6.0dev"
 
+[[package]]
+name = "grpcio-status"
+version = "1.71.0"
+description = "Status proto mapping for gRPC"
+optional = true
+python-versions = ">=3.9"
+files = [
+    {file = "grpcio_status-1.71.0-py3-none-any.whl", hash = "sha256:843934ef8c09e3e858952887467f8256aac3910c55f077a359a65b2b3cde3e68"},
+    {file = "grpcio_status-1.71.0.tar.gz", hash = "sha256:11405fed67b68f406b3f3c7c5ae5104a79d2d309666d10d61b152e91d28fb968"},
+]
+
+[package.dependencies]
+googleapis-common-protos = ">=1.5.5"
+grpcio = ">=1.71.0"
+protobuf = ">=5.26.1,<6.0dev"
+
 [[package]]
 name = "gunicorn"
 version = "22.0.0"
 description = "WSGI HTTP Server for UNIX"
 optional = true
 python-versions = ">=3.7"
-groups = ["main"]
-markers = "extra == \"proxy\""
 files = [
     {file = "gunicorn-22.0.0-py3-none-any.whl", hash = "sha256:350679f91b24062c86e386e198a15438d53a7a8207235a78ba1b53df4c4378d9"},
     {file = "gunicorn-22.0.0.tar.gz", hash = "sha256:4a0b436239ff76fb33f11c07a16482c521a7e09c1ce3cc293c2330afe01bec63"},
@@ -1199,7 +1215,6 @@ version = "0.14.0"
 description = "A pure-Python, bring-your-own-I/O implementation of HTTP/1.1"
 optional = false
 python-versions = ">=3.7"
-groups = ["main"]
 files = [
     {file = "h11-0.14.0-py3-none-any.whl", hash = "sha256:e3fe4ac4b851c468cc8363d500db52c2ead036020723024a109d37346efaa761"},
     {file = "h11-0.14.0.tar.gz", hash = "sha256:8f19fbbe99e72420ff35c00b27a34cb9937e902a8b810e2c88300c6f0a3b699d"},
@@ -1211,7 +1226,6 @@ version = "1.0.7"
 description = "A minimal low-level HTTP client."
 optional = false
 python-versions = ">=3.8"
-groups = ["main"]
 files = [
     {file = "httpcore-1.0.7-py3-none-any.whl", hash = "sha256:a3fff8f43dc260d5bd363d9f9cf1830fa3a458b332856f34282de498ed420edd"},
     {file = "httpcore-1.0.7.tar.gz", hash = "sha256:8551cb62a169ec7162ac7be8d4817d561f60e08eaa485234898414bb5a8a0b4c"},
@@ -1229,14 +1243,13 @@ trio = ["trio (>=0.22.0,<1.0)"]
 
 [[package]]
 name = "httpx"
-version = "0.27.2"
+version = "0.28.1"
 description = "The next generation HTTP client."
 optional = false
 python-versions = ">=3.8"
-groups = ["main"]
 files = [
-    {file = "httpx-0.27.2-py3-none-any.whl", hash = "sha256:7bb2708e112d8fdd7829cd4243970f0c223274051cb35ee80c03301ee29a3df0"},
-    {file = "httpx-0.27.2.tar.gz", hash = "sha256:f7c2be1d2f3c3c3160d441802406b206c2b76f5947b11115e6df10c6c65e66c2"},
+    {file = "httpx-0.28.1-py3-none-any.whl", hash = "sha256:d909fcccc110f8c7faf814ca82a9a4d816bc5a6dbfea25d6591d6985b8ba59ad"},
+    {file = "httpx-0.28.1.tar.gz", hash = "sha256:75e98c5f16b0f35b567856f597f06ff2270a374470a5c2392242528e3e3e42fc"},
 ]
 
 [package.dependencies]
@@ -1244,7 +1257,6 @@ anyio = "*"
 certifi = "*"
 httpcore = "==1.*"
 idna = "*"
-sniffio = "*"
 
 [package.extras]
 brotli = ["brotli", "brotlicffi"]
@@ -1255,14 +1267,13 @@ zstd = ["zstandard (>=0.18.0)"]
 
 [[package]]
 name = "huggingface-hub"
-version = "0.28.1"
+version = "0.29.3"
 description = "Client library to download and publish models, datasets and other repos on the huggingface.co hub"
 optional = false
 python-versions = ">=3.8.0"
-groups = ["main"]
 files = [
-    {file = "huggingface_hub-0.28.1-py3-none-any.whl", hash = "sha256:aa6b9a3ffdae939b72c464dbb0d7f99f56e649b55c3d52406f49e0a5a620c0a7"},
-    {file = "huggingface_hub-0.28.1.tar.gz", hash = "sha256:893471090c98e3b6efbdfdacafe4052b20b84d59866fb6f54c33d9af18c303ae"},
+    {file = "huggingface_hub-0.29.3-py3-none-any.whl", hash = "sha256:0b25710932ac649c08cdbefa6c6ccb8e88eef82927cacdb048efb726429453aa"},
+    {file = "huggingface_hub-0.29.3.tar.gz", hash = "sha256:64519a25716e0ba382ba2d3fb3ca082e7c7eb4a2fc634d200e8380006e0760e5"},
 ]
 
 [package.dependencies]
@@ -1294,7 +1305,6 @@ version = "3.10"
 description = "Internationalized Domain Names in Applications (IDNA)"
 optional = false
 python-versions = ">=3.6"
-groups = ["main"]
 files = [
     {file = "idna-3.10-py3-none-any.whl", hash = "sha256:946d195a0d259cbba61165e88e65941f16e9b36ea6ddb97f00452bae8b1287d3"},
     {file = "idna-3.10.tar.gz", hash = "sha256:12f65c9b470abda6dc35cf8e63cc574b1c52b11df2c86030af0ac09b01b13ea9"},
@@ -1309,7 +1319,6 @@ version = "8.5.0"
 description = "Read metadata from Python packages"
 optional = false
 python-versions = ">=3.8"
-groups = ["main"]
 files = [
     {file = "importlib_metadata-8.5.0-py3-none-any.whl", hash = "sha256:45e54197d28b7a7f1559e60b95e7c567032b602131fbd588f1497f47880aa68b"},
     {file = "importlib_metadata-8.5.0.tar.gz", hash = "sha256:71522656f0abace1d072b9e5481a48f07c138e00f079c38c8f883823f9c26bd7"},
@@ -1333,8 +1342,6 @@ version = "6.4.5"
 description = "Read resources from Python packages"
 optional = false
 python-versions = ">=3.8"
-groups = ["main"]
-markers = "python_version < \"3.9\""
 files = [
     {file = "importlib_resources-6.4.5-py3-none-any.whl", hash = "sha256:ac29d5f956f01d5e4bb63102a5a19957f1b9175e45649977264a1416783bb717"},
     {file = "importlib_resources-6.4.5.tar.gz", hash = "sha256:980862a1d16c9e147a59603677fa2aa5fd82b87f223b6cb870695bcfce830065"},
@@ -1357,7 +1364,6 @@ version = "2.0.0"
 description = "brain-dead simple config-ini parsing"
 optional = false
 python-versions = ">=3.7"
-groups = ["dev"]
 files = [
     {file = "iniconfig-2.0.0-py3-none-any.whl", hash = "sha256:b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374"},
     {file = "iniconfig-2.0.0.tar.gz", hash = "sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3"},
@@ -1369,8 +1375,6 @@ version = "0.7.2"
 description = "An ISO 8601 date/time/duration parser and formatter"
 optional = true
 python-versions = ">=3.7"
-groups = ["main"]
-markers = "extra == \"extra-proxy\""
 files = [
     {file = "isodate-0.7.2-py3-none-any.whl", hash = "sha256:28009937d8031054830160fce6d409ed342816b543597cece116d966c6d99e15"},
     {file = "isodate-0.7.2.tar.gz", hash = "sha256:4cd1aa0f43ca76f4a6c6c0292a85f40b35ec2e43e315b59f06e6d32171a953e6"},
@@ -1378,14 +1382,13 @@ files = [
 
 [[package]]
 name = "jinja2"
-version = "3.1.5"
+version = "3.1.6"
 description = "A very fast and expressive template engine."
 optional = false
 python-versions = ">=3.7"
-groups = ["main"]
 files = [
-    {file = "jinja2-3.1.5-py3-none-any.whl", hash = "sha256:aba0f4dc9ed8013c424088f68a5c226f7d6097ed89b246d7749c2ec4175c6adb"},
-    {file = "jinja2-3.1.5.tar.gz", hash = "sha256:8fefff8dc3034e27bb80d67c671eb8a9bc424c0ef4c0826edbff304cceff43bb"},
+    {file = "jinja2-3.1.6-py3-none-any.whl", hash = "sha256:85ece4451f492d0c13c5dd7c13a64681a86afae63a5f347908daf103ce6d2f67"},
+    {file = "jinja2-3.1.6.tar.gz", hash = "sha256:0137fb05990d35f1275a587e9aee6d56da821fc83491a0fb838183be43f66d6d"},
 ]
 
 [package.dependencies]
@@ -1396,88 +1399,87 @@ i18n = ["Babel (>=2.7)"]
 
 [[package]]
 name = "jiter"
-version = "0.8.2"
+version = "0.9.0"
 description = "Fast iterable JSON parser."
 optional = false
 python-versions = ">=3.8"
-groups = ["main"]
 files = [
-    {file = "jiter-0.8.2-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:ca8577f6a413abe29b079bc30f907894d7eb07a865c4df69475e868d73e71c7b"},
-    {file = "jiter-0.8.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:b25bd626bde7fb51534190c7e3cb97cee89ee76b76d7585580e22f34f5e3f393"},
-    {file = "jiter-0.8.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d5c826a221851a8dc028eb6d7d6429ba03184fa3c7e83ae01cd6d3bd1d4bd17d"},
-    {file = "jiter-0.8.2-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:d35c864c2dff13dfd79fb070fc4fc6235d7b9b359efe340e1261deb21b9fcb66"},
-    {file = "jiter-0.8.2-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f557c55bc2b7676e74d39d19bcb8775ca295c7a028246175d6a8b431e70835e5"},
-    {file = "jiter-0.8.2-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:580ccf358539153db147e40751a0b41688a5ceb275e6f3e93d91c9467f42b2e3"},
-    {file = "jiter-0.8.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:af102d3372e917cffce49b521e4c32c497515119dc7bd8a75665e90a718bbf08"},
-    {file = "jiter-0.8.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:cadcc978f82397d515bb2683fc0d50103acff2a180552654bb92d6045dec2c49"},
-    {file = "jiter-0.8.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:ba5bdf56969cad2019d4e8ffd3f879b5fdc792624129741d3d83fc832fef8c7d"},
-    {file = "jiter-0.8.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:3b94a33a241bee9e34b8481cdcaa3d5c2116f575e0226e421bed3f7a6ea71cff"},
-    {file = "jiter-0.8.2-cp310-cp310-win32.whl", hash = "sha256:6e5337bf454abddd91bd048ce0dca5134056fc99ca0205258766db35d0a2ea43"},
-    {file = "jiter-0.8.2-cp310-cp310-win_amd64.whl", hash = "sha256:4a9220497ca0cb1fe94e3f334f65b9b5102a0b8147646118f020d8ce1de70105"},
-    {file = "jiter-0.8.2-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:2dd61c5afc88a4fda7d8b2cf03ae5947c6ac7516d32b7a15bf4b49569a5c076b"},
-    {file = "jiter-0.8.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:a6c710d657c8d1d2adbbb5c0b0c6bfcec28fd35bd6b5f016395f9ac43e878a15"},
-    {file = "jiter-0.8.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a9584de0cd306072635fe4b89742bf26feae858a0683b399ad0c2509011b9dc0"},
-    {file = "jiter-0.8.2-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:5a90a923338531b7970abb063cfc087eebae6ef8ec8139762007188f6bc69a9f"},
-    {file = "jiter-0.8.2-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d21974d246ed0181558087cd9f76e84e8321091ebfb3a93d4c341479a736f099"},
-    {file = "jiter-0.8.2-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:32475a42b2ea7b344069dc1e81445cfc00b9d0e3ca837f0523072432332e9f74"},
-    {file = "jiter-0.8.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8b9931fd36ee513c26b5bf08c940b0ac875de175341cbdd4fa3be109f0492586"},
-    {file = "jiter-0.8.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:ce0820f4a3a59ddced7fce696d86a096d5cc48d32a4183483a17671a61edfddc"},
-    {file = "jiter-0.8.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:8ffc86ae5e3e6a93765d49d1ab47b6075a9c978a2b3b80f0f32628f39caa0c88"},
-    {file = "jiter-0.8.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:5127dc1abd809431172bc3fbe8168d6b90556a30bb10acd5ded41c3cfd6f43b6"},
-    {file = "jiter-0.8.2-cp311-cp311-win32.whl", hash = "sha256:66227a2c7b575720c1871c8800d3a0122bb8ee94edb43a5685aa9aceb2782d44"},
-    {file = "jiter-0.8.2-cp311-cp311-win_amd64.whl", hash = "sha256:cde031d8413842a1e7501e9129b8e676e62a657f8ec8166e18a70d94d4682855"},
-    {file = "jiter-0.8.2-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:e6ec2be506e7d6f9527dae9ff4b7f54e68ea44a0ef6b098256ddf895218a2f8f"},
-    {file = "jiter-0.8.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:76e324da7b5da060287c54f2fabd3db5f76468006c811831f051942bf68c9d44"},
-    {file = "jiter-0.8.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:180a8aea058f7535d1c84183c0362c710f4750bef66630c05f40c93c2b152a0f"},
-    {file = "jiter-0.8.2-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:025337859077b41548bdcbabe38698bcd93cfe10b06ff66617a48ff92c9aec60"},
-    {file = "jiter-0.8.2-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ecff0dc14f409599bbcafa7e470c00b80f17abc14d1405d38ab02e4b42e55b57"},
-    {file = "jiter-0.8.2-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:ffd9fee7d0775ebaba131f7ca2e2d83839a62ad65e8e02fe2bd8fc975cedeb9e"},
-    {file = "jiter-0.8.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:14601dcac4889e0a1c75ccf6a0e4baf70dbc75041e51bcf8d0e9274519df6887"},
-    {file = "jiter-0.8.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:92249669925bc1c54fcd2ec73f70f2c1d6a817928480ee1c65af5f6b81cdf12d"},
-    {file = "jiter-0.8.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:e725edd0929fa79f8349ab4ec7f81c714df51dc4e991539a578e5018fa4a7152"},
-    {file = "jiter-0.8.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:bf55846c7b7a680eebaf9c3c48d630e1bf51bdf76c68a5f654b8524335b0ad29"},
-    {file = "jiter-0.8.2-cp312-cp312-win32.whl", hash = "sha256:7efe4853ecd3d6110301665a5178b9856be7e2a9485f49d91aa4d737ad2ae49e"},
-    {file = "jiter-0.8.2-cp312-cp312-win_amd64.whl", hash = "sha256:83c0efd80b29695058d0fd2fa8a556490dbce9804eac3e281f373bbc99045f6c"},
-    {file = "jiter-0.8.2-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:ca1f08b8e43dc3bd0594c992fb1fd2f7ce87f7bf0d44358198d6da8034afdf84"},
-    {file = "jiter-0.8.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:5672a86d55416ccd214c778efccf3266b84f87b89063b582167d803246354be4"},
-    {file = "jiter-0.8.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:58dc9bc9767a1101f4e5e22db1b652161a225874d66f0e5cb8e2c7d1c438b587"},
-    {file = "jiter-0.8.2-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:37b2998606d6dadbb5ccda959a33d6a5e853252d921fec1792fc902351bb4e2c"},
-    {file = "jiter-0.8.2-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4ab9a87f3784eb0e098f84a32670cfe4a79cb6512fd8f42ae3d0709f06405d18"},
-    {file = "jiter-0.8.2-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:79aec8172b9e3c6d05fd4b219d5de1ac616bd8da934107325a6c0d0e866a21b6"},
-    {file = "jiter-0.8.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:711e408732d4e9a0208008e5892c2966b485c783cd2d9a681f3eb147cf36c7ef"},
-    {file = "jiter-0.8.2-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:653cf462db4e8c41995e33d865965e79641ef45369d8a11f54cd30888b7e6ff1"},
-    {file = "jiter-0.8.2-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:9c63eaef32b7bebac8ebebf4dabebdbc6769a09c127294db6babee38e9f405b9"},
-    {file = "jiter-0.8.2-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:eb21aaa9a200d0a80dacc7a81038d2e476ffe473ffdd9c91eb745d623561de05"},
-    {file = "jiter-0.8.2-cp313-cp313-win32.whl", hash = "sha256:789361ed945d8d42850f919342a8665d2dc79e7e44ca1c97cc786966a21f627a"},
-    {file = "jiter-0.8.2-cp313-cp313-win_amd64.whl", hash = "sha256:ab7f43235d71e03b941c1630f4b6e3055d46b6cb8728a17663eaac9d8e83a865"},
-    {file = "jiter-0.8.2-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:b426f72cd77da3fec300ed3bc990895e2dd6b49e3bfe6c438592a3ba660e41ca"},
-    {file = "jiter-0.8.2-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b2dd880785088ff2ad21ffee205e58a8c1ddabc63612444ae41e5e4b321b39c0"},
-    {file = "jiter-0.8.2-cp313-cp313t-win_amd64.whl", hash = "sha256:3ac9f578c46f22405ff7f8b1f5848fb753cc4b8377fbec8470a7dc3997ca7566"},
-    {file = "jiter-0.8.2-cp38-cp38-macosx_10_12_x86_64.whl", hash = "sha256:9e1fa156ee9454642adb7e7234a383884452532bc9d53d5af2d18d98ada1d79c"},
-    {file = "jiter-0.8.2-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:0cf5dfa9956d96ff2efb0f8e9c7d055904012c952539a774305aaaf3abdf3d6c"},
-    {file = "jiter-0.8.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e52bf98c7e727dd44f7c4acb980cb988448faeafed8433c867888268899b298b"},
-    {file = "jiter-0.8.2-cp38-cp38-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:a2ecaa3c23e7a7cf86d00eda3390c232f4d533cd9ddea4b04f5d0644faf642c5"},
-    {file = "jiter-0.8.2-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:08d4c92bf480e19fc3f2717c9ce2aa31dceaa9163839a311424b6862252c943e"},
-    {file = "jiter-0.8.2-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:99d9a1eded738299ba8e106c6779ce5c3893cffa0e32e4485d680588adae6db8"},
-    {file = "jiter-0.8.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d20be8b7f606df096e08b0b1b4a3c6f0515e8dac296881fe7461dfa0fb5ec817"},
-    {file = "jiter-0.8.2-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:d33f94615fcaf872f7fd8cd98ac3b429e435c77619777e8a449d9d27e01134d1"},
-    {file = "jiter-0.8.2-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:317b25e98a35ffec5c67efe56a4e9970852632c810d35b34ecdd70cc0e47b3b6"},
-    {file = "jiter-0.8.2-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:fc9043259ee430ecd71d178fccabd8c332a3bf1e81e50cae43cc2b28d19e4cb7"},
-    {file = "jiter-0.8.2-cp38-cp38-win32.whl", hash = "sha256:fc5adda618205bd4678b146612ce44c3cbfdee9697951f2c0ffdef1f26d72b63"},
-    {file = "jiter-0.8.2-cp38-cp38-win_amd64.whl", hash = "sha256:cd646c827b4f85ef4a78e4e58f4f5854fae0caf3db91b59f0d73731448a970c6"},
-    {file = "jiter-0.8.2-cp39-cp39-macosx_10_12_x86_64.whl", hash = "sha256:e41e75344acef3fc59ba4765df29f107f309ca9e8eace5baacabd9217e52a5ee"},
-    {file = "jiter-0.8.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:7f22b16b35d5c1df9dfd58843ab2cd25e6bf15191f5a236bed177afade507bfc"},
-    {file = "jiter-0.8.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f7200b8f7619d36aa51c803fd52020a2dfbea36ffec1b5e22cab11fd34d95a6d"},
-    {file = "jiter-0.8.2-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:70bf4c43652cc294040dbb62256c83c8718370c8b93dd93d934b9a7bf6c4f53c"},
-    {file = "jiter-0.8.2-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f9d471356dc16f84ed48768b8ee79f29514295c7295cb41e1133ec0b2b8d637d"},
-    {file = "jiter-0.8.2-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:859e8eb3507894093d01929e12e267f83b1d5f6221099d3ec976f0c995cb6bd9"},
-    {file = "jiter-0.8.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:eaa58399c01db555346647a907b4ef6d4f584b123943be6ed5588c3f2359c9f4"},
-    {file = "jiter-0.8.2-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:8f2d5ed877f089862f4c7aacf3a542627c1496f972a34d0474ce85ee7d939c27"},
-    {file = "jiter-0.8.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:03c9df035d4f8d647f8c210ddc2ae0728387275340668fb30d2421e17d9a0841"},
-    {file = "jiter-0.8.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:8bd2a824d08d8977bb2794ea2682f898ad3d8837932e3a74937e93d62ecbb637"},
-    {file = "jiter-0.8.2-cp39-cp39-win32.whl", hash = "sha256:ca29b6371ebc40e496995c94b988a101b9fbbed48a51190a4461fcb0a68b4a36"},
-    {file = "jiter-0.8.2-cp39-cp39-win_amd64.whl", hash = "sha256:1c0dfbd1be3cbefc7510102370d86e35d1d53e5a93d48519688b1bf0f761160a"},
-    {file = "jiter-0.8.2.tar.gz", hash = "sha256:cd73d3e740666d0e639f678adb176fad25c1bcbdae88d8d7b857e1783bb4212d"},
+    {file = "jiter-0.9.0-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:816ec9b60fdfd1fec87da1d7ed46c66c44ffec37ab2ef7de5b147b2fce3fd5ad"},
+    {file = "jiter-0.9.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:9b1d3086f8a3ee0194ecf2008cf81286a5c3e540d977fa038ff23576c023c0ea"},
+    {file = "jiter-0.9.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1339f839b91ae30b37c409bf16ccd3dc453e8b8c3ed4bd1d6a567193651a4a51"},
+    {file = "jiter-0.9.0-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:ffba79584b3b670fefae66ceb3a28822365d25b7bf811e030609a3d5b876f538"},
+    {file = "jiter-0.9.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5cfc7d0a8e899089d11f065e289cb5b2daf3d82fbe028f49b20d7b809193958d"},
+    {file = "jiter-0.9.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e00a1a2bbfaaf237e13c3d1592356eab3e9015d7efd59359ac8b51eb56390a12"},
+    {file = "jiter-0.9.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d1d9870561eb26b11448854dce0ff27a9a27cb616b632468cafc938de25e9e51"},
+    {file = "jiter-0.9.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:9872aeff3f21e437651df378cb75aeb7043e5297261222b6441a620218b58708"},
+    {file = "jiter-0.9.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:1fd19112d1049bdd47f17bfbb44a2c0001061312dcf0e72765bfa8abd4aa30e5"},
+    {file = "jiter-0.9.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:6ef5da104664e526836070e4a23b5f68dec1cc673b60bf1edb1bfbe8a55d0678"},
+    {file = "jiter-0.9.0-cp310-cp310-win32.whl", hash = "sha256:cb12e6d65ebbefe5518de819f3eda53b73187b7089040b2d17f5b39001ff31c4"},
+    {file = "jiter-0.9.0-cp310-cp310-win_amd64.whl", hash = "sha256:c43ca669493626d8672be3b645dbb406ef25af3f4b6384cfd306da7eb2e70322"},
+    {file = "jiter-0.9.0-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:6c4d99c71508912a7e556d631768dcdef43648a93660670986916b297f1c54af"},
+    {file = "jiter-0.9.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:8f60fb8ce7df529812bf6c625635a19d27f30806885139e367af93f6e734ef58"},
+    {file = "jiter-0.9.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:51c4e1a4f8ea84d98b7b98912aa4290ac3d1eabfde8e3c34541fae30e9d1f08b"},
+    {file = "jiter-0.9.0-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:5f4c677c424dc76684fea3e7285a7a2a7493424bea89ac441045e6a1fb1d7b3b"},
+    {file = "jiter-0.9.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2221176dfec87f3470b21e6abca056e6b04ce9bff72315cb0b243ca9e835a4b5"},
+    {file = "jiter-0.9.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3c7adb66f899ffa25e3c92bfcb593391ee1947dbdd6a9a970e0d7e713237d572"},
+    {file = "jiter-0.9.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c98d27330fdfb77913c1097a7aab07f38ff2259048949f499c9901700789ac15"},
+    {file = "jiter-0.9.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:eda3f8cc74df66892b1d06b5d41a71670c22d95a1ca2cbab73654745ce9d0419"},
+    {file = "jiter-0.9.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:dd5ab5ddc11418dce28343123644a100f487eaccf1de27a459ab36d6cca31043"},
+    {file = "jiter-0.9.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:42f8a68a69f047b310319ef8e2f52fdb2e7976fb3313ef27df495cf77bcad965"},
+    {file = "jiter-0.9.0-cp311-cp311-win32.whl", hash = "sha256:a25519efb78a42254d59326ee417d6f5161b06f5da827d94cf521fed961b1ff2"},
+    {file = "jiter-0.9.0-cp311-cp311-win_amd64.whl", hash = "sha256:923b54afdd697dfd00d368b7ccad008cccfeb1efb4e621f32860c75e9f25edbd"},
+    {file = "jiter-0.9.0-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:7b46249cfd6c48da28f89eb0be3f52d6fdb40ab88e2c66804f546674e539ec11"},
+    {file = "jiter-0.9.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:609cf3c78852f1189894383cf0b0b977665f54cb38788e3e6b941fa6d982c00e"},
+    {file = "jiter-0.9.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d726a3890a54561e55a9c5faea1f7655eda7f105bd165067575ace6e65f80bb2"},
+    {file = "jiter-0.9.0-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:2e89dc075c1fef8fa9be219e249f14040270dbc507df4215c324a1839522ea75"},
+    {file = "jiter-0.9.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:04e8ffa3c353b1bc4134f96f167a2082494351e42888dfcf06e944f2729cbe1d"},
+    {file = "jiter-0.9.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:203f28a72a05ae0e129b3ed1f75f56bc419d5f91dfacd057519a8bd137b00c42"},
+    {file = "jiter-0.9.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fca1a02ad60ec30bb230f65bc01f611c8608b02d269f998bc29cca8619a919dc"},
+    {file = "jiter-0.9.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:237e5cee4d5d2659aaf91bbf8ec45052cc217d9446070699441a91b386ae27dc"},
+    {file = "jiter-0.9.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:528b6b71745e7326eed73c53d4aa57e2a522242320b6f7d65b9c5af83cf49b6e"},
+    {file = "jiter-0.9.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:9f48e86b57bc711eb5acdfd12b6cb580a59cc9a993f6e7dcb6d8b50522dcd50d"},
+    {file = "jiter-0.9.0-cp312-cp312-win32.whl", hash = "sha256:699edfde481e191d81f9cf6d2211debbfe4bd92f06410e7637dffb8dd5dfde06"},
+    {file = "jiter-0.9.0-cp312-cp312-win_amd64.whl", hash = "sha256:099500d07b43f61d8bd780466d429c45a7b25411b334c60ca875fa775f68ccb0"},
+    {file = "jiter-0.9.0-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:2764891d3f3e8b18dce2cff24949153ee30c9239da7c00f032511091ba688ff7"},
+    {file = "jiter-0.9.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:387b22fbfd7a62418d5212b4638026d01723761c75c1c8232a8b8c37c2f1003b"},
+    {file = "jiter-0.9.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:40d8da8629ccae3606c61d9184970423655fb4e33d03330bcdfe52d234d32f69"},
+    {file = "jiter-0.9.0-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:a1be73d8982bdc278b7b9377426a4b44ceb5c7952073dd7488e4ae96b88e1103"},
+    {file = "jiter-0.9.0-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2228eaaaa111ec54b9e89f7481bffb3972e9059301a878d085b2b449fbbde635"},
+    {file = "jiter-0.9.0-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:11509bfecbc319459647d4ac3fd391d26fdf530dad00c13c4dadabf5b81f01a4"},
+    {file = "jiter-0.9.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3f22238da568be8bbd8e0650e12feeb2cfea15eda4f9fc271d3b362a4fa0604d"},
+    {file = "jiter-0.9.0-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:17f5d55eb856597607562257c8e36c42bc87f16bef52ef7129b7da11afc779f3"},
+    {file = "jiter-0.9.0-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:6a99bed9fbb02f5bed416d137944419a69aa4c423e44189bc49718859ea83bc5"},
+    {file = "jiter-0.9.0-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:e057adb0cd1bd39606100be0eafe742de2de88c79df632955b9ab53a086b3c8d"},
+    {file = "jiter-0.9.0-cp313-cp313-win32.whl", hash = "sha256:f7e6850991f3940f62d387ccfa54d1a92bd4bb9f89690b53aea36b4364bcab53"},
+    {file = "jiter-0.9.0-cp313-cp313-win_amd64.whl", hash = "sha256:c8ae3bf27cd1ac5e6e8b7a27487bf3ab5f82318211ec2e1346a5b058756361f7"},
+    {file = "jiter-0.9.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:f0b2827fb88dda2cbecbbc3e596ef08d69bda06c6f57930aec8e79505dc17001"},
+    {file = "jiter-0.9.0-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:062b756ceb1d40b0b28f326cba26cfd575a4918415b036464a52f08632731e5a"},
+    {file = "jiter-0.9.0-cp313-cp313t-win_amd64.whl", hash = "sha256:6f7838bc467ab7e8ef9f387bd6de195c43bad82a569c1699cb822f6609dd4cdf"},
+    {file = "jiter-0.9.0-cp38-cp38-macosx_10_12_x86_64.whl", hash = "sha256:4a2d16360d0642cd68236f931b85fe50288834c383492e4279d9f1792e309571"},
+    {file = "jiter-0.9.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:e84ed1c9c9ec10bbb8c37f450077cbe3c0d4e8c2b19f0a49a60ac7ace73c7452"},
+    {file = "jiter-0.9.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9f3c848209ccd1bfa344a1240763975ca917de753c7875c77ec3034f4151d06c"},
+    {file = "jiter-0.9.0-cp38-cp38-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:7825f46e50646bee937e0f849d14ef3a417910966136f59cd1eb848b8b5bb3e4"},
+    {file = "jiter-0.9.0-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d82a811928b26d1a6311a886b2566f68ccf2b23cf3bfed042e18686f1f22c2d7"},
+    {file = "jiter-0.9.0-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:0c058ecb51763a67f019ae423b1cbe3fa90f7ee6280c31a1baa6ccc0c0e2d06e"},
+    {file = "jiter-0.9.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9897115ad716c48f0120c1f0c4efae348ec47037319a6c63b2d7838bb53aaef4"},
+    {file = "jiter-0.9.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:351f4c90a24c4fb8c87c6a73af2944c440494ed2bea2094feecacb75c50398ae"},
+    {file = "jiter-0.9.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:d45807b0f236c485e1e525e2ce3a854807dfe28ccf0d013dd4a563395e28008a"},
+    {file = "jiter-0.9.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:1537a890724ba00fdba21787010ac6f24dad47f763410e9e1093277913592784"},
+    {file = "jiter-0.9.0-cp38-cp38-win32.whl", hash = "sha256:e3630ec20cbeaddd4b65513fa3857e1b7c4190d4481ef07fb63d0fad59033321"},
+    {file = "jiter-0.9.0-cp38-cp38-win_amd64.whl", hash = "sha256:2685f44bf80e95f8910553bf2d33b9c87bf25fceae6e9f0c1355f75d2922b0ee"},
+    {file = "jiter-0.9.0-cp39-cp39-macosx_10_12_x86_64.whl", hash = "sha256:9ef340fae98065071ccd5805fe81c99c8f80484e820e40043689cf97fb66b3e2"},
+    {file = "jiter-0.9.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:efb767d92c63b2cd9ec9f24feeb48f49574a713870ec87e9ba0c2c6e9329c3e2"},
+    {file = "jiter-0.9.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:113f30f87fb1f412510c6d7ed13e91422cfd329436364a690c34c8b8bd880c42"},
+    {file = "jiter-0.9.0-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:8793b6df019b988526f5a633fdc7456ea75e4a79bd8396a3373c371fc59f5c9b"},
+    {file = "jiter-0.9.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:7a9aaa5102dba4e079bb728076fadd5a2dca94c05c04ce68004cfd96f128ea34"},
+    {file = "jiter-0.9.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d838650f6ebaf4ccadfb04522463e74a4c378d7e667e0eb1865cfe3990bfac49"},
+    {file = "jiter-0.9.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c0194f813efdf4b8865ad5f5c5f50f8566df7d770a82c51ef593d09e0b347020"},
+    {file = "jiter-0.9.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:a7954a401d0a8a0b8bc669199db78af435aae1e3569187c2939c477c53cb6a0a"},
+    {file = "jiter-0.9.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:4feafe787eb8a8d98168ab15637ca2577f6ddf77ac6c8c66242c2d028aa5420e"},
+    {file = "jiter-0.9.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:27cd1f2e8bb377f31d3190b34e4328d280325ad7ef55c6ac9abde72f79e84d2e"},
+    {file = "jiter-0.9.0-cp39-cp39-win32.whl", hash = "sha256:161d461dcbe658cf0bd0aa375b30a968b087cdddc624fc585f3867c63c6eca95"},
+    {file = "jiter-0.9.0-cp39-cp39-win_amd64.whl", hash = "sha256:e8b36d8a16a61993be33e75126ad3d8aa29cf450b09576f3c427d27647fcb4aa"},
+    {file = "jiter-0.9.0.tar.gz", hash = "sha256:aadba0964deb424daa24492abc3d229c60c4a31bfee205aedbf1acc7639d7893"},
 ]
 
 [[package]]
@@ -1486,7 +1488,6 @@ version = "4.23.0"
 description = "An implementation of JSON Schema validation for Python"
 optional = false
 python-versions = ">=3.8"
-groups = ["main"]
 files = [
     {file = "jsonschema-4.23.0-py3-none-any.whl", hash = "sha256:fbadb6f8b144a8f8cf9f0b89ba94501d143e50411a1278633f56a7acf7fd5566"},
     {file = "jsonschema-4.23.0.tar.gz", hash = "sha256:d71497fef26351a33265337fa77ffeb82423f3ea21283cd9467bb03999266bc4"},
@@ -1510,7 +1511,6 @@ version = "2023.12.1"
 description = "The JSON Schema meta-schemas and vocabularies, exposed as a Registry"
 optional = false
 python-versions = ">=3.8"
-groups = ["main"]
 files = [
     {file = "jsonschema_specifications-2023.12.1-py3-none-any.whl", hash = "sha256:87e4fdf3a94858b8a2ba2778d9ba57d8a9cafca7c7489c46ba0d30a8bc6a9c3c"},
     {file = "jsonschema_specifications-2023.12.1.tar.gz", hash = "sha256:48a76787b3e70f5ed53f1160d2b81f586e4ca6d1548c5de7085d1682674764cc"},
@@ -1526,7 +1526,6 @@ version = "2.1.5"
 description = "Safely add untrusted strings to HTML/XML markup."
 optional = false
 python-versions = ">=3.7"
-groups = ["main"]
 files = [
     {file = "MarkupSafe-2.1.5-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:a17a92de5231666cfbe003f0e4b9b3a7ae3afb1ec2845aadc2bacc93ff85febc"},
     {file = "MarkupSafe-2.1.5-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:72b6be590cc35924b02c78ef34b467da4ba07e4e0f0454a2c5907f473fc50ce5"},
@@ -1596,7 +1595,6 @@ version = "0.7.0"
 description = "McCabe checker, plugin for flake8"
 optional = false
 python-versions = ">=3.6"
-groups = ["dev"]
 files = [
     {file = "mccabe-0.7.0-py2.py3-none-any.whl", hash = "sha256:6c2d30ab6be0e4a46919781807b4f0d834ebdd6c6e3dca0bda5a15f863427b6e"},
     {file = "mccabe-0.7.0.tar.gz", hash = "sha256:348e0240c33b60bbdf4e523192ef919f28cb2c3d7d5c7794f74009290f236325"},
@@ -1604,19 +1602,17 @@ files = [
 
 [[package]]
 name = "msal"
-version = "1.31.1"
+version = "1.32.0"
 description = "The Microsoft Authentication Library (MSAL) for Python library enables your app to access the Microsoft Cloud by supporting authentication of users with Microsoft Azure Active Directory accounts (AAD) and Microsoft Accounts (MSA) using industry standard OAuth2 and OpenID Connect."
 optional = true
 python-versions = ">=3.7"
-groups = ["main"]
-markers = "extra == \"extra-proxy\""
 files = [
-    {file = "msal-1.31.1-py3-none-any.whl", hash = "sha256:29d9882de247e96db01386496d59f29035e5e841bcac892e6d7bf4390bf6bd17"},
-    {file = "msal-1.31.1.tar.gz", hash = "sha256:11b5e6a3f802ffd3a72107203e20c4eac6ef53401961b880af2835b723d80578"},
+    {file = "msal-1.32.0-py3-none-any.whl", hash = "sha256:9dbac5384a10bbbf4dae5c7ea0d707d14e087b92c5aa4954b3feaa2d1aa0bcb7"},
+    {file = "msal-1.32.0.tar.gz", hash = "sha256:5445fe3af1da6be484991a7ab32eaa82461dc2347de105b76af92c610c3335c2"},
 ]
 
 [package.dependencies]
-cryptography = ">=2.5,<46"
+cryptography = ">=2.5,<47"
 PyJWT = {version = ">=1.0.0,<3", extras = ["crypto"]}
 requests = ">=2.0.0,<3"
 
@@ -1629,8 +1625,6 @@ version = "1.2.0"
 description = "Microsoft Authentication Library extensions (MSAL EX) provides a persistence API that can save your data on disk, encrypted on Windows, macOS and Linux. Concurrent data access will be coordinated by a file lock mechanism."
 optional = true
 python-versions = ">=3.7"
-groups = ["main"]
-markers = "extra == \"extra-proxy\""
 files = [
     {file = "msal_extensions-1.2.0-py3-none-any.whl", hash = "sha256:cf5ba83a2113fa6dc011a254a72f1c223c88d7dfad74cc30617c4679a417704d"},
     {file = "msal_extensions-1.2.0.tar.gz", hash = "sha256:6f41b320bfd2933d631a215c91ca0dd3e67d84bd1a2f50ce917d5874ec646bef"},
@@ -1646,7 +1640,6 @@ version = "6.1.0"
 description = "multidict implementation"
 optional = false
 python-versions = ">=3.8"
-groups = ["main"]
 files = [
     {file = "multidict-6.1.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:3380252550e372e8511d49481bd836264c009adb826b23fefcc5dd3c69692f60"},
     {file = "multidict-6.1.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:99f826cbf970077383d7de805c0681799491cb939c25450b9b5b3ced03ca99f1"},
@@ -1751,7 +1744,6 @@ version = "1.14.1"
 description = "Optional static typing for Python"
 optional = false
 python-versions = ">=3.8"
-groups = ["dev"]
 files = [
     {file = "mypy-1.14.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:52686e37cf13d559f668aa398dd7ddf1f92c5d613e4f8cb262be2fb4fedb0fcb"},
     {file = "mypy-1.14.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:1fb545ca340537d4b45d3eecdb3def05e913299ca72c290326be19b3804b39c0"},
@@ -1811,7 +1803,6 @@ version = "1.0.0"
 description = "Type system extensions for programs checked with the mypy type checker."
 optional = false
 python-versions = ">=3.5"
-groups = ["dev"]
 files = [
     {file = "mypy_extensions-1.0.0-py3-none-any.whl", hash = "sha256:4392f6c0eb8a5668a69e23d168ffa70f0be9ccfd32b5cc2d26a34ae5b844552d"},
     {file = "mypy_extensions-1.0.0.tar.gz", hash = "sha256:75dbf8955dc00442a438fc4d0666508a9a97b6bd41aa2f0ffe9d2f2725af0782"},
@@ -1823,8 +1814,6 @@ version = "1.9.1"
 description = "Node.js virtual environment builder"
 optional = true
 python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7"
-groups = ["main"]
-markers = "extra == \"extra-proxy\""
 files = [
     {file = "nodeenv-1.9.1-py2.py3-none-any.whl", hash = "sha256:ba11c9782d29c27c70ffbdda2d7415098754709be8a7056d79a737cd901155c9"},
     {file = "nodeenv-1.9.1.tar.gz", hash = "sha256:6ec12890a2dab7946721edbfbcd91f3319c6ccc9aec47be7c7e6b7011ee6645f"},
@@ -1836,8 +1825,6 @@ version = "3.2.2"
 description = "A generic, spec-compliant, thorough implementation of the OAuth request-signing logic"
 optional = true
 python-versions = ">=3.6"
-groups = ["main"]
-markers = "extra == \"proxy\""
 files = [
     {file = "oauthlib-3.2.2-py3-none-any.whl", hash = "sha256:8139f29aac13e25d502680e9e19963e83f16838d48a0d71c287fe40e7067fbca"},
     {file = "oauthlib-3.2.2.tar.gz", hash = "sha256:9859c40929662bec5d64f34d01c99e093149682a3f38915dc0655d5a633dd918"},
@@ -1850,14 +1837,13 @@ signedtoken = ["cryptography (>=3.0.0)", "pyjwt (>=2.0.0,<3)"]
 
 [[package]]
 name = "openai"
-version = "1.61.0"
+version = "1.66.3"
 description = "The official Python library for the openai API"
 optional = false
 python-versions = ">=3.8"
-groups = ["main"]
 files = [
-    {file = "openai-1.61.0-py3-none-any.whl", hash = "sha256:e8c512c0743accbdbe77f3429a1490d862f8352045de8dc81969301eb4a4f666"},
-    {file = "openai-1.61.0.tar.gz", hash = "sha256:216f325a24ed8578e929b0f1b3fb2052165f3b04b0461818adaa51aa29c71f8a"},
+    {file = "openai-1.66.3-py3-none-any.whl", hash = "sha256:a427c920f727711877ab17c11b95f1230b27767ba7a01e5b66102945141ceca9"},
+    {file = "openai-1.66.3.tar.gz", hash = "sha256:8dde3aebe2d081258d4159c4cb27bdc13b5bb3f7ea2201d9bd940b9a89faf0c9"},
 ]
 
 [package.dependencies]
@@ -1880,8 +1866,6 @@ version = "3.10.15"
 description = "Fast, correct Python JSON library supporting dataclasses, datetimes, and numpy"
 optional = true
 python-versions = ">=3.8"
-groups = ["main"]
-markers = "extra == \"proxy\""
 files = [
     {file = "orjson-3.10.15-cp310-cp310-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:552c883d03ad185f720d0c09583ebde257e41b9521b74ff40e08b7dec4559c04"},
     {file = "orjson-3.10.15-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:616e3e8d438d02e4854f70bfdc03a6bcdb697358dbaa6bcd19cbe24d24ece1f8"},
@@ -1970,7 +1954,6 @@ version = "24.2"
 description = "Core utilities for Python packages"
 optional = false
 python-versions = ">=3.8"
-groups = ["main", "dev"]
 files = [
     {file = "packaging-24.2-py3-none-any.whl", hash = "sha256:09abb1bccd265c01f4a3aa3f7a7db064b36514d2cba19a2f694fe6150451a759"},
     {file = "packaging-24.2.tar.gz", hash = "sha256:c228a6dc5e932d346bc5739379109d49e8853dd8223571c7c5b55260edc0b97f"},
@@ -1982,7 +1965,6 @@ version = "0.12.1"
 description = "Utility library for gitignore style pattern matching of file paths."
 optional = false
 python-versions = ">=3.8"
-groups = ["dev"]
 files = [
     {file = "pathspec-0.12.1-py3-none-any.whl", hash = "sha256:a0d503e138a4c123b27490a4f7beda6a01c6f288df0e4a8b79c7eb0dc7b4cc08"},
     {file = "pathspec-0.12.1.tar.gz", hash = "sha256:a482d51503a1ab33b1c67a6c3813a26953dbdc71c31dacaef9a838c4e29f5712"},
@@ -1994,8 +1976,6 @@ version = "1.3.10"
 description = "Resolve a name to an object."
 optional = false
 python-versions = ">=3.6"
-groups = ["main"]
-markers = "python_version < \"3.9\""
 files = [
     {file = "pkgutil_resolve_name-1.3.10-py3-none-any.whl", hash = "sha256:ca27cc078d25c5ad71a9de0a7a330146c4e014c2462d9af19c6b828280649c5e"},
     {file = "pkgutil_resolve_name-1.3.10.tar.gz", hash = "sha256:357d6c9e6a755653cfd78893817c0853af365dd51ec97f3d358a819373bbd174"},
@@ -2007,7 +1987,6 @@ version = "4.3.6"
 description = "A small Python package for determining appropriate platform-specific dirs, e.g. a `user data dir`."
 optional = false
 python-versions = ">=3.8"
-groups = ["dev"]
 files = [
     {file = "platformdirs-4.3.6-py3-none-any.whl", hash = "sha256:73e575e1408ab8103900836b97580d5307456908a03e92031bab39e4554cc3fb"},
     {file = "platformdirs-4.3.6.tar.gz", hash = "sha256:357fb2acbc885b0419afd3ce3ed34564c13c9b95c89360cd9563f73aa5e2b907"},
@@ -2024,7 +2003,6 @@ version = "1.5.0"
 description = "plugin and hook calling mechanisms for python"
 optional = false
 python-versions = ">=3.8"
-groups = ["dev"]
 files = [
     {file = "pluggy-1.5.0-py3-none-any.whl", hash = "sha256:44e1ad92c8ca002de6377e165f3e0f1be63266ab4d554740532335b9d75ea669"},
     {file = "pluggy-1.5.0.tar.gz", hash = "sha256:2cffa88e94fdc978c4c574f15f9e59b7f4201d439195c3715ca9e2486f1d0cf1"},
@@ -2040,8 +2018,6 @@ version = "2.10.1"
 description = "Wraps the portalocker recipe for easy usage"
 optional = true
 python-versions = ">=3.8"
-groups = ["main"]
-markers = "extra == \"extra-proxy\""
 files = [
     {file = "portalocker-2.10.1-py3-none-any.whl", hash = "sha256:53a5984ebc86a025552264b459b46a2086e269b21823cb572f8f28ee759e45bf"},
     {file = "portalocker-2.10.1.tar.gz", hash = "sha256:ef1bf844e878ab08aee7e40184156e1151f228f103aa5c6bd0724cc330960f8f"},
@@ -2061,8 +2037,6 @@ version = "0.11.0"
 description = "Prisma Client Python is an auto-generated and fully type-safe database client"
 optional = true
 python-versions = ">=3.7.0"
-groups = ["main"]
-markers = "extra == \"extra-proxy\""
 files = [
     {file = "prisma-0.11.0-py3-none-any.whl", hash = "sha256:22bb869e59a2968b99f3483bb417717273ffbc569fd1e9ceed95e5614cbaf53a"},
     {file = "prisma-0.11.0.tar.gz", hash = "sha256:3f2f2fd2361e1ec5ff655f2a04c7860c2f2a5bc4c91f78ca9c5c6349735bf693"},
@@ -2088,7 +2062,6 @@ version = "0.2.0"
 description = "Accelerated property cache"
 optional = false
 python-versions = ">=3.8"
-groups = ["main"]
 files = [
     {file = "propcache-0.2.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:c5869b8fd70b81835a6f187c5fdbe67917a04d7e52b6e7cc4e5fe39d55c39d58"},
     {file = "propcache-0.2.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:952e0d9d07609d9c5be361f33b0d6d650cd2bae393aabb11d9b719364521984b"},
@@ -2192,19 +2165,17 @@ files = [
 
 [[package]]
 name = "proto-plus"
-version = "1.26.0"
+version = "1.26.1"
 description = "Beautiful, Pythonic protocol buffers"
 optional = true
 python-versions = ">=3.7"
-groups = ["main"]
-markers = "extra == \"extra-proxy\""
 files = [
-    {file = "proto_plus-1.26.0-py3-none-any.whl", hash = "sha256:bf2dfaa3da281fc3187d12d224c707cb57214fb2c22ba854eb0c105a3fb2d4d7"},
-    {file = "proto_plus-1.26.0.tar.gz", hash = "sha256:6e93d5f5ca267b54300880fff156b6a3386b3fa3f43b1da62e680fc0c586ef22"},
+    {file = "proto_plus-1.26.1-py3-none-any.whl", hash = "sha256:13285478c2dcf2abb829db158e1047e2f1e8d63a077d94263c2b88b043c75a66"},
+    {file = "proto_plus-1.26.1.tar.gz", hash = "sha256:21a515a4c4c0088a773899e23c7bbade3d18f9c66c73edd4c7ee3816bc96a012"},
 ]
 
 [package.dependencies]
-protobuf = ">=3.19.0,<6.0.0dev"
+protobuf = ">=3.19.0,<7.0.0"
 
 [package.extras]
 testing = ["google-api-core (>=1.31.5)"]
@@ -2215,8 +2186,6 @@ version = "5.29.3"
 description = ""
 optional = true
 python-versions = ">=3.8"
-groups = ["main"]
-markers = "extra == \"extra-proxy\""
 files = [
     {file = "protobuf-5.29.3-cp310-abi3-win32.whl", hash = "sha256:3ea51771449e1035f26069c4c7fd51fba990d07bc55ba80701c78f886bf9c888"},
     {file = "protobuf-5.29.3-cp310-abi3-win_amd64.whl", hash = "sha256:a4fa6f80816a9a0678429e84973f2f98cbc218cca434abe8db2ad0bffc98503a"},
@@ -2237,8 +2206,6 @@ version = "0.6.1"
 description = "Pure-Python implementation of ASN.1 types and DER/BER/CER codecs (X.208)"
 optional = true
 python-versions = ">=3.8"
-groups = ["main"]
-markers = "extra == \"extra-proxy\""
 files = [
     {file = "pyasn1-0.6.1-py3-none-any.whl", hash = "sha256:0d632f46f2ba09143da3a8afe9e33fb6f92fa2320ab7e886e2d0f7672af84629"},
     {file = "pyasn1-0.6.1.tar.gz", hash = "sha256:6f580d2bdd84365380830acf45550f2511469f673cb4a5ae3857a3170128b034"},
@@ -2250,8 +2217,6 @@ version = "0.4.1"
 description = "A collection of ASN.1-based protocols modules"
 optional = true
 python-versions = ">=3.8"
-groups = ["main"]
-markers = "extra == \"extra-proxy\""
 files = [
     {file = "pyasn1_modules-0.4.1-py3-none-any.whl", hash = "sha256:49bfa96b45a292b711e986f222502c1c9a5e1f4e568fc30e2574a6c7d07838fd"},
     {file = "pyasn1_modules-0.4.1.tar.gz", hash = "sha256:c28e2dbf9c06ad61c71a075c7e0f9fd0f1b0bb2d2ad4377f240d33ac2ab60a7c"},
@@ -2266,7 +2231,6 @@ version = "2.11.1"
 description = "Python style guide checker"
 optional = false
 python-versions = ">=3.8"
-groups = ["dev"]
 files = [
     {file = "pycodestyle-2.11.1-py2.py3-none-any.whl", hash = "sha256:44fe31000b2d866f2e41841b18528a505fbd7fef9017b04eff4e2648a0fadc67"},
     {file = "pycodestyle-2.11.1.tar.gz", hash = "sha256:41ba0e7afc9752dfb53ced5489e89f8186be00e599e712660695b7a75ff2663f"},
@@ -2278,8 +2242,6 @@ version = "2.22"
 description = "C parser in Python"
 optional = true
 python-versions = ">=3.8"
-groups = ["main"]
-markers = "extra == \"proxy\" or extra == \"extra-proxy\" and platform_python_implementation != \"PyPy\""
 files = [
     {file = "pycparser-2.22-py3-none-any.whl", hash = "sha256:c3702b6d3dd8c7abc1afa565d7e63d53a1d0bd86cdc24edd75470f4de499cfcc"},
     {file = "pycparser-2.22.tar.gz", hash = "sha256:491c8be9c040f5390f5bf44a5b07752bd07f56edf992381b05c701439eec10f6"},
@@ -2291,7 +2253,6 @@ version = "2.10.6"
 description = "Data validation using Python type hints"
 optional = false
 python-versions = ">=3.8"
-groups = ["main"]
 files = [
     {file = "pydantic-2.10.6-py3-none-any.whl", hash = "sha256:427d664bf0b8a2b34ff5dd0f5a18df00591adcee7198fbd71981054cef37b584"},
     {file = "pydantic-2.10.6.tar.gz", hash = "sha256:ca5daa827cce33de7a42be142548b0096bf05a7e7b365aebfa5f8eeec7128236"},
@@ -2313,7 +2274,6 @@ version = "2.27.2"
 description = "Core functionality for Pydantic validation and serialization"
 optional = false
 python-versions = ">=3.8"
-groups = ["main"]
 files = [
     {file = "pydantic_core-2.27.2-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:2d367ca20b2f14095a8f4fa1210f5a7b78b8a20009ecced6b12818f455b1e9fa"},
     {file = "pydantic_core-2.27.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:491a2b73db93fab69731eaee494f320faa4e093dbed776be1a829c2eb222c34c"},
@@ -2426,7 +2386,6 @@ version = "3.1.0"
 description = "passive checker of Python programs"
 optional = false
 python-versions = ">=3.8"
-groups = ["dev"]
 files = [
     {file = "pyflakes-3.1.0-py2.py3-none-any.whl", hash = "sha256:4132f6d49cb4dae6819e5379898f2b8cce3c5f23994194c24b77d5da2e36f774"},
     {file = "pyflakes-3.1.0.tar.gz", hash = "sha256:a0aae034c444db0071aa077972ba4768d40c830d9539fd45bf4cd3f8f6992efc"},
@@ -2438,8 +2397,6 @@ version = "2.9.0"
 description = "JSON Web Token implementation in Python"
 optional = true
 python-versions = ">=3.8"
-groups = ["main"]
-markers = "extra == \"proxy\" or extra == \"extra-proxy\""
 files = [
     {file = "PyJWT-2.9.0-py3-none-any.whl", hash = "sha256:3b02fb0f44517787776cf48f2ae25d8e14f300e6d7545a4315cee571a415e850"},
     {file = "pyjwt-2.9.0.tar.gz", hash = "sha256:7e1e5b56cc735432a7369cbfa0efe50fa113ebecdc04ae6922deba8b84582d0c"},
@@ -2460,8 +2417,6 @@ version = "1.5.0"
 description = "Python binding to the Networking and Cryptography (NaCl) library"
 optional = true
 python-versions = ">=3.6"
-groups = ["main"]
-markers = "extra == \"proxy\""
 files = [
     {file = "PyNaCl-1.5.0-cp36-abi3-macosx_10_10_universal2.whl", hash = "sha256:401002a4aaa07c9414132aaed7f6836ff98f59277a234704ff66878c2ee4a0d1"},
     {file = "PyNaCl-1.5.0-cp36-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_24_aarch64.whl", hash = "sha256:52cb72a79269189d4e0dc537556f4740f7f0a9ec41c1322598799b0bdad4ef92"},
@@ -2488,7 +2443,6 @@ version = "7.4.4"
 description = "pytest: simple powerful testing with Python"
 optional = false
 python-versions = ">=3.7"
-groups = ["dev"]
 files = [
     {file = "pytest-7.4.4-py3-none-any.whl", hash = "sha256:b090cdf5ed60bf4c45261be03239c2c1c22df034fbffe691abe93cd80cea01d8"},
     {file = "pytest-7.4.4.tar.gz", hash = "sha256:2cf0005922c6ace4a3e2ec8b4080eb0d9753fdc93107415332f50ce9e7994280"},
@@ -2511,7 +2465,6 @@ version = "3.14.0"
 description = "Thin-wrapper around the mock package for easier use with pytest"
 optional = false
 python-versions = ">=3.8"
-groups = ["dev"]
 files = [
     {file = "pytest-mock-3.14.0.tar.gz", hash = "sha256:2719255a1efeceadbc056d6bf3df3d1c5015530fb40cf347c0f9afac88410bd0"},
     {file = "pytest_mock-3.14.0-py3-none-any.whl", hash = "sha256:0b72c38033392a5f4621342fe11e9219ac11ec9d375f8e2a0c164539e0d70f6f"},
@@ -2529,7 +2482,6 @@ version = "1.0.1"
 description = "Read key-value pairs from a .env file and set them as environment variables"
 optional = false
 python-versions = ">=3.8"
-groups = ["main"]
 files = [
     {file = "python-dotenv-1.0.1.tar.gz", hash = "sha256:e324ee90a023d808f1959c46bcbc04446a10ced277783dc6ee09987c37ec10ca"},
     {file = "python_dotenv-1.0.1-py3-none-any.whl", hash = "sha256:f7b63ef50f1b690dddf550d03497b66d609393b40b564ed0d674909a68ebf16a"},
@@ -2544,8 +2496,6 @@ version = "0.0.18"
 description = "A streaming multipart parser for Python"
 optional = true
 python-versions = ">=3.8"
-groups = ["main"]
-markers = "extra == \"proxy\""
 files = [
     {file = "python_multipart-0.0.18-py3-none-any.whl", hash = "sha256:efe91480f485f6a361427a541db4796f9e1591afc0fb8e7a4ba06bfbc6708996"},
     {file = "python_multipart-0.0.18.tar.gz", hash = "sha256:7a68db60c8bfb82e460637fa4750727b45af1d5e2ed215593f917f64694d34fe"},
@@ -2553,31 +2503,27 @@ files = [
 
 [[package]]
 name = "pywin32"
-version = "308"
+version = "309"
 description = "Python for Window Extensions"
 optional = true
 python-versions = "*"
-groups = ["main"]
-markers = "extra == \"extra-proxy\" and platform_system == \"Windows\""
 files = [
-    {file = "pywin32-308-cp310-cp310-win32.whl", hash = "sha256:796ff4426437896550d2981b9c2ac0ffd75238ad9ea2d3bfa67a1abd546d262e"},
-    {file = "pywin32-308-cp310-cp310-win_amd64.whl", hash = "sha256:4fc888c59b3c0bef905ce7eb7e2106a07712015ea1c8234b703a088d46110e8e"},
-    {file = "pywin32-308-cp310-cp310-win_arm64.whl", hash = "sha256:a5ab5381813b40f264fa3495b98af850098f814a25a63589a8e9eb12560f450c"},
-    {file = "pywin32-308-cp311-cp311-win32.whl", hash = "sha256:5d8c8015b24a7d6855b1550d8e660d8daa09983c80e5daf89a273e5c6fb5095a"},
-    {file = "pywin32-308-cp311-cp311-win_amd64.whl", hash = "sha256:575621b90f0dc2695fec346b2d6302faebd4f0f45c05ea29404cefe35d89442b"},
-    {file = "pywin32-308-cp311-cp311-win_arm64.whl", hash = "sha256:100a5442b7332070983c4cd03f2e906a5648a5104b8a7f50175f7906efd16bb6"},
-    {file = "pywin32-308-cp312-cp312-win32.whl", hash = "sha256:587f3e19696f4bf96fde9d8a57cec74a57021ad5f204c9e627e15c33ff568897"},
-    {file = "pywin32-308-cp312-cp312-win_amd64.whl", hash = "sha256:00b3e11ef09ede56c6a43c71f2d31857cf7c54b0ab6e78ac659497abd2834f47"},
-    {file = "pywin32-308-cp312-cp312-win_arm64.whl", hash = "sha256:9b4de86c8d909aed15b7011182c8cab38c8850de36e6afb1f0db22b8959e3091"},
-    {file = "pywin32-308-cp313-cp313-win32.whl", hash = "sha256:1c44539a37a5b7b21d02ab34e6a4d314e0788f1690d65b48e9b0b89f31abbbed"},
-    {file = "pywin32-308-cp313-cp313-win_amd64.whl", hash = "sha256:fd380990e792eaf6827fcb7e187b2b4b1cede0585e3d0c9e84201ec27b9905e4"},
-    {file = "pywin32-308-cp313-cp313-win_arm64.whl", hash = "sha256:ef313c46d4c18dfb82a2431e3051ac8f112ccee1a34f29c263c583c568db63cd"},
-    {file = "pywin32-308-cp37-cp37m-win32.whl", hash = "sha256:1f696ab352a2ddd63bd07430080dd598e6369152ea13a25ebcdd2f503a38f1ff"},
-    {file = "pywin32-308-cp37-cp37m-win_amd64.whl", hash = "sha256:13dcb914ed4347019fbec6697a01a0aec61019c1046c2b905410d197856326a6"},
-    {file = "pywin32-308-cp38-cp38-win32.whl", hash = "sha256:5794e764ebcabf4ff08c555b31bd348c9025929371763b2183172ff4708152f0"},
-    {file = "pywin32-308-cp38-cp38-win_amd64.whl", hash = "sha256:3b92622e29d651c6b783e368ba7d6722b1634b8e70bd376fd7610fe1992e19de"},
-    {file = "pywin32-308-cp39-cp39-win32.whl", hash = "sha256:7873ca4dc60ab3287919881a7d4f88baee4a6e639aa6962de25a98ba6b193341"},
-    {file = "pywin32-308-cp39-cp39-win_amd64.whl", hash = "sha256:71b3322d949b4cc20776436a9c9ba0eeedcbc9c650daa536df63f0ff111bb920"},
+    {file = "pywin32-309-cp310-cp310-win32.whl", hash = "sha256:5b78d98550ca093a6fe7ab6d71733fbc886e2af9d4876d935e7f6e1cd6577ac9"},
+    {file = "pywin32-309-cp310-cp310-win_amd64.whl", hash = "sha256:728d08046f3d65b90d4c77f71b6fbb551699e2005cc31bbffd1febd6a08aa698"},
+    {file = "pywin32-309-cp310-cp310-win_arm64.whl", hash = "sha256:c667bcc0a1e6acaca8984eb3e2b6e42696fc035015f99ff8bc6c3db4c09a466a"},
+    {file = "pywin32-309-cp311-cp311-win32.whl", hash = "sha256:d5df6faa32b868baf9ade7c9b25337fa5eced28eb1ab89082c8dae9c48e4cd51"},
+    {file = "pywin32-309-cp311-cp311-win_amd64.whl", hash = "sha256:e7ec2cef6df0926f8a89fd64959eba591a1eeaf0258082065f7bdbe2121228db"},
+    {file = "pywin32-309-cp311-cp311-win_arm64.whl", hash = "sha256:54ee296f6d11db1627216e9b4d4c3231856ed2d9f194c82f26c6cb5650163f4c"},
+    {file = "pywin32-309-cp312-cp312-win32.whl", hash = "sha256:de9acacced5fa82f557298b1fed5fef7bd49beee04190f68e1e4783fbdc19926"},
+    {file = "pywin32-309-cp312-cp312-win_amd64.whl", hash = "sha256:6ff9eebb77ffc3d59812c68db33c0a7817e1337e3537859499bd27586330fc9e"},
+    {file = "pywin32-309-cp312-cp312-win_arm64.whl", hash = "sha256:619f3e0a327b5418d833f44dc87859523635cf339f86071cc65a13c07be3110f"},
+    {file = "pywin32-309-cp313-cp313-win32.whl", hash = "sha256:008bffd4afd6de8ca46c6486085414cc898263a21a63c7f860d54c9d02b45c8d"},
+    {file = "pywin32-309-cp313-cp313-win_amd64.whl", hash = "sha256:bd0724f58492db4cbfbeb1fcd606495205aa119370c0ddc4f70e5771a3ab768d"},
+    {file = "pywin32-309-cp313-cp313-win_arm64.whl", hash = "sha256:8fd9669cfd41863b688a1bc9b1d4d2d76fd4ba2128be50a70b0ea66b8d37953b"},
+    {file = "pywin32-309-cp38-cp38-win32.whl", hash = "sha256:617b837dc5d9dfa7e156dbfa7d3906c009a2881849a80a9ae7519f3dd8c6cb86"},
+    {file = "pywin32-309-cp38-cp38-win_amd64.whl", hash = "sha256:0be3071f555480fbfd86a816a1a773880ee655bf186aa2931860dbb44e8424f8"},
+    {file = "pywin32-309-cp39-cp39-win32.whl", hash = "sha256:72ae9ae3a7a6473223589a1621f9001fe802d59ed227fd6a8503c9af67c1d5f4"},
+    {file = "pywin32-309-cp39-cp39-win_amd64.whl", hash = "sha256:88bc06d6a9feac70783de64089324568ecbc65866e2ab318eab35da3811fd7ef"},
 ]
 
 [[package]]
@@ -2586,7 +2532,6 @@ version = "6.0.2"
 description = "YAML parser and emitter for Python"
 optional = false
 python-versions = ">=3.8"
-groups = ["main"]
 files = [
     {file = "PyYAML-6.0.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:0a9a2848a5b7feac301353437eb7d5957887edbf81d56e903999a75a3d743086"},
     {file = "PyYAML-6.0.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:29717114e51c84ddfba879543fb232a6ed60086602313ca38cce623c1d62cfbf"},
@@ -2649,8 +2594,6 @@ version = "5.2.1"
 description = "Python client for Redis database and key-value store"
 optional = true
 python-versions = ">=3.8"
-groups = ["main"]
-markers = "extra == \"proxy\""
 files = [
     {file = "redis-5.2.1-py3-none-any.whl", hash = "sha256:ee7e1056b9aea0f04c6c2ed59452947f34c4940ee025f5dd83e6a6418b6989e4"},
     {file = "redis-5.2.1.tar.gz", hash = "sha256:16f2e22dff21d5125e8481515e386711a34cbec50f0e44413dd7d9c060a54e0f"},
@@ -2669,7 +2612,6 @@ version = "0.35.1"
 description = "JSON Referencing + Python"
 optional = false
 python-versions = ">=3.8"
-groups = ["main"]
 files = [
     {file = "referencing-0.35.1-py3-none-any.whl", hash = "sha256:eda6d3234d62814d1c64e305c1331c9a3a6132da475ab6382eaa997b21ee75de"},
     {file = "referencing-0.35.1.tar.gz", hash = "sha256:25b42124a6c8b632a425174f24087783efb348a6f1e0008e63cd4466fedf703c"},
@@ -2685,7 +2627,6 @@ version = "2024.11.6"
 description = "Alternative regular expression module, to replace re."
 optional = false
 python-versions = ">=3.8"
-groups = ["main"]
 files = [
     {file = "regex-2024.11.6-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:ff590880083d60acc0433f9c3f713c51f7ac6ebb9adf889c79a261ecf541aa91"},
     {file = "regex-2024.11.6-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:658f90550f38270639e83ce492f27d2c8d2cd63805c65a13a14d36ca126753f0"},
@@ -2789,7 +2730,6 @@ version = "2.31.0"
 description = "Python HTTP for Humans."
 optional = false
 python-versions = ">=3.7"
-groups = ["main"]
 files = [
     {file = "requests-2.31.0-py3-none-any.whl", hash = "sha256:58cd2187c01e70e6e26505bca751777aa9f2ee0b7f4300988b709f44e013003f"},
     {file = "requests-2.31.0.tar.gz", hash = "sha256:942c5a758f98d790eaed1a29cb6eefc7ffb0d1cf7af05c3d2791656dbd6ad1e1"},
@@ -2811,8 +2751,6 @@ version = "0.8.0"
 description = "Resend Python SDK"
 optional = true
 python-versions = ">=3.7"
-groups = ["main"]
-markers = "extra == \"extra-proxy\""
 files = [
     {file = "resend-0.8.0-py2.py3-none-any.whl", hash = "sha256:adc1515dadf4f4fc6b90db55a237f0f37fc56fd74287a986519a8a187fdb661d"},
     {file = "resend-0.8.0.tar.gz", hash = "sha256:94142394701724dbcfcd8f760f675c662a1025013e741dd7cc773ca885526257"},
@@ -2827,7 +2765,6 @@ version = "0.20.1"
 description = "Python bindings to Rust's persistent data structures (rpds)"
 optional = false
 python-versions = ">=3.8"
-groups = ["main"]
 files = [
     {file = "rpds_py-0.20.1-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:a649dfd735fff086e8a9d0503a9f0c7d01b7912a333c7ae77e1515c08c146dad"},
     {file = "rpds_py-0.20.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:f16bc1334853e91ddaaa1217045dd7be166170beec337576818461268a3de67f"},
@@ -2940,8 +2877,6 @@ version = "2.1.0"
 description = "RQ is a simple, lightweight, library for creating background jobs, and processing them."
 optional = true
 python-versions = ">=3.8"
-groups = ["main"]
-markers = "extra == \"proxy\""
 files = [
     {file = "rq-2.1.0-py3-none-any.whl", hash = "sha256:3c6892c6ca848e5fb47c1875399a66f13656bf0e123bf725d9aa9a12718e2fdf"},
     {file = "rq-2.1.0.tar.gz", hash = "sha256:764585b6cab69ef1412f4aee523347e5aa7ece3ca175c118b1d92223dd8c2826"},
@@ -2957,8 +2892,6 @@ version = "4.9"
 description = "Pure-Python RSA implementation"
 optional = true
 python-versions = ">=3.6,<4"
-groups = ["main"]
-markers = "extra == \"extra-proxy\""
 files = [
     {file = "rsa-4.9-py3-none-any.whl", hash = "sha256:90260d9058e514786967344d0ef75fa8727eed8a7d2e43ce9f4bcf1b536174f7"},
     {file = "rsa-4.9.tar.gz", hash = "sha256:e38464a49c6c85d7f1351b0126661487a7e0a14a50f1675ec50eb34d4f20ef21"},
@@ -2973,8 +2906,6 @@ version = "1.17.0"
 description = "Python 2 and 3 compatibility utilities"
 optional = true
 python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7"
-groups = ["main"]
-markers = "extra == \"extra-proxy\""
 files = [
     {file = "six-1.17.0-py2.py3-none-any.whl", hash = "sha256:4721f391ed90541fddacab5acf947aa0d3dc7d27b2e1e8eda2be8970586c3274"},
     {file = "six-1.17.0.tar.gz", hash = "sha256:ff70335d468e7eb6ec65b95b99d3a2836546063f63acc5171de367e834932a81"},
@@ -2986,7 +2917,6 @@ version = "1.3.1"
 description = "Sniff out which async library your code is running under"
 optional = false
 python-versions = ">=3.7"
-groups = ["main"]
 files = [
     {file = "sniffio-1.3.1-py3-none-any.whl", hash = "sha256:2f6da418d1f1e0fddd844478f41680e794e6051915791a034ff65e5f100525a2"},
     {file = "sniffio-1.3.1.tar.gz", hash = "sha256:f4324edc670a0f49750a81b895f35c3adb843cca46f0530f79fc1babb23789dc"},
@@ -2998,8 +2928,6 @@ version = "0.44.0"
 description = "The little ASGI library that shines."
 optional = true
 python-versions = ">=3.8"
-groups = ["main"]
-markers = "extra == \"proxy\""
 files = [
     {file = "starlette-0.44.0-py3-none-any.whl", hash = "sha256:19edeb75844c16dcd4f9dd72f22f9108c1539f3fc9c4c88885654fef64f85aea"},
     {file = "starlette-0.44.0.tar.gz", hash = "sha256:e35166950a3ccccc701962fe0711db0bc14f2ecd37c6f9fe5e3eae0cbaea8715"},
@@ -3018,7 +2946,6 @@ version = "0.7.0"
 description = "tiktoken is a fast BPE tokeniser for use with OpenAI's models"
 optional = false
 python-versions = ">=3.8"
-groups = ["main"]
 files = [
     {file = "tiktoken-0.7.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:485f3cc6aba7c6b6ce388ba634fbba656d9ee27f766216f45146beb4ac18b25f"},
     {file = "tiktoken-0.7.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:e54be9a2cd2f6d6ffa3517b064983fb695c9a9d8aa7d574d1ef3c3f931a99225"},
@@ -3071,7 +2998,6 @@ version = "0.21.0"
 description = ""
 optional = false
 python-versions = ">=3.7"
-groups = ["main"]
 files = [
     {file = "tokenizers-0.21.0-cp39-abi3-macosx_10_12_x86_64.whl", hash = "sha256:3c4c93eae637e7d2aaae3d376f06085164e1660f89304c0ab2b1d08a406636b2"},
     {file = "tokenizers-0.21.0-cp39-abi3-macosx_11_0_arm64.whl", hash = "sha256:f53ea537c925422a2e0e92a24cce96f6bc5046bbef24a1652a5edc8ba975f62e"},
@@ -3104,8 +3030,6 @@ version = "2.2.1"
 description = "A lil' TOML parser"
 optional = false
 python-versions = ">=3.8"
-groups = ["dev"]
-markers = "python_version < \"3.11\""
 files = [
     {file = "tomli-2.2.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:678e4fa69e4575eb77d103de3df8a895e1591b48e740211bd1067378c69e8249"},
     {file = "tomli-2.2.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:023aa114dd824ade0100497eb2318602af309e5a55595f76b626d6d9f3b7b0a6"},
@@ -3147,8 +3071,6 @@ version = "0.13.2"
 description = "Style preserving TOML library"
 optional = true
 python-versions = ">=3.8"
-groups = ["main"]
-markers = "extra == \"extra-proxy\""
 files = [
     {file = "tomlkit-0.13.2-py3-none-any.whl", hash = "sha256:7a974427f6e119197f670fbbbeae7bef749a6c14e793db934baefc1b5f03efde"},
     {file = "tomlkit-0.13.2.tar.gz", hash = "sha256:fff5fe59a87295b278abd31bec92c15d9bc4a06885ab12bcea52c71119392e79"},
@@ -3160,7 +3082,6 @@ version = "4.67.1"
 description = "Fast, Extensible Progress Meter"
 optional = false
 python-versions = ">=3.7"
-groups = ["main"]
 files = [
     {file = "tqdm-4.67.1-py3-none-any.whl", hash = "sha256:26445eca388f82e72884e0d580d5464cd801a3ea01e63e5601bdff9ba6a48de2"},
     {file = "tqdm-4.67.1.tar.gz", hash = "sha256:f8aef9c52c08c13a65f30ea34f4e5aac3fd1a34959879d7e59e63027286627f2"},
@@ -3182,7 +3103,6 @@ version = "4.12.2"
 description = "Backported and Experimental Type Hints for Python 3.8+"
 optional = false
 python-versions = ">=3.8"
-groups = ["main", "dev"]
 files = [
     {file = "typing_extensions-4.12.2-py3-none-any.whl", hash = "sha256:04e5ca0351e0f3f85c6853954072df659d0d13fac324d0072316b67d7794700d"},
     {file = "typing_extensions-4.12.2.tar.gz", hash = "sha256:1a7ead55c7e559dd4dee8856e3a88b41225abfe1ce8df57b7c13915fe121ffb8"},
@@ -3194,8 +3114,6 @@ version = "2025.1"
 description = "Provider of IANA time zone data"
 optional = true
 python-versions = ">=2"
-groups = ["main"]
-markers = "extra == \"proxy\" and platform_system == \"Windows\""
 files = [
     {file = "tzdata-2025.1-py2.py3-none-any.whl", hash = "sha256:7e127113816800496f027041c570f50bcd464a020098a3b6b199517772303639"},
     {file = "tzdata-2025.1.tar.gz", hash = "sha256:24894909e88cdb28bd1636c6887801df64cb485bd593f2fd83ef29075a81d694"},
@@ -3207,8 +3125,6 @@ version = "5.2"
 description = "tzinfo object for the local timezone"
 optional = true
 python-versions = ">=3.8"
-groups = ["main"]
-markers = "extra == \"proxy\""
 files = [
     {file = "tzlocal-5.2-py3-none-any.whl", hash = "sha256:49816ef2fe65ea8ac19d19aa7a1ae0551c834303d5014c6d5a62e4cbda8047b8"},
     {file = "tzlocal-5.2.tar.gz", hash = "sha256:8d399205578f1a9342816409cc1e46a93ebd5755e39ea2d85334bea911bf0e6e"},
@@ -3227,7 +3143,6 @@ version = "2.2.3"
 description = "HTTP library with thread-safe connection pooling, file post, and more."
 optional = false
 python-versions = ">=3.8"
-groups = ["main"]
 files = [
     {file = "urllib3-2.2.3-py3-none-any.whl", hash = "sha256:ca899ca043dcb1bafa3e262d73aa25c465bfb49e0bd9dd5d59f1d0acba2f8fac"},
     {file = "urllib3-2.2.3.tar.gz", hash = "sha256:e7d814a81dad81e6caf2ec9fdedb284ecc9c73076b62654547cc64ccdcae26e9"},
@@ -3245,8 +3160,6 @@ version = "0.29.0"
 description = "The lightning-fast ASGI server."
 optional = true
 python-versions = ">=3.8"
-groups = ["main"]
-markers = "extra == \"proxy\""
 files = [
     {file = "uvicorn-0.29.0-py3-none-any.whl", hash = "sha256:2c2aac7ff4f4365c206fd773a39bf4ebd1047c238f8b8268ad996829323473de"},
     {file = "uvicorn-0.29.0.tar.gz", hash = "sha256:6a69214c0b6a087462412670b3ef21224fa48cae0e452b5883e8e8bdfdd11dd0"},
@@ -3266,8 +3179,6 @@ version = "0.21.0"
 description = "Fast implementation of asyncio event loop on top of libuv"
 optional = true
 python-versions = ">=3.8.0"
-groups = ["main"]
-markers = "extra == \"proxy\""
 files = [
     {file = "uvloop-0.21.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:ec7e6b09a6fdded42403182ab6b832b71f4edaf7f37a9a0e371a01db5f0cb45f"},
     {file = "uvloop-0.21.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:196274f2adb9689a289ad7d65700d37df0c0930fd8e4e743fa4834e850d7719d"},
@@ -3319,7 +3230,6 @@ version = "1.15.2"
 description = "Yet another URL library"
 optional = false
 python-versions = ">=3.8"
-groups = ["main"]
 files = [
     {file = "yarl-1.15.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:e4ee8b8639070ff246ad3649294336b06db37a94bdea0d09ea491603e0be73b8"},
     {file = "yarl-1.15.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:a7cf963a357c5f00cb55b1955df8bbe68d2f2f65de065160a1c26b85a1e44172"},
@@ -3432,7 +3342,6 @@ version = "3.20.2"
 description = "Backport of pathlib-compatible object wrapper for zip files"
 optional = false
 python-versions = ">=3.8"
-groups = ["main"]
 files = [
     {file = "zipp-3.20.2-py3-none-any.whl", hash = "sha256:a817ac80d6cf4b23bf7f2828b7cabf326f15a001bea8b1f9b49631780ba28350"},
     {file = "zipp-3.20.2.tar.gz", hash = "sha256:bc9eb26f4506fda01b81bcde0ca78103b6e62f991b381fec825435c836edbc29"},
@@ -3451,6 +3360,6 @@ extra-proxy = ["azure-identity", "azure-keyvault-secrets", "google-cloud-kms", "
 proxy = ["PyJWT", "apscheduler", "backoff", "cryptography", "fastapi", "fastapi-sso", "gunicorn", "orjson", "pynacl", "python-multipart", "pyyaml", "rq", "uvicorn", "uvloop"]
 
 [metadata]
-lock-version = "2.1"
+lock-version = "2.0"
 python-versions = ">=3.8.1,<4.0, !=3.9.7"
-content-hash = "c4bb19825304caad644188c6bbdc6ad8c0da91c02eac05c96c17691219c754cc"
+content-hash = "0fe10b223236f198823e8cc3457176211293d58e653cd430f74ff079ef38b756"
diff --git a/pyproject.toml b/pyproject.toml
index 77b0ac55a3..47a9073e30 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "litellm"
-version = "1.63.7"
+version = "1.63.8"
 description = "Library to easily interface with LLM API providers"
 authors = ["BerriAI"]
 license = "MIT"
@@ -21,7 +21,7 @@ Documentation = "https://docs.litellm.ai"
 [tool.poetry.dependencies]
 python = ">=3.8.1,<4.0, !=3.9.7"
 httpx = ">=0.23.0"
-openai = ">=1.61.0"
+openai = ">=1.66.1"
 python-dotenv = ">=0.2.0"
 tiktoken = ">=0.7.0"
 importlib-metadata = ">=6.8.0"
@@ -96,7 +96,7 @@ requires = ["poetry-core", "wheel"]
 build-backend = "poetry.core.masonry.api"
 
 [tool.commitizen]
-version = "1.63.7"
+version = "1.63.8"
 version_files = [
     "pyproject.toml:^version"
 ]
diff --git a/requirements.txt b/requirements.txt
index 3d695d1766..dcdddff117 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,7 +1,7 @@
 # LITELLM PROXY DEPENDENCIES #
 anyio==4.4.0 # openai + http req.
 httpx==0.27.0 # Pin Httpx dependency
-openai==1.61.0  # openai req. 
+openai==1.66.1  # openai req. 
 fastapi==0.115.5 # server dep
 backoff==2.2.1 # server dep
 pyyaml==6.0.2 # server dep
diff --git a/schema.prisma b/schema.prisma
index fedbb271da..e453e74b46 100644
--- a/schema.prisma
+++ b/schema.prisma
@@ -29,6 +29,18 @@ model LiteLLM_BudgetTable {
   organization_membership LiteLLM_OrganizationMembership[] // budgets of Users within a Organization 
 }
 
+// Models on proxy
+model LiteLLM_CredentialsTable {
+  credential_id String @id @default(uuid())
+  credential_name String @unique
+  credential_values Json
+  credential_info Json? 
+  created_at    DateTime               @default(now()) @map("created_at")
+  created_by String
+  updated_at    DateTime               @default(now()) @updatedAt @map("updated_at")
+  updated_by String
+}
+
 // Models on proxy
 model LiteLLM_ProxyModelTable {
   model_id String @id @default(uuid())
diff --git a/tests/litellm/llms/azure/test_azure_common_utils.py b/tests/litellm/llms/azure/test_azure_common_utils.py
new file mode 100644
index 0000000000..21fa3b37ee
--- /dev/null
+++ b/tests/litellm/llms/azure/test_azure_common_utils.py
@@ -0,0 +1,457 @@
+import json
+import os
+import sys
+import traceback
+from typing import Callable, Optional
+from unittest.mock import MagicMock, patch
+
+import pytest
+
+sys.path.insert(
+    0, os.path.abspath("../../../..")
+)  # Adds the parent directory to the system path
+import litellm
+from litellm.llms.azure.common_utils import BaseAzureLLM
+from litellm.types.utils import CallTypes
+
+
+# Mock the necessary dependencies
+@pytest.fixture
+def setup_mocks():
+    with patch(
+        "litellm.llms.azure.common_utils.get_azure_ad_token_from_entrata_id"
+    ) as mock_entrata_token, patch(
+        "litellm.llms.azure.common_utils.get_azure_ad_token_from_username_password"
+    ) as mock_username_password_token, patch(
+        "litellm.llms.azure.common_utils.get_azure_ad_token_from_oidc"
+    ) as mock_oidc_token, patch(
+        "litellm.llms.azure.common_utils.get_azure_ad_token_provider"
+    ) as mock_token_provider, patch(
+        "litellm.llms.azure.common_utils.litellm"
+    ) as mock_litellm, patch(
+        "litellm.llms.azure.common_utils.verbose_logger"
+    ) as mock_logger, patch(
+        "litellm.llms.azure.common_utils.select_azure_base_url_or_endpoint"
+    ) as mock_select_url:
+
+        # Configure mocks
+        mock_litellm.AZURE_DEFAULT_API_VERSION = "2023-05-15"
+        mock_litellm.enable_azure_ad_token_refresh = False
+
+        mock_entrata_token.return_value = lambda: "mock-entrata-token"
+        mock_username_password_token.return_value = (
+            lambda: "mock-username-password-token"
+        )
+        mock_oidc_token.return_value = "mock-oidc-token"
+        mock_token_provider.return_value = lambda: "mock-default-token"
+
+        mock_select_url.side_effect = (
+            lambda azure_client_params, **kwargs: azure_client_params
+        )
+
+        yield {
+            "entrata_token": mock_entrata_token,
+            "username_password_token": mock_username_password_token,
+            "oidc_token": mock_oidc_token,
+            "token_provider": mock_token_provider,
+            "litellm": mock_litellm,
+            "logger": mock_logger,
+            "select_url": mock_select_url,
+        }
+
+
+def test_initialize_with_api_key(setup_mocks):
+    # Test with api_key provided
+    result = BaseAzureLLM().initialize_azure_sdk_client(
+        litellm_params={},
+        api_key="test-api-key",
+        api_base="https://test.openai.azure.com",
+        model_name="gpt-4",
+        api_version="2023-06-01",
+    )
+
+    # Verify expected result
+    assert result["api_key"] == "test-api-key"
+    assert result["azure_endpoint"] == "https://test.openai.azure.com"
+    assert result["api_version"] == "2023-06-01"
+    assert "azure_ad_token" in result
+    assert result["azure_ad_token"] is None
+
+
+def test_initialize_with_tenant_credentials(setup_mocks):
+    # Test with tenant_id, client_id, and client_secret provided
+    result = BaseAzureLLM().initialize_azure_sdk_client(
+        litellm_params={
+            "tenant_id": "test-tenant-id",
+            "client_id": "test-client-id",
+            "client_secret": "test-client-secret",
+        },
+        api_key=None,
+        api_base="https://test.openai.azure.com",
+        model_name="gpt-4",
+        api_version=None,
+    )
+
+    # Verify that get_azure_ad_token_from_entrata_id was called
+    setup_mocks["entrata_token"].assert_called_once_with(
+        tenant_id="test-tenant-id",
+        client_id="test-client-id",
+        client_secret="test-client-secret",
+    )
+
+    # Verify expected result
+    assert result["api_key"] is None
+    assert result["azure_endpoint"] == "https://test.openai.azure.com"
+    assert "azure_ad_token_provider" in result
+
+
+def test_initialize_with_username_password(setup_mocks):
+    # Test with azure_username, azure_password, and client_id provided
+    result = BaseAzureLLM().initialize_azure_sdk_client(
+        litellm_params={
+            "azure_username": "test-username",
+            "azure_password": "test-password",
+            "client_id": "test-client-id",
+        },
+        api_key=None,
+        api_base="https://test.openai.azure.com",
+        model_name="gpt-4",
+        api_version=None,
+    )
+
+    # Verify that get_azure_ad_token_from_username_password was called
+    setup_mocks["username_password_token"].assert_called_once_with(
+        azure_username="test-username",
+        azure_password="test-password",
+        client_id="test-client-id",
+    )
+
+    # Verify expected result
+    assert "azure_ad_token_provider" in result
+
+
+def test_initialize_with_oidc_token(setup_mocks):
+    # Test with azure_ad_token that starts with "oidc/"
+    result = BaseAzureLLM().initialize_azure_sdk_client(
+        litellm_params={"azure_ad_token": "oidc/test-token"},
+        api_key=None,
+        api_base="https://test.openai.azure.com",
+        model_name="gpt-4",
+        api_version=None,
+    )
+
+    # Verify that get_azure_ad_token_from_oidc was called
+    setup_mocks["oidc_token"].assert_called_once_with("oidc/test-token")
+
+    # Verify expected result
+    assert result["azure_ad_token"] == "mock-oidc-token"
+
+
+def test_initialize_with_enable_token_refresh(setup_mocks):
+    # Enable token refresh
+    setup_mocks["litellm"].enable_azure_ad_token_refresh = True
+
+    # Test with token refresh enabled
+    result = BaseAzureLLM().initialize_azure_sdk_client(
+        litellm_params={},
+        api_key=None,
+        api_base="https://test.openai.azure.com",
+        model_name="gpt-4",
+        api_version=None,
+    )
+
+    # Verify that get_azure_ad_token_provider was called
+    setup_mocks["token_provider"].assert_called_once()
+
+    # Verify expected result
+    assert "azure_ad_token_provider" in result
+
+
+def test_initialize_with_token_refresh_error(setup_mocks):
+    # Enable token refresh but make it raise an error
+    setup_mocks["litellm"].enable_azure_ad_token_refresh = True
+    setup_mocks["token_provider"].side_effect = ValueError("Token provider error")
+
+    # Test with token refresh enabled but raising error
+    result = BaseAzureLLM().initialize_azure_sdk_client(
+        litellm_params={},
+        api_key=None,
+        api_base="https://test.openai.azure.com",
+        model_name="gpt-4",
+        api_version=None,
+    )
+
+    # Verify error was logged
+    setup_mocks["logger"].debug.assert_any_call(
+        "Azure AD Token Provider could not be used."
+    )
+
+
+def test_api_version_from_env_var(setup_mocks):
+    # Test api_version from environment variable
+    with patch.dict(os.environ, {"AZURE_API_VERSION": "2023-07-01"}):
+        result = BaseAzureLLM().initialize_azure_sdk_client(
+            litellm_params={},
+            api_key="test-api-key",
+            api_base="https://test.openai.azure.com",
+            model_name="gpt-4",
+            api_version=None,
+        )
+
+    # Verify expected result
+    assert result["api_version"] == "2023-07-01"
+
+
+def test_select_azure_base_url_called(setup_mocks):
+    # Test that select_azure_base_url_or_endpoint is called
+    result = BaseAzureLLM().initialize_azure_sdk_client(
+        litellm_params={},
+        api_key="test-api-key",
+        api_base="https://test.openai.azure.com",
+        model_name="gpt-4",
+        api_version="2023-06-01",
+    )
+
+    # Verify that select_azure_base_url_or_endpoint was called
+    setup_mocks["select_url"].assert_called_once()
+
+
+@pytest.mark.parametrize(
+    "call_type",
+    [
+        call_type
+        for call_type in CallTypes.__members__.values()
+        if call_type.name.startswith("a")
+        and call_type.name
+        not in [
+            "amoderation",
+            "arerank",
+            "arealtime",
+            "anthropic_messages",
+            "add_message",
+            "arun_thread_stream",
+            "aresponses",
+        ]
+    ],
+)
+@pytest.mark.asyncio
+async def test_ensure_initialize_azure_sdk_client_always_used(call_type):
+    from litellm.router import Router
+
+    # Create a router with an Azure model
+    azure_model_name = "azure/chatgpt-v-2"
+    router = Router(
+        model_list=[
+            {
+                "model_name": "gpt-3.5-turbo",
+                "litellm_params": {
+                    "model": azure_model_name,
+                    "api_key": "test-api-key",
+                    "api_version": os.getenv("AZURE_API_VERSION", "2023-05-15"),
+                    "api_base": os.getenv(
+                        "AZURE_API_BASE", "https://test.openai.azure.com"
+                    ),
+                },
+            }
+        ],
+    )
+
+    # Prepare test input based on call type
+    test_inputs = {
+        "acompletion": {
+            "messages": [{"role": "user", "content": "Hello, how are you?"}]
+        },
+        "atext_completion": {"prompt": "Hello, how are you?"},
+        "aimage_generation": {"prompt": "Hello, how are you?"},
+        "aembedding": {"input": "Hello, how are you?"},
+        "arerank": {"input": "Hello, how are you?"},
+        "atranscription": {"file": "path/to/file"},
+        "aspeech": {"input": "Hello, how are you?", "voice": "female"},
+        "acreate_batch": {
+            "completion_window": 10,
+            "endpoint": "https://test.openai.azure.com",
+            "input_file_id": "123",
+        },
+        "aretrieve_batch": {"batch_id": "123"},
+        "aget_assistants": {"custom_llm_provider": "azure"},
+        "acreate_assistants": {"custom_llm_provider": "azure"},
+        "adelete_assistant": {"custom_llm_provider": "azure", "assistant_id": "123"},
+        "acreate_thread": {"custom_llm_provider": "azure"},
+        "aget_thread": {"custom_llm_provider": "azure", "thread_id": "123"},
+        "a_add_message": {
+            "custom_llm_provider": "azure",
+            "thread_id": "123",
+            "role": "user",
+            "content": "Hello, how are you?",
+        },
+        "aget_messages": {"custom_llm_provider": "azure", "thread_id": "123"},
+        "arun_thread": {
+            "custom_llm_provider": "azure",
+            "assistant_id": "123",
+            "thread_id": "123",
+        },
+        "acreate_file": {
+            "custom_llm_provider": "azure",
+            "file": MagicMock(),
+            "purpose": "assistants",
+        },
+    }
+
+    # Get appropriate input for this call type
+    input_kwarg = test_inputs.get(call_type.value, {})
+
+    patch_target = "litellm.main.azure_chat_completions.initialize_azure_sdk_client"
+    if call_type == CallTypes.atranscription:
+        patch_target = (
+            "litellm.main.azure_audio_transcriptions.initialize_azure_sdk_client"
+        )
+    elif call_type == CallTypes.arerank:
+        patch_target = (
+            "litellm.rerank_api.main.azure_rerank.initialize_azure_sdk_client"
+        )
+    elif call_type == CallTypes.acreate_batch or call_type == CallTypes.aretrieve_batch:
+        patch_target = (
+            "litellm.batches.main.azure_batches_instance.initialize_azure_sdk_client"
+        )
+    elif (
+        call_type == CallTypes.aget_assistants
+        or call_type == CallTypes.acreate_assistants
+        or call_type == CallTypes.adelete_assistant
+        or call_type == CallTypes.acreate_thread
+        or call_type == CallTypes.aget_thread
+        or call_type == CallTypes.a_add_message
+        or call_type == CallTypes.aget_messages
+        or call_type == CallTypes.arun_thread
+    ):
+        patch_target = (
+            "litellm.assistants.main.azure_assistants_api.initialize_azure_sdk_client"
+        )
+    elif call_type == CallTypes.acreate_file or call_type == CallTypes.afile_content:
+        patch_target = (
+            "litellm.files.main.azure_files_instance.initialize_azure_sdk_client"
+        )
+
+    # Mock the initialize_azure_sdk_client function
+    with patch(patch_target) as mock_init_azure:
+        # Also mock async_function_with_fallbacks to prevent actual API calls
+        # Call the appropriate router method
+        try:
+            get_attr = getattr(router, call_type.value, None)
+            if get_attr is None:
+                pytest.skip(
+                    f"Skipping {call_type.value} because it is not supported on Router"
+                )
+            await getattr(router, call_type.value)(
+                model="gpt-3.5-turbo",
+                **input_kwarg,
+                num_retries=0,
+                azure_ad_token="oidc/test-token",
+            )
+        except Exception as e:
+            traceback.print_exc()
+
+        # Verify initialize_azure_sdk_client was called
+        mock_init_azure.assert_called_once()
+
+        # Verify it was called with the right model name
+        calls = mock_init_azure.call_args_list
+        azure_calls = [call for call in calls]
+
+        litellm_params = azure_calls[0].kwargs["litellm_params"]
+        print("litellm_params", litellm_params)
+
+        assert (
+            "azure_ad_token" in litellm_params
+        ), "azure_ad_token not found in parameters"
+        assert (
+            litellm_params["azure_ad_token"] == "oidc/test-token"
+        ), "azure_ad_token is not correct"
+
+        # More detailed verification (optional)
+        for call in azure_calls:
+            assert "api_key" in call.kwargs, "api_key not found in parameters"
+            assert "api_base" in call.kwargs, "api_base not found in parameters"
+
+
+@pytest.mark.parametrize(
+    "call_type",
+    [
+        CallTypes.atext_completion,
+        CallTypes.acompletion,
+    ],
+)
+@pytest.mark.asyncio
+async def test_ensure_initialize_azure_sdk_client_always_used_azure_text(call_type):
+    from litellm.router import Router
+
+    # Create a router with an Azure model
+    azure_model_name = "azure_text/chatgpt-v-2"
+    router = Router(
+        model_list=[
+            {
+                "model_name": "gpt-3.5-turbo",
+                "litellm_params": {
+                    "model": azure_model_name,
+                    "api_key": "test-api-key",
+                    "api_version": os.getenv("AZURE_API_VERSION", "2023-05-15"),
+                    "api_base": os.getenv(
+                        "AZURE_API_BASE", "https://test.openai.azure.com"
+                    ),
+                },
+            }
+        ],
+    )
+
+    # Prepare test input based on call type
+    test_inputs = {
+        "acompletion": {
+            "messages": [{"role": "user", "content": "Hello, how are you?"}]
+        },
+        "atext_completion": {"prompt": "Hello, how are you?"},
+    }
+
+    # Get appropriate input for this call type
+    input_kwarg = test_inputs.get(call_type.value, {})
+
+    patch_target = "litellm.main.azure_text_completions.initialize_azure_sdk_client"
+
+    # Mock the initialize_azure_sdk_client function
+    with patch(patch_target) as mock_init_azure:
+        # Also mock async_function_with_fallbacks to prevent actual API calls
+        # Call the appropriate router method
+        try:
+            get_attr = getattr(router, call_type.value, None)
+            if get_attr is None:
+                pytest.skip(
+                    f"Skipping {call_type.value} because it is not supported on Router"
+                )
+            await getattr(router, call_type.value)(
+                model="gpt-3.5-turbo",
+                **input_kwarg,
+                num_retries=0,
+                azure_ad_token="oidc/test-token",
+            )
+        except Exception as e:
+            traceback.print_exc()
+
+        # Verify initialize_azure_sdk_client was called
+        mock_init_azure.assert_called_once()
+
+        # Verify it was called with the right model name
+        calls = mock_init_azure.call_args_list
+        azure_calls = [call for call in calls]
+
+        litellm_params = azure_calls[0].kwargs["litellm_params"]
+        print("litellm_params", litellm_params)
+
+        assert (
+            "azure_ad_token" in litellm_params
+        ), "azure_ad_token not found in parameters"
+        assert (
+            litellm_params["azure_ad_token"] == "oidc/test-token"
+        ), "azure_ad_token is not correct"
+
+        # More detailed verification (optional)
+        for call in azure_calls:
+            assert "api_key" in call.kwargs, "api_key not found in parameters"
+            assert "api_base" in call.kwargs, "api_base not found in parameters"
diff --git a/tests/litellm/llms/openai/responses/test_openai_responses_transformation.py b/tests/litellm/llms/openai/responses/test_openai_responses_transformation.py
new file mode 100644
index 0000000000..b4a6cd974e
--- /dev/null
+++ b/tests/litellm/llms/openai/responses/test_openai_responses_transformation.py
@@ -0,0 +1,239 @@
+import json
+import os
+import sys
+from unittest.mock import AsyncMock, MagicMock, patch
+
+import httpx
+import pytest
+
+sys.path.insert(
+    0, os.path.abspath("../../../../..")
+)  # Adds the parent directory to the system path
+
+from litellm.llms.openai.responses.transformation import OpenAIResponsesAPIConfig
+from litellm.types.llms.openai import (
+    OutputTextDeltaEvent,
+    ResponseCompletedEvent,
+    ResponsesAPIRequestParams,
+    ResponsesAPIResponse,
+    ResponsesAPIStreamEvents,
+)
+
+
+class TestOpenAIResponsesAPIConfig:
+    def setup_method(self):
+        self.config = OpenAIResponsesAPIConfig()
+        self.model = "gpt-4o"
+        self.logging_obj = MagicMock()
+
+    def test_map_openai_params(self):
+        """Test that parameters are correctly mapped"""
+        test_params = {"input": "Hello world", "temperature": 0.7, "stream": True}
+
+        result = self.config.map_openai_params(
+            response_api_optional_params=test_params,
+            model=self.model,
+            drop_params=False,
+        )
+
+        # The function should return the params unchanged
+        assert result == test_params
+
+    def validate_responses_api_request_params(self, params, expected_fields):
+        """
+        Validate that the params dict has the expected structure of ResponsesAPIRequestParams
+
+        Args:
+            params: The dict to validate
+            expected_fields: Dict of field names and their expected values
+        """
+        # Check that it's a dict
+        assert isinstance(params, dict), "Result should be a dict"
+
+        # Check expected fields have correct values
+        for field, value in expected_fields.items():
+            assert field in params, f"Missing expected field: {field}"
+            assert (
+                params[field] == value
+            ), f"Field {field} has value {params[field]}, expected {value}"
+
+    def test_transform_responses_api_request(self):
+        """Test request transformation"""
+        input_text = "What is the capital of France?"
+        optional_params = {"temperature": 0.7, "stream": True}
+
+        result = self.config.transform_responses_api_request(
+            model=self.model,
+            input=input_text,
+            response_api_optional_request_params=optional_params,
+            litellm_params={},
+            headers={},
+        )
+
+        # Validate the result has the expected structure and values
+        expected_fields = {
+            "model": self.model,
+            "input": input_text,
+            "temperature": 0.7,
+            "stream": True,
+        }
+
+        self.validate_responses_api_request_params(result, expected_fields)
+
+    def test_transform_streaming_response(self):
+        """Test streaming response transformation"""
+        # Test with a text delta event
+        chunk = {
+            "type": "response.output_text.delta",
+            "item_id": "item_123",
+            "output_index": 0,
+            "content_index": 0,
+            "delta": "Hello",
+        }
+
+        result = self.config.transform_streaming_response(
+            model=self.model, parsed_chunk=chunk, logging_obj=self.logging_obj
+        )
+
+        assert isinstance(result, OutputTextDeltaEvent)
+        assert result.type == ResponsesAPIStreamEvents.OUTPUT_TEXT_DELTA
+        assert result.delta == "Hello"
+        assert result.item_id == "item_123"
+
+        # Test with a completed event - providing all required fields
+        completed_chunk = {
+            "type": "response.completed",
+            "response": {
+                "id": "resp_123",
+                "created_at": 1234567890,
+                "model": "gpt-4o",
+                "object": "response",
+                "output": [],
+                "parallel_tool_calls": False,
+                "error": None,
+                "incomplete_details": None,
+                "instructions": None,
+                "metadata": None,
+                "temperature": 0.7,
+                "tool_choice": "auto",
+                "tools": [],
+                "top_p": 1.0,
+                "max_output_tokens": None,
+                "previous_response_id": None,
+                "reasoning": None,
+                "status": "completed",
+                "text": None,
+                "truncation": "auto",
+                "usage": None,
+                "user": None,
+            },
+        }
+
+        # Mock the get_event_model_class to avoid validation issues in tests
+        with patch.object(
+            OpenAIResponsesAPIConfig, "get_event_model_class"
+        ) as mock_get_class:
+            mock_get_class.return_value = ResponseCompletedEvent
+
+            result = self.config.transform_streaming_response(
+                model=self.model,
+                parsed_chunk=completed_chunk,
+                logging_obj=self.logging_obj,
+            )
+
+            assert result.type == ResponsesAPIStreamEvents.RESPONSE_COMPLETED
+            assert result.response.id == "resp_123"
+
+    def test_validate_environment(self):
+        """Test that validate_environment correctly sets the Authorization header"""
+        # Test with provided API key
+        headers = {}
+        api_key = "test_api_key"
+
+        result = self.config.validate_environment(
+            headers=headers, model=self.model, api_key=api_key
+        )
+
+        assert "Authorization" in result
+        assert result["Authorization"] == f"Bearer {api_key}"
+
+        # Test with empty headers
+        headers = {}
+
+        with patch("litellm.api_key", "litellm_api_key"):
+            result = self.config.validate_environment(headers=headers, model=self.model)
+
+            assert "Authorization" in result
+            assert result["Authorization"] == "Bearer litellm_api_key"
+
+        # Test with existing headers
+        headers = {"Content-Type": "application/json"}
+
+        with patch("litellm.openai_key", "openai_key"):
+            with patch("litellm.api_key", None):
+                result = self.config.validate_environment(
+                    headers=headers, model=self.model
+                )
+
+                assert "Authorization" in result
+                assert result["Authorization"] == "Bearer openai_key"
+                assert "Content-Type" in result
+                assert result["Content-Type"] == "application/json"
+
+        # Test with environment variable
+        headers = {}
+
+        with patch("litellm.api_key", None):
+            with patch("litellm.openai_key", None):
+                with patch(
+                    "litellm.llms.openai.responses.transformation.get_secret_str",
+                    return_value="env_api_key",
+                ):
+                    result = self.config.validate_environment(
+                        headers=headers, model=self.model
+                    )
+
+                    assert "Authorization" in result
+                    assert result["Authorization"] == "Bearer env_api_key"
+
+    def test_get_complete_url(self):
+        """Test that get_complete_url returns the correct URL"""
+        # Test with provided API base
+        api_base = "https://custom-openai.example.com/v1"
+
+        result = self.config.get_complete_url(api_base=api_base, model=self.model)
+
+        assert result == "https://custom-openai.example.com/v1/responses"
+
+        # Test with litellm.api_base
+        with patch("litellm.api_base", "https://litellm-api-base.example.com/v1"):
+            result = self.config.get_complete_url(api_base=None, model=self.model)
+
+            assert result == "https://litellm-api-base.example.com/v1/responses"
+
+        # Test with environment variable
+        with patch("litellm.api_base", None):
+            with patch(
+                "litellm.llms.openai.responses.transformation.get_secret_str",
+                return_value="https://env-api-base.example.com/v1",
+            ):
+                result = self.config.get_complete_url(api_base=None, model=self.model)
+
+                assert result == "https://env-api-base.example.com/v1/responses"
+
+        # Test with default API base
+        with patch("litellm.api_base", None):
+            with patch(
+                "litellm.llms.openai.responses.transformation.get_secret_str",
+                return_value=None,
+            ):
+                result = self.config.get_complete_url(api_base=None, model=self.model)
+
+                assert result == "https://api.openai.com/v1/responses"
+
+        # Test with trailing slash in API base
+        api_base = "https://custom-openai.example.com/v1/"
+
+        result = self.config.get_complete_url(api_base=api_base, model=self.model)
+
+        assert result == "https://custom-openai.example.com/v1/responses"
diff --git a/tests/litellm/responses/test_responses_utils.py b/tests/litellm/responses/test_responses_utils.py
new file mode 100644
index 0000000000..3567f609e7
--- /dev/null
+++ b/tests/litellm/responses/test_responses_utils.py
@@ -0,0 +1,150 @@
+import json
+import os
+import sys
+
+import pytest
+from fastapi.testclient import TestClient
+
+sys.path.insert(
+    0, os.path.abspath("../../..")
+)  # Adds the parent directory to the system path
+
+import litellm
+from litellm.llms.base_llm.responses.transformation import BaseResponsesAPIConfig
+from litellm.llms.openai.responses.transformation import OpenAIResponsesAPIConfig
+from litellm.responses.utils import ResponseAPILoggingUtils, ResponsesAPIRequestUtils
+from litellm.types.llms.openai import ResponsesAPIOptionalRequestParams
+from litellm.types.utils import Usage
+
+
+class TestResponsesAPIRequestUtils:
+    def test_get_optional_params_responses_api(self):
+        """Test that optional parameters are correctly processed for responses API"""
+        # Setup
+        model = "gpt-4o"
+        config = OpenAIResponsesAPIConfig()
+        optional_params = ResponsesAPIOptionalRequestParams(
+            {"temperature": 0.7, "max_output_tokens": 100}
+        )
+
+        # Execute
+        result = ResponsesAPIRequestUtils.get_optional_params_responses_api(
+            model=model,
+            responses_api_provider_config=config,
+            response_api_optional_params=optional_params,
+        )
+
+        # Assert
+        assert result == optional_params
+        assert "temperature" in result
+        assert result["temperature"] == 0.7
+        assert "max_output_tokens" in result
+        assert result["max_output_tokens"] == 100
+
+    def test_get_optional_params_responses_api_unsupported_param(self):
+        """Test that unsupported parameters raise an error"""
+        # Setup
+        model = "gpt-4o"
+        config = OpenAIResponsesAPIConfig()
+        optional_params = ResponsesAPIOptionalRequestParams(
+            {"temperature": 0.7, "unsupported_param": "value"}
+        )
+
+        # Execute and Assert
+        with pytest.raises(litellm.UnsupportedParamsError) as excinfo:
+            ResponsesAPIRequestUtils.get_optional_params_responses_api(
+                model=model,
+                responses_api_provider_config=config,
+                response_api_optional_params=optional_params,
+            )
+
+        assert "unsupported_param" in str(excinfo.value)
+        assert model in str(excinfo.value)
+
+    def test_get_requested_response_api_optional_param(self):
+        """Test filtering parameters to only include those in ResponsesAPIOptionalRequestParams"""
+        # Setup
+        params = {
+            "temperature": 0.7,
+            "max_output_tokens": 100,
+            "invalid_param": "value",
+            "model": "gpt-4o",  # This is not in ResponsesAPIOptionalRequestParams
+        }
+
+        # Execute
+        result = ResponsesAPIRequestUtils.get_requested_response_api_optional_param(
+            params
+        )
+
+        # Assert
+        assert "temperature" in result
+        assert "max_output_tokens" in result
+        assert "invalid_param" not in result
+        assert "model" not in result
+        assert result["temperature"] == 0.7
+        assert result["max_output_tokens"] == 100
+
+
+class TestResponseAPILoggingUtils:
+    def test_is_response_api_usage_true(self):
+        """Test identification of Response API usage format"""
+        # Setup
+        usage = {"input_tokens": 10, "output_tokens": 20}
+
+        # Execute
+        result = ResponseAPILoggingUtils._is_response_api_usage(usage)
+
+        # Assert
+        assert result is True
+
+    def test_is_response_api_usage_false(self):
+        """Test identification of non-Response API usage format"""
+        # Setup
+        usage = {"prompt_tokens": 10, "completion_tokens": 20, "total_tokens": 30}
+
+        # Execute
+        result = ResponseAPILoggingUtils._is_response_api_usage(usage)
+
+        # Assert
+        assert result is False
+
+    def test_transform_response_api_usage_to_chat_usage(self):
+        """Test transformation from Response API usage to Chat usage format"""
+        # Setup
+        usage = {
+            "input_tokens": 10,
+            "output_tokens": 20,
+            "total_tokens": 30,
+            "output_tokens_details": {"reasoning_tokens": 5},
+        }
+
+        # Execute
+        result = ResponseAPILoggingUtils._transform_response_api_usage_to_chat_usage(
+            usage
+        )
+
+        # Assert
+        assert isinstance(result, Usage)
+        assert result.prompt_tokens == 10
+        assert result.completion_tokens == 20
+        assert result.total_tokens == 30
+
+    def test_transform_response_api_usage_with_none_values(self):
+        """Test transformation handles None values properly"""
+        # Setup
+        usage = {
+            "input_tokens": 0,  # Changed from None to 0
+            "output_tokens": 20,
+            "total_tokens": 20,
+            "output_tokens_details": {"reasoning_tokens": 5},
+        }
+
+        # Execute
+        result = ResponseAPILoggingUtils._transform_response_api_usage_to_chat_usage(
+            usage
+        )
+
+        # Assert
+        assert result.prompt_tokens == 0
+        assert result.completion_tokens == 20
+        assert result.total_tokens == 20
diff --git a/tests/llm_responses_api_testing/conftest.py b/tests/llm_responses_api_testing/conftest.py
new file mode 100644
index 0000000000..b3561d8a62
--- /dev/null
+++ b/tests/llm_responses_api_testing/conftest.py
@@ -0,0 +1,63 @@
+# conftest.py
+
+import importlib
+import os
+import sys
+
+import pytest
+
+sys.path.insert(
+    0, os.path.abspath("../..")
+)  # Adds the parent directory to the system path
+import litellm
+
+
+@pytest.fixture(scope="function", autouse=True)
+def setup_and_teardown():
+    """
+    This fixture reloads litellm before every function. To speed up testing by removing callbacks being chained.
+    """
+    curr_dir = os.getcwd()  # Get the current working directory
+    sys.path.insert(
+        0, os.path.abspath("../..")
+    )  # Adds the project directory to the system path
+
+    import litellm
+    from litellm import Router
+
+    importlib.reload(litellm)
+
+    try:
+        if hasattr(litellm, "proxy") and hasattr(litellm.proxy, "proxy_server"):
+            import litellm.proxy.proxy_server
+
+            importlib.reload(litellm.proxy.proxy_server)
+    except Exception as e:
+        print(f"Error reloading litellm.proxy.proxy_server: {e}")
+
+    import asyncio
+
+    loop = asyncio.get_event_loop_policy().new_event_loop()
+    asyncio.set_event_loop(loop)
+    print(litellm)
+    # from litellm import Router, completion, aembedding, acompletion, embedding
+    yield
+
+    # Teardown code (executes after the yield point)
+    loop.close()  # Close the loop created earlier
+    asyncio.set_event_loop(None)  # Remove the reference to the loop
+
+
+def pytest_collection_modifyitems(config, items):
+    # Separate tests in 'test_amazing_proxy_custom_logger.py' and other tests
+    custom_logger_tests = [
+        item for item in items if "custom_logger" in item.parent.name
+    ]
+    other_tests = [item for item in items if "custom_logger" not in item.parent.name]
+
+    # Sort tests based on their names
+    custom_logger_tests.sort(key=lambda x: x.name)
+    other_tests.sort(key=lambda x: x.name)
+
+    # Reorder the items list
+    items[:] = custom_logger_tests + other_tests
diff --git a/tests/llm_responses_api_testing/test_openai_responses_api.py b/tests/llm_responses_api_testing/test_openai_responses_api.py
new file mode 100644
index 0000000000..b711c93f80
--- /dev/null
+++ b/tests/llm_responses_api_testing/test_openai_responses_api.py
@@ -0,0 +1,797 @@
+import os
+import sys
+import pytest
+import asyncio
+from typing import Optional
+from unittest.mock import patch, AsyncMock
+
+sys.path.insert(0, os.path.abspath("../.."))
+import litellm
+from litellm.integrations.custom_logger import CustomLogger
+import json
+from litellm.types.utils import StandardLoggingPayload
+from litellm.types.llms.openai import (
+    ResponseCompletedEvent,
+    ResponsesAPIResponse,
+    ResponseTextConfig,
+    ResponseAPIUsage,
+    IncompleteDetails,
+)
+from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler
+
+
+def validate_responses_api_response(response, final_chunk: bool = False):
+    """
+    Validate that a response from litellm.responses() or litellm.aresponses()
+    conforms to the expected ResponsesAPIResponse structure.
+
+    Args:
+        response: The response object to validate
+
+    Raises:
+        AssertionError: If the response doesn't match the expected structure
+    """
+    # Validate response structure
+    print("response=", json.dumps(response, indent=4, default=str))
+    assert isinstance(
+        response, ResponsesAPIResponse
+    ), "Response should be an instance of ResponsesAPIResponse"
+
+    # Required fields
+    assert "id" in response and isinstance(
+        response["id"], str
+    ), "Response should have a string 'id' field"
+    assert "created_at" in response and isinstance(
+        response["created_at"], (int, float)
+    ), "Response should have a numeric 'created_at' field"
+    assert "output" in response and isinstance(
+        response["output"], list
+    ), "Response should have a list 'output' field"
+    assert "parallel_tool_calls" in response and isinstance(
+        response["parallel_tool_calls"], bool
+    ), "Response should have a boolean 'parallel_tool_calls' field"
+
+    # Optional fields with their expected types
+    optional_fields = {
+        "error": (dict, type(None)),  # error can be dict or None
+        "incomplete_details": (IncompleteDetails, type(None)),
+        "instructions": (str, type(None)),
+        "metadata": dict,
+        "model": str,
+        "object": str,
+        "temperature": (int, float),
+        "tool_choice": (dict, str),
+        "tools": list,
+        "top_p": (int, float),
+        "max_output_tokens": (int, type(None)),
+        "previous_response_id": (str, type(None)),
+        "reasoning": dict,
+        "status": str,
+        "text": ResponseTextConfig,
+        "truncation": str,
+        "usage": ResponseAPIUsage,
+        "user": (str, type(None)),
+    }
+    if final_chunk is False:
+        optional_fields["usage"] = type(None)
+
+    for field, expected_type in optional_fields.items():
+        if field in response:
+            assert isinstance(
+                response[field], expected_type
+            ), f"Field '{field}' should be of type {expected_type}, but got {type(response[field])}"
+
+    # Check if output has at least one item
+    if final_chunk is True:
+        assert (
+            len(response["output"]) > 0
+        ), "Response 'output' field should have at least one item"
+
+    return True  # Return True if validation passes
+
+
+@pytest.mark.parametrize("sync_mode", [True, False])
+@pytest.mark.asyncio
+async def test_basic_openai_responses_api(sync_mode):
+    litellm._turn_on_debug()
+
+    if sync_mode:
+        response = litellm.responses(
+            model="gpt-4o", input="Basic ping", max_output_tokens=20
+        )
+    else:
+        response = await litellm.aresponses(
+            model="gpt-4o", input="Basic ping", max_output_tokens=20
+        )
+
+    print("litellm response=", json.dumps(response, indent=4, default=str))
+
+    # Use the helper function to validate the response
+    validate_responses_api_response(response, final_chunk=True)
+
+
+@pytest.mark.parametrize("sync_mode", [True])
+@pytest.mark.asyncio
+async def test_basic_openai_responses_api_streaming(sync_mode):
+    litellm._turn_on_debug()
+
+    if sync_mode:
+        response = litellm.responses(
+            model="gpt-4o",
+            input="Basic ping",
+            stream=True,
+        )
+        for event in response:
+            print("litellm response=", json.dumps(event, indent=4, default=str))
+    else:
+        response = await litellm.aresponses(
+            model="gpt-4o",
+            input="Basic ping",
+            stream=True,
+        )
+        async for event in response:
+            print("litellm response=", json.dumps(event, indent=4, default=str))
+
+
+class TestCustomLogger(CustomLogger):
+    def __init__(
+        self,
+    ):
+        self.standard_logging_object: Optional[StandardLoggingPayload] = None
+
+    async def async_log_success_event(self, kwargs, response_obj, start_time, end_time):
+        print("in async_log_success_event")
+        print("kwargs=", json.dumps(kwargs, indent=4, default=str))
+        self.standard_logging_object = kwargs["standard_logging_object"]
+        pass
+
+
+def validate_standard_logging_payload(
+    slp: StandardLoggingPayload, response: ResponsesAPIResponse, request_model: str
+):
+    """
+    Validate that a StandardLoggingPayload object matches the expected response
+
+    Args:
+        slp (StandardLoggingPayload): The standard logging payload object to validate
+        response (dict): The litellm response to compare against
+        request_model (str): The model name that was requested
+    """
+    # Validate payload exists
+    assert slp is not None, "Standard logging payload should not be None"
+
+    # Validate token counts
+    print("response=", json.dumps(response, indent=4, default=str))
+    assert (
+        slp["prompt_tokens"] == response["usage"]["input_tokens"]
+    ), "Prompt tokens mismatch"
+    assert (
+        slp["completion_tokens"] == response["usage"]["output_tokens"]
+    ), "Completion tokens mismatch"
+    assert (
+        slp["total_tokens"]
+        == response["usage"]["input_tokens"] + response["usage"]["output_tokens"]
+    ), "Total tokens mismatch"
+
+    # Validate spend and response metadata
+    assert slp["response_cost"] > 0, "Response cost should be greater than 0"
+    assert slp["id"] == response["id"], "Response ID mismatch"
+    assert slp["model"] == request_model, "Model name mismatch"
+
+    # Validate messages
+    assert slp["messages"] == [{"content": "hi", "role": "user"}], "Messages mismatch"
+
+    # Validate complete response structure
+    validate_responses_match(slp["response"], response)
+
+
+@pytest.mark.asyncio
+async def test_basic_openai_responses_api_streaming_with_logging():
+    litellm._turn_on_debug()
+    litellm.set_verbose = True
+    test_custom_logger = TestCustomLogger()
+    litellm.callbacks = [test_custom_logger]
+    request_model = "gpt-4o"
+    response = await litellm.aresponses(
+        model=request_model,
+        input="hi",
+        stream=True,
+    )
+    final_response: Optional[ResponseCompletedEvent] = None
+    async for event in response:
+        if event.type == "response.completed":
+            final_response = event
+        print("litellm response=", json.dumps(event, indent=4, default=str))
+
+    print("sleeping for 2 seconds...")
+    await asyncio.sleep(2)
+    print(
+        "standard logging payload=",
+        json.dumps(test_custom_logger.standard_logging_object, indent=4, default=str),
+    )
+
+    assert final_response is not None
+    assert test_custom_logger.standard_logging_object is not None
+
+    validate_standard_logging_payload(
+        slp=test_custom_logger.standard_logging_object,
+        response=final_response.response,
+        request_model=request_model,
+    )
+
+
+def validate_responses_match(slp_response, litellm_response):
+    """Validate that the standard logging payload OpenAI response matches the litellm response"""
+    # Validate core fields
+    assert slp_response["id"] == litellm_response["id"], "ID mismatch"
+    assert slp_response["model"] == litellm_response["model"], "Model mismatch"
+    assert (
+        slp_response["created_at"] == litellm_response["created_at"]
+    ), "Created at mismatch"
+
+    # Validate usage
+    assert (
+        slp_response["usage"]["input_tokens"]
+        == litellm_response["usage"]["input_tokens"]
+    ), "Input tokens mismatch"
+    assert (
+        slp_response["usage"]["output_tokens"]
+        == litellm_response["usage"]["output_tokens"]
+    ), "Output tokens mismatch"
+    assert (
+        slp_response["usage"]["total_tokens"]
+        == litellm_response["usage"]["total_tokens"]
+    ), "Total tokens mismatch"
+
+    # Validate output/messages
+    assert len(slp_response["output"]) == len(
+        litellm_response["output"]
+    ), "Output length mismatch"
+    for slp_msg, litellm_msg in zip(slp_response["output"], litellm_response["output"]):
+        assert slp_msg["role"] == litellm_msg.role, "Message role mismatch"
+        # Access the content's text field for the litellm response
+        litellm_content = litellm_msg.content[0].text if litellm_msg.content else ""
+        assert (
+            slp_msg["content"][0]["text"] == litellm_content
+        ), f"Message content mismatch. Expected {litellm_content}, Got {slp_msg['content']}"
+        assert slp_msg["status"] == litellm_msg.status, "Message status mismatch"
+
+
+@pytest.mark.asyncio
+async def test_basic_openai_responses_api_non_streaming_with_logging():
+    litellm._turn_on_debug()
+    litellm.set_verbose = True
+    test_custom_logger = TestCustomLogger()
+    litellm.callbacks = [test_custom_logger]
+    request_model = "gpt-4o"
+    response = await litellm.aresponses(
+        model=request_model,
+        input="hi",
+    )
+
+    print("litellm response=", json.dumps(response, indent=4, default=str))
+    print("response hidden params=", response._hidden_params)
+
+    print("sleeping for 2 seconds...")
+    await asyncio.sleep(2)
+    print(
+        "standard logging payload=",
+        json.dumps(test_custom_logger.standard_logging_object, indent=4, default=str),
+    )
+
+    assert response is not None
+    assert test_custom_logger.standard_logging_object is not None
+
+    validate_standard_logging_payload(
+        test_custom_logger.standard_logging_object, response, request_model
+    )
+
+
+def validate_stream_event(event):
+    """
+    Validate that a streaming event from litellm.responses() or litellm.aresponses()
+    with stream=True conforms to the expected structure based on its event type.
+
+    Args:
+        event: The streaming event object to validate
+
+    Raises:
+        AssertionError: If the event doesn't match the expected structure for its type
+    """
+    # Common validation for all event types
+    assert hasattr(event, "type"), "Event should have a 'type' attribute"
+
+    # Type-specific validation
+    if event.type == "response.created" or event.type == "response.in_progress":
+        assert hasattr(
+            event, "response"
+        ), f"{event.type} event should have a 'response' attribute"
+        validate_responses_api_response(event.response, final_chunk=False)
+
+    elif event.type == "response.completed":
+        assert hasattr(
+            event, "response"
+        ), "response.completed event should have a 'response' attribute"
+        validate_responses_api_response(event.response, final_chunk=True)
+        # Usage is guaranteed only on the completed event
+        assert (
+            "usage" in event.response
+        ), "response.completed event should have usage information"
+        print("Usage in event.response=", event.response["usage"])
+        assert isinstance(event.response["usage"], ResponseAPIUsage)
+    elif event.type == "response.failed" or event.type == "response.incomplete":
+        assert hasattr(
+            event, "response"
+        ), f"{event.type} event should have a 'response' attribute"
+
+    elif (
+        event.type == "response.output_item.added"
+        or event.type == "response.output_item.done"
+    ):
+        assert hasattr(
+            event, "output_index"
+        ), f"{event.type} event should have an 'output_index' attribute"
+        assert hasattr(
+            event, "item"
+        ), f"{event.type} event should have an 'item' attribute"
+
+    elif (
+        event.type == "response.content_part.added"
+        or event.type == "response.content_part.done"
+    ):
+        assert hasattr(
+            event, "item_id"
+        ), f"{event.type} event should have an 'item_id' attribute"
+        assert hasattr(
+            event, "output_index"
+        ), f"{event.type} event should have an 'output_index' attribute"
+        assert hasattr(
+            event, "content_index"
+        ), f"{event.type} event should have a 'content_index' attribute"
+        assert hasattr(
+            event, "part"
+        ), f"{event.type} event should have a 'part' attribute"
+
+    elif event.type == "response.output_text.delta":
+        assert hasattr(
+            event, "item_id"
+        ), f"{event.type} event should have an 'item_id' attribute"
+        assert hasattr(
+            event, "output_index"
+        ), f"{event.type} event should have an 'output_index' attribute"
+        assert hasattr(
+            event, "content_index"
+        ), f"{event.type} event should have a 'content_index' attribute"
+        assert hasattr(
+            event, "delta"
+        ), f"{event.type} event should have a 'delta' attribute"
+
+    elif event.type == "response.output_text.annotation.added":
+        assert hasattr(
+            event, "item_id"
+        ), f"{event.type} event should have an 'item_id' attribute"
+        assert hasattr(
+            event, "output_index"
+        ), f"{event.type} event should have an 'output_index' attribute"
+        assert hasattr(
+            event, "content_index"
+        ), f"{event.type} event should have a 'content_index' attribute"
+        assert hasattr(
+            event, "annotation_index"
+        ), f"{event.type} event should have an 'annotation_index' attribute"
+        assert hasattr(
+            event, "annotation"
+        ), f"{event.type} event should have an 'annotation' attribute"
+
+    elif event.type == "response.output_text.done":
+        assert hasattr(
+            event, "item_id"
+        ), f"{event.type} event should have an 'item_id' attribute"
+        assert hasattr(
+            event, "output_index"
+        ), f"{event.type} event should have an 'output_index' attribute"
+        assert hasattr(
+            event, "content_index"
+        ), f"{event.type} event should have a 'content_index' attribute"
+        assert hasattr(
+            event, "text"
+        ), f"{event.type} event should have a 'text' attribute"
+
+    elif event.type == "response.refusal.delta":
+        assert hasattr(
+            event, "item_id"
+        ), f"{event.type} event should have an 'item_id' attribute"
+        assert hasattr(
+            event, "output_index"
+        ), f"{event.type} event should have an 'output_index' attribute"
+        assert hasattr(
+            event, "content_index"
+        ), f"{event.type} event should have a 'content_index' attribute"
+        assert hasattr(
+            event, "delta"
+        ), f"{event.type} event should have a 'delta' attribute"
+
+    elif event.type == "response.refusal.done":
+        assert hasattr(
+            event, "item_id"
+        ), f"{event.type} event should have an 'item_id' attribute"
+        assert hasattr(
+            event, "output_index"
+        ), f"{event.type} event should have an 'output_index' attribute"
+        assert hasattr(
+            event, "content_index"
+        ), f"{event.type} event should have a 'content_index' attribute"
+        assert hasattr(
+            event, "refusal"
+        ), f"{event.type} event should have a 'refusal' attribute"
+
+    elif event.type == "response.function_call_arguments.delta":
+        assert hasattr(
+            event, "item_id"
+        ), f"{event.type} event should have an 'item_id' attribute"
+        assert hasattr(
+            event, "output_index"
+        ), f"{event.type} event should have an 'output_index' attribute"
+        assert hasattr(
+            event, "delta"
+        ), f"{event.type} event should have a 'delta' attribute"
+
+    elif event.type == "response.function_call_arguments.done":
+        assert hasattr(
+            event, "item_id"
+        ), f"{event.type} event should have an 'item_id' attribute"
+        assert hasattr(
+            event, "output_index"
+        ), f"{event.type} event should have an 'output_index' attribute"
+        assert hasattr(
+            event, "arguments"
+        ), f"{event.type} event should have an 'arguments' attribute"
+
+    elif event.type in [
+        "response.file_search_call.in_progress",
+        "response.file_search_call.searching",
+        "response.file_search_call.completed",
+        "response.web_search_call.in_progress",
+        "response.web_search_call.searching",
+        "response.web_search_call.completed",
+    ]:
+        assert hasattr(
+            event, "output_index"
+        ), f"{event.type} event should have an 'output_index' attribute"
+        assert hasattr(
+            event, "item_id"
+        ), f"{event.type} event should have an 'item_id' attribute"
+
+    elif event.type == "error":
+        assert hasattr(
+            event, "message"
+        ), "Error event should have a 'message' attribute"
+    return True  # Return True if validation passes
+
+
+@pytest.mark.parametrize("sync_mode", [True, False])
+@pytest.mark.asyncio
+async def test_openai_responses_api_streaming_validation(sync_mode):
+    """Test that validates each streaming event from the responses API"""
+    litellm._turn_on_debug()
+
+    event_types_seen = set()
+
+    if sync_mode:
+        response = litellm.responses(
+            model="gpt-4o",
+            input="Tell me about artificial intelligence in 3 sentences.",
+            stream=True,
+        )
+        for event in response:
+            print(f"Validating event type: {event.type}")
+            validate_stream_event(event)
+            event_types_seen.add(event.type)
+    else:
+        response = await litellm.aresponses(
+            model="gpt-4o",
+            input="Tell me about artificial intelligence in 3 sentences.",
+            stream=True,
+        )
+        async for event in response:
+            print(f"Validating event type: {event.type}")
+            validate_stream_event(event)
+            event_types_seen.add(event.type)
+
+    # At minimum, we should see these core event types
+    required_events = {"response.created", "response.completed"}
+
+    missing_events = required_events - event_types_seen
+    assert not missing_events, f"Missing required event types: {missing_events}"
+
+    print(f"Successfully validated all event types: {event_types_seen}")
+
+
+@pytest.mark.parametrize("sync_mode", [True, False])
+@pytest.mark.asyncio
+async def test_openai_responses_litellm_router(sync_mode):
+    """
+    Test the OpenAI responses API with LiteLLM Router in both sync and async modes
+    """
+    litellm._turn_on_debug()
+    router = litellm.Router(
+        model_list=[
+            {
+                "model_name": "gpt4o-special-alias",
+                "litellm_params": {
+                    "model": "gpt-4o",
+                    "api_key": os.getenv("OPENAI_API_KEY"),
+                },
+            }
+        ]
+    )
+
+    # Call the handler
+    if sync_mode:
+        response = router.responses(
+            model="gpt4o-special-alias",
+            input="Hello, can you tell me a short joke?",
+            max_output_tokens=100,
+        )
+        print("SYNC MODE RESPONSE=", response)
+    else:
+        response = await router.aresponses(
+            model="gpt4o-special-alias",
+            input="Hello, can you tell me a short joke?",
+            max_output_tokens=100,
+        )
+
+    print(
+        f"Router {'sync' if sync_mode else 'async'} response=",
+        json.dumps(response, indent=4, default=str),
+    )
+
+    # Use the helper function to validate the response
+    validate_responses_api_response(response, final_chunk=True)
+
+    return response
+
+
+@pytest.mark.parametrize("sync_mode", [True, False])
+@pytest.mark.asyncio
+async def test_openai_responses_litellm_router_streaming(sync_mode):
+    """
+    Test the OpenAI responses API with streaming through LiteLLM Router
+    """
+    litellm._turn_on_debug()
+    router = litellm.Router(
+        model_list=[
+            {
+                "model_name": "gpt4o-special-alias",
+                "litellm_params": {
+                    "model": "gpt-4o",
+                    "api_key": os.getenv("OPENAI_API_KEY"),
+                },
+            }
+        ]
+    )
+
+    event_types_seen = set()
+
+    if sync_mode:
+        response = router.responses(
+            model="gpt4o-special-alias",
+            input="Tell me about artificial intelligence in 2 sentences.",
+            stream=True,
+        )
+        for event in response:
+            print(f"Validating event type: {event.type}")
+            validate_stream_event(event)
+            event_types_seen.add(event.type)
+    else:
+        response = await router.aresponses(
+            model="gpt4o-special-alias",
+            input="Tell me about artificial intelligence in 2 sentences.",
+            stream=True,
+        )
+        async for event in response:
+            print(f"Validating event type: {event.type}")
+            validate_stream_event(event)
+            event_types_seen.add(event.type)
+
+    # At minimum, we should see these core event types
+    required_events = {"response.created", "response.completed"}
+
+    missing_events = required_events - event_types_seen
+    assert not missing_events, f"Missing required event types: {missing_events}"
+
+    print(f"Successfully validated all event types: {event_types_seen}")
+
+
+@pytest.mark.asyncio
+async def test_openai_responses_litellm_router_no_metadata():
+    """
+    Test that metadata is not passed through when using the Router for responses API
+    """
+    mock_response = {
+        "id": "resp_123",
+        "object": "response",
+        "created_at": 1741476542,
+        "status": "completed",
+        "model": "gpt-4o",
+        "output": [
+            {
+                "type": "message",
+                "id": "msg_123",
+                "status": "completed",
+                "role": "assistant",
+                "content": [
+                    {"type": "output_text", "text": "Hello world!", "annotations": []}
+                ],
+            }
+        ],
+        "parallel_tool_calls": True,
+        "usage": {
+            "input_tokens": 10,
+            "output_tokens": 20,
+            "total_tokens": 30,
+            "output_tokens_details": {"reasoning_tokens": 0},
+        },
+        "text": {"format": {"type": "text"}},
+        # Adding all required fields
+        "error": None,
+        "incomplete_details": None,
+        "instructions": None,
+        "metadata": {},
+        "temperature": 1.0,
+        "tool_choice": "auto",
+        "tools": [],
+        "top_p": 1.0,
+        "max_output_tokens": None,
+        "previous_response_id": None,
+        "reasoning": {"effort": None, "summary": None},
+        "truncation": "disabled",
+        "user": None,
+    }
+
+    class MockResponse:
+        def __init__(self, json_data, status_code):
+            self._json_data = json_data
+            self.status_code = status_code
+            self.text = str(json_data)
+
+        def json(self):  # Changed from async to sync
+            return self._json_data
+
+    with patch(
+        "litellm.llms.custom_httpx.http_handler.AsyncHTTPHandler.post",
+        new_callable=AsyncMock,
+    ) as mock_post:
+        # Configure the mock to return our response
+        mock_post.return_value = MockResponse(mock_response, 200)
+
+        litellm._turn_on_debug()
+        router = litellm.Router(
+            model_list=[
+                {
+                    "model_name": "gpt4o-special-alias",
+                    "litellm_params": {
+                        "model": "gpt-4o",
+                        "api_key": "fake-key",
+                    },
+                }
+            ]
+        )
+
+        # Call the handler with metadata
+        await router.aresponses(
+            model="gpt4o-special-alias",
+            input="Hello, can you tell me a short joke?",
+        )
+
+        # Check the request body
+        request_body = mock_post.call_args.kwargs["data"]
+        print("Request body:", json.dumps(request_body, indent=4))
+
+        loaded_request_body = json.loads(request_body)
+        print("Loaded request body:", json.dumps(loaded_request_body, indent=4))
+
+        # Assert metadata is not in the request
+        assert (
+            loaded_request_body["metadata"] == None
+        ), "metadata should not be in the request body"
+        mock_post.assert_called_once()
+
+
+@pytest.mark.asyncio
+async def test_openai_responses_litellm_router_with_metadata():
+    """
+    Test that metadata is correctly passed through when explicitly provided to the Router for responses API
+    """
+    test_metadata = {
+        "user_id": "123",
+        "conversation_id": "abc",
+        "custom_field": "test_value",
+    }
+
+    mock_response = {
+        "id": "resp_123",
+        "object": "response",
+        "created_at": 1741476542,
+        "status": "completed",
+        "model": "gpt-4o",
+        "output": [
+            {
+                "type": "message",
+                "id": "msg_123",
+                "status": "completed",
+                "role": "assistant",
+                "content": [
+                    {"type": "output_text", "text": "Hello world!", "annotations": []}
+                ],
+            }
+        ],
+        "parallel_tool_calls": True,
+        "usage": {
+            "input_tokens": 10,
+            "output_tokens": 20,
+            "total_tokens": 30,
+            "output_tokens_details": {"reasoning_tokens": 0},
+        },
+        "text": {"format": {"type": "text"}},
+        "error": None,
+        "incomplete_details": None,
+        "instructions": None,
+        "metadata": test_metadata,  # Include the test metadata in response
+        "temperature": 1.0,
+        "tool_choice": "auto",
+        "tools": [],
+        "top_p": 1.0,
+        "max_output_tokens": None,
+        "previous_response_id": None,
+        "reasoning": {"effort": None, "summary": None},
+        "truncation": "disabled",
+        "user": None,
+    }
+
+    class MockResponse:
+        def __init__(self, json_data, status_code):
+            self._json_data = json_data
+            self.status_code = status_code
+            self.text = str(json_data)
+
+        def json(self):
+            return self._json_data
+
+    with patch(
+        "litellm.llms.custom_httpx.http_handler.AsyncHTTPHandler.post",
+        new_callable=AsyncMock,
+    ) as mock_post:
+        # Configure the mock to return our response
+        mock_post.return_value = MockResponse(mock_response, 200)
+
+        litellm._turn_on_debug()
+        router = litellm.Router(
+            model_list=[
+                {
+                    "model_name": "gpt4o-special-alias",
+                    "litellm_params": {
+                        "model": "gpt-4o",
+                        "api_key": "fake-key",
+                    },
+                }
+            ]
+        )
+
+        # Call the handler with metadata
+        await router.aresponses(
+            model="gpt4o-special-alias",
+            input="Hello, can you tell me a short joke?",
+            metadata=test_metadata,
+        )
+
+        # Check the request body
+        request_body = mock_post.call_args.kwargs["data"]
+        loaded_request_body = json.loads(request_body)
+        print("Request body:", json.dumps(loaded_request_body, indent=4))
+
+        # Assert metadata matches exactly what was passed
+        assert (
+            loaded_request_body["metadata"] == test_metadata
+        ), "metadata in request body should match what was passed"
+        mock_post.assert_called_once()
diff --git a/tests/llm_translation/base_llm_unit_tests.py b/tests/llm_translation/base_llm_unit_tests.py
index f91ef0eae9..32f631daad 100644
--- a/tests/llm_translation/base_llm_unit_tests.py
+++ b/tests/llm_translation/base_llm_unit_tests.py
@@ -868,10 +868,13 @@ class BaseLLMChatTest(ABC):
         except Exception as e:
             pytest.fail(f"Error occurred: {e}")
 
+    @pytest.mark.flaky(retries=3, delay=1)
     @pytest.mark.asyncio
     async def test_completion_cost(self):
         from litellm import completion_cost
 
+        litellm._turn_on_debug()
+
         os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
         litellm.model_cost = litellm.get_model_cost_map(url="")
 
diff --git a/tests/llm_translation/test_azure_openai.py b/tests/llm_translation/test_azure_openai.py
index d4715b8906..d289c892a0 100644
--- a/tests/llm_translation/test_azure_openai.py
+++ b/tests/llm_translation/test_azure_openai.py
@@ -522,7 +522,7 @@ async def test_async_azure_max_retries_0(
 @pytest.mark.parametrize("max_retries", [0, 4])
 @pytest.mark.parametrize("stream", [True, False])
 @pytest.mark.parametrize("sync_mode", [True, False])
-@patch("litellm.llms.azure.completion.handler.select_azure_base_url_or_endpoint")
+@patch("litellm.llms.azure.common_utils.select_azure_base_url_or_endpoint")
 @pytest.mark.asyncio
 async def test_azure_instruct(
     mock_select_azure_base_url_or_endpoint, max_retries, stream, sync_mode
@@ -556,12 +556,11 @@ async def test_azure_instruct(
 
 
 @pytest.mark.parametrize("max_retries", [0, 4])
-@pytest.mark.parametrize("stream", [True, False])
 @pytest.mark.parametrize("sync_mode", [True, False])
-@patch("litellm.llms.azure.azure.select_azure_base_url_or_endpoint")
+@patch("litellm.llms.azure.common_utils.select_azure_base_url_or_endpoint")
 @pytest.mark.asyncio
 async def test_azure_embedding_max_retries_0(
-    mock_select_azure_base_url_or_endpoint, max_retries, stream, sync_mode
+    mock_select_azure_base_url_or_endpoint, max_retries, sync_mode
 ):
     from litellm import aembedding, embedding
 
@@ -569,7 +568,6 @@ async def test_azure_embedding_max_retries_0(
         "model": "azure/azure-embedding-model",
         "input": "Hello world",
         "max_retries": max_retries,
-        "stream": stream,
     }
 
     try:
@@ -581,6 +579,10 @@ async def test_azure_embedding_max_retries_0(
         print(e)
 
     mock_select_azure_base_url_or_endpoint.assert_called_once()
+    print(
+        "mock_select_azure_base_url_or_endpoint.call_args.kwargs",
+        mock_select_azure_base_url_or_endpoint.call_args.kwargs,
+    )
     assert (
         mock_select_azure_base_url_or_endpoint.call_args.kwargs["azure_client_params"][
             "max_retries"
diff --git a/tests/local_testing/test_completion.py b/tests/local_testing/test_completion.py
index a0a6af281d..5fe4984c17 100644
--- a/tests/local_testing/test_completion.py
+++ b/tests/local_testing/test_completion.py
@@ -2933,13 +2933,19 @@ def test_completion_azure():
 # test_completion_azure()
 
 
+@pytest.mark.skip(
+    reason="this is bad test. It doesn't actually fail if the token is not set in the header. "
+)
 def test_azure_openai_ad_token():
+    import time
+
     # this tests if the azure ad token is set in the request header
     # the request can fail since azure ad tokens expire after 30 mins, but the header MUST have the azure ad token
     # we use litellm.input_callbacks for this test
     def tester(
         kwargs,  # kwargs to completion
     ):
+        print("inside kwargs")
         print(kwargs["additional_args"])
         if kwargs["additional_args"]["headers"]["Authorization"] != "Bearer gm":
             pytest.fail("AZURE AD TOKEN Passed but not set in request header")
@@ -2962,7 +2968,9 @@ def test_azure_openai_ad_token():
         litellm.input_callback = []
     except Exception as e:
         litellm.input_callback = []
-        pytest.fail(f"An exception occurs - {str(e)}")
+        pass
+
+    time.sleep(1)
 
 
 # test_azure_openai_ad_token()
diff --git a/tests/local_testing/test_completion_cost.py b/tests/local_testing/test_completion_cost.py
index 33fc6cfd3a..d4efade9e3 100644
--- a/tests/local_testing/test_completion_cost.py
+++ b/tests/local_testing/test_completion_cost.py
@@ -2769,6 +2769,7 @@ def test_add_known_models():
     )
 
 
+@pytest.mark.skip(reason="flaky test")
 def test_bedrock_cost_calc_with_region():
     from litellm import completion
 
diff --git a/tests/local_testing/test_pass_through_endpoints.py b/tests/local_testing/test_pass_through_endpoints.py
index 0215e295be..ae9644afb8 100644
--- a/tests/local_testing/test_pass_through_endpoints.py
+++ b/tests/local_testing/test_pass_through_endpoints.py
@@ -329,3 +329,71 @@ async def test_aaapass_through_endpoint_pass_through_keys_langfuse(
         setattr(
             litellm.proxy.proxy_server, "proxy_logging_obj", original_proxy_logging_obj
         )
+
+@pytest.mark.asyncio
+async def test_pass_through_endpoint_bing(client, monkeypatch):
+    import litellm
+
+    captured_requests = []
+
+    async def mock_bing_request(*args, **kwargs):
+
+        captured_requests.append((args, kwargs))
+        mock_response = httpx.Response(
+            200,
+            json={
+                "_type": "SearchResponse",
+                "queryContext": {"originalQuery": "bob barker"},
+                "webPages": {
+                    "webSearchUrl": "https://www.bing.com/search?q=bob+barker",
+                    "totalEstimatedMatches": 12000000,
+                    "value": [],
+                },
+            },
+        )
+        mock_response.request = Mock(spec=httpx.Request)
+        return mock_response
+
+    monkeypatch.setattr("httpx.AsyncClient.request", mock_bing_request)
+
+    # Define a pass-through endpoint
+    pass_through_endpoints = [
+        {
+            "path": "/bing/search",
+            "target": "https://api.bing.microsoft.com/v7.0/search?setLang=en-US&mkt=en-US",
+            "headers": {"Ocp-Apim-Subscription-Key": "XX"},
+            "forward_headers": True,
+            # Additional settings
+            "merge_query_params": True,
+            "auth": True,
+        },
+        {
+            "path": "/bing/search-no-merge-params",
+            "target": "https://api.bing.microsoft.com/v7.0/search?setLang=en-US&mkt=en-US",
+            "headers": {"Ocp-Apim-Subscription-Key": "XX"},
+            "forward_headers": True,
+        },
+    ]
+
+    # Initialize the pass-through endpoint
+    await initialize_pass_through_endpoints(pass_through_endpoints)
+    general_settings: Optional[dict] = (
+        getattr(litellm.proxy.proxy_server, "general_settings", {}) or {}
+    )
+    general_settings.update({"pass_through_endpoints": pass_through_endpoints})
+    setattr(litellm.proxy.proxy_server, "general_settings", general_settings)
+
+    # Make 2 requests thru the pass-through endpoint
+    client.get("/bing/search?q=bob+barker")
+    client.get("/bing/search-no-merge-params?q=bob+barker")
+
+    first_transformed_url = captured_requests[0][1]["url"]
+    second_transformed_url = captured_requests[1][1]["url"]
+
+    # Assert the response
+    assert (
+        first_transformed_url
+        == "https://api.bing.microsoft.com/v7.0/search?q=bob+barker&setLang=en-US&mkt=en-US"
+        and second_transformed_url
+        == "https://api.bing.microsoft.com/v7.0/search?setLang=en-US&mkt=en-US"
+    )
diff --git a/tests/local_testing/test_router.py b/tests/local_testing/test_router.py
index 4deb589439..20a2f28c95 100644
--- a/tests/local_testing/test_router.py
+++ b/tests/local_testing/test_router.py
@@ -194,6 +194,9 @@ def test_router_specific_model_via_id():
     router.completion(model="1234", messages=[{"role": "user", "content": "Hey!"}])
 
 
+@pytest.mark.skip(
+    reason="Router no longer creates clients, this is delegated to the provider integration."
+)
 def test_router_azure_ai_client_init():
 
     _deployment = {
@@ -219,6 +222,9 @@ def test_router_azure_ai_client_init():
     assert not isinstance(_client, AsyncAzureOpenAI)
 
 
+@pytest.mark.skip(
+    reason="Router no longer creates clients, this is delegated to the provider integration."
+)
 def test_router_azure_ad_token_provider():
     _deployment = {
         "model_name": "gpt-4o_2024-05-13",
@@ -247,8 +253,10 @@ def test_router_azure_ad_token_provider():
         assert isinstance(_client, AsyncAzureOpenAI)
         assert _client._azure_ad_token_provider is not None
         assert isinstance(_client._azure_ad_token_provider.__closure__, tuple)
-        assert isinstance(_client._azure_ad_token_provider.__closure__[0].cell_contents._credential,
-                        getattr(identity, os.environ["AZURE_CREDENTIAL"]))
+        assert isinstance(
+            _client._azure_ad_token_provider.__closure__[0].cell_contents._credential,
+            getattr(identity, os.environ["AZURE_CREDENTIAL"]),
+        )
 
 
 def test_router_sensitive_keys():
@@ -312,91 +320,6 @@ def test_router_order():
         assert response._hidden_params["model_id"] == "1"
 
 
-@pytest.mark.parametrize("num_retries", [None, 2])
-@pytest.mark.parametrize("max_retries", [None, 4])
-def test_router_num_retries_init(num_retries, max_retries):
-    """
-    - test when num_retries set v/s not
-    - test client value when max retries set v/s not
-    """
-    router = Router(
-        model_list=[
-            {
-                "model_name": "gpt-3.5-turbo",  # openai model name
-                "litellm_params": {  # params for litellm completion/embedding call
-                    "model": "azure/chatgpt-v-2",
-                    "api_key": "bad-key",
-                    "api_version": os.getenv("AZURE_API_VERSION"),
-                    "api_base": os.getenv("AZURE_API_BASE"),
-                    "max_retries": max_retries,
-                },
-                "model_info": {"id": 12345},
-            },
-        ],
-        num_retries=num_retries,
-    )
-
-    if num_retries is not None:
-        assert router.num_retries == num_retries
-    else:
-        assert router.num_retries == openai.DEFAULT_MAX_RETRIES
-
-    model_client = router._get_client(
-        {"model_info": {"id": 12345}}, client_type="async", kwargs={}
-    )
-
-    if max_retries is not None:
-        assert getattr(model_client, "max_retries") == max_retries
-    else:
-        assert getattr(model_client, "max_retries") == 0
-
-
-@pytest.mark.parametrize(
-    "timeout", [10, 1.0, httpx.Timeout(timeout=300.0, connect=20.0)]
-)
-@pytest.mark.parametrize("ssl_verify", [True, False])
-def test_router_timeout_init(timeout, ssl_verify):
-    """
-    Allow user to pass httpx.Timeout
-
-    related issue - https://github.com/BerriAI/litellm/issues/3162
-    """
-    litellm.ssl_verify = ssl_verify
-
-    router = Router(
-        model_list=[
-            {
-                "model_name": "test-model",
-                "litellm_params": {
-                    "model": "azure/chatgpt-v-2",
-                    "api_key": os.getenv("AZURE_API_KEY"),
-                    "api_base": os.getenv("AZURE_API_BASE"),
-                    "api_version": os.getenv("AZURE_API_VERSION"),
-                    "timeout": timeout,
-                },
-                "model_info": {"id": 1234},
-            }
-        ]
-    )
-
-    model_client = router._get_client(
-        deployment={"model_info": {"id": 1234}}, client_type="sync_client", kwargs={}
-    )
-
-    assert getattr(model_client, "timeout") == timeout
-
-    print(f"vars model_client: {vars(model_client)}")
-    http_client = getattr(model_client, "_client")
-    print(f"http client: {vars(http_client)}, ssl_Verify={ssl_verify}")
-    if ssl_verify == False:
-        assert http_client._transport._pool._ssl_context.verify_mode.name == "CERT_NONE"
-    else:
-        assert (
-            http_client._transport._pool._ssl_context.verify_mode.name
-            == "CERT_REQUIRED"
-        )
-
-
 @pytest.mark.parametrize("sync_mode", [False, True])
 @pytest.mark.asyncio
 async def test_router_retries(sync_mode):
@@ -445,6 +368,9 @@ async def test_router_retries(sync_mode):
         "https://Mistral-large-nmefg-serverless.eastus2.inference.ai.azure.com",
     ],
 )
+@pytest.mark.skip(
+    reason="Router no longer creates clients, this is delegated to the provider integration."
+)
 def test_router_azure_ai_studio_init(mistral_api_base):
     router = Router(
         model_list=[
@@ -460,16 +386,21 @@ def test_router_azure_ai_studio_init(mistral_api_base):
         ]
     )
 
-    model_client = router._get_client(
-        deployment={"model_info": {"id": 1234}}, client_type="sync_client", kwargs={}
+    # model_client = router._get_client(
+    #     deployment={"model_info": {"id": 1234}}, client_type="sync_client", kwargs={}
+    # )
+    # url = getattr(model_client, "_base_url")
+    # uri_reference = str(getattr(url, "_uri_reference"))
+
+    # print(f"uri_reference: {uri_reference}")
+
+    # assert "/v1/" in uri_reference
+    # assert uri_reference.count("v1") == 1
+    response = router.completion(
+        model="azure/mistral-large-latest",
+        messages=[{"role": "user", "content": "Hey, how's it going?"}],
     )
-    url = getattr(model_client, "_base_url")
-    uri_reference = str(getattr(url, "_uri_reference"))
-
-    print(f"uri_reference: {uri_reference}")
-
-    assert "/v1/" in uri_reference
-    assert uri_reference.count("v1") == 1
+    assert response is not None
 
 
 def test_exception_raising():
diff --git a/tests/local_testing/test_router_client_init.py b/tests/local_testing/test_router_client_init.py
index 0249358e91..1440dfecaa 100644
--- a/tests/local_testing/test_router_client_init.py
+++ b/tests/local_testing/test_router_client_init.py
@@ -137,6 +137,7 @@ def test_router_init_azure_service_principal_with_secret_with_environment_variab
     mocked_os_lib: MagicMock,
     mocked_credential: MagicMock,
     mocked_get_bearer_token_provider: MagicMock,
+    monkeypatch,
 ) -> None:
     """
     Test router initialization and sample completion using Azure Service Principal with Secret authentication workflow,
@@ -145,6 +146,7 @@ def test_router_init_azure_service_principal_with_secret_with_environment_variab
     To allow for local testing without real credentials, first must mock Azure SDK authentication functions
     and environment variables.
     """
+    monkeypatch.delenv("AZURE_API_KEY", raising=False)
     litellm.enable_azure_ad_token_refresh = True
     # mock the token provider function
     mocked_func_generating_token = MagicMock(return_value="test_token")
@@ -182,25 +184,25 @@ def test_router_init_azure_service_principal_with_secret_with_environment_variab
     # initialize the router
     router = Router(model_list=model_list)
 
-    # first check if environment variables were used at all
-    mocked_environ.assert_called()
-    # then check if the client was initialized with the correct environment variables
-    mocked_credential.assert_called_with(
-        **{
-            "client_id": environment_variables_expected_to_use["AZURE_CLIENT_ID"],
-            "client_secret": environment_variables_expected_to_use[
-                "AZURE_CLIENT_SECRET"
-            ],
-            "tenant_id": environment_variables_expected_to_use["AZURE_TENANT_ID"],
-        }
-    )
-    # check if the token provider was called at all
-    mocked_get_bearer_token_provider.assert_called()
-    # then check if the token provider was initialized with the mocked credential
-    for call_args in mocked_get_bearer_token_provider.call_args_list:
-        assert call_args.args[0] == mocked_credential.return_value
-    # however, at this point token should not be fetched yet
-    mocked_func_generating_token.assert_not_called()
+    # # first check if environment variables were used at all
+    # mocked_environ.assert_called()
+    # # then check if the client was initialized with the correct environment variables
+    # mocked_credential.assert_called_with(
+    #     **{
+    #         "client_id": environment_variables_expected_to_use["AZURE_CLIENT_ID"],
+    #         "client_secret": environment_variables_expected_to_use[
+    #             "AZURE_CLIENT_SECRET"
+    #         ],
+    #         "tenant_id": environment_variables_expected_to_use["AZURE_TENANT_ID"],
+    #     }
+    # )
+    # # check if the token provider was called at all
+    # mocked_get_bearer_token_provider.assert_called()
+    # # then check if the token provider was initialized with the mocked credential
+    # for call_args in mocked_get_bearer_token_provider.call_args_list:
+    #     assert call_args.args[0] == mocked_credential.return_value
+    # # however, at this point token should not be fetched yet
+    # mocked_func_generating_token.assert_not_called()
 
     # now let's try to make a completion call
     deployment = model_list[0]
diff --git a/tests/local_testing/test_router_init.py b/tests/local_testing/test_router_init.py
index 4fce5cbfcc..00b2daa764 100644
--- a/tests/local_testing/test_router_init.py
+++ b/tests/local_testing/test_router_init.py
@@ -1,704 +1,704 @@
-# this tests if the router is initialized correctly
-import asyncio
-import os
-import sys
-import time
-import traceback
-
-import pytest
-
-sys.path.insert(
-    0, os.path.abspath("../..")
-)  # Adds the parent directory to the system path
-from collections import defaultdict
-from concurrent.futures import ThreadPoolExecutor
-
-from dotenv import load_dotenv
-
-import litellm
-from litellm import Router
-
-load_dotenv()
-
-# every time we load the router we should have 4 clients:
-# Async
-# Sync
-# Async + Stream
-# Sync + Stream
-
-
-def test_init_clients():
-    litellm.set_verbose = True
-    import logging
-
-    from litellm._logging import verbose_router_logger
-
-    verbose_router_logger.setLevel(logging.DEBUG)
-    try:
-        print("testing init 4 clients with diff timeouts")
-        model_list = [
-            {
-                "model_name": "gpt-3.5-turbo",
-                "litellm_params": {
-                    "model": "azure/chatgpt-v-2",
-                    "api_key": os.getenv("AZURE_API_KEY"),
-                    "api_version": os.getenv("AZURE_API_VERSION"),
-                    "api_base": os.getenv("AZURE_API_BASE"),
-                    "timeout": 0.01,
-                    "stream_timeout": 0.000_001,
-                    "max_retries": 7,
-                },
-            },
-        ]
-        router = Router(model_list=model_list, set_verbose=True)
-        for elem in router.model_list:
-            model_id = elem["model_info"]["id"]
-            assert router.cache.get_cache(f"{model_id}_client") is not None
-            assert router.cache.get_cache(f"{model_id}_async_client") is not None
-            assert router.cache.get_cache(f"{model_id}_stream_client") is not None
-            assert router.cache.get_cache(f"{model_id}_stream_async_client") is not None
-
-            # check if timeout for stream/non stream clients is set correctly
-            async_client = router.cache.get_cache(f"{model_id}_async_client")
-            stream_async_client = router.cache.get_cache(
-                f"{model_id}_stream_async_client"
-            )
-
-            assert async_client.timeout == 0.01
-            assert stream_async_client.timeout == 0.000_001
-            print(vars(async_client))
-            print()
-            print(async_client._base_url)
-            assert (
-                async_client._base_url
-                == "https://openai-gpt-4-test-v-1.openai.azure.com/openai/"
-            )
-            assert (
-                stream_async_client._base_url
-                == "https://openai-gpt-4-test-v-1.openai.azure.com/openai/"
-            )
-
-        print("PASSED !")
-
-    except Exception as e:
-        traceback.print_exc()
-        pytest.fail(f"Error occurred: {e}")
-
-
-# test_init_clients()
-
-
-def test_init_clients_basic():
-    litellm.set_verbose = True
-    try:
-        print("Test basic client init")
-        model_list = [
-            {
-                "model_name": "gpt-3.5-turbo",
-                "litellm_params": {
-                    "model": "azure/chatgpt-v-2",
-                    "api_key": os.getenv("AZURE_API_KEY"),
-                    "api_version": os.getenv("AZURE_API_VERSION"),
-                    "api_base": os.getenv("AZURE_API_BASE"),
-                },
-            },
-        ]
-        router = Router(model_list=model_list)
-        for elem in router.model_list:
-            model_id = elem["model_info"]["id"]
-            assert router.cache.get_cache(f"{model_id}_client") is not None
-            assert router.cache.get_cache(f"{model_id}_async_client") is not None
-            assert router.cache.get_cache(f"{model_id}_stream_client") is not None
-            assert router.cache.get_cache(f"{model_id}_stream_async_client") is not None
-        print("PASSED !")
-
-        # see if we can init clients without timeout or max retries set
-    except Exception as e:
-        traceback.print_exc()
-        pytest.fail(f"Error occurred: {e}")
-
-
-# test_init_clients_basic()
-
-
-def test_init_clients_basic_azure_cloudflare():
-    # init azure + cloudflare
-    # init OpenAI gpt-3.5
-    # init OpenAI text-embedding
-    # init OpenAI comptaible - Mistral/mistral-medium
-    # init OpenAI compatible - xinference/bge
-    litellm.set_verbose = True
-    try:
-        print("Test basic client init")
-        model_list = [
-            {
-                "model_name": "azure-cloudflare",
-                "litellm_params": {
-                    "model": "azure/chatgpt-v-2",
-                    "api_key": os.getenv("AZURE_API_KEY"),
-                    "api_version": os.getenv("AZURE_API_VERSION"),
-                    "api_base": "https://gateway.ai.cloudflare.com/v1/0399b10e77ac6668c80404a5ff49eb37/litellm-test/azure-openai/openai-gpt-4-test-v-1",
-                },
-            },
-            {
-                "model_name": "gpt-openai",
-                "litellm_params": {
-                    "model": "gpt-3.5-turbo",
-                    "api_key": os.getenv("OPENAI_API_KEY"),
-                },
-            },
-            {
-                "model_name": "text-embedding-ada-002",
-                "litellm_params": {
-                    "model": "text-embedding-ada-002",
-                    "api_key": os.getenv("OPENAI_API_KEY"),
-                },
-            },
-            {
-                "model_name": "mistral",
-                "litellm_params": {
-                    "model": "mistral/mistral-tiny",
-                    "api_key": os.getenv("MISTRAL_API_KEY"),
-                },
-            },
-            {
-                "model_name": "bge-base-en",
-                "litellm_params": {
-                    "model": "xinference/bge-base-en",
-                    "api_base": "http://127.0.0.1:9997/v1",
-                    "api_key": os.getenv("OPENAI_API_KEY"),
-                },
-            },
-        ]
-        router = Router(model_list=model_list)
-        for elem in router.model_list:
-            model_id = elem["model_info"]["id"]
-            assert router.cache.get_cache(f"{model_id}_client") is not None
-            assert router.cache.get_cache(f"{model_id}_async_client") is not None
-            assert router.cache.get_cache(f"{model_id}_stream_client") is not None
-            assert router.cache.get_cache(f"{model_id}_stream_async_client") is not None
-        print("PASSED !")
-
-        # see if we can init clients without timeout or max retries set
-    except Exception as e:
-        traceback.print_exc()
-        pytest.fail(f"Error occurred: {e}")
-
-
-# test_init_clients_basic_azure_cloudflare()
-
-
-def test_timeouts_router():
-    """
-    Test the timeouts of the router with multiple clients. This HASas to raise a timeout error
-    """
-    import openai
-
-    litellm.set_verbose = True
-    try:
-        print("testing init 4 clients with diff timeouts")
-        model_list = [
-            {
-                "model_name": "gpt-3.5-turbo",
-                "litellm_params": {
-                    "model": "azure/chatgpt-v-2",
-                    "api_key": os.getenv("AZURE_API_KEY"),
-                    "api_version": os.getenv("AZURE_API_VERSION"),
-                    "api_base": os.getenv("AZURE_API_BASE"),
-                    "timeout": 0.000001,
-                    "stream_timeout": 0.000_001,
-                },
-            },
-        ]
-        router = Router(model_list=model_list, num_retries=0)
-
-        print("PASSED !")
-
-        async def test():
-            try:
-                await router.acompletion(
-                    model="gpt-3.5-turbo",
-                    messages=[
-                        {"role": "user", "content": "hello, write a 20 pg essay"}
-                    ],
-                )
-            except Exception as e:
-                raise e
-
-        asyncio.run(test())
-    except openai.APITimeoutError as e:
-        print(
-            "Passed: Raised correct exception. Got openai.APITimeoutError\nGood Job", e
-        )
-        print(type(e))
-        pass
-    except Exception as e:
-        pytest.fail(
-            f"Did not raise error `openai.APITimeoutError`. Instead raised error type: {type(e)}, Error: {e}"
-        )
-
-
-# test_timeouts_router()
-
-
-def test_stream_timeouts_router():
-    """
-    Test the stream timeouts router. See if it selected the correct client with stream timeout
-    """
-    import openai
-
-    litellm.set_verbose = True
-    try:
-        print("testing init 4 clients with diff timeouts")
-        model_list = [
-            {
-                "model_name": "gpt-3.5-turbo",
-                "litellm_params": {
-                    "model": "azure/chatgpt-v-2",
-                    "api_key": os.getenv("AZURE_API_KEY"),
-                    "api_version": os.getenv("AZURE_API_VERSION"),
-                    "api_base": os.getenv("AZURE_API_BASE"),
-                    "timeout": 200,  # regular calls will not timeout, stream calls will
-                    "stream_timeout": 10,
-                },
-            },
-        ]
-        router = Router(model_list=model_list)
-
-        print("PASSED !")
-        data = {
-            "model": "gpt-3.5-turbo",
-            "messages": [{"role": "user", "content": "hello, write a 20 pg essay"}],
-            "stream": True,
-        }
-        selected_client = router._get_client(
-            deployment=router.model_list[0],
-            kwargs=data,
-            client_type=None,
-        )
-        print("Select client timeout", selected_client.timeout)
-        assert selected_client.timeout == 10
-
-        # make actual call
-        response = router.completion(**data)
-
-        for chunk in response:
-            print(f"chunk: {chunk}")
-    except openai.APITimeoutError as e:
-        print(
-            "Passed: Raised correct exception. Got openai.APITimeoutError\nGood Job", e
-        )
-        print(type(e))
-        pass
-    except Exception as e:
-        pytest.fail(
-            f"Did not raise error `openai.APITimeoutError`. Instead raised error type: {type(e)}, Error: {e}"
-        )
-
-
-# test_stream_timeouts_router()
-
-
-def test_xinference_embedding():
-    # [Test Init Xinference] this tests if we init xinference on the router correctly
-    # [Test Exception Mapping] tests that xinference is an openai comptiable provider
-    print("Testing init xinference")
-    print(
-        "this tests if we create an OpenAI client for Xinference, with the correct API BASE"
-    )
-
-    model_list = [
-        {
-            "model_name": "xinference",
-            "litellm_params": {
-                "model": "xinference/bge-base-en",
-                "api_base": "os.environ/XINFERENCE_API_BASE",
-            },
-        }
-    ]
-
-    router = Router(model_list=model_list)
-
-    print(router.model_list)
-    print(router.model_list[0])
-
-    assert (
-        router.model_list[0]["litellm_params"]["api_base"] == "http://0.0.0.0:9997"
-    )  # set in env
-
-    openai_client = router._get_client(
-        deployment=router.model_list[0],
-        kwargs={"input": ["hello"], "model": "xinference"},
-    )
-
-    assert openai_client._base_url == "http://0.0.0.0:9997"
-    assert "xinference" in litellm.openai_compatible_providers
-    print("passed")
-
-
-# test_xinference_embedding()
-
-
-def test_router_init_gpt_4_vision_enhancements():
-    try:
-        # tests base_url set when any base_url with /openai/deployments passed to router
-        print("Testing Azure GPT_Vision enhancements")
-
-        model_list = [
-            {
-                "model_name": "gpt-4-vision-enhancements",
-                "litellm_params": {
-                    "model": "azure/gpt-4-vision",
-                    "api_key": os.getenv("AZURE_API_KEY"),
-                    "base_url": "https://gpt-4-vision-resource.openai.azure.com/openai/deployments/gpt-4-vision/extensions/",
-                    "dataSources": [
-                        {
-                            "type": "AzureComputerVision",
-                            "parameters": {
-                                "endpoint": "os.environ/AZURE_VISION_ENHANCE_ENDPOINT",
-                                "key": "os.environ/AZURE_VISION_ENHANCE_KEY",
-                            },
-                        }
-                    ],
-                },
-            }
-        ]
-
-        router = Router(model_list=model_list)
-
-        print(router.model_list)
-        print(router.model_list[0])
-
-        assert (
-            router.model_list[0]["litellm_params"]["base_url"]
-            == "https://gpt-4-vision-resource.openai.azure.com/openai/deployments/gpt-4-vision/extensions/"
-        )  # set in env
-
-        assert (
-            router.model_list[0]["litellm_params"]["dataSources"][0]["parameters"][
-                "endpoint"
-            ]
-            == os.environ["AZURE_VISION_ENHANCE_ENDPOINT"]
-        )
-
-        assert (
-            router.model_list[0]["litellm_params"]["dataSources"][0]["parameters"][
-                "key"
-            ]
-            == os.environ["AZURE_VISION_ENHANCE_KEY"]
-        )
-
-        azure_client = router._get_client(
-            deployment=router.model_list[0],
-            kwargs={"stream": True, "model": "gpt-4-vision-enhancements"},
-            client_type="async",
-        )
-
-        assert (
-            azure_client._base_url
-            == "https://gpt-4-vision-resource.openai.azure.com/openai/deployments/gpt-4-vision/extensions/"
-        )
-        print("passed")
-    except Exception as e:
-        pytest.fail(f"Error occurred: {e}")
-
-
-@pytest.mark.parametrize("sync_mode", [True, False])
-@pytest.mark.asyncio
-async def test_openai_with_organization(sync_mode):
-    try:
-        print("Testing OpenAI with organization")
-        model_list = [
-            {
-                "model_name": "openai-bad-org",
-                "litellm_params": {
-                    "model": "gpt-3.5-turbo",
-                    "organization": "org-ikDc4ex8NB",
-                },
-            },
-            {
-                "model_name": "openai-good-org",
-                "litellm_params": {"model": "gpt-3.5-turbo"},
-            },
-        ]
-
-        router = Router(model_list=model_list)
-
-        print(router.model_list)
-        print(router.model_list[0])
-
-        if sync_mode:
-            openai_client = router._get_client(
-                deployment=router.model_list[0],
-                kwargs={"input": ["hello"], "model": "openai-bad-org"},
-            )
-            print(vars(openai_client))
-
-            assert openai_client.organization == "org-ikDc4ex8NB"
-
-            # bad org raises error
-
-            try:
-                response = router.completion(
-                    model="openai-bad-org",
-                    messages=[{"role": "user", "content": "this is a test"}],
-                )
-                pytest.fail(
-                    "Request should have failed - This organization does not exist"
-                )
-            except Exception as e:
-                print("Got exception: " + str(e))
-                assert "header should match organization for API key" in str(
-                    e
-                ) or "No such organization" in str(e)
-
-            # good org works
-            response = router.completion(
-                model="openai-good-org",
-                messages=[{"role": "user", "content": "this is a test"}],
-                max_tokens=5,
-            )
-        else:
-            openai_client = router._get_client(
-                deployment=router.model_list[0],
-                kwargs={"input": ["hello"], "model": "openai-bad-org"},
-                client_type="async",
-            )
-            print(vars(openai_client))
-
-            assert openai_client.organization == "org-ikDc4ex8NB"
-
-            # bad org raises error
-
-            try:
-                response = await router.acompletion(
-                    model="openai-bad-org",
-                    messages=[{"role": "user", "content": "this is a test"}],
-                )
-                pytest.fail(
-                    "Request should have failed - This organization does not exist"
-                )
-            except Exception as e:
-                print("Got exception: " + str(e))
-                assert "header should match organization for API key" in str(
-                    e
-                ) or "No such organization" in str(e)
-
-            # good org works
-            response = await router.acompletion(
-                model="openai-good-org",
-                messages=[{"role": "user", "content": "this is a test"}],
-                max_tokens=5,
-            )
-
-    except Exception as e:
-        pytest.fail(f"Error occurred: {e}")
-
-
-def test_init_clients_azure_command_r_plus():
-    # This tests that the router uses the OpenAI client for Azure/Command-R+
-    # For azure/command-r-plus we need to use openai.OpenAI because of how the Azure provider requires requests being sent
-    litellm.set_verbose = True
-    import logging
-
-    from litellm._logging import verbose_router_logger
-
-    verbose_router_logger.setLevel(logging.DEBUG)
-    try:
-        print("testing init 4 clients with diff timeouts")
-        model_list = [
-            {
-                "model_name": "gpt-3.5-turbo",
-                "litellm_params": {
-                    "model": "azure/command-r-plus",
-                    "api_key": os.getenv("AZURE_COHERE_API_KEY"),
-                    "api_base": os.getenv("AZURE_COHERE_API_BASE"),
-                    "timeout": 0.01,
-                    "stream_timeout": 0.000_001,
-                    "max_retries": 7,
-                },
-            },
-        ]
-        router = Router(model_list=model_list, set_verbose=True)
-        for elem in router.model_list:
-            model_id = elem["model_info"]["id"]
-            async_client = router.cache.get_cache(f"{model_id}_async_client")
-            stream_async_client = router.cache.get_cache(
-                f"{model_id}_stream_async_client"
-            )
-            # Assert the Async Clients used are OpenAI clients and not Azure
-            # For using Azure/Command-R-Plus and Azure/Mistral the clients NEED to be OpenAI clients used
-            # this is weirdness introduced on Azure's side
-
-            assert "openai.AsyncOpenAI" in str(async_client)
-            assert "openai.AsyncOpenAI" in str(stream_async_client)
-        print("PASSED !")
-
-    except Exception as e:
-        traceback.print_exc()
-        pytest.fail(f"Error occurred: {e}")
-
-
-@pytest.mark.asyncio
-async def test_aaaaatext_completion_with_organization():
-    try:
-        print("Testing Text OpenAI with organization")
-        model_list = [
-            {
-                "model_name": "openai-bad-org",
-                "litellm_params": {
-                    "model": "text-completion-openai/gpt-3.5-turbo-instruct",
-                    "api_key": os.getenv("OPENAI_API_KEY", None),
-                    "organization": "org-ikDc4ex8NB",
-                },
-            },
-            {
-                "model_name": "openai-good-org",
-                "litellm_params": {
-                    "model": "text-completion-openai/gpt-3.5-turbo-instruct",
-                    "api_key": os.getenv("OPENAI_API_KEY", None),
-                    "organization": os.getenv("OPENAI_ORGANIZATION", None),
-                },
-            },
-        ]
-
-        router = Router(model_list=model_list)
-
-        print(router.model_list)
-        print(router.model_list[0])
-
-        openai_client = router._get_client(
-            deployment=router.model_list[0],
-            kwargs={"input": ["hello"], "model": "openai-bad-org"},
-        )
-        print(vars(openai_client))
-
-        assert openai_client.organization == "org-ikDc4ex8NB"
-
-        # bad org raises error
-
-        try:
-            response = await router.atext_completion(
-                model="openai-bad-org",
-                prompt="this is a test",
-            )
-            pytest.fail("Request should have failed - This organization does not exist")
-        except Exception as e:
-            print("Got exception: " + str(e))
-            assert "header should match organization for API key" in str(
-                e
-            ) or "No such organization" in str(e)
-
-        # good org works
-        response = await router.atext_completion(
-            model="openai-good-org",
-            prompt="this is a test",
-            max_tokens=5,
-        )
-        print("working response: ", response)
-
-    except Exception as e:
-        pytest.fail(f"Error occurred: {e}")
-
-
-def test_init_clients_async_mode():
-    litellm.set_verbose = True
-    import logging
-
-    from litellm._logging import verbose_router_logger
-    from litellm.types.router import RouterGeneralSettings
-
-    verbose_router_logger.setLevel(logging.DEBUG)
-    try:
-        print("testing init 4 clients with diff timeouts")
-        model_list = [
-            {
-                "model_name": "gpt-3.5-turbo",
-                "litellm_params": {
-                    "model": "azure/chatgpt-v-2",
-                    "api_key": os.getenv("AZURE_API_KEY"),
-                    "api_version": os.getenv("AZURE_API_VERSION"),
-                    "api_base": os.getenv("AZURE_API_BASE"),
-                    "timeout": 0.01,
-                    "stream_timeout": 0.000_001,
-                    "max_retries": 7,
-                },
-            },
-        ]
-        router = Router(
-            model_list=model_list,
-            set_verbose=True,
-            router_general_settings=RouterGeneralSettings(async_only_mode=True),
-        )
-        for elem in router.model_list:
-            model_id = elem["model_info"]["id"]
-
-            # sync clients not initialized in async_only_mode=True
-            assert router.cache.get_cache(f"{model_id}_client") is None
-            assert router.cache.get_cache(f"{model_id}_stream_client") is None
-
-            # only async clients initialized in async_only_mode=True
-            assert router.cache.get_cache(f"{model_id}_async_client") is not None
-            assert router.cache.get_cache(f"{model_id}_stream_async_client") is not None
-    except Exception as e:
-        pytest.fail(f"Error occurred: {e}")
-
-
-@pytest.mark.parametrize(
-    "environment,expected_models",
-    [
-        ("development", ["gpt-3.5-turbo"]),
-        ("production", ["gpt-4", "gpt-3.5-turbo", "gpt-4o"]),
-    ],
-)
-def test_init_router_with_supported_environments(environment, expected_models):
-    """
-    Tests that the correct models are setup on router when LITELLM_ENVIRONMENT is set
-    """
-    os.environ["LITELLM_ENVIRONMENT"] = environment
-    model_list = [
-        {
-            "model_name": "gpt-3.5-turbo",
-            "litellm_params": {
-                "model": "azure/chatgpt-v-2",
-                "api_key": os.getenv("AZURE_API_KEY"),
-                "api_version": os.getenv("AZURE_API_VERSION"),
-                "api_base": os.getenv("AZURE_API_BASE"),
-                "timeout": 0.01,
-                "stream_timeout": 0.000_001,
-                "max_retries": 7,
-            },
-            "model_info": {"supported_environments": ["development", "production"]},
-        },
-        {
-            "model_name": "gpt-4",
-            "litellm_params": {
-                "model": "openai/gpt-4",
-                "api_key": os.getenv("OPENAI_API_KEY"),
-                "timeout": 0.01,
-                "stream_timeout": 0.000_001,
-                "max_retries": 7,
-            },
-            "model_info": {"supported_environments": ["production"]},
-        },
-        {
-            "model_name": "gpt-4o",
-            "litellm_params": {
-                "model": "openai/gpt-4o",
-                "api_key": os.getenv("OPENAI_API_KEY"),
-                "timeout": 0.01,
-                "stream_timeout": 0.000_001,
-                "max_retries": 7,
-            },
-            "model_info": {"supported_environments": ["production"]},
-        },
-    ]
-    router = Router(model_list=model_list, set_verbose=True)
-    _model_list = router.get_model_names()
-
-    print("model_list: ", _model_list)
-    print("expected_models: ", expected_models)
-
-    assert set(_model_list) == set(expected_models)
-
-    os.environ.pop("LITELLM_ENVIRONMENT")
+# # this tests if the router is initialized correctly
+# import asyncio
+# import os
+# import sys
+# import time
+# import traceback
+
+# import pytest
+
+# sys.path.insert(
+#     0, os.path.abspath("../..")
+# )  # Adds the parent directory to the system path
+# from collections import defaultdict
+# from concurrent.futures import ThreadPoolExecutor
+
+# from dotenv import load_dotenv
+
+# import litellm
+# from litellm import Router
+
+# load_dotenv()
+
+# # every time we load the router we should have 4 clients:
+# # Async
+# # Sync
+# # Async + Stream
+# # Sync + Stream
+
+
+# def test_init_clients():
+#     litellm.set_verbose = True
+#     import logging
+
+#     from litellm._logging import verbose_router_logger
+
+#     verbose_router_logger.setLevel(logging.DEBUG)
+#     try:
+#         print("testing init 4 clients with diff timeouts")
+#         model_list = [
+#             {
+#                 "model_name": "gpt-3.5-turbo",
+#                 "litellm_params": {
+#                     "model": "azure/chatgpt-v-2",
+#                     "api_key": os.getenv("AZURE_API_KEY"),
+#                     "api_version": os.getenv("AZURE_API_VERSION"),
+#                     "api_base": os.getenv("AZURE_API_BASE"),
+#                     "timeout": 0.01,
+#                     "stream_timeout": 0.000_001,
+#                     "max_retries": 7,
+#                 },
+#             },
+#         ]
+#         router = Router(model_list=model_list, set_verbose=True)
+#         for elem in router.model_list:
+#             model_id = elem["model_info"]["id"]
+#             assert router.cache.get_cache(f"{model_id}_client") is not None
+#             assert router.cache.get_cache(f"{model_id}_async_client") is not None
+#             assert router.cache.get_cache(f"{model_id}_stream_client") is not None
+#             assert router.cache.get_cache(f"{model_id}_stream_async_client") is not None
+
+#             # check if timeout for stream/non stream clients is set correctly
+#             async_client = router.cache.get_cache(f"{model_id}_async_client")
+#             stream_async_client = router.cache.get_cache(
+#                 f"{model_id}_stream_async_client"
+#             )
+
+#             assert async_client.timeout == 0.01
+#             assert stream_async_client.timeout == 0.000_001
+#             print(vars(async_client))
+#             print()
+#             print(async_client._base_url)
+#             assert (
+#                 async_client._base_url
+#                 == "https://openai-gpt-4-test-v-1.openai.azure.com/openai/"
+#             )
+#             assert (
+#                 stream_async_client._base_url
+#                 == "https://openai-gpt-4-test-v-1.openai.azure.com/openai/"
+#             )
+
+#         print("PASSED !")
+
+#     except Exception as e:
+#         traceback.print_exc()
+#         pytest.fail(f"Error occurred: {e}")
+
+
+# # test_init_clients()
+
+
+# def test_init_clients_basic():
+#     litellm.set_verbose = True
+#     try:
+#         print("Test basic client init")
+#         model_list = [
+#             {
+#                 "model_name": "gpt-3.5-turbo",
+#                 "litellm_params": {
+#                     "model": "azure/chatgpt-v-2",
+#                     "api_key": os.getenv("AZURE_API_KEY"),
+#                     "api_version": os.getenv("AZURE_API_VERSION"),
+#                     "api_base": os.getenv("AZURE_API_BASE"),
+#                 },
+#             },
+#         ]
+#         router = Router(model_list=model_list)
+#         for elem in router.model_list:
+#             model_id = elem["model_info"]["id"]
+#             assert router.cache.get_cache(f"{model_id}_client") is not None
+#             assert router.cache.get_cache(f"{model_id}_async_client") is not None
+#             assert router.cache.get_cache(f"{model_id}_stream_client") is not None
+#             assert router.cache.get_cache(f"{model_id}_stream_async_client") is not None
+#         print("PASSED !")
+
+#         # see if we can init clients without timeout or max retries set
+#     except Exception as e:
+#         traceback.print_exc()
+#         pytest.fail(f"Error occurred: {e}")
+
+
+# # test_init_clients_basic()
+
+
+# def test_init_clients_basic_azure_cloudflare():
+#     # init azure + cloudflare
+#     # init OpenAI gpt-3.5
+#     # init OpenAI text-embedding
+#     # init OpenAI comptaible - Mistral/mistral-medium
+#     # init OpenAI compatible - xinference/bge
+#     litellm.set_verbose = True
+#     try:
+#         print("Test basic client init")
+#         model_list = [
+#             {
+#                 "model_name": "azure-cloudflare",
+#                 "litellm_params": {
+#                     "model": "azure/chatgpt-v-2",
+#                     "api_key": os.getenv("AZURE_API_KEY"),
+#                     "api_version": os.getenv("AZURE_API_VERSION"),
+#                     "api_base": "https://gateway.ai.cloudflare.com/v1/0399b10e77ac6668c80404a5ff49eb37/litellm-test/azure-openai/openai-gpt-4-test-v-1",
+#                 },
+#             },
+#             {
+#                 "model_name": "gpt-openai",
+#                 "litellm_params": {
+#                     "model": "gpt-3.5-turbo",
+#                     "api_key": os.getenv("OPENAI_API_KEY"),
+#                 },
+#             },
+#             {
+#                 "model_name": "text-embedding-ada-002",
+#                 "litellm_params": {
+#                     "model": "text-embedding-ada-002",
+#                     "api_key": os.getenv("OPENAI_API_KEY"),
+#                 },
+#             },
+#             {
+#                 "model_name": "mistral",
+#                 "litellm_params": {
+#                     "model": "mistral/mistral-tiny",
+#                     "api_key": os.getenv("MISTRAL_API_KEY"),
+#                 },
+#             },
+#             {
+#                 "model_name": "bge-base-en",
+#                 "litellm_params": {
+#                     "model": "xinference/bge-base-en",
+#                     "api_base": "http://127.0.0.1:9997/v1",
+#                     "api_key": os.getenv("OPENAI_API_KEY"),
+#                 },
+#             },
+#         ]
+#         router = Router(model_list=model_list)
+#         for elem in router.model_list:
+#             model_id = elem["model_info"]["id"]
+#             assert router.cache.get_cache(f"{model_id}_client") is not None
+#             assert router.cache.get_cache(f"{model_id}_async_client") is not None
+#             assert router.cache.get_cache(f"{model_id}_stream_client") is not None
+#             assert router.cache.get_cache(f"{model_id}_stream_async_client") is not None
+#         print("PASSED !")
+
+#         # see if we can init clients without timeout or max retries set
+#     except Exception as e:
+#         traceback.print_exc()
+#         pytest.fail(f"Error occurred: {e}")
+
+
+# # test_init_clients_basic_azure_cloudflare()
+
+
+# def test_timeouts_router():
+#     """
+#     Test the timeouts of the router with multiple clients. This HASas to raise a timeout error
+#     """
+#     import openai
+
+#     litellm.set_verbose = True
+#     try:
+#         print("testing init 4 clients with diff timeouts")
+#         model_list = [
+#             {
+#                 "model_name": "gpt-3.5-turbo",
+#                 "litellm_params": {
+#                     "model": "azure/chatgpt-v-2",
+#                     "api_key": os.getenv("AZURE_API_KEY"),
+#                     "api_version": os.getenv("AZURE_API_VERSION"),
+#                     "api_base": os.getenv("AZURE_API_BASE"),
+#                     "timeout": 0.000001,
+#                     "stream_timeout": 0.000_001,
+#                 },
+#             },
+#         ]
+#         router = Router(model_list=model_list, num_retries=0)
+
+#         print("PASSED !")
+
+#         async def test():
+#             try:
+#                 await router.acompletion(
+#                     model="gpt-3.5-turbo",
+#                     messages=[
+#                         {"role": "user", "content": "hello, write a 20 pg essay"}
+#                     ],
+#                 )
+#             except Exception as e:
+#                 raise e
+
+#         asyncio.run(test())
+#     except openai.APITimeoutError as e:
+#         print(
+#             "Passed: Raised correct exception. Got openai.APITimeoutError\nGood Job", e
+#         )
+#         print(type(e))
+#         pass
+#     except Exception as e:
+#         pytest.fail(
+#             f"Did not raise error `openai.APITimeoutError`. Instead raised error type: {type(e)}, Error: {e}"
+#         )
+
+
+# # test_timeouts_router()
+
+
+# def test_stream_timeouts_router():
+#     """
+#     Test the stream timeouts router. See if it selected the correct client with stream timeout
+#     """
+#     import openai
+
+#     litellm.set_verbose = True
+#     try:
+#         print("testing init 4 clients with diff timeouts")
+#         model_list = [
+#             {
+#                 "model_name": "gpt-3.5-turbo",
+#                 "litellm_params": {
+#                     "model": "azure/chatgpt-v-2",
+#                     "api_key": os.getenv("AZURE_API_KEY"),
+#                     "api_version": os.getenv("AZURE_API_VERSION"),
+#                     "api_base": os.getenv("AZURE_API_BASE"),
+#                     "timeout": 200,  # regular calls will not timeout, stream calls will
+#                     "stream_timeout": 10,
+#                 },
+#             },
+#         ]
+#         router = Router(model_list=model_list)
+
+#         print("PASSED !")
+#         data = {
+#             "model": "gpt-3.5-turbo",
+#             "messages": [{"role": "user", "content": "hello, write a 20 pg essay"}],
+#             "stream": True,
+#         }
+#         selected_client = router._get_client(
+#             deployment=router.model_list[0],
+#             kwargs=data,
+#             client_type=None,
+#         )
+#         print("Select client timeout", selected_client.timeout)
+#         assert selected_client.timeout == 10
+
+#         # make actual call
+#         response = router.completion(**data)
+
+#         for chunk in response:
+#             print(f"chunk: {chunk}")
+#     except openai.APITimeoutError as e:
+#         print(
+#             "Passed: Raised correct exception. Got openai.APITimeoutError\nGood Job", e
+#         )
+#         print(type(e))
+#         pass
+#     except Exception as e:
+#         pytest.fail(
+#             f"Did not raise error `openai.APITimeoutError`. Instead raised error type: {type(e)}, Error: {e}"
+#         )
+
+
+# # test_stream_timeouts_router()
+
+
+# def test_xinference_embedding():
+#     # [Test Init Xinference] this tests if we init xinference on the router correctly
+#     # [Test Exception Mapping] tests that xinference is an openai comptiable provider
+#     print("Testing init xinference")
+#     print(
+#         "this tests if we create an OpenAI client for Xinference, with the correct API BASE"
+#     )
+
+#     model_list = [
+#         {
+#             "model_name": "xinference",
+#             "litellm_params": {
+#                 "model": "xinference/bge-base-en",
+#                 "api_base": "os.environ/XINFERENCE_API_BASE",
+#             },
+#         }
+#     ]
+
+#     router = Router(model_list=model_list)
+
+#     print(router.model_list)
+#     print(router.model_list[0])
+
+#     assert (
+#         router.model_list[0]["litellm_params"]["api_base"] == "http://0.0.0.0:9997"
+#     )  # set in env
+
+#     openai_client = router._get_client(
+#         deployment=router.model_list[0],
+#         kwargs={"input": ["hello"], "model": "xinference"},
+#     )
+
+#     assert openai_client._base_url == "http://0.0.0.0:9997"
+#     assert "xinference" in litellm.openai_compatible_providers
+#     print("passed")
+
+
+# # test_xinference_embedding()
+
+
+# def test_router_init_gpt_4_vision_enhancements():
+#     try:
+#         # tests base_url set when any base_url with /openai/deployments passed to router
+#         print("Testing Azure GPT_Vision enhancements")
+
+#         model_list = [
+#             {
+#                 "model_name": "gpt-4-vision-enhancements",
+#                 "litellm_params": {
+#                     "model": "azure/gpt-4-vision",
+#                     "api_key": os.getenv("AZURE_API_KEY"),
+#                     "base_url": "https://gpt-4-vision-resource.openai.azure.com/openai/deployments/gpt-4-vision/extensions/",
+#                     "dataSources": [
+#                         {
+#                             "type": "AzureComputerVision",
+#                             "parameters": {
+#                                 "endpoint": "os.environ/AZURE_VISION_ENHANCE_ENDPOINT",
+#                                 "key": "os.environ/AZURE_VISION_ENHANCE_KEY",
+#                             },
+#                         }
+#                     ],
+#                 },
+#             }
+#         ]
+
+#         router = Router(model_list=model_list)
+
+#         print(router.model_list)
+#         print(router.model_list[0])
+
+#         assert (
+#             router.model_list[0]["litellm_params"]["base_url"]
+#             == "https://gpt-4-vision-resource.openai.azure.com/openai/deployments/gpt-4-vision/extensions/"
+#         )  # set in env
+
+#         assert (
+#             router.model_list[0]["litellm_params"]["dataSources"][0]["parameters"][
+#                 "endpoint"
+#             ]
+#             == os.environ["AZURE_VISION_ENHANCE_ENDPOINT"]
+#         )
+
+#         assert (
+#             router.model_list[0]["litellm_params"]["dataSources"][0]["parameters"][
+#                 "key"
+#             ]
+#             == os.environ["AZURE_VISION_ENHANCE_KEY"]
+#         )
+
+#         azure_client = router._get_client(
+#             deployment=router.model_list[0],
+#             kwargs={"stream": True, "model": "gpt-4-vision-enhancements"},
+#             client_type="async",
+#         )
+
+#         assert (
+#             azure_client._base_url
+#             == "https://gpt-4-vision-resource.openai.azure.com/openai/deployments/gpt-4-vision/extensions/"
+#         )
+#         print("passed")
+#     except Exception as e:
+#         pytest.fail(f"Error occurred: {e}")
+
+
+# @pytest.mark.parametrize("sync_mode", [True, False])
+# @pytest.mark.asyncio
+# async def test_openai_with_organization(sync_mode):
+#     try:
+#         print("Testing OpenAI with organization")
+#         model_list = [
+#             {
+#                 "model_name": "openai-bad-org",
+#                 "litellm_params": {
+#                     "model": "gpt-3.5-turbo",
+#                     "organization": "org-ikDc4ex8NB",
+#                 },
+#             },
+#             {
+#                 "model_name": "openai-good-org",
+#                 "litellm_params": {"model": "gpt-3.5-turbo"},
+#             },
+#         ]
+
+#         router = Router(model_list=model_list)
+
+#         print(router.model_list)
+#         print(router.model_list[0])
+
+#         if sync_mode:
+#             openai_client = router._get_client(
+#                 deployment=router.model_list[0],
+#                 kwargs={"input": ["hello"], "model": "openai-bad-org"},
+#             )
+#             print(vars(openai_client))
+
+#             assert openai_client.organization == "org-ikDc4ex8NB"
+
+#             # bad org raises error
+
+#             try:
+#                 response = router.completion(
+#                     model="openai-bad-org",
+#                     messages=[{"role": "user", "content": "this is a test"}],
+#                 )
+#                 pytest.fail(
+#                     "Request should have failed - This organization does not exist"
+#                 )
+#             except Exception as e:
+#                 print("Got exception: " + str(e))
+#                 assert "header should match organization for API key" in str(
+#                     e
+#                 ) or "No such organization" in str(e)
+
+#             # good org works
+#             response = router.completion(
+#                 model="openai-good-org",
+#                 messages=[{"role": "user", "content": "this is a test"}],
+#                 max_tokens=5,
+#             )
+#         else:
+#             openai_client = router._get_client(
+#                 deployment=router.model_list[0],
+#                 kwargs={"input": ["hello"], "model": "openai-bad-org"},
+#                 client_type="async",
+#             )
+#             print(vars(openai_client))
+
+#             assert openai_client.organization == "org-ikDc4ex8NB"
+
+#             # bad org raises error
+
+#             try:
+#                 response = await router.acompletion(
+#                     model="openai-bad-org",
+#                     messages=[{"role": "user", "content": "this is a test"}],
+#                 )
+#                 pytest.fail(
+#                     "Request should have failed - This organization does not exist"
+#                 )
+#             except Exception as e:
+#                 print("Got exception: " + str(e))
+#                 assert "header should match organization for API key" in str(
+#                     e
+#                 ) or "No such organization" in str(e)
+
+#             # good org works
+#             response = await router.acompletion(
+#                 model="openai-good-org",
+#                 messages=[{"role": "user", "content": "this is a test"}],
+#                 max_tokens=5,
+#             )
+
+#     except Exception as e:
+#         pytest.fail(f"Error occurred: {e}")
+
+
+# def test_init_clients_azure_command_r_plus():
+#     # This tests that the router uses the OpenAI client for Azure/Command-R+
+#     # For azure/command-r-plus we need to use openai.OpenAI because of how the Azure provider requires requests being sent
+#     litellm.set_verbose = True
+#     import logging
+
+#     from litellm._logging import verbose_router_logger
+
+#     verbose_router_logger.setLevel(logging.DEBUG)
+#     try:
+#         print("testing init 4 clients with diff timeouts")
+#         model_list = [
+#             {
+#                 "model_name": "gpt-3.5-turbo",
+#                 "litellm_params": {
+#                     "model": "azure/command-r-plus",
+#                     "api_key": os.getenv("AZURE_COHERE_API_KEY"),
+#                     "api_base": os.getenv("AZURE_COHERE_API_BASE"),
+#                     "timeout": 0.01,
+#                     "stream_timeout": 0.000_001,
+#                     "max_retries": 7,
+#                 },
+#             },
+#         ]
+#         router = Router(model_list=model_list, set_verbose=True)
+#         for elem in router.model_list:
+#             model_id = elem["model_info"]["id"]
+#             async_client = router.cache.get_cache(f"{model_id}_async_client")
+#             stream_async_client = router.cache.get_cache(
+#                 f"{model_id}_stream_async_client"
+#             )
+#             # Assert the Async Clients used are OpenAI clients and not Azure
+#             # For using Azure/Command-R-Plus and Azure/Mistral the clients NEED to be OpenAI clients used
+#             # this is weirdness introduced on Azure's side
+
+#             assert "openai.AsyncOpenAI" in str(async_client)
+#             assert "openai.AsyncOpenAI" in str(stream_async_client)
+#         print("PASSED !")
+
+#     except Exception as e:
+#         traceback.print_exc()
+#         pytest.fail(f"Error occurred: {e}")
+
+
+# @pytest.mark.asyncio
+# async def test_aaaaatext_completion_with_organization():
+#     try:
+#         print("Testing Text OpenAI with organization")
+#         model_list = [
+#             {
+#                 "model_name": "openai-bad-org",
+#                 "litellm_params": {
+#                     "model": "text-completion-openai/gpt-3.5-turbo-instruct",
+#                     "api_key": os.getenv("OPENAI_API_KEY", None),
+#                     "organization": "org-ikDc4ex8NB",
+#                 },
+#             },
+#             {
+#                 "model_name": "openai-good-org",
+#                 "litellm_params": {
+#                     "model": "text-completion-openai/gpt-3.5-turbo-instruct",
+#                     "api_key": os.getenv("OPENAI_API_KEY", None),
+#                     "organization": os.getenv("OPENAI_ORGANIZATION", None),
+#                 },
+#             },
+#         ]
+
+#         router = Router(model_list=model_list)
+
+#         print(router.model_list)
+#         print(router.model_list[0])
+
+#         openai_client = router._get_client(
+#             deployment=router.model_list[0],
+#             kwargs={"input": ["hello"], "model": "openai-bad-org"},
+#         )
+#         print(vars(openai_client))
+
+#         assert openai_client.organization == "org-ikDc4ex8NB"
+
+#         # bad org raises error
+
+#         try:
+#             response = await router.atext_completion(
+#                 model="openai-bad-org",
+#                 prompt="this is a test",
+#             )
+#             pytest.fail("Request should have failed - This organization does not exist")
+#         except Exception as e:
+#             print("Got exception: " + str(e))
+#             assert "header should match organization for API key" in str(
+#                 e
+#             ) or "No such organization" in str(e)
+
+#         # good org works
+#         response = await router.atext_completion(
+#             model="openai-good-org",
+#             prompt="this is a test",
+#             max_tokens=5,
+#         )
+#         print("working response: ", response)
+
+#     except Exception as e:
+#         pytest.fail(f"Error occurred: {e}")
+
+
+# def test_init_clients_async_mode():
+#     litellm.set_verbose = True
+#     import logging
+
+#     from litellm._logging import verbose_router_logger
+#     from litellm.types.router import RouterGeneralSettings
+
+#     verbose_router_logger.setLevel(logging.DEBUG)
+#     try:
+#         print("testing init 4 clients with diff timeouts")
+#         model_list = [
+#             {
+#                 "model_name": "gpt-3.5-turbo",
+#                 "litellm_params": {
+#                     "model": "azure/chatgpt-v-2",
+#                     "api_key": os.getenv("AZURE_API_KEY"),
+#                     "api_version": os.getenv("AZURE_API_VERSION"),
+#                     "api_base": os.getenv("AZURE_API_BASE"),
+#                     "timeout": 0.01,
+#                     "stream_timeout": 0.000_001,
+#                     "max_retries": 7,
+#                 },
+#             },
+#         ]
+#         router = Router(
+#             model_list=model_list,
+#             set_verbose=True,
+#             router_general_settings=RouterGeneralSettings(async_only_mode=True),
+#         )
+#         for elem in router.model_list:
+#             model_id = elem["model_info"]["id"]
+
+#             # sync clients not initialized in async_only_mode=True
+#             assert router.cache.get_cache(f"{model_id}_client") is None
+#             assert router.cache.get_cache(f"{model_id}_stream_client") is None
+
+#             # only async clients initialized in async_only_mode=True
+#             assert router.cache.get_cache(f"{model_id}_async_client") is not None
+#             assert router.cache.get_cache(f"{model_id}_stream_async_client") is not None
+#     except Exception as e:
+#         pytest.fail(f"Error occurred: {e}")
+
+
+# @pytest.mark.parametrize(
+#     "environment,expected_models",
+#     [
+#         ("development", ["gpt-3.5-turbo"]),
+#         ("production", ["gpt-4", "gpt-3.5-turbo", "gpt-4o"]),
+#     ],
+# )
+# def test_init_router_with_supported_environments(environment, expected_models):
+#     """
+#     Tests that the correct models are setup on router when LITELLM_ENVIRONMENT is set
+#     """
+#     os.environ["LITELLM_ENVIRONMENT"] = environment
+#     model_list = [
+#         {
+#             "model_name": "gpt-3.5-turbo",
+#             "litellm_params": {
+#                 "model": "azure/chatgpt-v-2",
+#                 "api_key": os.getenv("AZURE_API_KEY"),
+#                 "api_version": os.getenv("AZURE_API_VERSION"),
+#                 "api_base": os.getenv("AZURE_API_BASE"),
+#                 "timeout": 0.01,
+#                 "stream_timeout": 0.000_001,
+#                 "max_retries": 7,
+#             },
+#             "model_info": {"supported_environments": ["development", "production"]},
+#         },
+#         {
+#             "model_name": "gpt-4",
+#             "litellm_params": {
+#                 "model": "openai/gpt-4",
+#                 "api_key": os.getenv("OPENAI_API_KEY"),
+#                 "timeout": 0.01,
+#                 "stream_timeout": 0.000_001,
+#                 "max_retries": 7,
+#             },
+#             "model_info": {"supported_environments": ["production"]},
+#         },
+#         {
+#             "model_name": "gpt-4o",
+#             "litellm_params": {
+#                 "model": "openai/gpt-4o",
+#                 "api_key": os.getenv("OPENAI_API_KEY"),
+#                 "timeout": 0.01,
+#                 "stream_timeout": 0.000_001,
+#                 "max_retries": 7,
+#             },
+#             "model_info": {"supported_environments": ["production"]},
+#         },
+#     ]
+#     router = Router(model_list=model_list, set_verbose=True)
+#     _model_list = router.get_model_names()
+
+#     print("model_list: ", _model_list)
+#     print("expected_models: ", expected_models)
+
+#     assert set(_model_list) == set(expected_models)
+
+#     os.environ.pop("LITELLM_ENVIRONMENT")
diff --git a/tests/local_testing/test_router_utils.py b/tests/local_testing/test_router_utils.py
index 7c2bbdc2a1..a94f5ceca9 100644
--- a/tests/local_testing/test_router_utils.py
+++ b/tests/local_testing/test_router_utils.py
@@ -418,3 +418,21 @@ def test_router_handle_clientside_credential():
 
     assert new_deployment.litellm_params.api_key == "123"
     assert len(router.get_model_list()) == 2
+
+
+def test_router_get_async_openai_model_client():
+    router = Router(
+        model_list=[
+            {
+                "model_name": "gemini/*",
+                "litellm_params": {
+                    "model": "gemini/*",
+                    "api_base": "https://api.gemini.com",
+                },
+            }
+        ]
+    )
+    model_client = router._get_async_openai_model_client(
+        deployment=MagicMock(), kwargs={}
+    )
+    assert model_client is None
diff --git a/tests/router_unit_tests/test_router_endpoints.py b/tests/router_unit_tests/test_router_endpoints.py
index e80b7dc3a8..a7f6df9ae2 100644
--- a/tests/router_unit_tests/test_router_endpoints.py
+++ b/tests/router_unit_tests/test_router_endpoints.py
@@ -315,14 +315,20 @@ async def test_router_with_empty_choices(model_list):
     assert response is not None
 
 
-@pytest.mark.asyncio
-async def test_ageneric_api_call_with_fallbacks_basic():
+@pytest.mark.parametrize("sync_mode", [True, False])
+def test_generic_api_call_with_fallbacks_basic(sync_mode):
     """
-    Test the _ageneric_api_call_with_fallbacks method with a basic successful call
+    Test both the sync and async versions of generic_api_call_with_fallbacks with a basic successful call
     """
-    # Create a mock function that will be passed to _ageneric_api_call_with_fallbacks
-    mock_function = AsyncMock()
-    mock_function.__name__ = "test_function"
+    # Create a mock function that will be passed to generic_api_call_with_fallbacks
+    if sync_mode:
+        from unittest.mock import Mock
+
+        mock_function = Mock()
+        mock_function.__name__ = "test_function"
+    else:
+        mock_function = AsyncMock()
+        mock_function.__name__ = "test_function"
 
     # Create a mock response
     mock_response = {
@@ -347,13 +353,23 @@ async def test_ageneric_api_call_with_fallbacks_basic():
         ]
     )
 
-    # Call the _ageneric_api_call_with_fallbacks method
-    response = await router._ageneric_api_call_with_fallbacks(
-        model="test-model-alias",
-        original_function=mock_function,
-        messages=[{"role": "user", "content": "Hello"}],
-        max_tokens=100,
-    )
+    # Call the appropriate generic_api_call_with_fallbacks method
+    if sync_mode:
+        response = router._generic_api_call_with_fallbacks(
+            model="test-model-alias",
+            original_function=mock_function,
+            messages=[{"role": "user", "content": "Hello"}],
+            max_tokens=100,
+        )
+    else:
+        response = asyncio.run(
+            router._ageneric_api_call_with_fallbacks(
+                model="test-model-alias",
+                original_function=mock_function,
+                messages=[{"role": "user", "content": "Hello"}],
+                max_tokens=100,
+            )
+        )
 
     # Verify the mock function was called
     mock_function.assert_called_once()
@@ -510,3 +526,36 @@ async def test__aadapter_completion():
 
         # Verify async_routing_strategy_pre_call_checks was called
         router.async_routing_strategy_pre_call_checks.assert_called_once()
+
+
+def test_initialize_router_endpoints():
+    """
+    Test that initialize_router_endpoints correctly sets up all router endpoints
+    """
+    # Create a router with a basic model
+    router = Router(
+        model_list=[
+            {
+                "model_name": "test-model",
+                "litellm_params": {
+                    "model": "anthropic/test-model",
+                    "api_key": "fake-api-key",
+                },
+            }
+        ]
+    )
+
+    # Explicitly call initialize_router_endpoints
+    router.initialize_router_endpoints()
+
+    # Verify all expected endpoints are initialized
+    assert hasattr(router, "amoderation")
+    assert hasattr(router, "aanthropic_messages")
+    assert hasattr(router, "aresponses")
+    assert hasattr(router, "responses")
+
+    # Verify the endpoints are callable
+    assert callable(router.amoderation)
+    assert callable(router.aanthropic_messages)
+    assert callable(router.aresponses)
+    assert callable(router.responses)
diff --git a/tests/router_unit_tests/test_router_helper_utils.py b/tests/router_unit_tests/test_router_helper_utils.py
index f12371baeb..782f0d8fbb 100644
--- a/tests/router_unit_tests/test_router_helper_utils.py
+++ b/tests/router_unit_tests/test_router_helper_utils.py
@@ -338,18 +338,6 @@ def test_update_kwargs_with_default_litellm_params(model_list):
     assert kwargs["metadata"]["key2"] == "value2"
 
 
-def test_get_async_openai_model_client(model_list):
-    """Test if the '_get_async_openai_model_client' function is working correctly"""
-    router = Router(model_list=model_list)
-    deployment = router.get_deployment_by_model_group_name(
-        model_group_name="gpt-3.5-turbo"
-    )
-    model_client = router._get_async_openai_model_client(
-        deployment=deployment, kwargs={}
-    )
-    assert model_client is not None
-
-
 def test_get_timeout(model_list):
     """Test if the '_get_timeout' function is working correctly"""
     router = Router(model_list=model_list)
diff --git a/ui/litellm-dashboard/src/components/add_model/add_model_tab.tsx b/ui/litellm-dashboard/src/components/add_model/add_model_tab.tsx
index d2c71ba908..c699c161e3 100644
--- a/ui/litellm-dashboard/src/components/add_model/add_model_tab.tsx
+++ b/ui/litellm-dashboard/src/components/add_model/add_model_tab.tsx
@@ -8,7 +8,7 @@ import ProviderSpecificFields from "./provider_specific_fields";
 import AdvancedSettings from "./advanced_settings";
 import { Providers, providerLogoMap, getPlaceholder } from "../provider_info_helpers";
 import type { Team } from "../key_team_helpers/key_list";
-
+import { CredentialItem } from "../networking";
 interface AddModelTabProps {
   form: FormInstance;
   handleOk: () => void;
@@ -21,6 +21,7 @@ interface AddModelTabProps {
   showAdvancedSettings: boolean;
   setShowAdvancedSettings: (show: boolean) => void;
   teams: Team[] | null;
+  credentials: CredentialItem[];
 }
 
 const { Title, Link } = Typography;
@@ -37,6 +38,7 @@ const AddModelTab: React.FC<AddModelTabProps> = ({
   showAdvancedSettings,
   setShowAdvancedSettings,
   teams,
+  credentials,
 }) => {
   return (
     <>
@@ -108,10 +110,67 @@ const AddModelTab: React.FC<AddModelTabProps> = ({
                   {/* Conditionally Render "Public Model Name" */}
                   <ConditionalPublicModelName  />
 
-                  <ProviderSpecificFields
-                    selectedProvider={selectedProvider}
-                    uploadProps={uploadProps}
-                  />
+                  {/* Credentials */}
+                  <div className="mb-4">
+                    <Typography.Text className="text-sm text-gray-500 mb-2">
+                      Either select existing credentials OR enter new provider credentials below
+                    </Typography.Text>
+                  </div>
+
+                  <Form.Item
+                    label="Existing Credentials"
+                    name="litellm_credential_name"
+                  >
+                    <AntdSelect
+                      showSearch
+                      placeholder="Select or search for existing credentials"
+                      optionFilterProp="children"
+                      filterOption={(input, option) =>
+                        (option?.label ?? '').toLowerCase().includes(input.toLowerCase())
+                      }
+                      options={[
+                        { value: null, label: 'None' },
+                        ...credentials.map((credential) => ({
+                          value: credential.credential_name,
+                          label: credential.credential_name
+                        }))
+                      ]}
+                      allowClear
+                    />
+                  </Form.Item>
+
+                  <div className="flex items-center my-4">
+                    <div className="flex-grow border-t border-gray-200"></div>
+                    <span className="px-4 text-gray-500 text-sm">OR</span>
+                    <div className="flex-grow border-t border-gray-200"></div>
+                  </div>
+
+                  <Form.Item
+                    noStyle
+                    shouldUpdate={(prevValues, currentValues) => 
+                      prevValues.litellm_credential_name !== currentValues.litellm_credential_name ||
+                      prevValues.provider !== currentValues.provider
+                    }
+                  >
+                    {({ getFieldValue }) => {
+                      const credentialName = getFieldValue('litellm_credential_name');
+                      console.log("🔑 Credential Name Changed:", credentialName);
+                      // Only show provider specific fields if no credentials selected
+                      if (!credentialName) {
+                        return (
+                          <ProviderSpecificFields
+                            selectedProvider={selectedProvider}
+                            uploadProps={uploadProps}
+                          />
+                        );
+                      }
+                      return (
+                        <div className="text-gray-500 text-sm text-center">
+                          Using existing credentials - no additional provider fields needed
+                        </div>
+                      );
+                    }}
+                  </Form.Item>
                   <AdvancedSettings 
                     showAdvancedSettings={showAdvancedSettings}
                     setShowAdvancedSettings={setShowAdvancedSettings}
diff --git a/ui/litellm-dashboard/src/components/leftnav.tsx b/ui/litellm-dashboard/src/components/leftnav.tsx
index 5990a50f9e..46540401a9 100644
--- a/ui/litellm-dashboard/src/components/leftnav.tsx
+++ b/ui/litellm-dashboard/src/components/leftnav.tsx
@@ -20,6 +20,7 @@ import {
   SafetyOutlined,
   ExperimentOutlined,
   ThunderboltOutlined,
+  LockOutlined
 } from '@ant-design/icons';
 import { old_admin_roles, v2_admin_role_names, all_admin_roles, rolesAllowedToSeeUsage, rolesWithWriteAccess } from '../utils/roles';
 
@@ -68,6 +69,7 @@ const menuItems: MenuItem[] = [
       { key: "10", page: "budgets", label: "Budgets", icon: <BankOutlined />, roles: all_admin_roles },
       { key: "11", page: "guardrails", label: "Guardrails", icon: <SafetyOutlined />, roles: all_admin_roles },
       { key: "18", page: "transform-request", label: "Playground", icon: <ThunderboltOutlined />, roles: all_admin_roles },
+      { key: "19", page: "credentials", label: "Credentials", icon: <LockOutlined />, roles: all_admin_roles },
     ]
   },
   {
diff --git a/ui/litellm-dashboard/src/components/model_add/add_credentials_tab.tsx b/ui/litellm-dashboard/src/components/model_add/add_credentials_tab.tsx
new file mode 100644
index 0000000000..175eaa7a5a
--- /dev/null
+++ b/ui/litellm-dashboard/src/components/model_add/add_credentials_tab.tsx
@@ -0,0 +1,162 @@
+import React, { useState } from "react";
+import { 
+  Card, 
+  Form, 
+  Button, 
+  Tooltip, 
+  Typography, 
+  Select as AntdSelect, 
+  Input, 
+  Switch, 
+  Modal 
+} from "antd";
+import type { UploadProps } from "antd/es/upload";
+import { Providers, providerLogoMap } from "../provider_info_helpers";
+import type { FormInstance } from "antd";
+import ProviderSpecificFields from "../add_model/provider_specific_fields";
+import { TextInput } from "@tremor/react";
+import { CredentialItem } from "../networking";
+const { Title, Link } = Typography;
+
+interface AddCredentialsModalProps {
+  isVisible: boolean;
+  onCancel: () => void;
+  onAddCredential: (values: any) => void;
+  onUpdateCredential: (values: any) => void;
+  uploadProps: UploadProps;
+  addOrEdit: "add" | "edit";
+  existingCredential: CredentialItem | null;
+}
+
+const AddCredentialsModal: React.FC<AddCredentialsModalProps> = ({
+  isVisible,
+  onCancel,
+  onAddCredential,
+  onUpdateCredential,
+  uploadProps,
+  addOrEdit,
+  existingCredential
+}) => {
+  const [form] = Form.useForm();
+  const [selectedProvider, setSelectedProvider] = useState<Providers>(Providers.OpenAI);
+  const [showAdvancedSettings, setShowAdvancedSettings] = useState(false);
+
+  console.log(`existingCredential in add credentials tab: ${JSON.stringify(existingCredential)}`);
+
+  const handleSubmit = (values: any) => {
+    if (addOrEdit === "add") {
+      onAddCredential(values);
+    } else {
+      onUpdateCredential(values);
+    }
+    form.resetFields();
+  };
+
+  return (
+    <Modal
+      title={addOrEdit === "add" ? "Add New Credential" : "Edit Credential"}
+      visible={isVisible}
+      onCancel={() => {
+        onCancel();
+        form.resetFields();
+      }}
+      footer={null}
+      width={600}
+    >
+      <Form
+        form={form}
+        onFinish={handleSubmit}
+        layout="vertical"
+      >
+        {/* Credential Name */}
+        <Form.Item
+          label="Credential Name:"
+          name="credential_name"
+          rules={[{ required: true, message: "Credential name is required" }]}
+          initialValue={existingCredential?.credential_name}
+        >
+          <TextInput 
+            placeholder="Enter a friendly name for these credentials"
+            disabled={existingCredential?.credential_name ? true : false}
+          />
+        </Form.Item>
+
+        {/* Provider Selection */}
+        <Form.Item
+          rules={[{ required: true, message: "Required" }]}
+          label="Provider:"
+          name="custom_llm_provider"
+          tooltip="Helper to auto-populate provider specific fields"
+        >
+          <AntdSelect
+            value={existingCredential?.credential_info.custom_llm_provider || selectedProvider}
+            onChange={(value) => {
+              setSelectedProvider(value as Providers);
+            }}
+          >
+            {Object.entries(Providers).map(([providerEnum, providerDisplayName]) => (
+              <AntdSelect.Option
+                key={providerEnum}
+                value={providerEnum}
+              >
+                <div className="flex items-center space-x-2">
+                  <img
+                    src={providerLogoMap[providerDisplayName]}
+                    alt={`${providerEnum} logo`}
+                    className="w-5 h-5"
+                    onError={(e) => {
+                      const target = e.target as HTMLImageElement;
+                      const parent = target.parentElement;
+                      if (parent) {
+                        const fallbackDiv = document.createElement('div');
+                        fallbackDiv.className = 'w-5 h-5 rounded-full bg-gray-200 flex items-center justify-center text-xs';
+                        fallbackDiv.textContent = providerDisplayName.charAt(0);
+                        parent.replaceChild(fallbackDiv, target);
+                      }
+                    }}
+                  />
+                  <span>{providerDisplayName}</span>
+                </div>
+              </AntdSelect.Option>
+            ))}
+          </AntdSelect>
+        </Form.Item>
+
+        
+
+        <ProviderSpecificFields
+          selectedProvider={selectedProvider}
+          uploadProps={uploadProps}
+        />
+
+        {/* Modal Footer */}
+        <div className="flex justify-between items-center">
+          <Tooltip title="Get help on our github">
+            <Link href="https://github.com/BerriAI/litellm/issues">
+              Need Help?
+            </Link>
+          </Tooltip>
+          
+          <div>
+            <Button 
+              onClick={() => {
+                onCancel();
+                form.resetFields();
+              }} 
+              style={{ marginRight: 10 }}
+            >
+              Cancel
+            </Button>
+            <Button 
+              htmlType="submit"
+            >
+              {addOrEdit === "add" ? "Add Credential" : "Update Credential"}
+            </Button>
+          </div>
+        </div>
+      </Form>
+    </Modal>
+  );
+};
+
+export default AddCredentialsModal;
\ No newline at end of file
diff --git a/ui/litellm-dashboard/src/components/model_add/credentials.tsx b/ui/litellm-dashboard/src/components/model_add/credentials.tsx
new file mode 100644
index 0000000000..c1d6c92c46
--- /dev/null
+++ b/ui/litellm-dashboard/src/components/model_add/credentials.tsx
@@ -0,0 +1,231 @@
+import React, { useState, useEffect } from "react";
+import {
+  Table,
+  TableBody,
+  TableCell,
+  TableHead,
+  TableHeaderCell,
+  TableRow,
+  Card,
+  Text,
+  Badge,
+  Button
+} from "@tremor/react";
+import {
+  InformationCircleIcon,
+  PencilAltIcon,
+  PencilIcon,
+  RefreshIcon,
+  StatusOnlineIcon,
+  TrashIcon,
+} from "@heroicons/react/outline";
+import { UploadProps } from "antd/es/upload";
+import { PlusIcon } from "@heroicons/react/solid";
+import { credentialListCall, credentialCreateCall, credentialDeleteCall, credentialUpdateCall, CredentialItem, CredentialsResponse } from "@/components/networking"; // Assume this is your networking function
+import AddCredentialsTab from "./add_credentials_tab";
+import { Form, message } from "antd";
+interface CredentialsPanelProps {
+  accessToken: string | null;
+  uploadProps: UploadProps;
+  credentialList: CredentialItem[];
+  fetchCredentials: (accessToken: string) => Promise<void>;
+}
+
+
+
+const CredentialsPanel: React.FC<CredentialsPanelProps> = ({ accessToken, uploadProps, credentialList, fetchCredentials }) => {
+  const [isAddModalOpen, setIsAddModalOpen] = useState(false);
+  const [isUpdateModalOpen, setIsUpdateModalOpen] = useState(false);
+  const [selectedCredential, setSelectedCredential] = useState<CredentialItem | null>(null);
+  const [form] = Form.useForm();
+  console.log(`selectedCredential in credentials panel: ${JSON.stringify(selectedCredential)}`);
+
+  const restrictedFields = ['credential_name', 'custom_llm_provider'];
+  const handleUpdateCredential = async (values: any) => {
+    if (!accessToken) {
+      console.error('No access token found');
+      return;
+    }
+
+    const filter_credential_values = Object.entries(values)
+      .filter(([key]) => !restrictedFields.includes(key))
+      .reduce((acc, [key, value]) => ({ ...acc, [key]: value }), {});
+    // Transform form values into credential structure
+    const newCredential = {
+      credential_name: values.credential_name,
+      credential_values: filter_credential_values,
+      credential_info: {
+        custom_llm_provider: values.custom_llm_provider,
+      }
+    };
+
+    const response = await credentialUpdateCall(accessToken, values.credential_name, newCredential);
+    message.success('Credential updated successfully');
+    console.log(`response: ${JSON.stringify(response)}`);
+    setIsUpdateModalOpen(false);
+    fetchCredentials(accessToken);
+  }
+
+  const handleAddCredential = async (values: any) => {
+    if (!accessToken) {
+      console.error('No access token found');
+      return;
+    }
+
+    const filter_credential_values = Object.entries(values)
+      .filter(([key]) => !restrictedFields.includes(key))
+      .reduce((acc, [key, value]) => ({ ...acc, [key]: value }), {});
+    // Transform form values into credential structure
+    const newCredential = {
+      credential_name: values.credential_name,
+      credential_values: filter_credential_values,
+      credential_info: {
+        custom_llm_provider: values.custom_llm_provider,
+      }
+    };
+
+    // Add to list and close modal
+    const response = await credentialCreateCall(accessToken, newCredential);
+    message.success('Credential added successfully');
+    console.log(`response: ${JSON.stringify(response)}`);
+    setIsAddModalOpen(false);
+    fetchCredentials(accessToken);
+  };
+
+  
+
+  useEffect(() => {
+    if (!accessToken) {
+      return;
+    }
+    fetchCredentials(accessToken);
+  }, [accessToken]);
+
+  const renderProviderBadge = (provider: string) => {
+    const providerColors: Record<string, string> = {
+      'openai': 'blue',
+      'azure': 'indigo',
+      'anthropic': 'purple',
+      'default': 'gray'
+    };
+
+    const color = providerColors[provider.toLowerCase()] || providerColors['default'];
+    return (
+      <Badge color={color as any} size="xs">
+        {provider}
+      </Badge>
+    );
+  };
+
+
+  const handleDeleteCredential = async (credentialName: string) => {
+    if (!accessToken) {
+      console.error('No access token found');
+      return;
+    }
+    const response = await credentialDeleteCall(accessToken, credentialName);
+    console.log(`response: ${JSON.stringify(response)}`);
+    message.success('Credential deleted successfully');
+    fetchCredentials(accessToken);
+  };
+
+  return (
+    <div className="w-full mx-auto flex-auto overflow-y-auto m-8 p-2">
+      <div className="flex justify-between items-center mb-4">
+        <Text>
+          Configured credentials for different AI providers. Add and manage your API credentials.{" "}
+          <a 
+            href="https://docs.litellm.ai/docs/credentials" 
+            target="_blank" 
+            rel="noopener noreferrer" 
+            className="text-blue-500 hover:text-blue-700 underline"
+          >
+            Docs
+          </a>
+        </Text>
+      </div>
+
+      <Card>
+        <Table>
+          <TableHead>
+            <TableRow>
+              <TableHeaderCell>Credential Name</TableHeaderCell>
+              <TableHeaderCell>Provider</TableHeaderCell>
+              <TableHeaderCell>Description</TableHeaderCell>
+            </TableRow>
+          </TableHead>
+          <TableBody>
+            {(!credentialList || credentialList.length === 0) ? (
+              <TableRow>
+                <TableCell colSpan={4} className="text-center py-4 text-gray-500">
+                  No credentials configured
+                </TableCell>
+              </TableRow>
+            ) : (
+              credentialList.map((credential: CredentialItem, index: number) => (
+                <TableRow key={index}>
+                  <TableCell>{credential.credential_name}</TableCell>
+                  <TableCell>
+                    {renderProviderBadge(credential.credential_info?.custom_llm_provider as string || '-')}
+                  </TableCell>
+                  <TableCell>{credential.credential_info?.description || '-'}</TableCell>
+                  <TableCell>
+                    <Button
+                      icon={PencilAltIcon}
+                      variant="light" 
+                      size="sm"
+                      onClick={() => {
+                        console.log(`credential being set: ${JSON.stringify(credential)}`);
+                        setSelectedCredential(credential);
+                        setIsUpdateModalOpen(true);
+                      }}
+                    />
+                    <Button
+                      icon={TrashIcon}
+                      variant="light"
+                      size="sm"
+                      onClick={() => handleDeleteCredential(credential.credential_name)}
+                    />
+                  </TableCell>
+                </TableRow>
+              ))
+            )}
+          </TableBody>
+        </Table>
+      </Card>
+      <Button 
+        onClick={() => setIsAddModalOpen(true)}
+        className="mt-4"
+      >
+        Add Credential
+      </Button>
+
+      
+
+      {isAddModalOpen && (
+        <AddCredentialsTab
+          onAddCredential={handleAddCredential}
+          isVisible={isAddModalOpen}
+          onCancel={() => setIsAddModalOpen(false)}
+          uploadProps={uploadProps}
+          addOrEdit="add"
+          onUpdateCredential={handleUpdateCredential}
+          existingCredential={null}
+        />
+      )}
+      {isUpdateModalOpen && (
+        <AddCredentialsTab 
+          onAddCredential={handleAddCredential}
+          isVisible={isUpdateModalOpen}
+          existingCredential={selectedCredential}
+          onUpdateCredential={handleUpdateCredential}
+          uploadProps={uploadProps}
+          onCancel={() => setIsUpdateModalOpen(false)}
+          addOrEdit="edit"
+        />
+      )}
+    </div>
+  );
+};
+
+export default CredentialsPanel;
\ No newline at end of file
diff --git a/ui/litellm-dashboard/src/components/model_dashboard.tsx b/ui/litellm-dashboard/src/components/model_dashboard.tsx
index 86df8f7442..0a9c26ac4a 100644
--- a/ui/litellm-dashboard/src/components/model_dashboard.tsx
+++ b/ui/litellm-dashboard/src/components/model_dashboard.tsx
@@ -16,13 +16,14 @@ import {
   AccordionHeader,
   AccordionBody,
 } from "@tremor/react";
-
+import { CredentialItem, credentialListCall, CredentialsResponse } from "./networking";
 
 import ConditionalPublicModelName from "./add_model/conditional_public_model_name";
 import LiteLLMModelNameField from "./add_model/litellm_model_name";
 import AdvancedSettings from "./add_model/advanced_settings";
 import ProviderSpecificFields from "./add_model/provider_specific_fields";
 import { handleAddModelSubmit } from "./add_model/handle_add_model_submit";
+import CredentialsPanel from "@/components/model_add/credentials";
 import { getDisplayModelName } from "./view_model/model_name_display";
 import EditModelModal, { handleEditModelSubmit } from "./edit_model/edit_model_modal";
 import {
@@ -234,6 +235,8 @@ const ModelDashboard: React.FC<ModelDashboardProps> = ({
 
   const [allEndUsers, setAllEndUsers] = useState<any[]>([]);
 
+  const [credentialsList, setCredentialsList] = useState<CredentialItem[]>([]);
+
   // Add state for advanced settings visibility
   const [showAdvancedSettings, setShowAdvancedSettings] = useState<boolean>(false);
 
@@ -373,6 +376,16 @@ const ModelDashboard: React.FC<ModelDashboardProps> = ({
     }
   };
 
+  const fetchCredentials = async (accessToken: string) => {
+    try {
+      const response: CredentialsResponse = await credentialListCall(accessToken);
+      console.log(`credentials: ${JSON.stringify(response)}`);
+      setCredentialsList(response.credentials);
+    } catch (error) {
+      console.error('Error fetching credentials:', error);
+    }
+  };
+
 
   useEffect(() => {
     updateModelMetrics(
@@ -1034,11 +1047,13 @@ const ModelDashboard: React.FC<ModelDashboardProps> = ({
             <div className="flex">
               <Tab>All Models</Tab>
               <Tab>Add Model</Tab>
+              <Tab>LLM Credentials</Tab>
               <Tab>
                 <pre>/health Models</pre>
               </Tab>
               <Tab>Model Analytics</Tab>
               <Tab>Model Retry Settings</Tab>
+              
             </div>
 
             <div className="flex items-center space-x-2">
@@ -1124,8 +1139,12 @@ const ModelDashboard: React.FC<ModelDashboardProps> = ({
                 showAdvancedSettings={showAdvancedSettings}
                 setShowAdvancedSettings={setShowAdvancedSettings}
                 teams={teams}
+                credentials={credentialsList}
               />
             </TabPanel>
+            <TabPanel>
+              <CredentialsPanel accessToken={accessToken} uploadProps={uploadProps} credentialList={credentialsList} fetchCredentials={fetchCredentials} />
+            </TabPanel>
             <TabPanel>
               <Card>
                 <Text>
@@ -1139,6 +1158,257 @@ const ModelDashboard: React.FC<ModelDashboardProps> = ({
                 )}
               </Card>
             </TabPanel>
+            <TabPanel>
+              <Grid numItems={4} className="mt-2 mb-2">
+                <Col>
+                  <Text>Select Time Range</Text>
+                  <DateRangePicker
+                    enableSelect={true}
+                    value={dateValue}
+                    className="mr-2"
+                    onValueChange={(value) => {
+                      setDateValue(value);
+                      updateModelMetrics(
+                        selectedModelGroup,
+                        value.from,
+                        value.to
+                      ); // Call updateModelMetrics with the new date range
+                    }}
+                  />
+                </Col>
+                <Col className="ml-2">
+                  <Text>Select Model Group</Text>
+                  <Select
+                    defaultValue={
+                      selectedModelGroup
+                        ? selectedModelGroup
+                        : availableModelGroups[0]
+                    }
+                    value={
+                      selectedModelGroup
+                        ? selectedModelGroup
+                        : availableModelGroups[0]
+                    }
+                  >
+                    {availableModelGroups.map((group, idx) => (
+                      <SelectItem
+                        key={idx}
+                        value={group}
+                        onClick={() =>
+                          updateModelMetrics(group, dateValue.from, dateValue.to)
+                        }
+                      >
+                        {group}
+                      </SelectItem>
+                    ))}
+                  </Select>
+                </Col>
+                <Col>
+                <Popover
+                  trigger="click" content={FilterByContent}
+                  overlayStyle={{
+                    width: "20vw"
+                  }}
+                  >
+                <Button
+                icon={FilterIcon}
+                size="md"
+                variant="secondary"
+                className="mt-4 ml-2"
+                style={{
+                  border: "none",
+                }}
+                onClick={() => setShowAdvancedFilters(true)}
+                  >
+                </Button>      
+                </Popover>
+                </Col>
+
+                </Grid>
+
+
+              <Grid numItems={2}>
+                <Col>
+                  <Card className="mr-2 max-h-[400px] min-h-[400px]">
+                    <TabGroup>
+                      <TabList variant="line" defaultValue="1">
+                        <Tab value="1">Avg. Latency per Token</Tab>
+                        <Tab value="2">Time to first token</Tab>
+                      </TabList>
+                      <TabPanels>
+                        <TabPanel>
+                          <p className="text-gray-500 italic"> (seconds/token)</p>
+                          <Text className="text-gray-500 italic mt-1 mb-1">
+                            average Latency for successfull requests divided by
+                            the total tokens
+                          </Text>
+                          {modelMetrics && modelMetricsCategories && (
+                            <AreaChart
+                              title="Model Latency"
+                              className="h-72"
+                              data={modelMetrics}
+                              showLegend={false}
+                              index="date"
+                              categories={modelMetricsCategories}
+                              connectNulls={true}
+                              customTooltip={customTooltip}
+                            />
+                          )}
+                        </TabPanel>
+                        <TabPanel>
+                          <TimeToFirstToken
+                            modelMetrics={streamingModelMetrics}
+                            modelMetricsCategories={
+                              streamingModelMetricsCategories
+                            }
+                            customTooltip={customTooltip}
+                            premiumUser={premiumUser}
+                          />
+                        </TabPanel>
+                      </TabPanels>
+                    </TabGroup>
+                  </Card>
+                </Col>
+                <Col>
+                  <Card className="ml-2 max-h-[400px] min-h-[400px]  overflow-y-auto">
+                    <Table>
+                      <TableHead>
+                        <TableRow>
+                          <TableHeaderCell>Deployment</TableHeaderCell>
+                          <TableHeaderCell>Success Responses</TableHeaderCell>
+                          <TableHeaderCell>
+                            Slow Responses <p>Success Responses taking 600+s</p>
+                          </TableHeaderCell>
+                        </TableRow>
+                      </TableHead>
+                      <TableBody>
+                        {slowResponsesData.map((metric, idx) => (
+                          <TableRow key={idx}>
+                            <TableCell>{metric.api_base}</TableCell>
+                            <TableCell>{metric.total_count}</TableCell>
+                            <TableCell>{metric.slow_count}</TableCell>
+                          </TableRow>
+                        ))}
+                      </TableBody>
+                    </Table>
+                  </Card>
+                </Col>
+              </Grid>
+              <Grid numItems={1} className="gap-2 w-full mt-2">
+              <Card>
+
+              <Title>All Exceptions for {selectedModelGroup}</Title>
+               
+              <BarChart
+                      className="h-60"
+                      data={modelExceptions}
+                      index="model"
+                      categories={allExceptions}
+                      stack={true}
+                      
+                      yAxisWidth={30}
+                /> 
+                            </Card>
+            
+              </Grid>
+
+
+              <Grid numItems={1} className="gap-2 w-full mt-2">
+                  <Card>
+                  <Title>All Up Rate Limit Errors (429) for {selectedModelGroup}</Title>
+                  <Grid numItems={1}>
+                  <Col>
+                  <Subtitle style={{ fontSize: "15px", fontWeight: "normal", color: "#535452"}}>Num Rate Limit Errors { (globalExceptionData.sum_num_rate_limit_exceptions)}</Subtitle>
+                  <BarChart
+                      className="h-40"
+                      data={globalExceptionData.daily_data}
+                      index="date"
+                      colors={['rose']}
+                      categories={['num_rate_limit_exceptions']}
+                      onValueChange={(v) => console.log(v)}
+                    />
+                    </Col>
+                    <Col>
+
+                 
+
+                  </Col>
+
+                  </Grid>
+                  
+
+                  </Card>
+
+                  {
+                    premiumUser ? ( 
+                      <>
+                      {globalExceptionPerDeployment.map((globalActivity, index) => (
+                    <Card key={index}>
+                      <Title>{globalActivity.api_base ? globalActivity.api_base : "Unknown API Base"}</Title>
+                      <Grid numItems={1}>
+                        <Col>
+                          <Subtitle style={{ fontSize: "15px", fontWeight: "normal", color: "#535452"}}>Num Rate Limit Errors (429) {(globalActivity.sum_num_rate_limit_exceptions)}</Subtitle>
+                          <BarChart
+                            className="h-40"
+                            data={globalActivity.daily_data}
+                            index="date"
+                            colors={['rose']}
+                            categories={['num_rate_limit_exceptions']}
+                
+                            onValueChange={(v) => console.log(v)}
+                          />
+                          
+                        </Col>
+                      </Grid>
+                    </Card>
+                  ))}
+                      </>
+                    ) : 
+                    <>
+                    {globalExceptionPerDeployment && globalExceptionPerDeployment.length > 0 &&
+                      globalExceptionPerDeployment.slice(0, 1).map((globalActivity, index) => (
+                        <Card key={index}>
+                          <Title>✨ Rate Limit Errors by Deployment</Title>
+                          <p className="mb-2 text-gray-500 italic text-[12px]">Upgrade to see exceptions for all deployments</p>
+                          <Button variant="primary" className="mb-2">
+                            <a href="https://forms.gle/W3U4PZpJGFHWtHyA9" target="_blank">
+                              Get Free Trial
+                            </a>
+                          </Button>
+                          <Card>
+                          <Title>{globalActivity.api_base}</Title>
+                          <Grid numItems={1}>
+                            <Col>
+                              <Subtitle
+                                style={{
+                                  fontSize: "15px",
+                                  fontWeight: "normal",
+                                  color: "#535452",
+                                }}
+                              >
+                                Num Rate Limit Errors {(globalActivity.sum_num_rate_limit_exceptions)}
+                              </Subtitle>
+                              <BarChart
+                                  className="h-40"
+                                  data={globalActivity.daily_data}
+                                  index="date"
+                                  colors={['rose']}
+                                  categories={['num_rate_limit_exceptions']}
+                
+                                  onValueChange={(v) => console.log(v)}
+                                />
+                            </Col>
+                            
+                            
+                          </Grid>
+                          </Card>
+                        </Card>
+                      ))}
+                  </>
+                  }              
+                </Grid>
+                
+            </TabPanel>
             <TabPanel>
               <div className="flex items-center">
                 <Text>Filter by Public Model Name</Text>
@@ -1230,6 +1500,7 @@ const ModelDashboard: React.FC<ModelDashboardProps> = ({
                 Save
               </Button>
             </TabPanel>
+            
           </TabPanels>
         </TabGroup>
       )}
diff --git a/ui/litellm-dashboard/src/components/model_dashboard/columns.tsx b/ui/litellm-dashboard/src/components/model_dashboard/columns.tsx
index 61048fa3b5..53f1aaa696 100644
--- a/ui/litellm-dashboard/src/components/model_dashboard/columns.tsx
+++ b/ui/litellm-dashboard/src/components/model_dashboard/columns.tsx
@@ -188,6 +188,22 @@ export const columns = (
       );
     },
   },
+  {
+    header: "Credentials",
+    accessorKey: "litellm_credential_name",
+    cell: ({ row }) => {
+      const model = row.original;
+      return model.litellm_params.litellm_credential_name ? (
+        <div className="overflow-hidden">
+          <Tooltip title={model.litellm_params.litellm_credential_name}>
+            {model.litellm_params.litellm_credential_name.slice(0, 7)}...
+          </Tooltip>
+        </div>
+      ) : (
+        "-"
+      );
+    },
+  },
   {
     header: "Status",
     accessorKey: "model_info.db_model",
diff --git a/ui/litellm-dashboard/src/components/model_dashboard/types.ts b/ui/litellm-dashboard/src/components/model_dashboard/types.ts
index 04ecc1f009..67802c456b 100644
--- a/ui/litellm-dashboard/src/components/model_dashboard/types.ts
+++ b/ui/litellm-dashboard/src/components/model_dashboard/types.ts
@@ -13,6 +13,7 @@ export interface LiteLLMParams {
   input_cost_per_token?: number;
   output_cost_per_token?: number;
   custom_llm_provider?: string;
+  litellm_credential_name?: string;
   [key: string]: any;
 }
 
@@ -41,4 +42,4 @@ export interface ModelDashboardProps {
   setModelData: (data: any) => void;
   premiumUser: boolean;
   teams: any[];
-} 
\ No newline at end of file
+}
diff --git a/ui/litellm-dashboard/src/components/model_metrics/time_to_first_token.tsx b/ui/litellm-dashboard/src/components/model_metrics/time_to_first_token.tsx
index 744b3d4a1e..3f1437e1c3 100644
--- a/ui/litellm-dashboard/src/components/model_metrics/time_to_first_token.tsx
+++ b/ui/litellm-dashboard/src/components/model_metrics/time_to_first_token.tsx
@@ -13,7 +13,7 @@ const TimeToFirstToken: React.FC<TimeToFirstTokenProps> = ({
   customTooltip,
   premiumUser,
 }) => {
-  return premiumUser ? (
+  return (
     <LineChart
       title="Time to First token (s)"
       className="h-72"
@@ -25,18 +25,6 @@ const TimeToFirstToken: React.FC<TimeToFirstTokenProps> = ({
       connectNulls={true}
       customTooltip={customTooltip}
     />
-  ) : (
-    <div>
-      <Callout title="✨ Enterprise Feature" color="teal" className="mt-2 mb-4">
-        Enterprise features are available for users with a specific license,
-        please contact LiteLLM to unlock this limitation.
-      </Callout>
-      <Button variant="primary">
-        <a href="https://forms.gle/W3U4PZpJGFHWtHyA9" target="_blank">
-          Get in touch
-        </a>
-      </Button>
-    </div>
   );
 };
 
diff --git a/ui/litellm-dashboard/src/components/networking.tsx b/ui/litellm-dashboard/src/components/networking.tsx
index 4673e064f5..46099906ac 100644
--- a/ui/litellm-dashboard/src/components/networking.tsx
+++ b/ui/litellm-dashboard/src/components/networking.tsx
@@ -36,6 +36,21 @@ export interface Organization {
   members: any[] | null;
 }
 
+export interface CredentialItem {
+  credential_name: string;
+  credential_values: object;
+  credential_info: {
+    custom_llm_provider?: string;
+    description?: string;
+    required?: boolean;
+  };
+}
+
+export interface CredentialsResponse {
+  credentials: CredentialItem[];
+}
+
+
 const baseUrl = "/"; // Assuming the base URL is the root
 
 
@@ -2527,6 +2542,158 @@ export const teamCreateCall = async (
   }
 };
 
+export const credentialCreateCall = async (
+  accessToken: string,
+  formValues: Record<string, any> // Assuming formValues is an object
+) => {
+  try {
+    console.log("Form Values in credentialCreateCall:", formValues); // Log the form values before making the API call
+    if (formValues.metadata) {
+      console.log("formValues.metadata:", formValues.metadata);
+      // if there's an exception JSON.parse, show it in the message
+      try {
+        formValues.metadata = JSON.parse(formValues.metadata);
+      } catch (error) {
+        throw new Error("Failed to parse metadata: " + error);
+      }
+    }
+
+    const url = proxyBaseUrl ? `${proxyBaseUrl}/credentials` : `/credentials`;
+    const response = await fetch(url, {
+      method: "POST",
+      headers: {
+        [globalLitellmHeaderName]: `Bearer ${accessToken}`,
+        "Content-Type": "application/json",
+      },
+      body: JSON.stringify({
+        ...formValues, // Include formValues in the request body
+      }),
+    });
+
+    if (!response.ok) {
+      const errorData = await response.text();
+      handleError(errorData);
+      console.error("Error response from the server:", errorData);
+      throw new Error("Network response was not ok");
+    }
+
+    const data = await response.json();
+    console.log("API Response:", data);
+    return data;
+    // Handle success - you might want to update some state or UI based on the created key
+  } catch (error) {
+    console.error("Failed to create key:", error);
+    throw error;
+  }
+};
+
+export const credentialListCall = async (
+  accessToken: String, 
+) => {
+  /**
+   * Get all available teams on proxy
+   */
+  try {
+    let url = proxyBaseUrl ? `${proxyBaseUrl}/credentials` : `/credentials`;
+    console.log("in credentialListCall");
+
+    const response = await fetch(url, {
+      method: "GET",
+      headers: {
+        [globalLitellmHeaderName]: `Bearer ${accessToken}`,
+        "Content-Type": "application/json",
+      },
+    });
+
+    if (!response.ok) {
+      const errorData = await response.text();
+      handleError(errorData);
+      throw new Error("Network response was not ok");
+    }
+
+    const data = await response.json();
+    console.log("/credentials API Response:", data);
+    return data;
+    // Handle success - you might want to update some state or UI based on the created key
+  } catch (error) {
+    console.error("Failed to create key:", error);
+    throw error;
+  }
+};
+
+export const credentialDeleteCall = async (accessToken: String, credentialName: String) => {
+  try {
+    const url = proxyBaseUrl ? `${proxyBaseUrl}/credentials/${credentialName}` : `/credentials/${credentialName}`;
+    console.log("in credentialDeleteCall:", credentialName);
+    const response = await fetch(url, {
+      method: "DELETE",
+      headers: {
+        [globalLitellmHeaderName]: `Bearer ${accessToken}`,
+        "Content-Type": "application/json",
+      },
+    });
+
+    if (!response.ok) {
+      const errorData = await response.text();
+      handleError(errorData);
+      throw new Error("Network response was not ok");
+    }
+    const data = await response.json();
+    console.log(data);
+    return data;
+    // Handle success - you might want to update some state or UI based on the created key
+  } catch (error) {
+    console.error("Failed to delete key:", error);
+    throw error;
+  }
+};
+
+export const credentialUpdateCall = async (
+  accessToken: string,
+  credentialName: string,
+  formValues: Record<string, any> // Assuming formValues is an object
+) => {
+  try {
+    console.log("Form Values in credentialUpdateCall:", formValues); // Log the form values before making the API call
+    if (formValues.metadata) {
+      console.log("formValues.metadata:", formValues.metadata);
+      // if there's an exception JSON.parse, show it in the message
+      try {
+        formValues.metadata = JSON.parse(formValues.metadata);
+      } catch (error) {
+        throw new Error("Failed to parse metadata: " + error);
+      }
+    }
+
+    const url = proxyBaseUrl ? `${proxyBaseUrl}/credentials/${credentialName}` : `/credentials/${credentialName}`;
+    const response = await fetch(url, {
+      method: "PUT",
+      headers: {
+        [globalLitellmHeaderName]: `Bearer ${accessToken}`,
+        "Content-Type": "application/json",
+      },
+      body: JSON.stringify({
+        ...formValues, // Include formValues in the request body
+      }),
+    });
+
+    if (!response.ok) {
+      const errorData = await response.text();
+      handleError(errorData);
+      console.error("Error response from the server:", errorData);
+      throw new Error("Network response was not ok");
+    }
+
+    const data = await response.json();
+    console.log("API Response:", data);
+    return data;
+    // Handle success - you might want to update some state or UI based on the created key
+  } catch (error) {
+    console.error("Failed to create key:", error);
+    throw error;
+  }
+};
+
 export const keyUpdateCall = async (
   accessToken: string,
   formValues: Record<string, any> // Assuming formValues is an object
diff --git a/ui/litellm-dashboard/src/components/team/team_info.tsx b/ui/litellm-dashboard/src/components/team/team_info.tsx
index d268f137f6..fd7f08210a 100644
--- a/ui/litellm-dashboard/src/components/team/team_info.tsx
+++ b/ui/litellm-dashboard/src/components/team/team_info.tsx
@@ -184,7 +184,7 @@ const TeamInfoView: React.FC<TeamInfoProps> = ({
         max_budget: values.max_budget,
         budget_duration: values.budget_duration,
         metadata: {
-          ...teamData?.team_info?.metadata,
+          ...values.metadata,
           guardrails: values.guardrails || []
         }
       };