diff --git a/.circleci/config.yml b/.circleci/config.yml
index 0a12aa73b8..886e121f35 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -49,7 +49,7 @@ jobs:
             pip install opentelemetry-api==1.25.0
             pip install opentelemetry-sdk==1.25.0
             pip install opentelemetry-exporter-otlp==1.25.0
-            pip install openai==1.66.1
+            pip install openai==1.68.2
             pip install prisma==0.11.0
             pip install "detect_secrets==1.5.0"
             pip install "httpx==0.24.1"
@@ -168,7 +168,7 @@ jobs:
             pip install opentelemetry-api==1.25.0
             pip install opentelemetry-sdk==1.25.0
             pip install opentelemetry-exporter-otlp==1.25.0
-            pip install openai==1.66.1
+            pip install openai==1.68.2
             pip install prisma==0.11.0
             pip install "detect_secrets==1.5.0"
             pip install "httpx==0.24.1"
@@ -268,7 +268,7 @@ jobs:
             pip install opentelemetry-api==1.25.0
             pip install opentelemetry-sdk==1.25.0
             pip install opentelemetry-exporter-otlp==1.25.0
-            pip install openai==1.66.1
+            pip install openai==1.68.2
             pip install prisma==0.11.0
             pip install "detect_secrets==1.5.0"
             pip install "httpx==0.24.1"
@@ -513,7 +513,7 @@ jobs:
             pip install opentelemetry-api==1.25.0
             pip install opentelemetry-sdk==1.25.0
             pip install opentelemetry-exporter-otlp==1.25.0
-            pip install openai==1.66.1
+            pip install openai==1.68.2
             pip install prisma==0.11.0
             pip install "detect_secrets==1.5.0"
             pip install "httpx==0.24.1"
@@ -680,6 +680,50 @@ jobs:
           paths:
             - llm_translation_coverage.xml
             - llm_translation_coverage
+  mcp_testing:
+    docker:
+      - image: cimg/python:3.11
+        auth:
+          username: ${DOCKERHUB_USERNAME}
+          password: ${DOCKERHUB_PASSWORD}
+    working_directory: ~/project
+
+    steps:
+      - checkout
+      - run:
+          name: Install Dependencies
+          command: |
+            python -m pip install --upgrade pip
+            python -m pip install -r requirements.txt
+            pip install "pytest==7.3.1"
+            pip install "pytest-retry==1.6.3"
+            pip install "pytest-cov==5.0.0"
+            pip install "pytest-asyncio==0.21.1"
+            pip install "respx==0.21.1"
+            pip install "pydantic==2.7.2"
+            pip install "mcp==1.4.1"
+      # Run pytest and generate JUnit XML report
+      - run:
+          name: Run tests
+          command: |
+            pwd
+            ls
+            python -m pytest -vv tests/mcp_tests --cov=litellm --cov-report=xml -x -s -v --junitxml=test-results/junit.xml --durations=5
+          no_output_timeout: 120m
+      - run:
+          name: Rename the coverage files
+          command: |
+            mv coverage.xml mcp_coverage.xml
+            mv .coverage mcp_coverage
+
+      # Store test results
+      - store_test_results:
+          path: test-results
+      - persist_to_workspace:
+          root: .
+          paths:
+            - mcp_coverage.xml
+            - mcp_coverage
   llm_responses_api_testing:
     docker:
       - image: cimg/python:3.11
@@ -744,6 +788,8 @@ jobs:
             pip install "pytest-asyncio==0.21.1"
             pip install "respx==0.21.1"
             pip install "hypercorn==0.17.3"
+            pip install "pydantic==2.7.2"
+            pip install "mcp==1.4.1"
       # Run pytest and generate JUnit XML report
       - run:
           name: Run tests
@@ -1278,7 +1324,7 @@ jobs:
             pip install "aiodynamo==23.10.1"
             pip install "asyncio==3.4.3"
             pip install "PyGithub==1.59.1"
-            pip install "openai==1.66.1"
+            pip install "openai==1.68.2"
       - run:
           name: Install Grype
           command: |
@@ -1353,7 +1399,7 @@ jobs:
           command: |
             pwd
             ls
-            python -m pytest -s -vv tests/*.py -x --junitxml=test-results/junit.xml --durations=5 --ignore=tests/otel_tests --ignore=tests/pass_through_tests --ignore=tests/proxy_admin_ui_tests --ignore=tests/load_tests --ignore=tests/llm_translation --ignore=tests/llm_responses_api_testing --ignore=tests/image_gen_tests --ignore=tests/pass_through_unit_tests
+            python -m pytest -s -vv tests/*.py -x --junitxml=test-results/junit.xml --durations=5 --ignore=tests/otel_tests --ignore=tests/pass_through_tests --ignore=tests/proxy_admin_ui_tests --ignore=tests/load_tests --ignore=tests/llm_translation --ignore=tests/llm_responses_api_testing --ignore=tests/mcp_tests --ignore=tests/image_gen_tests --ignore=tests/pass_through_unit_tests
           no_output_timeout: 120m
 
       # Store test results
@@ -1414,7 +1460,7 @@ jobs:
             pip install "aiodynamo==23.10.1"
             pip install "asyncio==3.4.3"
             pip install "PyGithub==1.59.1"
-            pip install "openai==1.66.1"
+            pip install "openai==1.68.2"
             # Run pytest and generate JUnit XML report
       - run:
           name: Build Docker image
@@ -1536,7 +1582,7 @@ jobs:
             pip install "aiodynamo==23.10.1"
             pip install "asyncio==3.4.3"
             pip install "PyGithub==1.59.1"
-            pip install "openai==1.66.1"
+            pip install "openai==1.68.2"
       - run:
           name: Build Docker image
           command: docker build -t my-app:latest -f ./docker/Dockerfile.database .
@@ -1965,7 +2011,7 @@ jobs:
             pip install "pytest-asyncio==0.21.1"
             pip install "google-cloud-aiplatform==1.43.0"
             pip install aiohttp
-            pip install "openai==1.66.1"
+            pip install "openai==1.68.2"
             pip install "assemblyai==0.37.0"
             python -m pip install --upgrade pip
             pip install "pydantic==2.7.1"
@@ -2112,7 +2158,7 @@ jobs:
             python -m venv venv
             . venv/bin/activate
             pip install coverage
-            coverage combine llm_translation_coverage llm_responses_api_coverage logging_coverage litellm_router_coverage local_testing_coverage litellm_assistants_api_coverage auth_ui_unit_tests_coverage langfuse_coverage caching_coverage litellm_proxy_unit_tests_coverage image_gen_coverage pass_through_unit_tests_coverage batches_coverage litellm_proxy_security_tests_coverage
+            coverage combine llm_translation_coverage llm_responses_api_coverage mcp_coverage logging_coverage litellm_router_coverage local_testing_coverage litellm_assistants_api_coverage auth_ui_unit_tests_coverage langfuse_coverage caching_coverage litellm_proxy_unit_tests_coverage image_gen_coverage pass_through_unit_tests_coverage batches_coverage litellm_proxy_security_tests_coverage
             coverage xml
       - codecov/upload:
           file: ./coverage.xml
@@ -2241,7 +2287,7 @@ jobs:
             pip install "pytest-retry==1.6.3"
             pip install "pytest-asyncio==0.21.1"
             pip install aiohttp
-            pip install "openai==1.66.1"
+            pip install "openai==1.68.2"
             python -m pip install --upgrade pip
             pip install "pydantic==2.7.1"
             pip install "pytest==7.3.1"
@@ -2473,6 +2519,12 @@ workflows:
               only:
                 - main
                 - /litellm_.*/
+      - mcp_testing:
+          filters:
+            branches:
+              only:
+                - main
+                - /litellm_.*/
       - llm_responses_api_testing:
           filters:
             branches:
@@ -2518,6 +2570,7 @@ workflows:
       - upload-coverage:
           requires:
             - llm_translation_testing
+            - mcp_testing
             - llm_responses_api_testing
             - litellm_mapped_tests
             - batches_testing
@@ -2577,6 +2630,7 @@ workflows:
             - load_testing
             - test_bad_database_url
             - llm_translation_testing
+            - mcp_testing
             - llm_responses_api_testing
             - litellm_mapped_tests
             - batches_testing
diff --git a/.circleci/requirements.txt b/.circleci/requirements.txt
index e63fb9dd9a..356a9840f5 100644
--- a/.circleci/requirements.txt
+++ b/.circleci/requirements.txt
@@ -1,5 +1,5 @@
 # used by CI/CD testing
-openai==1.66.1
+openai==1.68.2
 python-dotenv
 tiktoken
 importlib_metadata
diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md
index d50aefa8bb..6c887178d5 100644
--- a/.github/pull_request_template.md
+++ b/.github/pull_request_template.md
@@ -10,7 +10,7 @@
 
 **Please complete all items before asking a LiteLLM maintainer to review your PR**
 
-- [ ] I have Added testing in the `tests/litellm/` directory, **Adding at least 1 test is a hard requirement** - [see details](https://docs.litellm.ai/docs/extras/contributing_code)
+- [ ] I have Added testing in the [`tests/litellm/`](https://github.com/BerriAI/litellm/tree/main/tests/litellm) directory, **Adding at least 1 test is a hard requirement** - [see details](https://docs.litellm.ai/docs/extras/contributing_code)
 - [ ] I have added a screenshot of my new test passing locally 
 - [ ] My PR passes all unit tests on (`make test-unit`)[https://docs.litellm.ai/docs/extras/contributing_code]
 - [ ] My PR's scope is as isolated as possible, it only solves 1 specific problem
diff --git a/docs/my-website/docs/mcp.md b/docs/my-website/docs/mcp.md
index 42489477cf..9f3343e9cd 100644
--- a/docs/my-website/docs/mcp.md
+++ b/docs/my-website/docs/mcp.md
@@ -1,14 +1,291 @@
-import Image from '@theme/IdealImage';
 import Tabs from '@theme/Tabs';
 import TabItem from '@theme/TabItem';
+import Image from '@theme/IdealImage';
+
+# /mcp [BETA] - Model Context Protocol
+
+Use Model Context Protocol with LiteLLM
+
+
+<Image 
+  img={require('../img/litellm_mcp.png')}
+  style={{width: '100%', display: 'block', margin: '2rem auto'}}
+/>
+<p style={{textAlign: 'left', color: '#666'}}>
+  LiteLLM MCP Architecture: Use MCP tools with all LiteLLM supported models
+</p>
 
-# /mcp Model Context Protocol [BETA]
 
 ## Overview
 
-LiteLLM's MCP implementation allows you to define tools that can be called by any MCP compatible client. Define your `mcp_tools` with LiteLLM and all your clients can `list` and `call` available tools. 
+LiteLLM acts as a MCP bridge to utilize MCP tools with all LiteLLM supported models. LiteLLM offers the following features for using MCP
 
-## How it works 
+- **List** Available MCP Tools: OpenAI clients can view all available MCP tools
+  - `litellm.experimental_mcp_client.load_mcp_tools` to list all available MCP tools
+- **Call** MCP Tools: OpenAI clients can call MCP tools
+  - `litellm.experimental_mcp_client.call_openai_tool` to call an OpenAI tool on an MCP server
+
+
+## Usage
+
+### 1. List Available MCP Tools
+
+In this example we'll use `litellm.experimental_mcp_client.load_mcp_tools` to list all available MCP tools on any MCP server. This method can be used in two ways:
+
+- `format="mcp"` - (default) Return MCP tools 
+  - Returns: `mcp.types.Tool`
+- `format="openai"` - Return MCP tools converted to OpenAI API compatible tools. Allows using with OpenAI endpoints.
+  - Returns: `openai.types.chat.ChatCompletionToolParam`
+
+<Tabs>
+<TabItem value="sdk" label="LiteLLM Python SDK">
+
+```python title="MCP Client List Tools" showLineNumbers
+# Create server parameters for stdio connection
+from mcp import ClientSession, StdioServerParameters
+from mcp.client.stdio import stdio_client
+import os
+import litellm
+from litellm import experimental_mcp_client
+
+
+server_params = StdioServerParameters(
+    command="python3",
+    # Make sure to update to the full absolute path to your mcp_server.py file
+    args=["./mcp_server.py"],
+)
+
+async with stdio_client(server_params) as (read, write):
+    async with ClientSession(read, write) as session:
+        # Initialize the connection
+        await session.initialize()
+
+        # Get tools
+        tools = await experimental_mcp_client.load_mcp_tools(session=session, format="openai")
+        print("MCP TOOLS: ", tools)
+
+        messages = [{"role": "user", "content": "what's (3 + 5)"}]
+        llm_response = await litellm.acompletion(
+            model="gpt-4o",
+            api_key=os.getenv("OPENAI_API_KEY"),
+            messages=messages,
+            tools=tools,
+        )
+        print("LLM RESPONSE: ", json.dumps(llm_response, indent=4, default=str))
+```
+
+</TabItem>
+
+<TabItem value="openai" label="OpenAI SDK + LiteLLM Proxy">
+
+In this example we'll walk through how you can use the OpenAI SDK pointed to the LiteLLM proxy to call MCP tools. The key difference here is we use the OpenAI SDK to make the LLM API request
+
+```python title="MCP Client List Tools" showLineNumbers
+# Create server parameters for stdio connection
+from mcp import ClientSession, StdioServerParameters
+from mcp.client.stdio import stdio_client
+import os
+from openai import OpenAI
+from litellm import experimental_mcp_client
+
+server_params = StdioServerParameters(
+    command="python3",
+    # Make sure to update to the full absolute path to your mcp_server.py file
+    args=["./mcp_server.py"],
+)
+
+async with stdio_client(server_params) as (read, write):
+    async with ClientSession(read, write) as session:
+        # Initialize the connection
+        await session.initialize()
+
+        # Get tools using litellm mcp client
+        tools = await experimental_mcp_client.load_mcp_tools(session=session, format="openai")
+        print("MCP TOOLS: ", tools)
+
+        # Use OpenAI SDK pointed to LiteLLM proxy
+        client = OpenAI(
+            api_key="your-api-key",  # Your LiteLLM proxy API key
+            base_url="http://localhost:4000"  # Your LiteLLM proxy URL
+        )
+
+        messages = [{"role": "user", "content": "what's (3 + 5)"}]
+        llm_response = client.chat.completions.create(
+            model="gpt-4",
+            messages=messages,
+            tools=tools
+        )
+        print("LLM RESPONSE: ", llm_response)
+```
+</TabItem>
+</Tabs>
+
+
+### 2. List and Call MCP Tools
+
+In this example we'll use 
+- `litellm.experimental_mcp_client.load_mcp_tools` to list all available MCP tools on any MCP server
+- `litellm.experimental_mcp_client.call_openai_tool` to call an OpenAI tool on an MCP server
+
+The first llm response returns a list of OpenAI tools. We take the first tool call from the LLM response and pass it to `litellm.experimental_mcp_client.call_openai_tool` to call the tool on the MCP server.
+
+#### How `litellm.experimental_mcp_client.call_openai_tool` works
+
+- Accepts an OpenAI Tool Call from the LLM response
+- Converts the OpenAI Tool Call to an MCP Tool
+- Calls the MCP Tool on the MCP server
+- Returns the result of the MCP Tool call
+
+<Tabs>
+<TabItem value="sdk" label="LiteLLM Python SDK">
+
+```python title="MCP Client List and Call Tools" showLineNumbers
+# Create server parameters for stdio connection
+from mcp import ClientSession, StdioServerParameters
+from mcp.client.stdio import stdio_client
+import os
+import litellm
+from litellm import experimental_mcp_client
+
+
+server_params = StdioServerParameters(
+    command="python3",
+    # Make sure to update to the full absolute path to your mcp_server.py file
+    args=["./mcp_server.py"],
+)
+
+async with stdio_client(server_params) as (read, write):
+    async with ClientSession(read, write) as session:
+        # Initialize the connection
+        await session.initialize()
+
+        # Get tools
+        tools = await experimental_mcp_client.load_mcp_tools(session=session, format="openai")
+        print("MCP TOOLS: ", tools)
+
+        messages = [{"role": "user", "content": "what's (3 + 5)"}]
+        llm_response = await litellm.acompletion(
+            model="gpt-4o",
+            api_key=os.getenv("OPENAI_API_KEY"),
+            messages=messages,
+            tools=tools,
+        )
+        print("LLM RESPONSE: ", json.dumps(llm_response, indent=4, default=str))
+
+        openai_tool = llm_response["choices"][0]["message"]["tool_calls"][0]
+        # Call the tool using MCP client
+        call_result = await experimental_mcp_client.call_openai_tool(
+            session=session,
+            openai_tool=openai_tool,
+        )
+        print("MCP TOOL CALL RESULT: ", call_result)
+
+        # send the tool result to the LLM
+        messages.append(llm_response["choices"][0]["message"])
+        messages.append(
+            {
+                "role": "tool",
+                "content": str(call_result.content[0].text),
+                "tool_call_id": openai_tool["id"],
+            }
+        )
+        print("final messages with tool result: ", messages)
+        llm_response = await litellm.acompletion(
+            model="gpt-4o",
+            api_key=os.getenv("OPENAI_API_KEY"),
+            messages=messages,
+            tools=tools,
+        )
+        print(
+            "FINAL LLM RESPONSE: ", json.dumps(llm_response, indent=4, default=str)
+        )
+```
+
+</TabItem>
+<TabItem value="proxy" label="OpenAI SDK + LiteLLM Proxy">
+
+In this example we'll walk through how you can use the OpenAI SDK pointed to the LiteLLM proxy to call MCP tools. The key difference here is we use the OpenAI SDK to make the LLM API request
+
+```python title="MCP Client with OpenAI SDK" showLineNumbers
+# Create server parameters for stdio connection
+from mcp import ClientSession, StdioServerParameters
+from mcp.client.stdio import stdio_client
+import os
+from openai import OpenAI
+from litellm import experimental_mcp_client
+
+server_params = StdioServerParameters(
+    command="python3",
+    # Make sure to update to the full absolute path to your mcp_server.py file
+    args=["./mcp_server.py"],
+)
+
+async with stdio_client(server_params) as (read, write):
+    async with ClientSession(read, write) as session:
+        # Initialize the connection
+        await session.initialize()
+
+        # Get tools using litellm mcp client
+        tools = await experimental_mcp_client.load_mcp_tools(session=session, format="openai")
+        print("MCP TOOLS: ", tools)
+
+        # Use OpenAI SDK pointed to LiteLLM proxy
+        client = OpenAI(
+            api_key="your-api-key",  # Your LiteLLM proxy API key
+            base_url="http://localhost:8000"  # Your LiteLLM proxy URL
+        )
+
+        messages = [{"role": "user", "content": "what's (3 + 5)"}]
+        llm_response = client.chat.completions.create(
+            model="gpt-4",
+            messages=messages,
+            tools=tools
+        )
+        print("LLM RESPONSE: ", llm_response)
+
+        # Get the first tool call
+        tool_call = llm_response.choices[0].message.tool_calls[0]
+        
+        # Call the tool using MCP client
+        call_result = await experimental_mcp_client.call_openai_tool(
+            session=session,
+            openai_tool=tool_call.model_dump(),
+        )
+        print("MCP TOOL CALL RESULT: ", call_result)
+
+        # Send the tool result back to the LLM
+        messages.append(llm_response.choices[0].message.model_dump())
+        messages.append({
+            "role": "tool",
+            "content": str(call_result.content[0].text),
+            "tool_call_id": tool_call.id,
+        })
+
+        final_response = client.chat.completions.create(
+            model="gpt-4",
+            messages=messages,
+            tools=tools
+        )
+        print("FINAL RESPONSE: ", final_response)
+```
+
+</TabItem>
+</Tabs>
+
+## Upcoming Features
+
+:::info
+
+**This feature is not live as yet** this is a beta interface. Expect this to be live on litellm `v1.63.15` and above.
+
+:::
+
+
+### Expose MCP tools on LiteLLM Proxy Server
+
+This allows you to define tools that can be called by any MCP compatible client. Define your mcp_tools with LiteLLM and all your clients can list and call available tools.
+
+#### How it works
 
 LiteLLM exposes the following MCP endpoints:
 
@@ -25,9 +302,9 @@ When MCP clients connect to LiteLLM they can follow this workflow:
 6. LiteLLM makes the tool calls to the appropriate handlers
 7. LiteLLM returns the tool call results to the MCP client
 
-## Quick Start
+#### Usage
 
-### 1. Define your tools on mcp_tools
+#### 1. Define your tools on mcp_tools
 
 LiteLLM allows you to define your tools on the `mcp_tools` section in your config.yaml file. All tools listed here will be available to MCP clients (when they connect to LiteLLM and call `list_tools`).
 
@@ -56,7 +333,7 @@ mcp_tools:
     handler: "mcp_tools.get_current_time"
 ```
 
-### 2. Define a handler for your tool
+#### 2. Define a handler for your tool
 
 Create a new file called `mcp_tools.py` and add this code. The key method here is `get_current_time` which gets executed when the `get_current_time` tool is called.
 
@@ -82,7 +359,7 @@ def get_current_time(format: str = "short"):
     return current_time.strftime('%H:%M')
 ```
 
-### 3. Start LiteLLM Gateway
+#### 3. Start LiteLLM Gateway
 
 <Tabs>
 <TabItem value="docker" label="Docker Run">
@@ -114,7 +391,7 @@ litellm --config config.yaml --detailed_debug
 </Tabs>
 
 
-### 3. Make an LLM API request 
+#### 4. Make an LLM API request 
 
 
 
@@ -161,11 +438,11 @@ if __name__ == "__main__":
 ```
 
 
-## Specification for `mcp_tools`
+### Specification for `mcp_tools`
 
 The `mcp_tools` section in your LiteLLM config defines tools that can be called by MCP-compatible clients.
 
-### Tool Definition Format
+#### Tool Definition Format
 
 ```yaml
 mcp_tools:
@@ -175,14 +452,14 @@ mcp_tools:
     handler: string             # Required: Path to the function that implements the tool
 ```
 
-### Field Details
+#### Field Details
 
 - `name`: A unique identifier for the tool
 - `description`: A clear description of what the tool does, used by LLMs to determine when to call it
 - `input_schema`: JSON Schema object defining the expected input parameters
 - `handler`: String path to the Python function that implements the tool (e.g., "module.submodule.function_name")
 
-### Example Tool Definition
+#### Example Tool Definition
 
 ```yaml
 mcp_tools:
diff --git a/docs/my-website/docs/pass_through/vertex_ai.md b/docs/my-website/docs/pass_through/vertex_ai.md
index ce366af541..4918d889ed 100644
--- a/docs/my-website/docs/pass_through/vertex_ai.md
+++ b/docs/my-website/docs/pass_through/vertex_ai.md
@@ -15,6 +15,91 @@ Pass-through endpoints for Vertex AI - call provider-specific endpoint, in nativ
 
 Just replace `https://REGION-aiplatform.googleapis.com` with `LITELLM_PROXY_BASE_URL/vertex_ai`
 
+LiteLLM supports 3 flows for calling Vertex AI endpoints via pass-through:
+
+1. **Specific Credentials**: Admin sets passthrough credentials for a specific project/region.
+
+2. **Default Credentials**: Admin sets default credentials.
+
+3. **Client-Side Credentials**: User can send client-side credentials through to Vertex AI (default behavior - if no default or mapped credentials are found, the request is passed through directly).
+
+
+## Example Usage
+
+<Tabs>
+<TabItem value="specific_credentials" label="Specific Project/Region">
+
+```yaml
+model_list:
+  - model_name: gemini-1.0-pro
+    litellm_params:
+      model: vertex_ai/gemini-1.0-pro
+      vertex_project: adroit-crow-413218
+      vertex_region: us-central1
+      vertex_credentials: /path/to/credentials.json
+      use_in_pass_through: true # 👈 KEY CHANGE
+```
+
+</TabItem>
+<TabItem value="default_credentials" label="Default Credentials">
+
+<Tabs>
+<TabItem value="yaml" label="Set in config.yaml">
+
+```yaml
+default_vertex_config: 
+  vertex_project: adroit-crow-413218
+  vertex_region: us-central1
+  vertex_credentials: /path/to/credentials.json
+```
+</TabItem>
+<TabItem value="env_var" label="Set in environment variables">
+
+```bash
+export DEFAULT_VERTEXAI_PROJECT="adroit-crow-413218"
+export DEFAULT_VERTEXAI_LOCATION="us-central1"
+export DEFAULT_GOOGLE_APPLICATION_CREDENTIALS="/path/to/credentials.json"
+```
+
+</TabItem>
+</Tabs>
+</TabItem>
+<TabItem value="client_credentials" label="Client Credentials">
+
+Try Gemini 2.0 Flash (curl)
+
+```
+MODEL_ID="gemini-2.0-flash-001"
+PROJECT_ID="YOUR_PROJECT_ID"
+```
+
+```bash
+curl \
+  -X POST \
+  -H "Authorization: Bearer $(gcloud auth application-default print-access-token)" \
+  -H "Content-Type: application/json" \
+  "${LITELLM_PROXY_BASE_URL}/vertex_ai/v1/projects/${PROJECT_ID}/locations/us-central1/publishers/google/models/${MODEL_ID}:streamGenerateContent" -d \
+  $'{
+    "contents": {
+      "role": "user",
+      "parts": [
+        {
+        "fileData": {
+          "mimeType": "image/png",
+          "fileUri": "gs://generativeai-downloads/images/scones.jpg"
+          }
+        },
+        {
+          "text": "Describe this picture."
+        }
+      ]
+    }
+  }'
+```
+
+</TabItem>
+</Tabs>
+
 
 #### **Example Usage**
 
@@ -22,7 +107,7 @@ Just replace `https://REGION-aiplatform.googleapis.com` with `LITELLM_PROXY_BASE
 <TabItem value="curl" label="curl">
 
 ```bash
-curl http://localhost:4000/vertex_ai/publishers/google/models/gemini-1.0-pro:generateContent \
+curl http://localhost:4000/vertex_ai/vertex_ai/v1/projects/${PROJECT_ID}/locations/us-central1/publishers/google/models/${MODEL_ID}:generateContent \
   -H "Content-Type: application/json" \
   -H "x-litellm-api-key: Bearer sk-1234" \
   -d '{
@@ -101,7 +186,7 @@ litellm
 Let's call the Google AI Studio token counting endpoint
 
 ```bash
-curl http://localhost:4000/vertex-ai/publishers/google/models/gemini-1.0-pro:generateContent \
+curl http://localhost:4000/vertex-ai/v1/projects/${PROJECT_ID}/locations/us-central1/publishers/google/models/gemini-1.0-pro:generateContent \
   -H "Content-Type: application/json" \
   -H "Authorization: Bearer sk-1234" \
   -d '{
@@ -140,7 +225,7 @@ LiteLLM Proxy Server supports two methods of authentication to Vertex AI:
 
 
 ```shell
-curl http://localhost:4000/vertex_ai/publishers/google/models/gemini-1.5-flash-001:generateContent \
+curl http://localhost:4000/vertex_ai/v1/projects/${PROJECT_ID}/locations/us-central1/publishers/google/models/gemini-1.5-flash-001:generateContent \
   -H "Content-Type: application/json" \
   -H "x-litellm-api-key: Bearer sk-1234" \
   -d '{"contents":[{"role": "user", "parts":[{"text": "hi"}]}]}'
@@ -152,7 +237,7 @@ curl http://localhost:4000/vertex_ai/publishers/google/models/gemini-1.5-flash-0
 
 
 ```shell
-curl http://localhost:4000/vertex_ai/publishers/google/models/textembedding-gecko@001:predict \
+curl http://localhost:4000/vertex_ai/v1/projects/${PROJECT_ID}/locations/us-central1/publishers/google/models/textembedding-gecko@001:predict \
   -H "Content-Type: application/json" \
   -H "x-litellm-api-key: Bearer sk-1234" \
   -d '{"instances":[{"content": "gm"}]}'
@@ -162,7 +247,7 @@ curl http://localhost:4000/vertex_ai/publishers/google/models/textembedding-geck
 ### Imagen API
 
 ```shell
-curl http://localhost:4000/vertex_ai/publishers/google/models/imagen-3.0-generate-001:predict \
+curl http://localhost:4000/vertex_ai/v1/projects/${PROJECT_ID}/locations/us-central1/publishers/google/models/imagen-3.0-generate-001:predict \
   -H "Content-Type: application/json" \
   -H "x-litellm-api-key: Bearer sk-1234" \
   -d '{"instances":[{"prompt": "make an otter"}], "parameters": {"sampleCount": 1}}'
@@ -172,7 +257,7 @@ curl http://localhost:4000/vertex_ai/publishers/google/models/imagen-3.0-generat
 ### Count Tokens API
 
 ```shell
-curl http://localhost:4000/vertex_ai/publishers/google/models/gemini-1.5-flash-001:countTokens \
+curl http://localhost:4000/vertex_ai/v1/projects/${PROJECT_ID}/locations/us-central1/publishers/google/models/gemini-1.5-flash-001:countTokens \
   -H "Content-Type: application/json" \
   -H "x-litellm-api-key: Bearer sk-1234" \
   -d '{"contents":[{"role": "user", "parts":[{"text": "hi"}]}]}'
@@ -183,7 +268,7 @@ Create Fine Tuning Job
 
 
 ```shell
-curl http://localhost:4000/vertex_ai/tuningJobs \
+curl http://localhost:4000/vertex_ai/v1/projects/${PROJECT_ID}/locations/us-central1/publishers/google/models/gemini-1.5-flash-001:tuningJobs \
       -H "Content-Type: application/json" \
       -H "x-litellm-api-key: Bearer sk-1234" \
       -d '{
@@ -243,7 +328,7 @@ Expected Response
 
 
 ```bash
-curl http://localhost:4000/vertex_ai/publishers/google/models/gemini-1.0-pro:generateContent \
+curl http://localhost:4000/vertex_ai/v1/projects/${PROJECT_ID}/locations/us-central1/publishers/google/models/gemini-1.0-pro:generateContent \
   -H "Content-Type: application/json" \
   -H "x-litellm-api-key: Bearer sk-1234" \
   -d '{
@@ -268,7 +353,7 @@ tags: ["vertex-js-sdk", "pass-through-endpoint"]
 <TabItem value="curl" label="curl">
 
 ```bash
-curl http://localhost:4000/vertex-ai/publishers/google/models/gemini-1.0-pro:generateContent \
+curl http://localhost:4000/vertex_ai/v1/projects/${PROJECT_ID}/locations/us-central1/publishers/google/models/gemini-1.0-pro:generateContent \
   -H "Content-Type: application/json" \
   -H "x-litellm-api-key: Bearer sk-1234" \
   -H "tags: vertex-js-sdk,pass-through-endpoint" \
diff --git a/docs/my-website/docs/providers/predibase.md b/docs/my-website/docs/providers/predibase.md
index 31713aef1e..9f25309c19 100644
--- a/docs/my-website/docs/providers/predibase.md
+++ b/docs/my-website/docs/providers/predibase.md
@@ -230,7 +230,7 @@ response = completion(
     model="predibase/llama-3-8b-instruct", 
     messages = [{ "content": "Hello, how are you?","role": "user"}],
     adapter_id="my_repo/3",
-    adapter_soruce="pbase",
+    adapter_source="pbase",
 )
 ```
 
diff --git a/docs/my-website/docs/proxy/image_handling.md b/docs/my-website/docs/proxy/image_handling.md
new file mode 100644
index 0000000000..300ab0bc38
--- /dev/null
+++ b/docs/my-website/docs/proxy/image_handling.md
@@ -0,0 +1,21 @@
+import Image from '@theme/IdealImage';
+import Tabs from '@theme/Tabs';
+import TabItem from '@theme/TabItem';
+
+# Image URL Handling 
+
+<Image img={require('../../img/image_handling.png')}  style={{ width: '900px', height: 'auto' }} />
+
+Some LLM API's don't support url's for images, but do support base-64 strings. 
+
+For those, LiteLLM will:
+
+1. Detect a URL being passed
+2. Check if the LLM API supports a URL
+3. Else, will download the base64 
+4. Send the provider a base64 string. 
+
+
+LiteLLM also caches this result, in-memory to reduce latency for subsequent calls. 
+
+The limit for an in-memory cache is 1MB. 
\ No newline at end of file
diff --git a/docs/my-website/docs/proxy/release_cycle.md b/docs/my-website/docs/proxy/release_cycle.md
index 947a4ae6b3..c5782087f2 100644
--- a/docs/my-website/docs/proxy/release_cycle.md
+++ b/docs/my-website/docs/proxy/release_cycle.md
@@ -4,9 +4,17 @@ Litellm Proxy has the following release cycle:
 
 - `v1.x.x-nightly`: These are releases which pass ci/cd. 
 - `v1.x.x.rc`: These are releases which pass ci/cd + [manual review](https://github.com/BerriAI/litellm/discussions/8495#discussioncomment-12180711).
-- `v1.x.x` OR `v1.x.x-stable`: These are releases which pass ci/cd + manual review + 3 days of production testing.
+- `v1.x.x:main-stable`: These are releases which pass ci/cd + manual review + 3 days of production testing.
 
-In production, we recommend using the latest `v1.x.x` release.
+In production, we recommend using the latest `v1.x.x:main-stable` release.
 
 
-Follow our release notes [here](https://github.com/BerriAI/litellm/releases).
\ No newline at end of file
+Follow our release notes [here](https://github.com/BerriAI/litellm/releases).
+
+
+## FAQ
+
+### Is there a release schedule for LiteLLM stable release?
+
+Stable releases come out every week (typically Sunday)
+
diff --git a/docs/my-website/img/image_handling.png b/docs/my-website/img/image_handling.png
new file mode 100644
index 0000000000..bd56206911
Binary files /dev/null and b/docs/my-website/img/image_handling.png differ
diff --git a/docs/my-website/img/litellm_mcp.png b/docs/my-website/img/litellm_mcp.png
new file mode 100644
index 0000000000..cef822eeb2
Binary files /dev/null and b/docs/my-website/img/litellm_mcp.png differ
diff --git a/docs/my-website/sidebars.js b/docs/my-website/sidebars.js
index baae2dfe6d..bff3ad41a2 100644
--- a/docs/my-website/sidebars.js
+++ b/docs/my-website/sidebars.js
@@ -53,7 +53,7 @@ const sidebars = {
         {
           type: "category",
           label: "Architecture",
-          items: ["proxy/architecture", "proxy/db_info", "router_architecture", "proxy/user_management_heirarchy", "proxy/jwt_auth_arch"],
+          items: ["proxy/architecture", "proxy/db_info", "router_architecture", "proxy/user_management_heirarchy", "proxy/jwt_auth_arch", "proxy/image_handling"],
         },
         {
           type: "link",
@@ -293,6 +293,7 @@ const sidebars = {
         "text_completion",
         "embedding/supported_embedding",
         "anthropic_unified",
+        "mcp",
         {
           type: "category",
           label: "/images",
diff --git a/litellm/__init__.py b/litellm/__init__.py
index 762a058c7e..25da650440 100644
--- a/litellm/__init__.py
+++ b/litellm/__init__.py
@@ -2,7 +2,7 @@
 import warnings
 
 warnings.filterwarnings("ignore", message=".*conflict with protected namespace.*")
-### INIT VARIABLES #########
+### INIT VARIABLES ##########
 import threading
 import os
 from typing import Callable, List, Optional, Dict, Union, Any, Literal, get_args
diff --git a/litellm/caching/in_memory_cache.py b/litellm/caching/in_memory_cache.py
index 9fca969226..5e09fe845f 100644
--- a/litellm/caching/in_memory_cache.py
+++ b/litellm/caching/in_memory_cache.py
@@ -9,9 +9,13 @@ Has 4 methods:
 """
 
 import json
+import sys
 import time
-from typing import List, Optional
+from typing import Any, List, Optional
 
+from pydantic import BaseModel
+
+from ..constants import MAX_SIZE_PER_ITEM_IN_MEMORY_CACHE_IN_KB
 from .base_cache import BaseCache
 
 
@@ -22,6 +26,7 @@ class InMemoryCache(BaseCache):
         default_ttl: Optional[
             int
         ] = 600,  # default ttl is 10 minutes. At maximum litellm rate limiting logic requires objects to be in memory for 1 minute
+        max_size_per_item: Optional[int] = 1024,  # 1MB = 1024KB
     ):
         """
         max_size_in_memory [int]: Maximum number of items in cache. done to prevent memory leaks. Use 200 items as a default
@@ -30,11 +35,53 @@ class InMemoryCache(BaseCache):
             max_size_in_memory or 200
         )  # set an upper bound of 200 items in-memory
         self.default_ttl = default_ttl or 600
+        self.max_size_per_item = (
+            max_size_per_item or MAX_SIZE_PER_ITEM_IN_MEMORY_CACHE_IN_KB
+        )  # 1MB = 1024KB
 
         # in-memory cache
         self.cache_dict: dict = {}
         self.ttl_dict: dict = {}
 
+    def check_value_size(self, value: Any):
+        """
+        Check if value size exceeds max_size_per_item (1MB)
+        Returns True if value size is acceptable, False otherwise
+        """
+        try:
+            # Fast path for common primitive types that are typically small
+            if (
+                isinstance(value, (bool, int, float, str))
+                and len(str(value)) < self.max_size_per_item * 512
+            ):  # Conservative estimate
+                return True
+
+            # Direct size check for bytes objects
+            if isinstance(value, bytes):
+                return sys.getsizeof(value) / 1024 <= self.max_size_per_item
+
+            # Handle special types without full conversion when possible
+            if hasattr(value, "__sizeof__"):  # Use __sizeof__ if available
+                size = value.__sizeof__() / 1024
+                return size <= self.max_size_per_item
+
+            # Fallback for complex types
+            if isinstance(value, BaseModel) and hasattr(
+                value, "model_dump"
+            ):  # Pydantic v2
+                value = value.model_dump()
+            elif hasattr(value, "isoformat"):  # datetime objects
+                return True  # datetime strings are always small
+
+            # Only convert to JSON if absolutely necessary
+            if not isinstance(value, (str, bytes)):
+                value = json.dumps(value, default=str)
+
+            return sys.getsizeof(value) / 1024 <= self.max_size_per_item
+
+        except Exception:
+            return False
+
     def evict_cache(self):
         """
         Eviction policy:
@@ -61,6 +108,8 @@ class InMemoryCache(BaseCache):
         if len(self.cache_dict) >= self.max_size_in_memory:
             # only evict when cache is full
             self.evict_cache()
+        if not self.check_value_size(value):
+            return
 
         self.cache_dict[key] = value
         if "ttl" in kwargs and kwargs["ttl"] is not None:
diff --git a/litellm/constants.py b/litellm/constants.py
index eb59858d43..da66f897c9 100644
--- a/litellm/constants.py
+++ b/litellm/constants.py
@@ -14,6 +14,7 @@ DEFAULT_REPLICATE_POLLING_DELAY_SECONDS = 1
 DEFAULT_IMAGE_TOKEN_COUNT = 250
 DEFAULT_IMAGE_WIDTH = 300
 DEFAULT_IMAGE_HEIGHT = 300
+MAX_SIZE_PER_ITEM_IN_MEMORY_CACHE_IN_KB = 1024  # 1MB = 1024KB
 SINGLE_DEPLOYMENT_TRAFFIC_FAILURE_THRESHOLD = 1000  # Minimum number of requests to consider "reasonable traffic". Used for single-deployment cooldown logic.
 #### RELIABILITY ####
 REPEATED_STREAMING_CHUNK_LIMIT = 100  # catch if model starts looping the same chunk while streaming. Uses high default to prevent false positives.
diff --git a/litellm/experimental_mcp_client/Readme.md b/litellm/experimental_mcp_client/Readme.md
new file mode 100644
index 0000000000..4fbd624369
--- /dev/null
+++ b/litellm/experimental_mcp_client/Readme.md
@@ -0,0 +1,6 @@
+# LiteLLM MCP Client
+
+LiteLLM MCP Client is a client that allows you to use MCP tools with LiteLLM.
+
+
+
diff --git a/litellm/experimental_mcp_client/__init__.py b/litellm/experimental_mcp_client/__init__.py
new file mode 100644
index 0000000000..7110d5375e
--- /dev/null
+++ b/litellm/experimental_mcp_client/__init__.py
@@ -0,0 +1,3 @@
+from .tools import call_openai_tool, load_mcp_tools
+
+__all__ = ["load_mcp_tools", "call_openai_tool"]
diff --git a/litellm/experimental_mcp_client/client.py b/litellm/experimental_mcp_client/client.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/litellm/experimental_mcp_client/tools.py b/litellm/experimental_mcp_client/tools.py
new file mode 100644
index 0000000000..f4ebbf4af4
--- /dev/null
+++ b/litellm/experimental_mcp_client/tools.py
@@ -0,0 +1,109 @@
+import json
+from typing import List, Literal, Union
+
+from mcp import ClientSession
+from mcp.types import CallToolRequestParams as MCPCallToolRequestParams
+from mcp.types import CallToolResult as MCPCallToolResult
+from mcp.types import Tool as MCPTool
+from openai.types.chat import ChatCompletionToolParam
+from openai.types.shared_params.function_definition import FunctionDefinition
+
+from litellm.types.utils import ChatCompletionMessageToolCall
+
+
+########################################################
+# List MCP Tool functions
+########################################################
+def transform_mcp_tool_to_openai_tool(mcp_tool: MCPTool) -> ChatCompletionToolParam:
+    """Convert an MCP tool to an OpenAI tool."""
+    return ChatCompletionToolParam(
+        type="function",
+        function=FunctionDefinition(
+            name=mcp_tool.name,
+            description=mcp_tool.description or "",
+            parameters=mcp_tool.inputSchema,
+            strict=False,
+        ),
+    )
+
+
+async def load_mcp_tools(
+    session: ClientSession, format: Literal["mcp", "openai"] = "mcp"
+) -> Union[List[MCPTool], List[ChatCompletionToolParam]]:
+    """
+    Load all available MCP tools
+
+    Args:
+        session: The MCP session to use
+        format: The format to convert the tools to
+    By default, the tools are returned in MCP format.
+
+    If format is set to "openai", the tools are converted to OpenAI API compatible tools.
+    """
+    tools = await session.list_tools()
+    if format == "openai":
+        return [
+            transform_mcp_tool_to_openai_tool(mcp_tool=tool) for tool in tools.tools
+        ]
+    return tools.tools
+
+
+########################################################
+# Call MCP Tool functions
+########################################################
+
+
+async def call_mcp_tool(
+    session: ClientSession,
+    call_tool_request_params: MCPCallToolRequestParams,
+) -> MCPCallToolResult:
+    """Call an MCP tool."""
+    tool_result = await session.call_tool(
+        name=call_tool_request_params.name,
+        arguments=call_tool_request_params.arguments,
+    )
+    return tool_result
+
+
+def _get_function_arguments(function: FunctionDefinition) -> dict:
+    """Helper to safely get and parse function arguments."""
+    arguments = function.get("arguments", {})
+    if isinstance(arguments, str):
+        try:
+            arguments = json.loads(arguments)
+        except json.JSONDecodeError:
+            arguments = {}
+    return arguments if isinstance(arguments, dict) else {}
+
+
+def _transform_openai_tool_call_to_mcp_tool_call_request(
+    openai_tool: ChatCompletionMessageToolCall,
+) -> MCPCallToolRequestParams:
+    """Convert an OpenAI ChatCompletionMessageToolCall to an MCP CallToolRequestParams."""
+    function = openai_tool["function"]
+    return MCPCallToolRequestParams(
+        name=function["name"],
+        arguments=_get_function_arguments(function),
+    )
+
+
+async def call_openai_tool(
+    session: ClientSession,
+    openai_tool: ChatCompletionMessageToolCall,
+) -> MCPCallToolResult:
+    """
+    Call an OpenAI tool using MCP client.
+
+    Args:
+        session: The MCP session to use
+        openai_tool: The OpenAI tool to call. You can get this from the `choices[0].message.tool_calls[0]` of the response from the OpenAI API.
+    Returns:
+        The result of the MCP tool call.
+    """
+    mcp_tool_call_request_params = _transform_openai_tool_call_to_mcp_tool_call_request(
+        openai_tool=openai_tool,
+    )
+    return await call_mcp_tool(
+        session=session,
+        call_tool_request_params=mcp_tool_call_request_params,
+    )
diff --git a/litellm/litellm_core_utils/llm_response_utils/convert_dict_to_response.py b/litellm/litellm_core_utils/llm_response_utils/convert_dict_to_response.py
index ebb1032a19..d33af2a477 100644
--- a/litellm/litellm_core_utils/llm_response_utils/convert_dict_to_response.py
+++ b/litellm/litellm_core_utils/llm_response_utils/convert_dict_to_response.py
@@ -494,6 +494,7 @@ def convert_to_model_response_object(  # noqa: PLR0915
                         provider_specific_fields=provider_specific_fields,
                         reasoning_content=reasoning_content,
                         thinking_blocks=thinking_blocks,
+                        annotations=choice["message"].get("annotations", None),
                     )
                     finish_reason = choice.get("finish_reason", None)
                 if finish_reason is None:
diff --git a/litellm/litellm_core_utils/model_param_helper.py b/litellm/litellm_core_utils/model_param_helper.py
index 09a2c15a77..d792ede282 100644
--- a/litellm/litellm_core_utils/model_param_helper.py
+++ b/litellm/litellm_core_utils/model_param_helper.py
@@ -1,6 +1,5 @@
 from typing import Set
 
-from openai.types.audio.transcription_create_params import TranscriptionCreateParams
 from openai.types.chat.completion_create_params import (
     CompletionCreateParamsNonStreaming,
     CompletionCreateParamsStreaming,
@@ -13,6 +12,7 @@ from openai.types.completion_create_params import (
 )
 from openai.types.embedding_create_params import EmbeddingCreateParams
 
+from litellm._logging import verbose_logger
 from litellm.types.rerank import RerankRequest
 
 
@@ -84,8 +84,10 @@ class ModelParamHelper:
         This follows the OpenAI API Spec
         """
         all_chat_completion_kwargs = set(
-            CompletionCreateParamsNonStreaming.__annotations__.keys()
-        ).union(set(CompletionCreateParamsStreaming.__annotations__.keys()))
+            getattr(CompletionCreateParamsNonStreaming, "__annotations__", {}).keys()
+        ).union(
+            set(getattr(CompletionCreateParamsStreaming, "__annotations__", {}).keys())
+        )
         return all_chat_completion_kwargs
 
     @staticmethod
@@ -96,8 +98,16 @@ class ModelParamHelper:
         This follows the OpenAI API Spec
         """
         all_text_completion_kwargs = set(
-            TextCompletionCreateParamsNonStreaming.__annotations__.keys()
-        ).union(set(TextCompletionCreateParamsStreaming.__annotations__.keys()))
+            getattr(
+                TextCompletionCreateParamsNonStreaming, "__annotations__", {}
+            ).keys()
+        ).union(
+            set(
+                getattr(
+                    TextCompletionCreateParamsStreaming, "__annotations__", {}
+                ).keys()
+            )
+        )
         return all_text_completion_kwargs
 
     @staticmethod
@@ -114,7 +124,7 @@ class ModelParamHelper:
 
         This follows the OpenAI API Spec
         """
-        return set(EmbeddingCreateParams.__annotations__.keys())
+        return set(getattr(EmbeddingCreateParams, "__annotations__", {}).keys())
 
     @staticmethod
     def _get_litellm_supported_transcription_kwargs() -> Set[str]:
@@ -123,7 +133,19 @@ class ModelParamHelper:
 
         This follows the OpenAI API Spec
         """
-        return set(TranscriptionCreateParams.__annotations__.keys())
+        try:
+            from openai.types.audio.transcription_create_params import (
+                TranscriptionCreateParamsNonStreaming,
+                TranscriptionCreateParamsStreaming,
+            )
+            non_streaming_kwargs = set(getattr(TranscriptionCreateParamsNonStreaming, "__annotations__", {}).keys())
+            streaming_kwargs = set(getattr(TranscriptionCreateParamsStreaming, "__annotations__", {}).keys())
+
+            all_transcription_kwargs = non_streaming_kwargs.union(streaming_kwargs)
+            return all_transcription_kwargs
+        except Exception as e:
+            verbose_logger.warning("Error getting transcription kwargs %s", str(e))
+            return set()
 
     @staticmethod
     def _get_exclude_kwargs() -> Set[str]:
diff --git a/litellm/litellm_core_utils/streaming_handler.py b/litellm/litellm_core_utils/streaming_handler.py
index 56e64d1859..a11e5af12b 100644
--- a/litellm/litellm_core_utils/streaming_handler.py
+++ b/litellm/litellm_core_utils/streaming_handler.py
@@ -799,6 +799,10 @@ class CustomStreamWrapper:
                 "provider_specific_fields" in response_obj
                 and response_obj["provider_specific_fields"] is not None
             )
+            or (
+                "annotations" in model_response.choices[0].delta
+                and model_response.choices[0].delta.annotations is not None
+            )
         ):
             return True
         else:
@@ -939,7 +943,6 @@ class CustomStreamWrapper:
             and model_response.choices[0].delta.audio is not None
         ):
             return model_response
-
         else:
             if hasattr(model_response, "usage"):
                 self.chunks.append(model_response)
diff --git a/litellm/llms/anthropic/chat/transformation.py b/litellm/llms/anthropic/chat/transformation.py
index 383c1cd3e5..1a77c453f4 100644
--- a/litellm/llms/anthropic/chat/transformation.py
+++ b/litellm/llms/anthropic/chat/transformation.py
@@ -387,7 +387,7 @@ class AnthropicConfig(BaseConfig):
             _input_schema["additionalProperties"] = True
             _input_schema["properties"] = {}
         else:
-            _input_schema["properties"] = {"values": json_schema}
+            _input_schema.update(cast(AnthropicInputSchema, json_schema))
 
         _tool = AnthropicMessagesTool(
             name=RESPONSE_FORMAT_TOOL_NAME, input_schema=_input_schema
diff --git a/litellm/llms/base_llm/responses/transformation.py b/litellm/llms/base_llm/responses/transformation.py
index c41d63842b..29555c55da 100644
--- a/litellm/llms/base_llm/responses/transformation.py
+++ b/litellm/llms/base_llm/responses/transformation.py
@@ -7,7 +7,6 @@ import httpx
 from litellm.types.llms.openai import (
     ResponseInputParam,
     ResponsesAPIOptionalRequestParams,
-    ResponsesAPIRequestParams,
     ResponsesAPIResponse,
     ResponsesAPIStreamingResponse,
 )
@@ -97,7 +96,7 @@ class BaseResponsesAPIConfig(ABC):
         response_api_optional_request_params: Dict,
         litellm_params: GenericLiteLLMParams,
         headers: dict,
-    ) -> ResponsesAPIRequestParams:
+    ) -> Dict:
         pass
 
     @abstractmethod
@@ -131,3 +130,12 @@ class BaseResponsesAPIConfig(ABC):
             message=error_message,
             headers=headers,
         )
+
+    def should_fake_stream(
+        self,
+        model: Optional[str],
+        stream: Optional[bool],
+        custom_llm_provider: Optional[str] = None,
+    ) -> bool:
+        """Returns True if litellm should fake a stream for the given model and stream value"""
+        return False
diff --git a/litellm/llms/custom_httpx/llm_http_handler.py b/litellm/llms/custom_httpx/llm_http_handler.py
index 01fe36acda..00caf55207 100644
--- a/litellm/llms/custom_httpx/llm_http_handler.py
+++ b/litellm/llms/custom_httpx/llm_http_handler.py
@@ -20,6 +20,7 @@ from litellm.llms.custom_httpx.http_handler import (
 )
 from litellm.responses.streaming_iterator import (
     BaseResponsesAPIStreamingIterator,
+    MockResponsesAPIStreamingIterator,
     ResponsesAPIStreamingIterator,
     SyncResponsesAPIStreamingIterator,
 )
@@ -978,6 +979,7 @@ class BaseLLMHTTPHandler:
         timeout: Optional[Union[float, httpx.Timeout]] = None,
         client: Optional[Union[HTTPHandler, AsyncHTTPHandler]] = None,
         _is_async: bool = False,
+        fake_stream: bool = False,
     ) -> Union[
         ResponsesAPIResponse,
         BaseResponsesAPIStreamingIterator,
@@ -1003,6 +1005,7 @@ class BaseLLMHTTPHandler:
                 extra_body=extra_body,
                 timeout=timeout,
                 client=client if isinstance(client, AsyncHTTPHandler) else None,
+                fake_stream=fake_stream,
             )
 
         if client is None or not isinstance(client, HTTPHandler):
@@ -1051,14 +1054,27 @@ class BaseLLMHTTPHandler:
         try:
             if stream:
                 # For streaming, use stream=True in the request
+                if fake_stream is True:
+                    stream, data = self._prepare_fake_stream_request(
+                        stream=stream,
+                        data=data,
+                        fake_stream=fake_stream,
+                    )
                 response = sync_httpx_client.post(
                     url=api_base,
                     headers=headers,
                     data=json.dumps(data),
                     timeout=timeout
                     or response_api_optional_request_params.get("timeout"),
-                    stream=True,
+                    stream=stream,
                 )
+                if fake_stream is True:
+                    return MockResponsesAPIStreamingIterator(
+                        response=response,
+                        model=model,
+                        logging_obj=logging_obj,
+                        responses_api_provider_config=responses_api_provider_config,
+                    )
 
                 return SyncResponsesAPIStreamingIterator(
                     response=response,
@@ -1100,6 +1116,7 @@ class BaseLLMHTTPHandler:
         extra_body: Optional[Dict[str, Any]] = None,
         timeout: Optional[Union[float, httpx.Timeout]] = None,
         client: Optional[Union[HTTPHandler, AsyncHTTPHandler]] = None,
+        fake_stream: bool = False,
     ) -> Union[ResponsesAPIResponse, BaseResponsesAPIStreamingIterator]:
         """
         Async version of the responses API handler.
@@ -1145,22 +1162,36 @@ class BaseLLMHTTPHandler:
                 "headers": headers,
             },
         )
-
         # Check if streaming is requested
         stream = response_api_optional_request_params.get("stream", False)
 
         try:
             if stream:
                 # For streaming, we need to use stream=True in the request
+                if fake_stream is True:
+                    stream, data = self._prepare_fake_stream_request(
+                        stream=stream,
+                        data=data,
+                        fake_stream=fake_stream,
+                    )
+
                 response = await async_httpx_client.post(
                     url=api_base,
                     headers=headers,
                     data=json.dumps(data),
                     timeout=timeout
                     or response_api_optional_request_params.get("timeout"),
-                    stream=True,
+                    stream=stream,
                 )
 
+                if fake_stream is True:
+                    return MockResponsesAPIStreamingIterator(
+                        response=response,
+                        model=model,
+                        logging_obj=logging_obj,
+                        responses_api_provider_config=responses_api_provider_config,
+                    )
+
                 # Return the streaming iterator
                 return ResponsesAPIStreamingIterator(
                     response=response,
@@ -1177,6 +1208,7 @@ class BaseLLMHTTPHandler:
                     timeout=timeout
                     or response_api_optional_request_params.get("timeout"),
                 )
+
         except Exception as e:
             raise self._handle_error(
                 e=e,
@@ -1189,6 +1221,21 @@ class BaseLLMHTTPHandler:
             logging_obj=logging_obj,
         )
 
+    def _prepare_fake_stream_request(
+        self,
+        stream: bool,
+        data: dict,
+        fake_stream: bool,
+    ) -> Tuple[bool, dict]:
+        """
+        Handles preparing a request when `fake_stream` is True.
+        """
+        if fake_stream is True:
+            stream = False
+            data.pop("stream", None)
+            return stream, data
+        return stream, data
+
     def _handle_error(
         self,
         e: Exception,
diff --git a/litellm/llms/openai/responses/transformation.py b/litellm/llms/openai/responses/transformation.py
index ce4052dc19..e062c0c9fa 100644
--- a/litellm/llms/openai/responses/transformation.py
+++ b/litellm/llms/openai/responses/transformation.py
@@ -65,10 +65,12 @@ class OpenAIResponsesAPIConfig(BaseResponsesAPIConfig):
         response_api_optional_request_params: Dict,
         litellm_params: GenericLiteLLMParams,
         headers: dict,
-    ) -> ResponsesAPIRequestParams:
+    ) -> Dict:
         """No transform applied since inputs are in OpenAI spec already"""
-        return ResponsesAPIRequestParams(
-            model=model, input=input, **response_api_optional_request_params
+        return dict(
+            ResponsesAPIRequestParams(
+                model=model, input=input, **response_api_optional_request_params
+            )
         )
 
     def transform_response_api_response(
@@ -188,3 +190,27 @@ class OpenAIResponsesAPIConfig(BaseResponsesAPIConfig):
             raise ValueError(f"Unknown event type: {event_type}")
 
         return model_class
+
+    def should_fake_stream(
+        self,
+        model: Optional[str],
+        stream: Optional[bool],
+        custom_llm_provider: Optional[str] = None,
+    ) -> bool:
+        if stream is not True:
+            return False
+        if model is not None:
+            try:
+                if (
+                    litellm.utils.supports_native_streaming(
+                        model=model,
+                        custom_llm_provider=custom_llm_provider,
+                    )
+                    is False
+                ):
+                    return True
+            except Exception as e:
+                verbose_logger.debug(
+                    f"Error getting model info in OpenAIResponsesAPIConfig: {e}"
+                )
+        return False
diff --git a/litellm/llms/sagemaker/chat/handler.py b/litellm/llms/sagemaker/chat/handler.py
index 3a90a15093..c827a8a5f7 100644
--- a/litellm/llms/sagemaker/chat/handler.py
+++ b/litellm/llms/sagemaker/chat/handler.py
@@ -5,6 +5,7 @@ from typing import Callable, Optional, Union
 import httpx
 
 from litellm.llms.bedrock.base_aws_llm import BaseAWSLLM
+from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler
 from litellm.utils import ModelResponse, get_secret
 
 from ..common_utils import AWSEventStreamDecoder
@@ -125,6 +126,7 @@ class SagemakerChatHandler(BaseAWSLLM):
         logger_fn=None,
         acompletion: bool = False,
         headers: dict = {},
+        client: Optional[Union[HTTPHandler, AsyncHTTPHandler]] = None,
     ):
 
         # pop streaming if it's in the optional params as 'stream' raises an error with sagemaker
@@ -173,4 +175,5 @@ class SagemakerChatHandler(BaseAWSLLM):
             custom_endpoint=True,
             custom_llm_provider="sagemaker_chat",
             streaming_decoder=custom_stream_decoder,  # type: ignore
+            client=client,
         )
diff --git a/litellm/llms/vertex_ai/common_utils.py b/litellm/llms/vertex_ai/common_utils.py
index f7149c349a..a3f91fbacc 100644
--- a/litellm/llms/vertex_ai/common_utils.py
+++ b/litellm/llms/vertex_ai/common_utils.py
@@ -1,3 +1,4 @@
+import re
 from typing import Dict, List, Literal, Optional, Tuple, Union
 
 import httpx
@@ -280,3 +281,81 @@ def _convert_vertex_datetime_to_openai_datetime(vertex_datetime: str) -> int:
     dt = datetime.strptime(vertex_datetime, "%Y-%m-%dT%H:%M:%S.%fZ")
     # Convert to Unix timestamp (seconds since epoch)
     return int(dt.timestamp())
+
+
+def get_vertex_project_id_from_url(url: str) -> Optional[str]:
+    """
+    Get the vertex project id from the url
+
+    `https://${LOCATION}-aiplatform.googleapis.com/v1/projects/${PROJECT_ID}/locations/${LOCATION}/publishers/google/models/${MODEL_ID}:streamGenerateContent`
+    """
+    match = re.search(r"/projects/([^/]+)", url)
+    return match.group(1) if match else None
+
+
+def get_vertex_location_from_url(url: str) -> Optional[str]:
+    """
+    Get the vertex location from the url
+
+    `https://${LOCATION}-aiplatform.googleapis.com/v1/projects/${PROJECT_ID}/locations/${LOCATION}/publishers/google/models/${MODEL_ID}:streamGenerateContent`
+    """
+    match = re.search(r"/locations/([^/]+)", url)
+    return match.group(1) if match else None
+
+
+def replace_project_and_location_in_route(
+    requested_route: str, vertex_project: str, vertex_location: str
+) -> str:
+    """
+    Replace project and location values in the route with the provided values
+    """
+    # Replace project and location values while keeping route structure
+    modified_route = re.sub(
+        r"/projects/[^/]+/locations/[^/]+/",
+        f"/projects/{vertex_project}/locations/{vertex_location}/",
+        requested_route,
+    )
+    return modified_route
+
+
+def construct_target_url(
+    base_url: str,
+    requested_route: str,
+    vertex_location: Optional[str],
+    vertex_project: Optional[str],
+) -> httpx.URL:
+    """
+    Allow user to specify their own project id / location.
+
+    If missing, use defaults
+
+    Handle cachedContent scenario - https://github.com/BerriAI/litellm/issues/5460
+
+    Constructed Url:
+    POST https://LOCATION-aiplatform.googleapis.com/{version}/projects/PROJECT_ID/locations/LOCATION/cachedContents
+    """
+    new_base_url = httpx.URL(base_url)
+    if "locations" in requested_route:  # contains the target project id + location
+        if vertex_project and vertex_location:
+            requested_route = replace_project_and_location_in_route(
+                requested_route, vertex_project, vertex_location
+            )
+        return new_base_url.copy_with(path=requested_route)
+
+    """
+    - Add endpoint version (e.g. v1beta for cachedContent, v1 for rest)
+    - Add default project id
+    - Add default location
+    """
+    vertex_version: Literal["v1", "v1beta1"] = "v1"
+    if "cachedContent" in requested_route:
+        vertex_version = "v1beta1"
+
+    base_requested_route = "{}/projects/{}/locations/{}".format(
+        vertex_version, vertex_project, vertex_location
+    )
+
+    updated_requested_route = "/" + base_requested_route + requested_route
+
+    updated_url = new_base_url.copy_with(path=updated_requested_route)
+    return updated_url
diff --git a/litellm/main.py b/litellm/main.py
index 6cc1057bb4..1826f2df78 100644
--- a/litellm/main.py
+++ b/litellm/main.py
@@ -2604,6 +2604,7 @@ def completion(  # type: ignore # noqa: PLR0915
                 encoding=encoding,
                 logging_obj=logging,
                 acompletion=acompletion,
+                client=client,
             )
 
             ## RESPONSE OBJECT
diff --git a/litellm/model_prices_and_context_window_backup.json b/litellm/model_prices_and_context_window_backup.json
index f2ca9156ad..1d4353e3ed 100644
--- a/litellm/model_prices_and_context_window_backup.json
+++ b/litellm/model_prices_and_context_window_backup.json
@@ -15,6 +15,12 @@
         "supports_prompt_caching": true,
         "supports_response_schema": true,
         "supports_system_messages": true,
+        "supports_web_search": true,
+        "search_context_cost_per_query": {
+            "search_context_size_low": 0.0000,
+            "search_context_size_medium": 0.0000,
+            "search_context_size_high": 0.0000
+        },
         "deprecation_date": "date when the model becomes deprecated in the format YYYY-MM-DD"
     },
     "omni-moderation-latest": {
@@ -74,7 +80,63 @@
         "supports_vision": true,
         "supports_prompt_caching": true,
         "supports_system_messages": true,
-        "supports_tool_choice": true
+        "supports_tool_choice": true,
+        "supports_web_search": true,
+        "search_context_cost_per_query": {
+            "search_context_size_low": 0.030,
+            "search_context_size_medium": 0.035,
+            "search_context_size_high": 0.050
+        }
+    },
+    "gpt-4o-search-preview-2025-03-11": {
+        "max_tokens": 16384,
+        "max_input_tokens": 128000,
+        "max_output_tokens": 16384,
+        "input_cost_per_token": 0.0000025,
+        "output_cost_per_token": 0.000010,
+        "input_cost_per_token_batches": 0.00000125,
+        "output_cost_per_token_batches": 0.00000500,
+        "cache_read_input_token_cost": 0.00000125,
+        "litellm_provider": "openai",
+        "mode": "chat",
+        "supports_function_calling": true,
+        "supports_parallel_function_calling": true,
+        "supports_response_schema": true,
+        "supports_vision": true,
+        "supports_prompt_caching": true,
+        "supports_system_messages": true,
+        "supports_tool_choice": true,
+        "supports_web_search": true,
+        "search_context_cost_per_query": {
+            "search_context_size_low": 0.030,
+            "search_context_size_medium": 0.035,
+            "search_context_size_high": 0.050
+        }
+     },
+    "gpt-4o-search-preview": {
+       "max_tokens": 16384,
+        "max_input_tokens": 128000,
+        "max_output_tokens": 16384,
+        "input_cost_per_token": 0.0000025,
+        "output_cost_per_token": 0.000010,
+        "input_cost_per_token_batches": 0.00000125,
+        "output_cost_per_token_batches": 0.00000500,
+        "cache_read_input_token_cost": 0.00000125,
+        "litellm_provider": "openai",
+        "mode": "chat",
+        "supports_function_calling": true,
+        "supports_parallel_function_calling": true,
+        "supports_response_schema": true,
+        "supports_vision": true,
+        "supports_prompt_caching": true,
+        "supports_system_messages": true,
+        "supports_tool_choice": true,
+        "supports_web_search": true,
+        "search_context_cost_per_query": {
+            "search_context_size_low": 0.030,
+            "search_context_size_medium": 0.035,
+            "search_context_size_high": 0.050
+        }
     },
     "gpt-4.5-preview": {
         "max_tokens": 16384,
@@ -199,7 +261,63 @@
         "supports_vision": true,
         "supports_prompt_caching": true,
         "supports_system_messages": true,
-        "supports_tool_choice": true
+        "supports_tool_choice": true,
+        "supports_web_search": true,
+        "search_context_cost_per_query": {
+            "search_context_size_low": 0.025,
+            "search_context_size_medium": 0.0275,
+            "search_context_size_high": 0.030
+        }
+    },
+    "gpt-4o-mini-search-preview-2025-03-11":{
+        "max_tokens": 16384,
+        "max_input_tokens": 128000,
+        "max_output_tokens": 16384,
+        "input_cost_per_token": 0.00000015,
+        "output_cost_per_token": 0.00000060,
+        "input_cost_per_token_batches": 0.000000075,
+        "output_cost_per_token_batches": 0.00000030,
+        "cache_read_input_token_cost": 0.000000075,
+        "litellm_provider": "openai",
+        "mode": "chat",
+        "supports_function_calling": true,
+        "supports_parallel_function_calling": true,
+        "supports_response_schema": true,
+        "supports_vision": true,
+        "supports_prompt_caching": true,
+        "supports_system_messages": true,
+        "supports_tool_choice": true,
+        "supports_web_search": true,
+        "search_context_cost_per_query": {
+            "search_context_size_low": 0.025,
+            "search_context_size_medium": 0.0275,
+            "search_context_size_high": 0.030
+        }
+    },
+    "gpt-4o-mini-search-preview": {
+        "max_tokens": 16384,
+        "max_input_tokens": 128000,
+        "max_output_tokens": 16384,
+        "input_cost_per_token": 0.00000015,
+        "output_cost_per_token": 0.00000060,
+        "input_cost_per_token_batches": 0.000000075,
+        "output_cost_per_token_batches": 0.00000030,
+        "cache_read_input_token_cost": 0.000000075,
+        "litellm_provider": "openai",
+        "mode": "chat",
+        "supports_function_calling": true,
+        "supports_parallel_function_calling": true,
+        "supports_response_schema": true,
+        "supports_vision": true,
+        "supports_prompt_caching": true,
+        "supports_system_messages": true,
+        "supports_tool_choice": true,
+        "supports_web_search": true,
+        "search_context_cost_per_query": {
+            "search_context_size_low": 0.025,
+            "search_context_size_medium": 0.0275,
+            "search_context_size_high": 0.030
+        }
     },
     "gpt-4o-mini-2024-07-18": {
         "max_tokens": 16384,
@@ -218,7 +336,12 @@
         "supports_vision": true,
         "supports_prompt_caching": true,
         "supports_system_messages": true,
-        "supports_tool_choice": true
+        "supports_tool_choice": true,
+        "search_context_cost_per_query": {
+            "search_context_size_low": 30.00,
+            "search_context_size_medium": 35.00,
+            "search_context_size_high": 50.00
+        }
     },
     "o1-pro": {
         "max_tokens": 100000,
@@ -425,7 +548,13 @@
         "supports_vision": true,
         "supports_prompt_caching": true,
         "supports_system_messages": true,
-        "supports_tool_choice": true
+        "supports_tool_choice": true,
+        "supports_web_search": true,
+        "search_context_cost_per_query": {
+            "search_context_size_low": 0.030,
+            "search_context_size_medium": 0.035,
+            "search_context_size_high": 0.050
+        }
     },
     "gpt-4o-2024-11-20": {
         "max_tokens": 16384,
@@ -1426,6 +1555,25 @@
         "supports_vision": false,
         "supports_prompt_caching": true
     },
+    "azure/gpt-4.5-preview": {
+        "max_tokens": 16384,
+        "max_input_tokens": 128000,
+        "max_output_tokens": 16384,
+        "input_cost_per_token": 0.000075,
+        "output_cost_per_token": 0.00015,
+        "input_cost_per_token_batches": 0.0000375,
+        "output_cost_per_token_batches": 0.000075,
+        "cache_read_input_token_cost": 0.0000375,
+        "litellm_provider": "azure",
+        "mode": "chat",
+        "supports_function_calling": true,
+        "supports_parallel_function_calling": true,
+        "supports_response_schema": true,
+        "supports_vision": true,
+        "supports_prompt_caching": true,
+        "supports_system_messages": true,
+        "supports_tool_choice": true
+    },
     "azure/gpt-4o": {
         "max_tokens": 16384,
         "max_input_tokens": 128000,
@@ -2091,6 +2239,18 @@
         "mode": "chat",
         "supports_tool_choice": true
     },
+    "azure_ai/mistral-small-2503": {
+        "max_tokens": 128000,
+        "max_input_tokens": 128000,
+        "max_output_tokens": 128000,
+        "input_cost_per_token": 0.000001,
+        "output_cost_per_token": 0.000003,
+        "litellm_provider": "azure_ai",
+        "mode": "chat",
+        "supports_function_calling": true,
+        "supports_vision": true,
+        "supports_tool_choice": true
+    },
     "azure_ai/mistral-large-2407": {
         "max_tokens": 4096,
         "max_input_tokens": 128000,
diff --git a/litellm/proxy/_new_secret_config.yaml b/litellm/proxy/_new_secret_config.yaml
index 86172ae269..cd49647464 100644
--- a/litellm/proxy/_new_secret_config.yaml
+++ b/litellm/proxy/_new_secret_config.yaml
@@ -15,4 +15,12 @@ router_settings:
   redis_password: os.environ/REDIS_PASSWORD
   redis_port: os.environ/REDIS_PORT
 
-
+general_settings:
+  enable_jwt_auth: True
+  litellm_jwtauth:
+    admin_jwt_scope: "ai.admin"
+    # team_id_jwt_field: "client_id" # 👈 CAN BE ANY FIELD
+    user_id_jwt_field: "sub" # 👈 CAN BE ANY FIELD
+    org_id_jwt_field: "org_id" # 👈 CAN BE ANY FIELD
+    end_user_id_jwt_field: "customer_id" # 👈 CAN BE ANY FIELD
+    user_id_upsert: True
\ No newline at end of file
diff --git a/litellm/proxy/_types.py b/litellm/proxy/_types.py
index 255e37186d..220a0d5ddb 100644
--- a/litellm/proxy/_types.py
+++ b/litellm/proxy/_types.py
@@ -1631,7 +1631,7 @@ class LiteLLM_UserTable(LiteLLMPydanticObjectBase):
 
 class LiteLLM_UserTableFiltered(BaseModel):  # done to avoid exposing sensitive data
     user_id: str
-    user_email: str
+    user_email: Optional[str] = None
 
 
 class LiteLLM_UserTableWithKeyCount(LiteLLM_UserTable):
diff --git a/litellm/proxy/auth/auth_checks.py b/litellm/proxy/auth/auth_checks.py
index f029511dd2..80cfb03de4 100644
--- a/litellm/proxy/auth/auth_checks.py
+++ b/litellm/proxy/auth/auth_checks.py
@@ -14,7 +14,7 @@ import time
 import traceback
 from typing import TYPE_CHECKING, Any, Dict, List, Literal, Optional, cast
 
-from fastapi import status
+from fastapi import Request, status
 from pydantic import BaseModel
 
 import litellm
@@ -74,6 +74,7 @@ async def common_checks(
     llm_router: Optional[Router],
     proxy_logging_obj: ProxyLogging,
     valid_token: Optional[UserAPIKeyAuth],
+    request: Request,
 ) -> bool:
     """
     Common checks across jwt + key-based auth.
@@ -198,9 +199,134 @@ async def common_checks(
         user_object=user_object, route=route, request_body=request_body
     )
 
+    token_team = getattr(valid_token, "team_id", None)
+    token_type: Literal["ui", "api"] = (
+        "ui" if token_team is not None and token_team == "litellm-dashboard" else "api"
+    )
+    _is_route_allowed = _is_allowed_route(
+        route=route,
+        token_type=token_type,
+        user_obj=user_object,
+        request=request,
+        request_data=request_body,
+        valid_token=valid_token,
+    )
+
     return True
 
 
+def _is_ui_route(
+    route: str,
+    user_obj: Optional[LiteLLM_UserTable] = None,
+) -> bool:
+    """
+    - Check if the route is a UI used route
+    """
+    # this token is only used for managing the ui
+    allowed_routes = LiteLLMRoutes.ui_routes.value
+    # check if the current route startswith any of the allowed routes
+    if (
+        route is not None
+        and isinstance(route, str)
+        and any(route.startswith(allowed_route) for allowed_route in allowed_routes)
+    ):
+        # Do something if the current route starts with any of the allowed routes
+        return True
+    elif any(
+        RouteChecks._route_matches_pattern(route=route, pattern=allowed_route)
+        for allowed_route in allowed_routes
+    ):
+        return True
+    return False
+
+
+def _get_user_role(
+    user_obj: Optional[LiteLLM_UserTable],
+) -> Optional[LitellmUserRoles]:
+    if user_obj is None:
+        return None
+
+    _user = user_obj
+
+    _user_role = _user.user_role
+    try:
+        role = LitellmUserRoles(_user_role)
+    except ValueError:
+        return LitellmUserRoles.INTERNAL_USER
+
+    return role
+
+
+def _is_api_route_allowed(
+    route: str,
+    request: Request,
+    request_data: dict,
+    valid_token: Optional[UserAPIKeyAuth],
+    user_obj: Optional[LiteLLM_UserTable] = None,
+) -> bool:
+    """
+    - Route b/w api token check and normal token check
+    """
+    _user_role = _get_user_role(user_obj=user_obj)
+
+    if valid_token is None:
+        raise Exception("Invalid proxy server token passed. valid_token=None.")
+
+    if not _is_user_proxy_admin(user_obj=user_obj):  # if non-admin
+        RouteChecks.non_proxy_admin_allowed_routes_check(
+            user_obj=user_obj,
+            _user_role=_user_role,
+            route=route,
+            request=request,
+            request_data=request_data,
+            valid_token=valid_token,
+        )
+    return True
+
+
+def _is_user_proxy_admin(user_obj: Optional[LiteLLM_UserTable]):
+    if user_obj is None:
+        return False
+
+    if (
+        user_obj.user_role is not None
+        and user_obj.user_role == LitellmUserRoles.PROXY_ADMIN.value
+    ):
+        return True
+
+    if (
+        user_obj.user_role is not None
+        and user_obj.user_role == LitellmUserRoles.PROXY_ADMIN.value
+    ):
+        return True
+
+    return False
+
+
+def _is_allowed_route(
+    route: str,
+    token_type: Literal["ui", "api"],
+    request: Request,
+    request_data: dict,
+    valid_token: Optional[UserAPIKeyAuth],
+    user_obj: Optional[LiteLLM_UserTable] = None,
+) -> bool:
+    """
+    - Route b/w ui token check and normal token check
+    """
+
+    if token_type == "ui" and _is_ui_route(route=route, user_obj=user_obj):
+        return True
+    else:
+        return _is_api_route_allowed(
+            route=route,
+            request=request,
+            request_data=request_data,
+            valid_token=valid_token,
+            user_obj=user_obj,
+        )
+
+
 def _allowed_routes_check(user_route: str, allowed_routes: list) -> bool:
     """
     Return if a user is allowed to access route. Helper function for `allowed_routes_check`.
diff --git a/litellm/proxy/auth/auth_utils.py b/litellm/proxy/auth/auth_utils.py
index 91fcaf7e11..2c4b122d3a 100644
--- a/litellm/proxy/auth/auth_utils.py
+++ b/litellm/proxy/auth/auth_utils.py
@@ -321,6 +321,7 @@ async def check_if_request_size_is_safe(request: Request) -> bool:
     from litellm.proxy.proxy_server import general_settings, premium_user
 
     max_request_size_mb = general_settings.get("max_request_size_mb", None)
+
     if max_request_size_mb is not None:
         # Check if premium user
         if premium_user is not True:
diff --git a/litellm/proxy/auth/route_checks.py b/litellm/proxy/auth/route_checks.py
index a18a7ab5e1..8f956abb72 100644
--- a/litellm/proxy/auth/route_checks.py
+++ b/litellm/proxy/auth/route_checks.py
@@ -24,7 +24,6 @@ class RouteChecks:
         route: str,
         request: Request,
         valid_token: UserAPIKeyAuth,
-        api_key: str,
         request_data: dict,
     ):
         """
diff --git a/litellm/proxy/auth/user_api_key_auth.py b/litellm/proxy/auth/user_api_key_auth.py
index ace0bf4948..b78619ae65 100644
--- a/litellm/proxy/auth/user_api_key_auth.py
+++ b/litellm/proxy/auth/user_api_key_auth.py
@@ -25,7 +25,9 @@ from litellm.litellm_core_utils.dd_tracing import tracer
 from litellm.proxy._types import *
 from litellm.proxy.auth.auth_checks import (
     _cache_key_object,
+    _get_user_role,
     _handle_failed_db_connection_for_get_key_object,
+    _is_user_proxy_admin,
     _virtual_key_max_budget_check,
     _virtual_key_soft_budget_check,
     can_key_call_model,
@@ -48,7 +50,6 @@ from litellm.proxy.auth.auth_utils import (
 from litellm.proxy.auth.handle_jwt import JWTAuthManager, JWTHandler
 from litellm.proxy.auth.oauth2_check import check_oauth2_token
 from litellm.proxy.auth.oauth2_proxy_hook import handle_oauth2_proxy_request
-from litellm.proxy.auth.route_checks import RouteChecks
 from litellm.proxy.auth.service_account_checks import service_account_checks
 from litellm.proxy.common_utils.http_parsing_utils import _read_request_body
 from litellm.proxy.utils import PrismaClient, ProxyLogging
@@ -98,86 +99,6 @@ def _get_bearer_token(
     return api_key
 
 
-def _is_ui_route(
-    route: str,
-    user_obj: Optional[LiteLLM_UserTable] = None,
-) -> bool:
-    """
-    - Check if the route is a UI used route
-    """
-    # this token is only used for managing the ui
-    allowed_routes = LiteLLMRoutes.ui_routes.value
-    # check if the current route startswith any of the allowed routes
-    if (
-        route is not None
-        and isinstance(route, str)
-        and any(route.startswith(allowed_route) for allowed_route in allowed_routes)
-    ):
-        # Do something if the current route starts with any of the allowed routes
-        return True
-    elif any(
-        RouteChecks._route_matches_pattern(route=route, pattern=allowed_route)
-        for allowed_route in allowed_routes
-    ):
-        return True
-    return False
-
-
-def _is_api_route_allowed(
-    route: str,
-    request: Request,
-    request_data: dict,
-    api_key: str,
-    valid_token: Optional[UserAPIKeyAuth],
-    user_obj: Optional[LiteLLM_UserTable] = None,
-) -> bool:
-    """
-    - Route b/w api token check and normal token check
-    """
-    _user_role = _get_user_role(user_obj=user_obj)
-
-    if valid_token is None:
-        raise Exception("Invalid proxy server token passed. valid_token=None.")
-
-    if not _is_user_proxy_admin(user_obj=user_obj):  # if non-admin
-        RouteChecks.non_proxy_admin_allowed_routes_check(
-            user_obj=user_obj,
-            _user_role=_user_role,
-            route=route,
-            request=request,
-            request_data=request_data,
-            api_key=api_key,
-            valid_token=valid_token,
-        )
-    return True
-
-
-def _is_allowed_route(
-    route: str,
-    token_type: Literal["ui", "api"],
-    request: Request,
-    request_data: dict,
-    api_key: str,
-    valid_token: Optional[UserAPIKeyAuth],
-    user_obj: Optional[LiteLLM_UserTable] = None,
-) -> bool:
-    """
-    - Route b/w ui token check and normal token check
-    """
-
-    if token_type == "ui" and _is_ui_route(route=route, user_obj=user_obj):
-        return True
-    else:
-        return _is_api_route_allowed(
-            route=route,
-            request=request,
-            request_data=request_data,
-            api_key=api_key,
-            valid_token=valid_token,
-            user_obj=user_obj,
-        )
-
-
 async def user_api_key_auth_websocket(websocket: WebSocket):
     # Accept the WebSocket connection
 
@@ -328,6 +249,8 @@ async def _user_api_key_auth_builder(  # noqa: PLR0915
     parent_otel_span: Optional[Span] = None
     start_time = datetime.now()
     route: str = get_request_route(request=request)
+    valid_token: Optional[UserAPIKeyAuth] = None
+
     try:
 
         # get the request body
@@ -470,22 +393,8 @@ async def _user_api_key_auth_builder(  # noqa: PLR0915
                         user_role=LitellmUserRoles.PROXY_ADMIN,
                         parent_otel_span=parent_otel_span,
                     )
-                # run through common checks
-                _ = await common_checks(
-                    request_body=request_data,
-                    team_object=team_object,
-                    user_object=user_object,
-                    end_user_object=end_user_object,
-                    general_settings=general_settings,
-                    global_proxy_spend=global_proxy_spend,
-                    route=route,
-                    llm_router=llm_router,
-                    proxy_logging_obj=proxy_logging_obj,
-                    valid_token=None,
-                )
 
-                # return UserAPIKeyAuth object
-                return UserAPIKeyAuth(
+                valid_token = UserAPIKeyAuth(
                     api_key=None,
                     team_id=team_id,
                     team_tpm_limit=(
@@ -501,6 +410,23 @@ async def _user_api_key_auth_builder(  # noqa: PLR0915
                     parent_otel_span=parent_otel_span,
                     end_user_id=end_user_id,
                 )
+                # run through common checks
+                _ = await common_checks(
+                    request=request,
+                    request_body=request_data,
+                    team_object=team_object,
+                    user_object=user_object,
+                    end_user_object=end_user_object,
+                    general_settings=general_settings,
+                    global_proxy_spend=global_proxy_spend,
+                    route=route,
+                    llm_router=llm_router,
+                    proxy_logging_obj=proxy_logging_obj,
+                    valid_token=valid_token,
+                )
+
+                # return UserAPIKeyAuth object
+                return cast(UserAPIKeyAuth, valid_token)
 
         #### ELSE ####
         ## CHECK PASS-THROUGH ENDPOINTS ##
@@ -1038,6 +964,7 @@ async def _user_api_key_auth_builder(  # noqa: PLR0915
                         )
                     )
             _ = await common_checks(
+                request=request,
                 request_body=request_data,
                 team_object=_team_obj,
                 user_object=user_obj,
@@ -1075,23 +1002,6 @@ async def _user_api_key_auth_builder(  # noqa: PLR0915
         # check if token is from litellm-ui, litellm ui makes keys to allow users to login with sso. These keys can only be used for LiteLLM UI functions
         # sso/login, ui/login, /key functions and /user functions
         # this will never be allowed to call /chat/completions
-        token_team = getattr(valid_token, "team_id", None)
-        token_type: Literal["ui", "api"] = (
-            "ui"
-            if token_team is not None and token_team == "litellm-dashboard"
-            else "api"
-        )
-        _is_route_allowed = _is_allowed_route(
-            route=route,
-            token_type=token_type,
-            user_obj=user_obj,
-            request=request,
-            request_data=request_data,
-            api_key=api_key,
-            valid_token=valid_token,
-        )
-        if not _is_route_allowed:
-            raise HTTPException(401, detail="Invalid route for UI token")
 
         if valid_token is None:
             # No token was found when looking up in the DB
@@ -1242,42 +1152,6 @@ async def _return_user_api_key_auth_obj(
         return UserAPIKeyAuth(**user_api_key_kwargs)
 
 
-def _is_user_proxy_admin(user_obj: Optional[LiteLLM_UserTable]):
-    if user_obj is None:
-        return False
-
-    if (
-        user_obj.user_role is not None
-        and user_obj.user_role == LitellmUserRoles.PROXY_ADMIN.value
-    ):
-        return True
-
-    if (
-        user_obj.user_role is not None
-        and user_obj.user_role == LitellmUserRoles.PROXY_ADMIN.value
-    ):
-        return True
-
-    return False
-
-
-def _get_user_role(
-    user_obj: Optional[LiteLLM_UserTable],
-) -> Optional[LitellmUserRoles]:
-    if user_obj is None:
-        return None
-
-    _user = user_obj
-
-    _user_role = _user.user_role
-    try:
-        role = LitellmUserRoles(_user_role)
-    except ValueError:
-        return LitellmUserRoles.INTERNAL_USER
-
-    return role
-
-
 def get_api_key_from_custom_header(
     request: Request, custom_litellm_key_header_name: str
 ) -> str:
diff --git a/litellm/proxy/common_request_processing.py b/litellm/proxy/common_request_processing.py
index 7f131efb04..fcc13509ce 100644
--- a/litellm/proxy/common_request_processing.py
+++ b/litellm/proxy/common_request_processing.py
@@ -57,7 +57,9 @@ class ProxyBaseLLMRequestProcessing:
             "x-litellm-call-id": call_id,
             "x-litellm-model-id": model_id,
             "x-litellm-cache-key": cache_key,
-            "x-litellm-model-api-base": api_base,
+            "x-litellm-model-api-base": (
+                api_base.split("?")[0] if api_base else None
+            ),  # don't include query params, risk of leaking sensitive info
             "x-litellm-version": version,
             "x-litellm-model-region": model_region,
             "x-litellm-response-cost": str(response_cost),
diff --git a/litellm/proxy/management_endpoints/internal_user_endpoints.py b/litellm/proxy/management_endpoints/internal_user_endpoints.py
index 43d8273dee..e9be169cdc 100644
--- a/litellm/proxy/management_endpoints/internal_user_endpoints.py
+++ b/litellm/proxy/management_endpoints/internal_user_endpoints.py
@@ -1240,4 +1240,5 @@ async def ui_view_users(
         return [LiteLLM_UserTableFiltered(**user.model_dump()) for user in users]
 
     except Exception as e:
+        verbose_proxy_logger.exception(f"Error searching users: {str(e)}")
         raise HTTPException(status_code=500, detail=f"Error searching users: {str(e)}")
diff --git a/litellm/proxy/management_endpoints/team_endpoints.py b/litellm/proxy/management_endpoints/team_endpoints.py
index 1994e27ecf..f5bcc6ba11 100644
--- a/litellm/proxy/management_endpoints/team_endpoints.py
+++ b/litellm/proxy/management_endpoints/team_endpoints.py
@@ -470,7 +470,7 @@ async def update_team(
 
     if existing_team_row is None:
         raise HTTPException(
-            status_code=400,
+            status_code=404,
             detail={"error": f"Team not found, passed team_id={data.team_id}"},
         )
 
@@ -1137,14 +1137,16 @@ async def delete_team(
     team_rows: List[LiteLLM_TeamTable] = []
     for team_id in data.team_ids:
         try:
-            team_row_base: BaseModel = (
+            team_row_base: Optional[BaseModel] = (
                 await prisma_client.db.litellm_teamtable.find_unique(
                     where={"team_id": team_id}
                 )
             )
+            if team_row_base is None:
+                raise Exception
         except Exception:
             raise HTTPException(
-                status_code=400,
+                status_code=404,
                 detail={"error": f"Team not found, passed team_id={team_id}"},
             )
         team_row_pydantic = LiteLLM_TeamTable(**team_row_base.model_dump())
diff --git a/litellm/proxy/pass_through_endpoints/common_utils.py b/litellm/proxy/pass_through_endpoints/common_utils.py
new file mode 100644
index 0000000000..3a3783dd57
--- /dev/null
+++ b/litellm/proxy/pass_through_endpoints/common_utils.py
@@ -0,0 +1,16 @@
+from fastapi import Request
+
+
+def get_litellm_virtual_key(request: Request) -> str:
+    """
+    Extract and format API key from request headers.
+    Prioritizes x-litellm-api-key over Authorization header.
+
+
+    Vertex JS SDK uses `Authorization` header, we use `x-litellm-api-key` to pass litellm virtual key
+
+    """
+    litellm_api_key = request.headers.get("x-litellm-api-key")
+    if litellm_api_key:
+        return f"Bearer {litellm_api_key}"
+    return request.headers.get("Authorization", "")
diff --git a/litellm/proxy/pass_through_endpoints/llm_passthrough_endpoints.py b/litellm/proxy/pass_through_endpoints/llm_passthrough_endpoints.py
index 4724c7f9d1..c4d96b67f6 100644
--- a/litellm/proxy/pass_through_endpoints/llm_passthrough_endpoints.py
+++ b/litellm/proxy/pass_through_endpoints/llm_passthrough_endpoints.py
@@ -12,10 +12,13 @@ import httpx
 from fastapi import APIRouter, Depends, HTTPException, Request, Response
 
 import litellm
+from litellm._logging import verbose_proxy_logger
 from litellm.constants import BEDROCK_AGENT_RUNTIME_PASS_THROUGH_ROUTES
+from litellm.llms.vertex_ai.vertex_llm_base import VertexBase
 from litellm.proxy._types import *
 from litellm.proxy.auth.route_checks import RouteChecks
 from litellm.proxy.auth.user_api_key_auth import user_api_key_auth
+from litellm.proxy.pass_through_endpoints.common_utils import get_litellm_virtual_key
 from litellm.proxy.pass_through_endpoints.pass_through_endpoints import (
     create_pass_through_route,
 )
@@ -23,6 +26,7 @@ from litellm.secret_managers.main import get_secret_str
 
 from .passthrough_endpoint_router import PassthroughEndpointRouter
 
+vertex_llm_base = VertexBase()
 router = APIRouter()
 default_vertex_config = None
 
@@ -417,6 +421,138 @@ async def azure_proxy_route(
     )
 
 
+@router.api_route(
+    "/vertex-ai/{endpoint:path}",
+    methods=["GET", "POST", "PUT", "DELETE", "PATCH"],
+    tags=["Vertex AI Pass-through", "pass-through"],
+    include_in_schema=False,
+)
+@router.api_route(
+    "/vertex_ai/{endpoint:path}",
+    methods=["GET", "POST", "PUT", "DELETE", "PATCH"],
+    tags=["Vertex AI Pass-through", "pass-through"],
+)
+async def vertex_proxy_route(
+    endpoint: str,
+    request: Request,
+    fastapi_response: Response,
+):
+    """
+    Call LiteLLM proxy via Vertex AI SDK.
+
+    [Docs](https://docs.litellm.ai/docs/pass_through/vertex_ai)
+    """
+    from litellm.llms.vertex_ai.common_utils import (
+        construct_target_url,
+        get_vertex_location_from_url,
+        get_vertex_project_id_from_url,
+    )
+
+    encoded_endpoint = httpx.URL(endpoint).path
+    verbose_proxy_logger.debug("requested endpoint %s", endpoint)
+    headers: dict = {}
+    api_key_to_use = get_litellm_virtual_key(request=request)
+    user_api_key_dict = await user_api_key_auth(
+        request=request,
+        api_key=api_key_to_use,
+    )
+    vertex_project: Optional[str] = get_vertex_project_id_from_url(endpoint)
+    vertex_location: Optional[str] = get_vertex_location_from_url(endpoint)
+    vertex_credentials = passthrough_endpoint_router.get_vertex_credentials(
+        project_id=vertex_project,
+        location=vertex_location,
+    )
+
+    headers_passed_through = False
+    # Use headers from the incoming request if no vertex credentials are found
+    if vertex_credentials is None or vertex_credentials.vertex_project is None:
+        headers = dict(request.headers) or {}
+        headers_passed_through = True
+        verbose_proxy_logger.debug(
+            "default_vertex_config  not set, incoming request headers %s", headers
+        )
+        base_target_url = f"https://{vertex_location}-aiplatform.googleapis.com/"
+        headers.pop("content-length", None)
+        headers.pop("host", None)
+    else:
+        vertex_project = vertex_credentials.vertex_project
+        vertex_location = vertex_credentials.vertex_location
+        vertex_credentials_str = vertex_credentials.vertex_credentials
+
+        # Construct base URL for the target endpoint
+        base_target_url = f"https://{vertex_location}-aiplatform.googleapis.com/"
+
+        _auth_header, vertex_project = await vertex_llm_base._ensure_access_token_async(
+            credentials=vertex_credentials_str,
+            project_id=vertex_project,
+            custom_llm_provider="vertex_ai_beta",
+        )
+
+        auth_header, _ = vertex_llm_base._get_token_and_url(
+            model="",
+            auth_header=_auth_header,
+            gemini_api_key=None,
+            vertex_credentials=vertex_credentials_str,
+            vertex_project=vertex_project,
+            vertex_location=vertex_location,
+            stream=False,
+            custom_llm_provider="vertex_ai_beta",
+            api_base="",
+        )
+
+        headers = {
+            "Authorization": f"Bearer {auth_header}",
+        }
+
+    request_route = encoded_endpoint
+    verbose_proxy_logger.debug("request_route %s", request_route)
+
+    # Ensure endpoint starts with '/' for proper URL construction
+    if not encoded_endpoint.startswith("/"):
+        encoded_endpoint = "/" + encoded_endpoint
+
+    # Construct the full target URL using httpx
+    updated_url = construct_target_url(
+        base_url=base_target_url,
+        requested_route=encoded_endpoint,
+        vertex_location=vertex_location,
+        vertex_project=vertex_project,
+    )
+
+    verbose_proxy_logger.debug("updated url %s", updated_url)
+
+    ## check for streaming
+    target = str(updated_url)
+    is_streaming_request = False
+    if "stream" in str(updated_url):
+        is_streaming_request = True
+        target += "?alt=sse"
+
+    ## CREATE PASS-THROUGH
+    endpoint_func = create_pass_through_route(
+        endpoint=endpoint,
+        target=target,
+        custom_headers=headers,
+    )  # dynamically construct pass-through endpoint based on incoming path
+
+    try:
+        received_value = await endpoint_func(
+            request,
+            fastapi_response,
+            user_api_key_dict,
+            stream=is_streaming_request,  # type: ignore
+        )
+    except Exception as e:
+        if headers_passed_through:
+            raise Exception(
+                f"No credentials found on proxy for this request. Headers were passed through directly but request failed with error: {str(e)}"
+            )
+        else:
+            raise e
+
+    return received_value
+
+
 @router.api_route(
     "/openai/{endpoint:path}",
     methods=["GET", "POST", "PUT", "DELETE", "PATCH"],
diff --git a/litellm/proxy/pass_through_endpoints/pass_through_endpoints.py b/litellm/proxy/pass_through_endpoints/pass_through_endpoints.py
index b13d614678..a13b0dc216 100644
--- a/litellm/proxy/pass_through_endpoints/pass_through_endpoints.py
+++ b/litellm/proxy/pass_through_endpoints/pass_through_endpoints.py
@@ -1,6 +1,7 @@
 import ast
 import asyncio
 import json
+import uuid
 from base64 import b64encode
 from datetime import datetime
 from typing import Dict, List, Optional, Union
@@ -284,7 +285,9 @@ class HttpPassThroughEndpointHelpers:
 
     @staticmethod
     def get_response_headers(
-        headers: httpx.Headers, litellm_call_id: Optional[str] = None
+        headers: httpx.Headers,
+        litellm_call_id: Optional[str] = None,
+        custom_headers: Optional[dict] = None,
     ) -> dict:
         excluded_headers = {"transfer-encoding", "content-encoding"}
 
@@ -295,6 +298,8 @@ class HttpPassThroughEndpointHelpers:
         }
         if litellm_call_id:
             return_headers["x-litellm-call-id"] = litellm_call_id
+        if custom_headers:
+            return_headers.update(custom_headers)
 
         return return_headers
 
@@ -365,8 +370,9 @@ async def pass_through_request(  # noqa: PLR0915
     query_params: Optional[dict] = None,
     stream: Optional[bool] = None,
 ):
+    litellm_call_id = str(uuid.uuid4())
+    url: Optional[httpx.URL] = None
     try:
-        import uuid
 
         from litellm.litellm_core_utils.litellm_logging import Logging
         from litellm.proxy.proxy_server import proxy_logging_obj
@@ -416,8 +422,6 @@ async def pass_through_request(  # noqa: PLR0915
         )
         async_client = async_client_obj.client
 
-        litellm_call_id = str(uuid.uuid4())
-
         # create logging object
         start_time = datetime.now()
         logging_obj = Logging(
@@ -596,15 +600,31 @@ async def pass_through_request(  # noqa: PLR0915
             )
         )
 
+        ## CUSTOM HEADERS - `x-litellm-*`
+        custom_headers = ProxyBaseLLMRequestProcessing.get_custom_headers(
+            user_api_key_dict=user_api_key_dict,
+            call_id=litellm_call_id,
+            model_id=None,
+            cache_key=None,
+            api_base=str(url._uri_reference),
+        )
+
         return Response(
             content=content,
             status_code=response.status_code,
             headers=HttpPassThroughEndpointHelpers.get_response_headers(
                 headers=response.headers,
-                litellm_call_id=litellm_call_id,
+                custom_headers=custom_headers,
             ),
         )
     except Exception as e:
+        custom_headers = ProxyBaseLLMRequestProcessing.get_custom_headers(
+            user_api_key_dict=user_api_key_dict,
+            call_id=litellm_call_id,
+            model_id=None,
+            cache_key=None,
+            api_base=str(url._uri_reference) if url else None,
+        )
         verbose_proxy_logger.exception(
             "litellm.proxy.proxy_server.pass_through_endpoint(): Exception occured - {}".format(
                 str(e)
@@ -616,6 +636,7 @@ async def pass_through_request(  # noqa: PLR0915
                 type=getattr(e, "type", "None"),
                 param=getattr(e, "param", "None"),
                 code=getattr(e, "status_code", status.HTTP_400_BAD_REQUEST),
+                headers=custom_headers,
             )
         else:
             error_msg = f"{str(e)}"
@@ -624,6 +645,7 @@ async def pass_through_request(  # noqa: PLR0915
                 type=getattr(e, "type", "None"),
                 param=getattr(e, "param", "None"),
                 code=getattr(e, "status_code", 500),
+                headers=custom_headers,
             )
 
 
diff --git a/litellm/proxy/pass_through_endpoints/passthrough_endpoint_router.py b/litellm/proxy/pass_through_endpoints/passthrough_endpoint_router.py
index adf7d0f30c..89cccfc071 100644
--- a/litellm/proxy/pass_through_endpoints/passthrough_endpoint_router.py
+++ b/litellm/proxy/pass_through_endpoints/passthrough_endpoint_router.py
@@ -1,7 +1,9 @@
 from typing import Dict, Optional
 
-from litellm._logging import verbose_logger
+from litellm._logging import verbose_router_logger
 from litellm.secret_managers.main import get_secret_str
+from litellm.types.llms.vertex_ai import VERTEX_CREDENTIALS_TYPES
+from litellm.types.passthrough_endpoints.vertex_ai import VertexPassThroughCredentials
 
 
 class PassthroughEndpointRouter:
@@ -11,6 +13,10 @@ class PassthroughEndpointRouter:
 
     def __init__(self):
         self.credentials: Dict[str, str] = {}
+        self.deployment_key_to_vertex_credentials: Dict[
+            str, VertexPassThroughCredentials
+        ] = {}
+        self.default_vertex_config: Optional[VertexPassThroughCredentials] = None
 
     def set_pass_through_credentials(
         self,
@@ -45,14 +51,14 @@ class PassthroughEndpointRouter:
             custom_llm_provider=custom_llm_provider,
             region_name=region_name,
         )
-        verbose_logger.debug(
+        verbose_router_logger.debug(
             f"Pass-through llm endpoints router, looking for credentials for {credential_name}"
         )
         if credential_name in self.credentials:
-            verbose_logger.debug(f"Found credentials for {credential_name}")
+            verbose_router_logger.debug(f"Found credentials for {credential_name}")
             return self.credentials[credential_name]
         else:
-            verbose_logger.debug(
+            verbose_router_logger.debug(
                 f"No credentials found for {credential_name}, looking for env variable"
             )
             _env_variable_name = (
@@ -62,6 +68,100 @@ class PassthroughEndpointRouter:
             )
             return get_secret_str(_env_variable_name)
 
+    def _get_vertex_env_vars(self) -> VertexPassThroughCredentials:
+        """
+        Helper to get vertex pass through config from environment variables
+
+        The following environment variables are used:
+        - DEFAULT_VERTEXAI_PROJECT (project id)
+        - DEFAULT_VERTEXAI_LOCATION (location)
+        - DEFAULT_GOOGLE_APPLICATION_CREDENTIALS (path to credentials file)
+        """
+        return VertexPassThroughCredentials(
+            vertex_project=get_secret_str("DEFAULT_VERTEXAI_PROJECT"),
+            vertex_location=get_secret_str("DEFAULT_VERTEXAI_LOCATION"),
+            vertex_credentials=get_secret_str("DEFAULT_GOOGLE_APPLICATION_CREDENTIALS"),
+        )
+
+    def set_default_vertex_config(self, config: Optional[dict] = None):
+        """Sets vertex configuration from provided config and/or environment variables
+
+        Args:
+            config (Optional[dict]): Configuration dictionary
+            Example: {
+                "vertex_project": "my-project-123",
+                "vertex_location": "us-central1",
+                "vertex_credentials": "os.environ/GOOGLE_CREDS"
+            }
+        """
+        # Initialize config dictionary if None
+        if config is None:
+            self.default_vertex_config = self._get_vertex_env_vars()
+            return
+
+        if isinstance(config, dict):
+            for key, value in config.items():
+                if isinstance(value, str) and value.startswith("os.environ/"):
+                    config[key] = get_secret_str(value)
+
+        self.default_vertex_config = VertexPassThroughCredentials(**config)
+
+    def add_vertex_credentials(
+        self,
+        project_id: str,
+        location: str,
+        vertex_credentials: VERTEX_CREDENTIALS_TYPES,
+    ):
+        """
+        Add the vertex credentials for the given project-id, location
+        """
+
+        deployment_key = self._get_deployment_key(
+            project_id=project_id,
+            location=location,
+        )
+        if deployment_key is None:
+            verbose_router_logger.debug(
+                "No deployment key found for project-id, location"
+            )
+            return
+        vertex_pass_through_credentials = VertexPassThroughCredentials(
+            vertex_project=project_id,
+            vertex_location=location,
+            vertex_credentials=vertex_credentials,
+        )
+        self.deployment_key_to_vertex_credentials[deployment_key] = (
+            vertex_pass_through_credentials
+        )
+
+    def _get_deployment_key(
+        self, project_id: Optional[str], location: Optional[str]
+    ) -> Optional[str]:
+        """
+        Get the deployment key for the given project-id, location
+        """
+        if project_id is None or location is None:
+            return None
+        return f"{project_id}-{location}"
+
+    def get_vertex_credentials(
+        self, project_id: Optional[str], location: Optional[str]
+    ) -> Optional[VertexPassThroughCredentials]:
+        """
+        Get the vertex credentials for the given project-id, location
+        """
+        deployment_key = self._get_deployment_key(
+            project_id=project_id,
+            location=location,
+        )
+
+        if deployment_key is None:
+            return self.default_vertex_config
+        if deployment_key in self.deployment_key_to_vertex_credentials:
+            return self.deployment_key_to_vertex_credentials[deployment_key]
+        else:
+            return self.default_vertex_config
+
     def _get_credential_name_for_provider(
         self,
         custom_llm_provider: str,
diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py
index 9185b2d22a..e7cc131fa9 100644
--- a/litellm/proxy/proxy_server.py
+++ b/litellm/proxy/proxy_server.py
@@ -239,6 +239,9 @@ from litellm.proxy.openai_files_endpoints.files_endpoints import (
     router as openai_files_router,
 )
 from litellm.proxy.openai_files_endpoints.files_endpoints import set_files_config
+from litellm.proxy.pass_through_endpoints.llm_passthrough_endpoints import (
+    passthrough_endpoint_router,
+)
 from litellm.proxy.pass_through_endpoints.llm_passthrough_endpoints import (
     router as llm_passthrough_router,
 )
@@ -276,8 +279,6 @@ from litellm.proxy.utils import (
 from litellm.proxy.vertex_ai_endpoints.langfuse_endpoints import (
     router as langfuse_router,
 )
-from litellm.proxy.vertex_ai_endpoints.vertex_endpoints import router as vertex_router
-from litellm.proxy.vertex_ai_endpoints.vertex_endpoints import set_default_vertex_config
 from litellm.router import (
     AssistantsTypedDict,
     Deployment,
@@ -2119,7 +2120,9 @@ class ProxyConfig:
 
         ## default config for vertex ai routes
         default_vertex_config = config.get("default_vertex_config", None)
-        set_default_vertex_config(config=default_vertex_config)
+        passthrough_endpoint_router.set_default_vertex_config(
+            config=default_vertex_config
+        )
 
         ## ROUTER SETTINGS (e.g. routing_strategy, ...)
         router_settings = config.get("router_settings", None)
@@ -8170,7 +8173,6 @@ app.include_router(batches_router)
 app.include_router(rerank_router)
 app.include_router(fine_tuning_router)
 app.include_router(credential_router)
-app.include_router(vertex_router)
 app.include_router(llm_passthrough_router)
 app.include_router(mcp_router)
 app.include_router(anthropic_router)
diff --git a/litellm/proxy/vertex_ai_endpoints/vertex_endpoints.py b/litellm/proxy/vertex_ai_endpoints/vertex_endpoints.py
deleted file mode 100644
index 7444e3d1c1..0000000000
--- a/litellm/proxy/vertex_ai_endpoints/vertex_endpoints.py
+++ /dev/null
@@ -1,274 +0,0 @@
-import traceback
-from typing import Optional
-
-import httpx
-from fastapi import APIRouter, HTTPException, Request, Response, status
-
-import litellm
-from litellm._logging import verbose_proxy_logger
-from litellm.fine_tuning.main import vertex_fine_tuning_apis_instance
-from litellm.proxy._types import *
-from litellm.proxy.auth.user_api_key_auth import user_api_key_auth
-from litellm.proxy.pass_through_endpoints.pass_through_endpoints import (
-    create_pass_through_route,
-)
-from litellm.secret_managers.main import get_secret_str
-from litellm.types.passthrough_endpoints.vertex_ai import *
-
-from .vertex_passthrough_router import VertexPassThroughRouter
-
-router = APIRouter()
-vertex_pass_through_router = VertexPassThroughRouter()
-
-default_vertex_config: VertexPassThroughCredentials = VertexPassThroughCredentials()
-
-
-def _get_vertex_env_vars() -> VertexPassThroughCredentials:
-    """
-    Helper to get vertex pass through config from environment variables
-
-    The following environment variables are used:
-    - DEFAULT_VERTEXAI_PROJECT (project id)
-    - DEFAULT_VERTEXAI_LOCATION (location)
-    - DEFAULT_GOOGLE_APPLICATION_CREDENTIALS (path to credentials file)
-    """
-    return VertexPassThroughCredentials(
-        vertex_project=get_secret_str("DEFAULT_VERTEXAI_PROJECT"),
-        vertex_location=get_secret_str("DEFAULT_VERTEXAI_LOCATION"),
-        vertex_credentials=get_secret_str("DEFAULT_GOOGLE_APPLICATION_CREDENTIALS"),
-    )
-
-
-def set_default_vertex_config(config: Optional[dict] = None):
-    """Sets vertex configuration from provided config and/or environment variables
-
-    Args:
-        config (Optional[dict]): Configuration dictionary
-        Example: {
-            "vertex_project": "my-project-123",
-            "vertex_location": "us-central1",
-            "vertex_credentials": "os.environ/GOOGLE_CREDS"
-        }
-    """
-    global default_vertex_config
-
-    # Initialize config dictionary if None
-    if config is None:
-        default_vertex_config = _get_vertex_env_vars()
-        return
-
-    if isinstance(config, dict):
-        for key, value in config.items():
-            if isinstance(value, str) and value.startswith("os.environ/"):
-                config[key] = litellm.get_secret(value)
-
-    _set_default_vertex_config(VertexPassThroughCredentials(**config))
-
-
-def _set_default_vertex_config(
-    vertex_pass_through_credentials: VertexPassThroughCredentials,
-):
-    global default_vertex_config
-    default_vertex_config = vertex_pass_through_credentials
-
-
-def exception_handler(e: Exception):
-    verbose_proxy_logger.error(
-        "litellm.proxy.proxy_server.v1/projects/tuningJobs(): Exception occurred - {}".format(
-            str(e)
-        )
-    )
-    verbose_proxy_logger.debug(traceback.format_exc())
-    if isinstance(e, HTTPException):
-        return ProxyException(
-            message=getattr(e, "message", str(e.detail)),
-            type=getattr(e, "type", "None"),
-            param=getattr(e, "param", "None"),
-            code=getattr(e, "status_code", status.HTTP_400_BAD_REQUEST),
-        )
-    else:
-        error_msg = f"{str(e)}"
-        return ProxyException(
-            message=getattr(e, "message", error_msg),
-            type=getattr(e, "type", "None"),
-            param=getattr(e, "param", "None"),
-            code=getattr(e, "status_code", 500),
-        )
-
-
-def construct_target_url(
-    base_url: str,
-    requested_route: str,
-    default_vertex_location: Optional[str],
-    default_vertex_project: Optional[str],
-) -> httpx.URL:
-    """
-    Allow user to specify their own project id / location.
-
-    If missing, use defaults
-
-    Handle cachedContent scenario - https://github.com/BerriAI/litellm/issues/5460
-
-    Constructed Url:
-    POST https://LOCATION-aiplatform.googleapis.com/{version}/projects/PROJECT_ID/locations/LOCATION/cachedContents
-    """
-    new_base_url = httpx.URL(base_url)
-    if "locations" in requested_route:  # contains the target project id + location
-        updated_url = new_base_url.copy_with(path=requested_route)
-        return updated_url
-    """
-    - Add endpoint version (e.g. v1beta for cachedContent, v1 for rest)
-    - Add default project id
-    - Add default location
-    """
-    vertex_version: Literal["v1", "v1beta1"] = "v1"
-    if "cachedContent" in requested_route:
-        vertex_version = "v1beta1"
-
-    base_requested_route = "{}/projects/{}/locations/{}".format(
-        vertex_version, default_vertex_project, default_vertex_location
-    )
-
-    updated_requested_route = "/" + base_requested_route + requested_route
-
-    updated_url = new_base_url.copy_with(path=updated_requested_route)
-    return updated_url
-
-
-@router.api_route(
-    "/vertex-ai/{endpoint:path}",
-    methods=["GET", "POST", "PUT", "DELETE", "PATCH"],
-    tags=["Vertex AI Pass-through", "pass-through"],
-    include_in_schema=False,
-)
-@router.api_route(
-    "/vertex_ai/{endpoint:path}",
-    methods=["GET", "POST", "PUT", "DELETE", "PATCH"],
-    tags=["Vertex AI Pass-through", "pass-through"],
-)
-async def vertex_proxy_route(
-    endpoint: str,
-    request: Request,
-    fastapi_response: Response,
-):
-    """
-    Call LiteLLM proxy via Vertex AI SDK.
-
-    [Docs](https://docs.litellm.ai/docs/pass_through/vertex_ai)
-    """
-    encoded_endpoint = httpx.URL(endpoint).path
-    verbose_proxy_logger.debug("requested endpoint %s", endpoint)
-    headers: dict = {}
-    api_key_to_use = get_litellm_virtual_key(request=request)
-    user_api_key_dict = await user_api_key_auth(
-        request=request,
-        api_key=api_key_to_use,
-    )
-
-    vertex_project: Optional[str] = (
-        VertexPassThroughRouter._get_vertex_project_id_from_url(endpoint)
-    )
-    vertex_location: Optional[str] = (
-        VertexPassThroughRouter._get_vertex_location_from_url(endpoint)
-    )
-    vertex_credentials = vertex_pass_through_router.get_vertex_credentials(
-        project_id=vertex_project,
-        location=vertex_location,
-    )
-
-    # Use headers from the incoming request if no vertex credentials are found
-    if vertex_credentials.vertex_project is None:
-        headers = dict(request.headers) or {}
-        verbose_proxy_logger.debug(
-            "default_vertex_config  not set, incoming request headers %s", headers
-        )
-        base_target_url = f"https://{vertex_location}-aiplatform.googleapis.com/"
-        headers.pop("content-length", None)
-        headers.pop("host", None)
-    else:
-        vertex_project = vertex_credentials.vertex_project
-        vertex_location = vertex_credentials.vertex_location
-        vertex_credentials_str = vertex_credentials.vertex_credentials
-
-        # Construct base URL for the target endpoint
-        base_target_url = f"https://{vertex_location}-aiplatform.googleapis.com/"
-
-        _auth_header, vertex_project = (
-            await vertex_fine_tuning_apis_instance._ensure_access_token_async(
-                credentials=vertex_credentials_str,
-                project_id=vertex_project,
-                custom_llm_provider="vertex_ai_beta",
-            )
-        )
-
-        auth_header, _ = vertex_fine_tuning_apis_instance._get_token_and_url(
-            model="",
-            auth_header=_auth_header,
-            gemini_api_key=None,
-            vertex_credentials=vertex_credentials_str,
-            vertex_project=vertex_project,
-            vertex_location=vertex_location,
-            stream=False,
-            custom_llm_provider="vertex_ai_beta",
-            api_base="",
-        )
-
-        headers = {
-            "Authorization": f"Bearer {auth_header}",
-        }
-
-    request_route = encoded_endpoint
-    verbose_proxy_logger.debug("request_route %s", request_route)
-
-    # Ensure endpoint starts with '/' for proper URL construction
-    if not encoded_endpoint.startswith("/"):
-        encoded_endpoint = "/" + encoded_endpoint
-
-    # Construct the full target URL using httpx
-    updated_url = construct_target_url(
-        base_url=base_target_url,
-        requested_route=encoded_endpoint,
-        default_vertex_location=vertex_location,
-        default_vertex_project=vertex_project,
-    )
-    # base_url = httpx.URL(base_target_url)
-    # updated_url = base_url.copy_with(path=encoded_endpoint)
-
-    verbose_proxy_logger.debug("updated url %s", updated_url)
-
-    ## check for streaming
-    target = str(updated_url)
-    is_streaming_request = False
-    if "stream" in str(updated_url):
-        is_streaming_request = True
-        target += "?alt=sse"
-
-    ## CREATE PASS-THROUGH
-    endpoint_func = create_pass_through_route(
-        endpoint=endpoint,
-        target=target,
-        custom_headers=headers,
-    )  # dynamically construct pass-through endpoint based on incoming path
-    received_value = await endpoint_func(
-        request,
-        fastapi_response,
-        user_api_key_dict,
-        stream=is_streaming_request,  # type: ignore
-    )
-
-    return received_value
-
-
-def get_litellm_virtual_key(request: Request) -> str:
-    """
-    Extract and format API key from request headers.
-    Prioritizes x-litellm-api-key over Authorization header.
-
-
-    Vertex JS SDK uses `Authorization` header, we use `x-litellm-api-key` to pass litellm virtual key
-
-    """
-    litellm_api_key = request.headers.get("x-litellm-api-key")
-    if litellm_api_key:
-        return f"Bearer {litellm_api_key}"
-    return request.headers.get("Authorization", "")
diff --git a/litellm/proxy/vertex_ai_endpoints/vertex_passthrough_router.py b/litellm/proxy/vertex_ai_endpoints/vertex_passthrough_router.py
deleted file mode 100644
index 0273a62047..0000000000
--- a/litellm/proxy/vertex_ai_endpoints/vertex_passthrough_router.py
+++ /dev/null
@@ -1,121 +0,0 @@
-import json
-import re
-from typing import Dict, Optional
-
-from litellm._logging import verbose_proxy_logger
-from litellm.proxy.vertex_ai_endpoints.vertex_endpoints import (
-    VertexPassThroughCredentials,
-)
-from litellm.types.llms.vertex_ai import VERTEX_CREDENTIALS_TYPES
-
-
-class VertexPassThroughRouter:
-    """
-    Vertex Pass Through Router for Vertex AI pass-through endpoints
-
-
-    - if request specifies a project-id, location -> use credentials corresponding to the project-id, location
-    - if request does not specify a project-id, location -> use credentials corresponding to the DEFAULT_VERTEXAI_PROJECT, DEFAULT_VERTEXAI_LOCATION
-    """
-
-    def __init__(self):
-        """
-        Initialize the VertexPassThroughRouter
-        Stores the vertex credentials for each deployment key
-        ```
-        {
-            "project_id-location": VertexPassThroughCredentials,
-            "adroit-crow-us-central1": VertexPassThroughCredentials,
-        }
-        ```
-        """
-        self.deployment_key_to_vertex_credentials: Dict[
-            str, VertexPassThroughCredentials
-        ] = {}
-        pass
-
-    def get_vertex_credentials(
-        self, project_id: Optional[str], location: Optional[str]
-    ) -> VertexPassThroughCredentials:
-        """
-        Get the vertex credentials for the given project-id, location
-        """
-        from litellm.proxy.vertex_ai_endpoints.vertex_endpoints import (
-            default_vertex_config,
-        )
-
-        deployment_key = self._get_deployment_key(
-            project_id=project_id,
-            location=location,
-        )
-        if deployment_key is None:
-            return default_vertex_config
-        if deployment_key in self.deployment_key_to_vertex_credentials:
-            return self.deployment_key_to_vertex_credentials[deployment_key]
-        else:
-            return default_vertex_config
-
-    def add_vertex_credentials(
-        self,
-        project_id: str,
-        location: str,
-        vertex_credentials: VERTEX_CREDENTIALS_TYPES,
-    ):
-        """
-        Add the vertex credentials for the given project-id, location
-        """
-        from litellm.proxy.vertex_ai_endpoints.vertex_endpoints import (
-            _set_default_vertex_config,
-        )
-
-        deployment_key = self._get_deployment_key(
-            project_id=project_id,
-            location=location,
-        )
-        if deployment_key is None:
-            verbose_proxy_logger.debug(
-                "No deployment key found for project-id, location"
-            )
-            return
-        vertex_pass_through_credentials = VertexPassThroughCredentials(
-            vertex_project=project_id,
-            vertex_location=location,
-            vertex_credentials=vertex_credentials,
-        )
-        self.deployment_key_to_vertex_credentials[deployment_key] = (
-            vertex_pass_through_credentials
-        )
-        verbose_proxy_logger.debug(
-            f"self.deployment_key_to_vertex_credentials: {json.dumps(self.deployment_key_to_vertex_credentials, indent=4, default=str)}"
-        )
-        _set_default_vertex_config(vertex_pass_through_credentials)
-
-    def _get_deployment_key(
-        self, project_id: Optional[str], location: Optional[str]
-    ) -> Optional[str]:
-        """
-        Get the deployment key for the given project-id, location
-        """
-        if project_id is None or location is None:
-            return None
-        return f"{project_id}-{location}"
-
-    @staticmethod
-    def _get_vertex_project_id_from_url(url: str) -> Optional[str]:
-        """
-        Get the vertex project id from the url
-
-        `https://${LOCATION}-aiplatform.googleapis.com/v1/projects/${PROJECT_ID}/locations/${LOCATION}/publishers/google/models/${MODEL_ID}:streamGenerateContent`
-        """
-        match = re.search(r"/projects/([^/]+)", url)
-        return match.group(1) if match else None
-
-    @staticmethod
-    def _get_vertex_location_from_url(url: str) -> Optional[str]:
-        """
-        Get the vertex location from the url
-
-        `https://${LOCATION}-aiplatform.googleapis.com/v1/projects/${PROJECT_ID}/locations/${LOCATION}/publishers/google/models/${MODEL_ID}:streamGenerateContent`
-        """
-        match = re.search(r"/locations/([^/]+)", url)
-        return match.group(1) if match else None
diff --git a/litellm/responses/main.py b/litellm/responses/main.py
index 43f37bdbc6..aec2f8fe4a 100644
--- a/litellm/responses/main.py
+++ b/litellm/responses/main.py
@@ -232,6 +232,9 @@ def responses(
             timeout=timeout or request_timeout,
             _is_async=_is_async,
             client=kwargs.get("client"),
+            fake_stream=responses_api_provider_config.should_fake_stream(
+                model=model, stream=stream, custom_llm_provider=custom_llm_provider
+            ),
         )
 
         return response
diff --git a/litellm/responses/streaming_iterator.py b/litellm/responses/streaming_iterator.py
index c016e71e7e..3039efb9f7 100644
--- a/litellm/responses/streaming_iterator.py
+++ b/litellm/responses/streaming_iterator.py
@@ -11,6 +11,7 @@ from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLogging
 from litellm.litellm_core_utils.thread_pool_executor import executor
 from litellm.llms.base_llm.responses.transformation import BaseResponsesAPIConfig
 from litellm.types.llms.openai import (
+    ResponseCompletedEvent,
     ResponsesAPIStreamEvents,
     ResponsesAPIStreamingResponse,
 )
@@ -207,3 +208,63 @@ class SyncResponsesAPIStreamingIterator(BaseResponsesAPIStreamingIterator):
             start_time=self.start_time,
             end_time=datetime.now(),
         )
+
+
+class MockResponsesAPIStreamingIterator(BaseResponsesAPIStreamingIterator):
+    """
+    mock iterator - some models like o1-pro do not support streaming, we need to fake a stream
+    """
+
+    def __init__(
+        self,
+        response: httpx.Response,
+        model: str,
+        responses_api_provider_config: BaseResponsesAPIConfig,
+        logging_obj: LiteLLMLoggingObj,
+    ):
+        self.raw_http_response = response
+        super().__init__(
+            response=response,
+            model=model,
+            responses_api_provider_config=responses_api_provider_config,
+            logging_obj=logging_obj,
+        )
+        self.is_done = False
+
+    def __aiter__(self):
+        return self
+
+    async def __anext__(self) -> ResponsesAPIStreamingResponse:
+        if self.is_done:
+            raise StopAsyncIteration
+        self.is_done = True
+        transformed_response = (
+            self.responses_api_provider_config.transform_response_api_response(
+                model=self.model,
+                raw_response=self.raw_http_response,
+                logging_obj=self.logging_obj,
+            )
+        )
+        return ResponseCompletedEvent(
+            type=ResponsesAPIStreamEvents.RESPONSE_COMPLETED,
+            response=transformed_response,
+        )
+
+    def __iter__(self):
+        return self
+
+    def __next__(self) -> ResponsesAPIStreamingResponse:
+        if self.is_done:
+            raise StopIteration
+        self.is_done = True
+        transformed_response = (
+            self.responses_api_provider_config.transform_response_api_response(
+                model=self.model,
+                raw_response=self.raw_http_response,
+                logging_obj=self.logging_obj,
+            )
+        )
+        return ResponseCompletedEvent(
+            type=ResponsesAPIStreamEvents.RESPONSE_COMPLETED,
+            response=transformed_response,
+        )
diff --git a/litellm/router.py b/litellm/router.py
index a395c851dd..af7b00e79d 100644
--- a/litellm/router.py
+++ b/litellm/router.py
@@ -4495,11 +4495,11 @@ class Router:
         Each provider uses diff .env vars for pass-through endpoints, this helper uses the deployment credentials to set the .env vars for pass-through endpoints
         """
         if deployment.litellm_params.use_in_pass_through is True:
-            if custom_llm_provider == "vertex_ai":
-                from litellm.proxy.vertex_ai_endpoints.vertex_endpoints import (
-                    vertex_pass_through_router,
-                )
+            from litellm.proxy.pass_through_endpoints.llm_passthrough_endpoints import (
+                passthrough_endpoint_router,
+            )
 
+            if custom_llm_provider == "vertex_ai":
                 if (
                     deployment.litellm_params.vertex_project is None
                     or deployment.litellm_params.vertex_location is None
@@ -4508,16 +4508,12 @@ class Router:
                     raise ValueError(
                         "vertex_project, vertex_location, and vertex_credentials must be set in litellm_params for pass-through endpoints"
                     )
-                vertex_pass_through_router.add_vertex_credentials(
+                passthrough_endpoint_router.add_vertex_credentials(
                     project_id=deployment.litellm_params.vertex_project,
                     location=deployment.litellm_params.vertex_location,
                     vertex_credentials=deployment.litellm_params.vertex_credentials,
                 )
             else:
-                from litellm.proxy.pass_through_endpoints.llm_passthrough_endpoints import (
-                    passthrough_endpoint_router,
-                )
-
                 passthrough_endpoint_router.set_pass_through_credentials(
                     custom_llm_provider=custom_llm_provider,
                     api_base=deployment.litellm_params.api_base,
diff --git a/litellm/router_utils/handle_error.py b/litellm/router_utils/handle_error.py
index e1055a9d0f..132440cbc3 100644
--- a/litellm/router_utils/handle_error.py
+++ b/litellm/router_utils/handle_error.py
@@ -1,7 +1,9 @@
 from typing import TYPE_CHECKING, Any, Optional
 
 from litellm._logging import verbose_router_logger
-from litellm.router_utils.cooldown_handlers import _async_get_cooldown_deployments
+from litellm.router_utils.cooldown_handlers import (
+    _async_get_cooldown_deployments_with_debug_info,
+)
 from litellm.types.integrations.slack_alerting import AlertType
 from litellm.types.router import RouterRateLimitError
 
@@ -75,7 +77,7 @@ async def async_raise_no_deployment_exception(
     _cooldown_time = litellm_router_instance.cooldown_cache.get_min_cooldown(
         model_ids=model_ids, parent_otel_span=parent_otel_span
     )
-    _cooldown_list = await _async_get_cooldown_deployments(
+    _cooldown_list = await _async_get_cooldown_deployments_with_debug_info(
         litellm_router_instance=litellm_router_instance,
         parent_otel_span=parent_otel_span,
     )
diff --git a/litellm/types/llms/openai.py b/litellm/types/llms/openai.py
index 4b0be9d5fe..e58f573227 100644
--- a/litellm/types/llms/openai.py
+++ b/litellm/types/llms/openai.py
@@ -382,6 +382,28 @@ class ChatCompletionThinkingBlock(TypedDict, total=False):
     cache_control: Optional[Union[dict, ChatCompletionCachedContent]]
 
 
+class ChatCompletionAnnotationURLCitation(TypedDict, total=False):
+    end_index: int
+    """The index of the last character of the URL citation in the message."""
+
+    start_index: int
+    """The index of the first character of the URL citation in the message."""
+
+    title: str
+    """The title of the web resource."""
+
+    url: str
+    """The URL of the web resource."""
+
+
+class ChatCompletionAnnotation(TypedDict, total=False):
+    type: Literal["url_citation"]
+    """The type of the URL citation. Always `url_citation`."""
+
+    url_citation: ChatCompletionAnnotationURLCitation
+    """A URL citation when using web search."""
+
+
 class OpenAIChatCompletionTextObject(TypedDict):
     type: Literal["text"]
     text: str
diff --git a/litellm/types/utils.py b/litellm/types/utils.py
index a665428561..8821d2c80b 100644
--- a/litellm/types/utils.py
+++ b/litellm/types/utils.py
@@ -7,6 +7,7 @@ from typing import Any, Dict, List, Literal, Optional, Tuple, Union
 from aiohttp import FormData
 from openai._models import BaseModel as OpenAIObject
 from openai.types.audio.transcription_create_params import FileTypes  # type: ignore
+from openai.types.chat.chat_completion import ChatCompletion
 from openai.types.completion_usage import (
     CompletionTokensDetails,
     CompletionUsage,
@@ -27,6 +28,7 @@ from ..litellm_core_utils.core_helpers import map_finish_reason
 from .guardrails import GuardrailEventHooks
 from .llms.openai import (
     Batch,
+    ChatCompletionAnnotation,
     ChatCompletionThinkingBlock,
     ChatCompletionToolCallChunk,
     ChatCompletionUsageBlock,
@@ -527,6 +529,7 @@ class Message(OpenAIObject):
     provider_specific_fields: Optional[Dict[str, Any]] = Field(
         default=None, exclude=True
     )
+    annotations: Optional[List[ChatCompletionAnnotation]] = None
 
     def __init__(
         self,
@@ -538,6 +541,7 @@ class Message(OpenAIObject):
         provider_specific_fields: Optional[Dict[str, Any]] = None,
         reasoning_content: Optional[str] = None,
         thinking_blocks: Optional[List[ChatCompletionThinkingBlock]] = None,
+        annotations: Optional[List[ChatCompletionAnnotation]] = None,
         **params,
     ):
         init_values: Dict[str, Any] = {
@@ -566,6 +570,9 @@ class Message(OpenAIObject):
         if thinking_blocks is not None:
             init_values["thinking_blocks"] = thinking_blocks
 
+        if annotations is not None:
+            init_values["annotations"] = annotations
+
         if reasoning_content is not None:
             init_values["reasoning_content"] = reasoning_content
 
@@ -623,6 +630,7 @@ class Delta(OpenAIObject):
         audio: Optional[ChatCompletionAudioResponse] = None,
         reasoning_content: Optional[str] = None,
         thinking_blocks: Optional[List[ChatCompletionThinkingBlock]] = None,
+        annotations: Optional[List[ChatCompletionAnnotation]] = None,
         **params,
     ):
         super(Delta, self).__init__(**params)
@@ -633,6 +641,7 @@ class Delta(OpenAIObject):
         self.function_call: Optional[Union[FunctionCall, Any]] = None
         self.tool_calls: Optional[List[Union[ChatCompletionDeltaToolCall, Any]]] = None
         self.audio: Optional[ChatCompletionAudioResponse] = None
+        self.annotations: Optional[List[ChatCompletionAnnotation]] = None
 
         if reasoning_content is not None:
             self.reasoning_content = reasoning_content
@@ -646,6 +655,12 @@ class Delta(OpenAIObject):
             # ensure default response matches OpenAI spec
             del self.thinking_blocks
 
+        # Add annotations to the delta, ensure they are only on Delta if they exist (Match OpenAI spec)
+        if annotations is not None:
+            self.annotations = annotations
+        else:
+            del self.annotations
+
         if function_call is not None and isinstance(function_call, dict):
             self.function_call = FunctionCall(**function_call)
         else:
diff --git a/litellm/utils.py b/litellm/utils.py
index 677cfe7684..03e69acf4e 100644
--- a/litellm/utils.py
+++ b/litellm/utils.py
@@ -1975,6 +1975,60 @@ def supports_system_messages(model: str, custom_llm_provider: Optional[str]) ->
     )
 
 
+def supports_web_search(model: str, custom_llm_provider: Optional[str]) -> bool:
+    """
+    Check if the given model supports web search and return a boolean value.
+
+    Parameters:
+    model (str): The model name to be checked.
+    custom_llm_provider (str): The provider to be checked.
+
+    Returns:
+    bool: True if the model supports web search, False otherwise.
+
+    Raises:
+    Exception: If the given model is not found in model_prices_and_context_window.json.
+    """
+    return _supports_factory(
+        model=model,
+        custom_llm_provider=custom_llm_provider,
+        key="supports_web_search",
+    )
+
+
+def supports_native_streaming(model: str, custom_llm_provider: Optional[str]) -> bool:
+    """
+    Check if the given model supports native streaming and return a boolean value.
+
+    Parameters:
+    model (str): The model name to be checked.
+    custom_llm_provider (str): The provider to be checked.
+
+    Returns:
+    bool: True if the model supports native streaming, False otherwise.
+
+    Raises:
+    Exception: If the given model is not found in model_prices_and_context_window.json.
+    """
+    try:
+        model, custom_llm_provider, _, _ = litellm.get_llm_provider(
+            model=model, custom_llm_provider=custom_llm_provider
+        )
+
+        model_info = _get_model_info_helper(
+            model=model, custom_llm_provider=custom_llm_provider
+        )
+        supports_native_streaming = model_info.get("supports_native_streaming", True)
+        if supports_native_streaming is None:
+            supports_native_streaming = True
+        return supports_native_streaming
+    except Exception as e:
+        verbose_logger.debug(
+            f"Model not found or error in checking supports_native_streaming support. You passed model={model}, custom_llm_provider={custom_llm_provider}. Error: {str(e)}"
+        )
+        return False
+
+
 def supports_response_schema(
     model: str, custom_llm_provider: Optional[str] = None
 ) -> bool:
diff --git a/mcp_servers.json b/mcp_servers.json
new file mode 100644
index 0000000000..c196815747
--- /dev/null
+++ b/mcp_servers.json
@@ -0,0 +1,16 @@
+{
+    "brave-search": {
+        "command": "docker",
+        "args": [
+            "run",
+            "-i",
+            "--rm",
+            "-e",
+            "BRAVE_API_KEY",
+            "mcp/brave-search"
+        ],
+        "env": {
+            "BRAVE_API_KEY": "YOUR_API_KEY_HERE"
+        }
+    }
+}
\ No newline at end of file
diff --git a/model_prices_and_context_window.json b/model_prices_and_context_window.json
index f2ca9156ad..1d4353e3ed 100644
--- a/model_prices_and_context_window.json
+++ b/model_prices_and_context_window.json
@@ -15,6 +15,12 @@
         "supports_prompt_caching": true,
         "supports_response_schema": true,
         "supports_system_messages": true,
+        "supports_web_search": true,
+        "search_context_cost_per_query": {
+            "search_context_size_low": 0.0000,
+            "search_context_size_medium": 0.0000,
+            "search_context_size_high": 0.0000
+        },
         "deprecation_date": "date when the model becomes deprecated in the format YYYY-MM-DD"
     },
     "omni-moderation-latest": {
@@ -74,7 +80,63 @@
         "supports_vision": true,
         "supports_prompt_caching": true,
         "supports_system_messages": true,
-        "supports_tool_choice": true
+        "supports_tool_choice": true,
+        "supports_web_search": true,
+        "search_context_cost_per_query": {
+            "search_context_size_low": 0.030,
+            "search_context_size_medium": 0.035,
+            "search_context_size_high": 0.050
+        }
+    },
+    "gpt-4o-search-preview-2025-03-11": {
+        "max_tokens": 16384,
+        "max_input_tokens": 128000,
+        "max_output_tokens": 16384,
+        "input_cost_per_token": 0.0000025,
+        "output_cost_per_token": 0.000010,
+        "input_cost_per_token_batches": 0.00000125,
+        "output_cost_per_token_batches": 0.00000500,
+        "cache_read_input_token_cost": 0.00000125,
+        "litellm_provider": "openai",
+        "mode": "chat",
+        "supports_function_calling": true,
+        "supports_parallel_function_calling": true,
+        "supports_response_schema": true,
+        "supports_vision": true,
+        "supports_prompt_caching": true,
+        "supports_system_messages": true,
+        "supports_tool_choice": true,
+        "supports_web_search": true,
+        "search_context_cost_per_query": {
+            "search_context_size_low": 0.030,
+            "search_context_size_medium": 0.035,
+            "search_context_size_high": 0.050
+        }
+     },
+    "gpt-4o-search-preview": {
+       "max_tokens": 16384,
+        "max_input_tokens": 128000,
+        "max_output_tokens": 16384,
+        "input_cost_per_token": 0.0000025,
+        "output_cost_per_token": 0.000010,
+        "input_cost_per_token_batches": 0.00000125,
+        "output_cost_per_token_batches": 0.00000500,
+        "cache_read_input_token_cost": 0.00000125,
+        "litellm_provider": "openai",
+        "mode": "chat",
+        "supports_function_calling": true,
+        "supports_parallel_function_calling": true,
+        "supports_response_schema": true,
+        "supports_vision": true,
+        "supports_prompt_caching": true,
+        "supports_system_messages": true,
+        "supports_tool_choice": true,
+        "supports_web_search": true,
+        "search_context_cost_per_query": {
+            "search_context_size_low": 0.030,
+            "search_context_size_medium": 0.035,
+            "search_context_size_high": 0.050
+        }
     },
     "gpt-4.5-preview": {
         "max_tokens": 16384,
@@ -199,7 +261,63 @@
         "supports_vision": true,
         "supports_prompt_caching": true,
         "supports_system_messages": true,
-        "supports_tool_choice": true
+        "supports_tool_choice": true,
+        "supports_web_search": true,
+        "search_context_cost_per_query": {
+            "search_context_size_low": 0.025,
+            "search_context_size_medium": 0.0275,
+            "search_context_size_high": 0.030
+        }
+    },
+    "gpt-4o-mini-search-preview-2025-03-11":{
+        "max_tokens": 16384,
+        "max_input_tokens": 128000,
+        "max_output_tokens": 16384,
+        "input_cost_per_token": 0.00000015,
+        "output_cost_per_token": 0.00000060,
+        "input_cost_per_token_batches": 0.000000075,
+        "output_cost_per_token_batches": 0.00000030,
+        "cache_read_input_token_cost": 0.000000075,
+        "litellm_provider": "openai",
+        "mode": "chat",
+        "supports_function_calling": true,
+        "supports_parallel_function_calling": true,
+        "supports_response_schema": true,
+        "supports_vision": true,
+        "supports_prompt_caching": true,
+        "supports_system_messages": true,
+        "supports_tool_choice": true,
+        "supports_web_search": true,
+        "search_context_cost_per_query": {
+            "search_context_size_low": 0.025,
+            "search_context_size_medium": 0.0275,
+            "search_context_size_high": 0.030
+        }
+    },
+    "gpt-4o-mini-search-preview": {
+        "max_tokens": 16384,
+        "max_input_tokens": 128000,
+        "max_output_tokens": 16384,
+        "input_cost_per_token": 0.00000015,
+        "output_cost_per_token": 0.00000060,
+        "input_cost_per_token_batches": 0.000000075,
+        "output_cost_per_token_batches": 0.00000030,
+        "cache_read_input_token_cost": 0.000000075,
+        "litellm_provider": "openai",
+        "mode": "chat",
+        "supports_function_calling": true,
+        "supports_parallel_function_calling": true,
+        "supports_response_schema": true,
+        "supports_vision": true,
+        "supports_prompt_caching": true,
+        "supports_system_messages": true,
+        "supports_tool_choice": true,
+        "supports_web_search": true,
+        "search_context_cost_per_query": {
+            "search_context_size_low": 0.025,
+            "search_context_size_medium": 0.0275,
+            "search_context_size_high": 0.030
+        }
     },
     "gpt-4o-mini-2024-07-18": {
         "max_tokens": 16384,
@@ -218,7 +336,12 @@
         "supports_vision": true,
         "supports_prompt_caching": true,
         "supports_system_messages": true,
-        "supports_tool_choice": true
+        "supports_tool_choice": true,
+        "search_context_cost_per_query": {
+            "search_context_size_low": 30.00,
+            "search_context_size_medium": 35.00,
+            "search_context_size_high": 50.00
+        }
     },
     "o1-pro": {
         "max_tokens": 100000,
@@ -425,7 +548,13 @@
         "supports_vision": true,
         "supports_prompt_caching": true,
         "supports_system_messages": true,
-        "supports_tool_choice": true
+        "supports_tool_choice": true,
+        "supports_web_search": true,
+        "search_context_cost_per_query": {
+            "search_context_size_low": 0.030,
+            "search_context_size_medium": 0.035,
+            "search_context_size_high": 0.050
+        }
     },
     "gpt-4o-2024-11-20": {
         "max_tokens": 16384,
@@ -1426,6 +1555,25 @@
         "supports_vision": false,
         "supports_prompt_caching": true
     },
+    "azure/gpt-4.5-preview": {
+        "max_tokens": 16384,
+        "max_input_tokens": 128000,
+        "max_output_tokens": 16384,
+        "input_cost_per_token": 0.000075,
+        "output_cost_per_token": 0.00015,
+        "input_cost_per_token_batches": 0.0000375,
+        "output_cost_per_token_batches": 0.000075,
+        "cache_read_input_token_cost": 0.0000375,
+        "litellm_provider": "azure",
+        "mode": "chat",
+        "supports_function_calling": true,
+        "supports_parallel_function_calling": true,
+        "supports_response_schema": true,
+        "supports_vision": true,
+        "supports_prompt_caching": true,
+        "supports_system_messages": true,
+        "supports_tool_choice": true
+    },
     "azure/gpt-4o": {
         "max_tokens": 16384,
         "max_input_tokens": 128000,
@@ -2091,6 +2239,18 @@
         "mode": "chat",
         "supports_tool_choice": true
     },
+    "azure_ai/mistral-small-2503": {
+        "max_tokens": 128000,
+        "max_input_tokens": 128000,
+        "max_output_tokens": 128000,
+        "input_cost_per_token": 0.000001,
+        "output_cost_per_token": 0.000003,
+        "litellm_provider": "azure_ai",
+        "mode": "chat",
+        "supports_function_calling": true,
+        "supports_vision": true,
+        "supports_tool_choice": true
+    },
     "azure_ai/mistral-large-2407": {
         "max_tokens": 4096,
         "max_input_tokens": 128000,
diff --git a/poetry.lock b/poetry.lock
index d270aa2d79..5834dd2e70 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -1,4 +1,4 @@
-# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand.
+# This file is automatically @generated by Poetry 2.0.0 and should not be changed by hand.
 
 [[package]]
 name = "aiohappyeyeballs"
@@ -6,6 +6,7 @@ version = "2.4.4"
 description = "Happy Eyeballs for asyncio"
 optional = false
 python-versions = ">=3.8"
+groups = ["main"]
 files = [
     {file = "aiohappyeyeballs-2.4.4-py3-none-any.whl", hash = "sha256:a980909d50efcd44795c4afeca523296716d50cd756ddca6af8c65b996e27de8"},
     {file = "aiohappyeyeballs-2.4.4.tar.gz", hash = "sha256:5fdd7d87889c63183afc18ce9271f9b0a7d32c2303e394468dd45d514a757745"},
@@ -17,6 +18,7 @@ version = "3.10.11"
 description = "Async http client/server framework (asyncio)"
 optional = false
 python-versions = ">=3.8"
+groups = ["main"]
 files = [
     {file = "aiohttp-3.10.11-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:5077b1a5f40ffa3ba1f40d537d3bec4383988ee51fbba6b74aa8fb1bc466599e"},
     {file = "aiohttp-3.10.11-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:8d6a14a4d93b5b3c2891fca94fa9d41b2322a68194422bef0dd5ec1e57d7d298"},
@@ -129,6 +131,7 @@ version = "1.3.1"
 description = "aiosignal: a list of registered asynchronous callbacks"
 optional = false
 python-versions = ">=3.7"
+groups = ["main"]
 files = [
     {file = "aiosignal-1.3.1-py3-none-any.whl", hash = "sha256:f8376fb07dd1e86a584e4fcdec80b36b7f81aac666ebc724e2c090300dd83b17"},
     {file = "aiosignal-1.3.1.tar.gz", hash = "sha256:54cd96e15e1649b75d6c87526a6ff0b6c1b0dd3459f43d9ca11d48c339b68cfc"},
@@ -143,6 +146,7 @@ version = "0.7.0"
 description = "Reusable constraint types to use with typing.Annotated"
 optional = false
 python-versions = ">=3.8"
+groups = ["main"]
 files = [
     {file = "annotated_types-0.7.0-py3-none-any.whl", hash = "sha256:1f02e8b43a8fbbc3f3e0d4f0f4bfc8131bcb4eebe8849b8e5c773f3a1c582a53"},
     {file = "annotated_types-0.7.0.tar.gz", hash = "sha256:aff07c09a53a08bc8cfccb9c85b05f1aa9a2a6f23728d790723543408344ce89"},
@@ -157,6 +161,7 @@ version = "4.5.2"
 description = "High level compatibility layer for multiple asynchronous event loop implementations"
 optional = false
 python-versions = ">=3.8"
+groups = ["main"]
 files = [
     {file = "anyio-4.5.2-py3-none-any.whl", hash = "sha256:c011ee36bc1e8ba40e5a81cb9df91925c218fe9b778554e0b56a21e1b5d4716f"},
     {file = "anyio-4.5.2.tar.gz", hash = "sha256:23009af4ed04ce05991845451e11ef02fc7c5ed29179ac9a420e5ad0ac7ddc5b"},
@@ -179,6 +184,8 @@ version = "3.11.0"
 description = "In-process task scheduler with Cron-like capabilities"
 optional = true
 python-versions = ">=3.8"
+groups = ["main"]
+markers = "extra == \"proxy\""
 files = [
     {file = "APScheduler-3.11.0-py3-none-any.whl", hash = "sha256:fc134ca32e50f5eadcc4938e3a4545ab19131435e851abb40b34d63d5141c6da"},
     {file = "apscheduler-3.11.0.tar.gz", hash = "sha256:4c622d250b0955a65d5d0eb91c33e6d43fd879834bf541e0a18661ae60460133"},
@@ -207,6 +214,8 @@ version = "5.0.1"
 description = "Timeout context manager for asyncio programs"
 optional = false
 python-versions = ">=3.8"
+groups = ["main"]
+markers = "python_full_version < \"3.11.3\" and extra == \"proxy\" or python_version < \"3.11\""
 files = [
     {file = "async_timeout-5.0.1-py3-none-any.whl", hash = "sha256:39e3809566ff85354557ec2398b55e096c8364bacac9405a7a1fa429e77fe76c"},
     {file = "async_timeout-5.0.1.tar.gz", hash = "sha256:d9321a7a3d5a6a5e187e824d2fa0793ce379a202935782d555d6e9d2735677d3"},
@@ -218,6 +227,7 @@ version = "25.3.0"
 description = "Classes Without Boilerplate"
 optional = false
 python-versions = ">=3.8"
+groups = ["main"]
 files = [
     {file = "attrs-25.3.0-py3-none-any.whl", hash = "sha256:427318ce031701fea540783410126f03899a97ffc6f61596ad581ac2e40e3bc3"},
     {file = "attrs-25.3.0.tar.gz", hash = "sha256:75d7cefc7fb576747b2c81b4442d4d4a1ce0900973527c011d1030fd3bf4af1b"},
@@ -237,6 +247,8 @@ version = "1.32.0"
 description = "Microsoft Azure Core Library for Python"
 optional = true
 python-versions = ">=3.8"
+groups = ["main"]
+markers = "extra == \"extra-proxy\""
 files = [
     {file = "azure_core-1.32.0-py3-none-any.whl", hash = "sha256:eac191a0efb23bfa83fddf321b27b122b4ec847befa3091fa736a5c32c50d7b4"},
     {file = "azure_core-1.32.0.tar.gz", hash = "sha256:22b3c35d6b2dae14990f6c1be2912bf23ffe50b220e708a28ab1bb92b1c730e5"},
@@ -256,6 +268,8 @@ version = "1.21.0"
 description = "Microsoft Azure Identity Library for Python"
 optional = true
 python-versions = ">=3.8"
+groups = ["main"]
+markers = "extra == \"extra-proxy\""
 files = [
     {file = "azure_identity-1.21.0-py3-none-any.whl", hash = "sha256:258ea6325537352440f71b35c3dffe9d240eae4a5126c1b7ce5efd5766bd9fd9"},
     {file = "azure_identity-1.21.0.tar.gz", hash = "sha256:ea22ce6e6b0f429bc1b8d9212d5b9f9877bd4c82f1724bfa910760612c07a9a6"},
@@ -274,6 +288,8 @@ version = "4.9.0"
 description = "Microsoft Azure Key Vault Secrets Client Library for Python"
 optional = true
 python-versions = ">=3.8"
+groups = ["main"]
+markers = "extra == \"extra-proxy\""
 files = [
     {file = "azure_keyvault_secrets-4.9.0-py3-none-any.whl", hash = "sha256:33c7e2aca2cc2092cebc8c6e96eca36a5cc30c767e16ea429c5fa21270e9fba6"},
     {file = "azure_keyvault_secrets-4.9.0.tar.gz", hash = "sha256:2a03bb2ffd9a0d6c8ad1c330d9d0310113985a9de06607ece378fd72a5889fe1"},
@@ -290,6 +306,8 @@ version = "2.2.1"
 description = "Function decoration for backoff and retry"
 optional = true
 python-versions = ">=3.7,<4.0"
+groups = ["main"]
+markers = "extra == \"proxy\""
 files = [
     {file = "backoff-2.2.1-py3-none-any.whl", hash = "sha256:63579f9a0628e06278f7e47b7d7d5b6ce20dc65c5e96a6f3ca99a6adca0396e8"},
     {file = "backoff-2.2.1.tar.gz", hash = "sha256:03f829f5bb1923180821643f8753b0502c3b682293992485b0eef2807afa5cba"},
@@ -301,6 +319,8 @@ version = "0.2.1"
 description = "Backport of the standard library zoneinfo module"
 optional = true
 python-versions = ">=3.6"
+groups = ["main"]
+markers = "extra == \"proxy\" and python_version < \"3.9\""
 files = [
     {file = "backports.zoneinfo-0.2.1-cp36-cp36m-macosx_10_14_x86_64.whl", hash = "sha256:da6013fd84a690242c310d77ddb8441a559e9cb3d3d59ebac9aca1a57b2e18bc"},
     {file = "backports.zoneinfo-0.2.1-cp36-cp36m-manylinux1_i686.whl", hash = "sha256:89a48c0d158a3cc3f654da4c2de1ceba85263fafb861b98b59040a5086259722"},
@@ -329,6 +349,7 @@ version = "23.12.1"
 description = "The uncompromising code formatter."
 optional = false
 python-versions = ">=3.8"
+groups = ["dev"]
 files = [
     {file = "black-23.12.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:e0aaf6041986767a5e0ce663c7a2f0e9eaf21e6ff87a5f95cbf3675bfd4c41d2"},
     {file = "black-23.12.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:c88b3711d12905b74206227109272673edce0cb29f27e1385f33b0163c414bba"},
@@ -375,6 +396,8 @@ version = "1.34.34"
 description = "The AWS SDK for Python"
 optional = true
 python-versions = ">= 3.8"
+groups = ["main"]
+markers = "extra == \"proxy\""
 files = [
     {file = "boto3-1.34.34-py3-none-any.whl", hash = "sha256:33a8b6d9136fa7427160edb92d2e50f2035f04e9d63a2d1027349053e12626aa"},
     {file = "boto3-1.34.34.tar.gz", hash = "sha256:b2f321e20966f021ec800b7f2c01287a3dd04fc5965acdfbaa9c505a24ca45d1"},
@@ -394,6 +417,8 @@ version = "1.34.162"
 description = "Low-level, data-driven core of boto 3."
 optional = true
 python-versions = ">=3.8"
+groups = ["main"]
+markers = "extra == \"proxy\""
 files = [
     {file = "botocore-1.34.162-py3-none-any.whl", hash = "sha256:2d918b02db88d27a75b48275e6fb2506e9adaaddbec1ffa6a8a0898b34e769be"},
     {file = "botocore-1.34.162.tar.gz", hash = "sha256:adc23be4fb99ad31961236342b7cbf3c0bfc62532cd02852196032e8c0d682f3"},
@@ -416,6 +441,8 @@ version = "5.5.2"
 description = "Extensible memoizing collections and decorators"
 optional = true
 python-versions = ">=3.7"
+groups = ["main"]
+markers = "extra == \"extra-proxy\""
 files = [
     {file = "cachetools-5.5.2-py3-none-any.whl", hash = "sha256:d26a22bcc62eb95c3beabd9f1ee5e820d3d2704fe2967cbe350e20c8ffcd3f0a"},
     {file = "cachetools-5.5.2.tar.gz", hash = "sha256:1a661caa9175d26759571b2e19580f9d6393969e5dfca11fdb1f947a23e640d4"},
@@ -427,6 +454,7 @@ version = "2025.1.31"
 description = "Python package for providing Mozilla's CA Bundle."
 optional = false
 python-versions = ">=3.6"
+groups = ["main"]
 files = [
     {file = "certifi-2025.1.31-py3-none-any.whl", hash = "sha256:ca78db4565a652026a4db2bcdf68f2fb589ea80d0be70e03929ed730746b84fe"},
     {file = "certifi-2025.1.31.tar.gz", hash = "sha256:3d5da6925056f6f18f119200434a4780a94263f10d1c21d032a6f6b2baa20651"},
@@ -438,6 +466,8 @@ version = "1.17.1"
 description = "Foreign Function Interface for Python calling C code."
 optional = true
 python-versions = ">=3.8"
+groups = ["main"]
+markers = "extra == \"proxy\" or extra == \"extra-proxy\" and platform_python_implementation != \"PyPy\""
 files = [
     {file = "cffi-1.17.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:df8b1c11f177bc2313ec4b2d46baec87a5f3e71fc8b45dab2ee7cae86d9aba14"},
     {file = "cffi-1.17.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:8f2cdc858323644ab277e9bb925ad72ae0e67f69e804f4898c070998d50b1a67"},
@@ -517,6 +547,7 @@ version = "3.4.1"
 description = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet."
 optional = false
 python-versions = ">=3.7"
+groups = ["main"]
 files = [
     {file = "charset_normalizer-3.4.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:91b36a978b5ae0ee86c394f5a54d6ef44db1de0815eb43de826d41d21e4af3de"},
     {file = "charset_normalizer-3.4.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7461baadb4dc00fd9e0acbe254e3d7d2112e7f92ced2adc96e54ef6501c5f176"},
@@ -618,6 +649,7 @@ version = "8.1.8"
 description = "Composable command line interface toolkit"
 optional = false
 python-versions = ">=3.7"
+groups = ["main", "dev"]
 files = [
     {file = "click-8.1.8-py3-none-any.whl", hash = "sha256:63c132bbbed01578a06712a2d1f497bb62d9c1c0d329b7903a866228027263b2"},
     {file = "click-8.1.8.tar.gz", hash = "sha256:ed53c9d8990d83c2a27deae68e4ee337473f6330c040a31d4225c9574d16096a"},
@@ -632,10 +664,12 @@ version = "0.4.6"
 description = "Cross-platform colored terminal text."
 optional = false
 python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7"
+groups = ["main", "dev"]
 files = [
     {file = "colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"},
     {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"},
 ]
+markers = {main = "platform_system == \"Windows\"", dev = "platform_system == \"Windows\" or sys_platform == \"win32\""}
 
 [[package]]
 name = "cryptography"
@@ -643,6 +677,8 @@ version = "43.0.3"
 description = "cryptography is a package which provides cryptographic recipes and primitives to Python developers."
 optional = true
 python-versions = ">=3.7"
+groups = ["main"]
+markers = "extra == \"proxy\" or extra == \"extra-proxy\""
 files = [
     {file = "cryptography-43.0.3-cp37-abi3-macosx_10_9_universal2.whl", hash = "sha256:bf7a1932ac4176486eab36a19ed4c0492da5d97123f1406cf15e41b05e787d2e"},
     {file = "cryptography-43.0.3-cp37-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:63efa177ff54aec6e1c0aefaa1a241232dcd37413835a9b674b6e3f0ae2bfd3e"},
@@ -692,6 +728,7 @@ version = "1.9.0"
 description = "Distro - an OS platform information API"
 optional = false
 python-versions = ">=3.6"
+groups = ["main"]
 files = [
     {file = "distro-1.9.0-py3-none-any.whl", hash = "sha256:7bffd925d65168f85027d8da9af6bddab658135b840670a223589bc0c8ef02b2"},
     {file = "distro-1.9.0.tar.gz", hash = "sha256:2fa77c6fd8940f116ee1d6b94a2f90b13b5ea8d019b98bc8bafdcabcdd9bdbed"},
@@ -703,6 +740,8 @@ version = "2.6.1"
 description = "DNS toolkit"
 optional = true
 python-versions = ">=3.8"
+groups = ["main"]
+markers = "extra == \"proxy\""
 files = [
     {file = "dnspython-2.6.1-py3-none-any.whl", hash = "sha256:5ef3b9680161f6fa89daf8ad451b5f1a33b18ae8a1c6778cdf4b43f08c0a6e50"},
     {file = "dnspython-2.6.1.tar.gz", hash = "sha256:e8f0f9c23a7b7cb99ded64e6c3a6f3e701d78f50c55e002b839dea7225cff7cc"},
@@ -723,6 +762,8 @@ version = "2.2.0"
 description = "A robust email address syntax and deliverability validation library."
 optional = true
 python-versions = ">=3.8"
+groups = ["main"]
+markers = "extra == \"proxy\""
 files = [
     {file = "email_validator-2.2.0-py3-none-any.whl", hash = "sha256:561977c2d73ce3611850a06fa56b414621e0c8faa9d66f2611407d87465da631"},
     {file = "email_validator-2.2.0.tar.gz", hash = "sha256:cb690f344c617a714f22e66ae771445a1ceb46821152df8e165c5f9a364582b7"},
@@ -738,6 +779,8 @@ version = "1.2.2"
 description = "Backport of PEP 654 (exception groups)"
 optional = false
 python-versions = ">=3.7"
+groups = ["main", "dev"]
+markers = "python_version < \"3.11\""
 files = [
     {file = "exceptiongroup-1.2.2-py3-none-any.whl", hash = "sha256:3111b9d131c238bec2f8f516e123e14ba243563fb135d3fe885990585aa7795b"},
     {file = "exceptiongroup-1.2.2.tar.gz", hash = "sha256:47c2edf7c6738fafb49fd34290706d1a1a2f4d1c6df275526b62cbb4aa5393cc"},
@@ -752,6 +795,8 @@ version = "0.115.11"
 description = "FastAPI framework, high performance, easy to learn, fast to code, ready for production"
 optional = true
 python-versions = ">=3.8"
+groups = ["main"]
+markers = "extra == \"proxy\""
 files = [
     {file = "fastapi-0.115.11-py3-none-any.whl", hash = "sha256:32e1541b7b74602e4ef4a0260ecaf3aadf9d4f19590bba3e1bf2ac4666aa2c64"},
     {file = "fastapi-0.115.11.tar.gz", hash = "sha256:cc81f03f688678b92600a65a5e618b93592c65005db37157147204d8924bf94f"},
@@ -772,6 +817,8 @@ version = "0.16.0"
 description = "FastAPI plugin to enable SSO to most common providers (such as Facebook login, Google login and login via Microsoft Office 365 Account)"
 optional = true
 python-versions = "<4.0,>=3.8"
+groups = ["main"]
+markers = "extra == \"proxy\""
 files = [
     {file = "fastapi_sso-0.16.0-py3-none-any.whl", hash = "sha256:3a66a942474ef9756d3a9d8b945d55bd9faf99781facdb9b87a40b73d6d6b0c3"},
     {file = "fastapi_sso-0.16.0.tar.gz", hash = "sha256:f3941f986347566b7d3747c710cf474a907f581bfb6697ff3bb3e44eb76b438c"},
@@ -790,6 +837,7 @@ version = "3.16.1"
 description = "A platform independent file lock."
 optional = false
 python-versions = ">=3.8"
+groups = ["main"]
 files = [
     {file = "filelock-3.16.1-py3-none-any.whl", hash = "sha256:2082e5703d51fbf98ea75855d9d5527e33d8ff23099bec374a134febee6946b0"},
     {file = "filelock-3.16.1.tar.gz", hash = "sha256:c249fbfcd5db47e5e2d6d62198e565475ee65e4831e2561c8e313fa7eb961435"},
@@ -806,6 +854,7 @@ version = "6.1.0"
 description = "the modular source code checker: pep8 pyflakes and co"
 optional = false
 python-versions = ">=3.8.1"
+groups = ["dev"]
 files = [
     {file = "flake8-6.1.0-py2.py3-none-any.whl", hash = "sha256:ffdfce58ea94c6580c77888a86506937f9a1a227dfcd15f245d694ae20a6b6e5"},
     {file = "flake8-6.1.0.tar.gz", hash = "sha256:d5b3857f07c030bdb5bf41c7f53799571d75c4491748a3adcd47de929e34cd23"},
@@ -822,6 +871,7 @@ version = "1.5.0"
 description = "A list-like structure which implements collections.abc.MutableSequence"
 optional = false
 python-versions = ">=3.8"
+groups = ["main"]
 files = [
     {file = "frozenlist-1.5.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:5b6a66c18b5b9dd261ca98dffcb826a525334b2f29e7caa54e182255c5f6a65a"},
     {file = "frozenlist-1.5.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:d1b3eb7b05ea246510b43a7e53ed1653e55c2121019a97e60cad7efb881a97bb"},
@@ -923,6 +973,7 @@ version = "2025.3.0"
 description = "File-system specification"
 optional = false
 python-versions = ">=3.8"
+groups = ["main"]
 files = [
     {file = "fsspec-2025.3.0-py3-none-any.whl", hash = "sha256:efb87af3efa9103f94ca91a7f8cb7a4df91af9f74fc106c9c7ea0efd7277c1b3"},
     {file = "fsspec-2025.3.0.tar.gz", hash = "sha256:a935fd1ea872591f2b5148907d103488fc523295e6c64b835cfad8c3eca44972"},
@@ -962,6 +1013,8 @@ version = "2.24.2"
 description = "Google API client core library"
 optional = true
 python-versions = ">=3.7"
+groups = ["main"]
+markers = "extra == \"extra-proxy\""
 files = [
     {file = "google_api_core-2.24.2-py3-none-any.whl", hash = "sha256:810a63ac95f3c441b7c0e43d344e372887f62ce9071ba972eacf32672e072de9"},
     {file = "google_api_core-2.24.2.tar.gz", hash = "sha256:81718493daf06d96d6bc76a91c23874dbf2fac0adbbf542831b805ee6e974696"},
@@ -997,6 +1050,8 @@ version = "2.38.0"
 description = "Google Authentication Library"
 optional = true
 python-versions = ">=3.7"
+groups = ["main"]
+markers = "extra == \"extra-proxy\""
 files = [
     {file = "google_auth-2.38.0-py2.py3-none-any.whl", hash = "sha256:e7dae6694313f434a2727bf2906f27ad259bae090d7aa896590d86feec3d9d4a"},
     {file = "google_auth-2.38.0.tar.gz", hash = "sha256:8285113607d3b80a3f1543b75962447ba8a09fe85783432a784fdeef6ac094c4"},
@@ -1021,6 +1076,8 @@ version = "2.24.2"
 description = "Google Cloud Kms API client library"
 optional = true
 python-versions = ">=3.7"
+groups = ["main"]
+markers = "extra == \"extra-proxy\""
 files = [
     {file = "google_cloud_kms-2.24.2-py2.py3-none-any.whl", hash = "sha256:368209b035dfac691a467c1cf50986d8b1b26cac1166bdfbaa25d738df91ff7b"},
     {file = "google_cloud_kms-2.24.2.tar.gz", hash = "sha256:e9e18bbfafd1a4035c76c03fb5ff03f4f57f596d08e1a9ede7e69ec0151b27a1"},
@@ -1039,6 +1096,8 @@ version = "1.69.2"
 description = "Common protobufs used in Google APIs"
 optional = true
 python-versions = ">=3.7"
+groups = ["main"]
+markers = "extra == \"extra-proxy\""
 files = [
     {file = "googleapis_common_protos-1.69.2-py3-none-any.whl", hash = "sha256:0b30452ff9c7a27d80bfc5718954063e8ab53dd3697093d3bc99581f5fd24212"},
     {file = "googleapis_common_protos-1.69.2.tar.gz", hash = "sha256:3e1b904a27a33c821b4b749fd31d334c0c9c30e6113023d495e48979a3dc9c5f"},
@@ -1057,6 +1116,8 @@ version = "0.14.2"
 description = "IAM API client library"
 optional = true
 python-versions = ">=3.7"
+groups = ["main"]
+markers = "extra == \"extra-proxy\""
 files = [
     {file = "grpc_google_iam_v1-0.14.2-py3-none-any.whl", hash = "sha256:a3171468459770907926d56a440b2bb643eec1d7ba215f48f3ecece42b4d8351"},
     {file = "grpc_google_iam_v1-0.14.2.tar.gz", hash = "sha256:b3e1fc387a1a329e41672197d0ace9de22c78dd7d215048c4c78712073f7bd20"},
@@ -1073,6 +1134,8 @@ version = "1.70.0"
 description = "HTTP/2-based RPC framework"
 optional = true
 python-versions = ">=3.8"
+groups = ["main"]
+markers = "extra == \"extra-proxy\""
 files = [
     {file = "grpcio-1.70.0-cp310-cp310-linux_armv7l.whl", hash = "sha256:95469d1977429f45fe7df441f586521361e235982a0b39e33841549143ae2851"},
     {file = "grpcio-1.70.0-cp310-cp310-macosx_12_0_universal2.whl", hash = "sha256:ed9718f17fbdb472e33b869c77a16d0b55e166b100ec57b016dc7de9c8d236bf"},
@@ -1134,75 +1197,14 @@ files = [
 [package.extras]
 protobuf = ["grpcio-tools (>=1.70.0)"]
 
-[[package]]
-name = "grpcio"
-version = "1.71.0"
-description = "HTTP/2-based RPC framework"
-optional = true
-python-versions = ">=3.9"
-files = [
-    {file = "grpcio-1.71.0-cp310-cp310-linux_armv7l.whl", hash = "sha256:c200cb6f2393468142eb50ab19613229dcc7829b5ccee8b658a36005f6669fdd"},
-    {file = "grpcio-1.71.0-cp310-cp310-macosx_12_0_universal2.whl", hash = "sha256:b2266862c5ad664a380fbbcdbdb8289d71464c42a8c29053820ee78ba0119e5d"},
-    {file = "grpcio-1.71.0-cp310-cp310-manylinux_2_17_aarch64.whl", hash = "sha256:0ab8b2864396663a5b0b0d6d79495657ae85fa37dcb6498a2669d067c65c11ea"},
-    {file = "grpcio-1.71.0-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c30f393f9d5ff00a71bb56de4aa75b8fe91b161aeb61d39528db6b768d7eac69"},
-    {file = "grpcio-1.71.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f250ff44843d9a0615e350c77f890082102a0318d66a99540f54769c8766ab73"},
-    {file = "grpcio-1.71.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:e6d8de076528f7c43a2f576bc311799f89d795aa6c9b637377cc2b1616473804"},
-    {file = "grpcio-1.71.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:9b91879d6da1605811ebc60d21ab6a7e4bae6c35f6b63a061d61eb818c8168f6"},
-    {file = "grpcio-1.71.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:f71574afdf944e6652203cd1badcda195b2a27d9c83e6d88dc1ce3cfb73b31a5"},
-    {file = "grpcio-1.71.0-cp310-cp310-win32.whl", hash = "sha256:8997d6785e93308f277884ee6899ba63baafa0dfb4729748200fcc537858a509"},
-    {file = "grpcio-1.71.0-cp310-cp310-win_amd64.whl", hash = "sha256:7d6ac9481d9d0d129224f6d5934d5832c4b1cddb96b59e7eba8416868909786a"},
-    {file = "grpcio-1.71.0-cp311-cp311-linux_armv7l.whl", hash = "sha256:d6aa986318c36508dc1d5001a3ff169a15b99b9f96ef5e98e13522c506b37eef"},
-    {file = "grpcio-1.71.0-cp311-cp311-macosx_10_14_universal2.whl", hash = "sha256:d2c170247315f2d7e5798a22358e982ad6eeb68fa20cf7a820bb74c11f0736e7"},
-    {file = "grpcio-1.71.0-cp311-cp311-manylinux_2_17_aarch64.whl", hash = "sha256:e6f83a583ed0a5b08c5bc7a3fe860bb3c2eac1f03f1f63e0bc2091325605d2b7"},
-    {file = "grpcio-1.71.0-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4be74ddeeb92cc87190e0e376dbc8fc7736dbb6d3d454f2fa1f5be1dee26b9d7"},
-    {file = "grpcio-1.71.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4dd0dfbe4d5eb1fcfec9490ca13f82b089a309dc3678e2edabc144051270a66e"},
-    {file = "grpcio-1.71.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:a2242d6950dc892afdf9e951ed7ff89473aaf744b7d5727ad56bdaace363722b"},
-    {file = "grpcio-1.71.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:0fa05ee31a20456b13ae49ad2e5d585265f71dd19fbd9ef983c28f926d45d0a7"},
-    {file = "grpcio-1.71.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:3d081e859fb1ebe176de33fc3adb26c7d46b8812f906042705346b314bde32c3"},
-    {file = "grpcio-1.71.0-cp311-cp311-win32.whl", hash = "sha256:d6de81c9c00c8a23047136b11794b3584cdc1460ed7cbc10eada50614baa1444"},
-    {file = "grpcio-1.71.0-cp311-cp311-win_amd64.whl", hash = "sha256:24e867651fc67717b6f896d5f0cac0ec863a8b5fb7d6441c2ab428f52c651c6b"},
-    {file = "grpcio-1.71.0-cp312-cp312-linux_armv7l.whl", hash = "sha256:0ff35c8d807c1c7531d3002be03221ff9ae15712b53ab46e2a0b4bb271f38537"},
-    {file = "grpcio-1.71.0-cp312-cp312-macosx_10_14_universal2.whl", hash = "sha256:b78a99cd1ece4be92ab7c07765a0b038194ded2e0a26fd654591ee136088d8d7"},
-    {file = "grpcio-1.71.0-cp312-cp312-manylinux_2_17_aarch64.whl", hash = "sha256:dc1a1231ed23caac1de9f943d031f1bc38d0f69d2a3b243ea0d664fc1fbd7fec"},
-    {file = "grpcio-1.71.0-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e6beeea5566092c5e3c4896c6d1d307fb46b1d4bdf3e70c8340b190a69198594"},
-    {file = "grpcio-1.71.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d5170929109450a2c031cfe87d6716f2fae39695ad5335d9106ae88cc32dc84c"},
-    {file = "grpcio-1.71.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:5b08d03ace7aca7b2fadd4baf291139b4a5f058805a8327bfe9aece7253b6d67"},
-    {file = "grpcio-1.71.0-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:f903017db76bf9cc2b2d8bdd37bf04b505bbccad6be8a81e1542206875d0e9db"},
-    {file = "grpcio-1.71.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:469f42a0b410883185eab4689060a20488a1a0a00f8bbb3cbc1061197b4c5a79"},
-    {file = "grpcio-1.71.0-cp312-cp312-win32.whl", hash = "sha256:ad9f30838550695b5eb302add33f21f7301b882937460dd24f24b3cc5a95067a"},
-    {file = "grpcio-1.71.0-cp312-cp312-win_amd64.whl", hash = "sha256:652350609332de6dac4ece254e5d7e1ff834e203d6afb769601f286886f6f3a8"},
-    {file = "grpcio-1.71.0-cp313-cp313-linux_armv7l.whl", hash = "sha256:cebc1b34ba40a312ab480ccdb396ff3c529377a2fce72c45a741f7215bfe8379"},
-    {file = "grpcio-1.71.0-cp313-cp313-macosx_10_14_universal2.whl", hash = "sha256:85da336e3649a3d2171e82f696b5cad2c6231fdd5bad52616476235681bee5b3"},
-    {file = "grpcio-1.71.0-cp313-cp313-manylinux_2_17_aarch64.whl", hash = "sha256:f9a412f55bb6e8f3bb000e020dbc1e709627dcb3a56f6431fa7076b4c1aab0db"},
-    {file = "grpcio-1.71.0-cp313-cp313-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:47be9584729534660416f6d2a3108aaeac1122f6b5bdbf9fd823e11fe6fbaa29"},
-    {file = "grpcio-1.71.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7c9c80ac6091c916db81131d50926a93ab162a7e97e4428ffc186b6e80d6dda4"},
-    {file = "grpcio-1.71.0-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:789d5e2a3a15419374b7b45cd680b1e83bbc1e52b9086e49308e2c0b5bbae6e3"},
-    {file = "grpcio-1.71.0-cp313-cp313-musllinux_1_1_i686.whl", hash = "sha256:1be857615e26a86d7363e8a163fade914595c81fec962b3d514a4b1e8760467b"},
-    {file = "grpcio-1.71.0-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:a76d39b5fafd79ed604c4be0a869ec3581a172a707e2a8d7a4858cb05a5a7637"},
-    {file = "grpcio-1.71.0-cp313-cp313-win32.whl", hash = "sha256:74258dce215cb1995083daa17b379a1a5a87d275387b7ffe137f1d5131e2cfbb"},
-    {file = "grpcio-1.71.0-cp313-cp313-win_amd64.whl", hash = "sha256:22c3bc8d488c039a199f7a003a38cb7635db6656fa96437a8accde8322ce2366"},
-    {file = "grpcio-1.71.0-cp39-cp39-linux_armv7l.whl", hash = "sha256:c6a0a28450c16809f94e0b5bfe52cabff63e7e4b97b44123ebf77f448534d07d"},
-    {file = "grpcio-1.71.0-cp39-cp39-macosx_10_14_universal2.whl", hash = "sha256:a371e6b6a5379d3692cc4ea1cb92754d2a47bdddeee755d3203d1f84ae08e03e"},
-    {file = "grpcio-1.71.0-cp39-cp39-manylinux_2_17_aarch64.whl", hash = "sha256:39983a9245d37394fd59de71e88c4b295eb510a3555e0a847d9965088cdbd033"},
-    {file = "grpcio-1.71.0-cp39-cp39-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9182e0063112e55e74ee7584769ec5a0b4f18252c35787f48738627e23a62b97"},
-    {file = "grpcio-1.71.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:693bc706c031aeb848849b9d1c6b63ae6bcc64057984bb91a542332b75aa4c3d"},
-    {file = "grpcio-1.71.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:20e8f653abd5ec606be69540f57289274c9ca503ed38388481e98fa396ed0b41"},
-    {file = "grpcio-1.71.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:8700a2a57771cc43ea295296330daaddc0d93c088f0a35cc969292b6db959bf3"},
-    {file = "grpcio-1.71.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:d35a95f05a8a2cbe8e02be137740138b3b2ea5f80bd004444e4f9a1ffc511e32"},
-    {file = "grpcio-1.71.0-cp39-cp39-win32.whl", hash = "sha256:f9c30c464cb2ddfbc2ddf9400287701270fdc0f14be5f08a1e3939f1e749b455"},
-    {file = "grpcio-1.71.0-cp39-cp39-win_amd64.whl", hash = "sha256:63e41b91032f298b3e973b3fa4093cbbc620c875e2da7b93e249d4728b54559a"},
-    {file = "grpcio-1.71.0.tar.gz", hash = "sha256:2b85f7820475ad3edec209d3d89a7909ada16caab05d3f2e08a7e8ae3200a55c"},
-]
-
-[package.extras]
-protobuf = ["grpcio-tools (>=1.71.0)"]
-
 [[package]]
 name = "grpcio-status"
 version = "1.70.0"
 description = "Status proto mapping for gRPC"
 optional = true
 python-versions = ">=3.8"
+groups = ["main"]
+markers = "extra == \"extra-proxy\""
 files = [
     {file = "grpcio_status-1.70.0-py3-none-any.whl", hash = "sha256:fc5a2ae2b9b1c1969cc49f3262676e6854aa2398ec69cb5bd6c47cd501904a85"},
     {file = "grpcio_status-1.70.0.tar.gz", hash = "sha256:0e7b42816512433b18b9d764285ff029bde059e9d41f8fe10a60631bd8348101"},
@@ -1213,31 +1215,17 @@ googleapis-common-protos = ">=1.5.5"
 grpcio = ">=1.70.0"
 protobuf = ">=5.26.1,<6.0dev"
 
-[[package]]
-name = "grpcio-status"
-version = "1.71.0"
-description = "Status proto mapping for gRPC"
-optional = true
-python-versions = ">=3.9"
-files = [
-    {file = "grpcio_status-1.71.0-py3-none-any.whl", hash = "sha256:843934ef8c09e3e858952887467f8256aac3910c55f077a359a65b2b3cde3e68"},
-    {file = "grpcio_status-1.71.0.tar.gz", hash = "sha256:11405fed67b68f406b3f3c7c5ae5104a79d2d309666d10d61b152e91d28fb968"},
-]
-
-[package.dependencies]
-googleapis-common-protos = ">=1.5.5"
-grpcio = ">=1.71.0"
-protobuf = ">=5.26.1,<6.0dev"
-
 [[package]]
 name = "gunicorn"
-version = "22.0.0"
+version = "23.0.0"
 description = "WSGI HTTP Server for UNIX"
 optional = true
 python-versions = ">=3.7"
+groups = ["main"]
+markers = "extra == \"proxy\""
 files = [
-    {file = "gunicorn-22.0.0-py3-none-any.whl", hash = "sha256:350679f91b24062c86e386e198a15438d53a7a8207235a78ba1b53df4c4378d9"},
-    {file = "gunicorn-22.0.0.tar.gz", hash = "sha256:4a0b436239ff76fb33f11c07a16482c521a7e09c1ce3cc293c2330afe01bec63"},
+    {file = "gunicorn-23.0.0-py3-none-any.whl", hash = "sha256:ec400d38950de4dfd418cff8328b2c8faed0edb0d517d3394e457c317908ca4d"},
+    {file = "gunicorn-23.0.0.tar.gz", hash = "sha256:f014447a0101dc57e294f6c18ca6b40227a4c90e9bdb586042628030cba004ec"},
 ]
 
 [package.dependencies]
@@ -1256,6 +1244,7 @@ version = "0.14.0"
 description = "A pure-Python, bring-your-own-I/O implementation of HTTP/1.1"
 optional = false
 python-versions = ">=3.7"
+groups = ["main"]
 files = [
     {file = "h11-0.14.0-py3-none-any.whl", hash = "sha256:e3fe4ac4b851c468cc8363d500db52c2ead036020723024a109d37346efaa761"},
     {file = "h11-0.14.0.tar.gz", hash = "sha256:8f19fbbe99e72420ff35c00b27a34cb9937e902a8b810e2c88300c6f0a3b699d"},
@@ -1267,6 +1256,7 @@ version = "1.0.7"
 description = "A minimal low-level HTTP client."
 optional = false
 python-versions = ">=3.8"
+groups = ["main"]
 files = [
     {file = "httpcore-1.0.7-py3-none-any.whl", hash = "sha256:a3fff8f43dc260d5bd363d9f9cf1830fa3a458b332856f34282de498ed420edd"},
     {file = "httpcore-1.0.7.tar.gz", hash = "sha256:8551cb62a169ec7162ac7be8d4817d561f60e08eaa485234898414bb5a8a0b4c"},
@@ -1288,6 +1278,7 @@ version = "0.28.1"
 description = "The next generation HTTP client."
 optional = false
 python-versions = ">=3.8"
+groups = ["main"]
 files = [
     {file = "httpx-0.28.1-py3-none-any.whl", hash = "sha256:d909fcccc110f8c7faf814ca82a9a4d816bc5a6dbfea25d6591d6985b8ba59ad"},
     {file = "httpx-0.28.1.tar.gz", hash = "sha256:75e98c5f16b0f35b567856f597f06ff2270a374470a5c2392242528e3e3e42fc"},
@@ -1312,6 +1303,7 @@ version = "0.29.3"
 description = "Client library to download and publish models, datasets and other repos on the huggingface.co hub"
 optional = false
 python-versions = ">=3.8.0"
+groups = ["main"]
 files = [
     {file = "huggingface_hub-0.29.3-py3-none-any.whl", hash = "sha256:0b25710932ac649c08cdbefa6c6ccb8e88eef82927cacdb048efb726429453aa"},
     {file = "huggingface_hub-0.29.3.tar.gz", hash = "sha256:64519a25716e0ba382ba2d3fb3ca082e7c7eb4a2fc634d200e8380006e0760e5"},
@@ -1346,6 +1338,7 @@ version = "3.10"
 description = "Internationalized Domain Names in Applications (IDNA)"
 optional = false
 python-versions = ">=3.6"
+groups = ["main"]
 files = [
     {file = "idna-3.10-py3-none-any.whl", hash = "sha256:946d195a0d259cbba61165e88e65941f16e9b36ea6ddb97f00452bae8b1287d3"},
     {file = "idna-3.10.tar.gz", hash = "sha256:12f65c9b470abda6dc35cf8e63cc574b1c52b11df2c86030af0ac09b01b13ea9"},
@@ -1360,6 +1353,7 @@ version = "8.5.0"
 description = "Read metadata from Python packages"
 optional = false
 python-versions = ">=3.8"
+groups = ["main"]
 files = [
     {file = "importlib_metadata-8.5.0-py3-none-any.whl", hash = "sha256:45e54197d28b7a7f1559e60b95e7c567032b602131fbd588f1497f47880aa68b"},
     {file = "importlib_metadata-8.5.0.tar.gz", hash = "sha256:71522656f0abace1d072b9e5481a48f07c138e00f079c38c8f883823f9c26bd7"},
@@ -1383,6 +1377,8 @@ version = "6.4.5"
 description = "Read resources from Python packages"
 optional = false
 python-versions = ">=3.8"
+groups = ["main"]
+markers = "python_version < \"3.9\""
 files = [
     {file = "importlib_resources-6.4.5-py3-none-any.whl", hash = "sha256:ac29d5f956f01d5e4bb63102a5a19957f1b9175e45649977264a1416783bb717"},
     {file = "importlib_resources-6.4.5.tar.gz", hash = "sha256:980862a1d16c9e147a59603677fa2aa5fd82b87f223b6cb870695bcfce830065"},
@@ -1405,6 +1401,7 @@ version = "2.0.0"
 description = "brain-dead simple config-ini parsing"
 optional = false
 python-versions = ">=3.7"
+groups = ["dev"]
 files = [
     {file = "iniconfig-2.0.0-py3-none-any.whl", hash = "sha256:b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374"},
     {file = "iniconfig-2.0.0.tar.gz", hash = "sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3"},
@@ -1416,6 +1413,8 @@ version = "0.7.2"
 description = "An ISO 8601 date/time/duration parser and formatter"
 optional = true
 python-versions = ">=3.7"
+groups = ["main"]
+markers = "extra == \"extra-proxy\""
 files = [
     {file = "isodate-0.7.2-py3-none-any.whl", hash = "sha256:28009937d8031054830160fce6d409ed342816b543597cece116d966c6d99e15"},
     {file = "isodate-0.7.2.tar.gz", hash = "sha256:4cd1aa0f43ca76f4a6c6c0292a85f40b35ec2e43e315b59f06e6d32171a953e6"},
@@ -1427,6 +1426,7 @@ version = "3.1.6"
 description = "A very fast and expressive template engine."
 optional = false
 python-versions = ">=3.7"
+groups = ["main"]
 files = [
     {file = "jinja2-3.1.6-py3-none-any.whl", hash = "sha256:85ece4451f492d0c13c5dd7c13a64681a86afae63a5f347908daf103ce6d2f67"},
     {file = "jinja2-3.1.6.tar.gz", hash = "sha256:0137fb05990d35f1275a587e9aee6d56da821fc83491a0fb838183be43f66d6d"},
@@ -1444,6 +1444,7 @@ version = "0.9.0"
 description = "Fast iterable JSON parser."
 optional = false
 python-versions = ">=3.8"
+groups = ["main"]
 files = [
     {file = "jiter-0.9.0-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:816ec9b60fdfd1fec87da1d7ed46c66c44ffec37ab2ef7de5b147b2fce3fd5ad"},
     {file = "jiter-0.9.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:9b1d3086f8a3ee0194ecf2008cf81286a5c3e540d977fa038ff23576c023c0ea"},
@@ -1529,6 +1530,8 @@ version = "1.0.1"
 description = "JSON Matching Expressions"
 optional = true
 python-versions = ">=3.7"
+groups = ["main"]
+markers = "extra == \"proxy\""
 files = [
     {file = "jmespath-1.0.1-py3-none-any.whl", hash = "sha256:02e2e4cc71b5bcab88332eebf907519190dd9e6e82107fa7f83b1003a6252980"},
     {file = "jmespath-1.0.1.tar.gz", hash = "sha256:90261b206d6defd58fdd5e85f478bf633a2901798906be2ad389150c5c60edbe"},
@@ -1540,6 +1543,7 @@ version = "4.23.0"
 description = "An implementation of JSON Schema validation for Python"
 optional = false
 python-versions = ">=3.8"
+groups = ["main"]
 files = [
     {file = "jsonschema-4.23.0-py3-none-any.whl", hash = "sha256:fbadb6f8b144a8f8cf9f0b89ba94501d143e50411a1278633f56a7acf7fd5566"},
     {file = "jsonschema-4.23.0.tar.gz", hash = "sha256:d71497fef26351a33265337fa77ffeb82423f3ea21283cd9467bb03999266bc4"},
@@ -1563,6 +1567,7 @@ version = "2023.12.1"
 description = "The JSON Schema meta-schemas and vocabularies, exposed as a Registry"
 optional = false
 python-versions = ">=3.8"
+groups = ["main"]
 files = [
     {file = "jsonschema_specifications-2023.12.1-py3-none-any.whl", hash = "sha256:87e4fdf3a94858b8a2ba2778d9ba57d8a9cafca7c7489c46ba0d30a8bc6a9c3c"},
     {file = "jsonschema_specifications-2023.12.1.tar.gz", hash = "sha256:48a76787b3e70f5ed53f1160d2b81f586e4ca6d1548c5de7085d1682674764cc"},
@@ -1578,6 +1583,7 @@ version = "2.1.5"
 description = "Safely add untrusted strings to HTML/XML markup."
 optional = false
 python-versions = ">=3.7"
+groups = ["main"]
 files = [
     {file = "MarkupSafe-2.1.5-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:a17a92de5231666cfbe003f0e4b9b3a7ae3afb1ec2845aadc2bacc93ff85febc"},
     {file = "MarkupSafe-2.1.5-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:72b6be590cc35924b02c78ef34b467da4ba07e4e0f0454a2c5907f473fc50ce5"},
@@ -1647,6 +1653,7 @@ version = "0.7.0"
 description = "McCabe checker, plugin for flake8"
 optional = false
 python-versions = ">=3.6"
+groups = ["dev"]
 files = [
     {file = "mccabe-0.7.0-py2.py3-none-any.whl", hash = "sha256:6c2d30ab6be0e4a46919781807b4f0d834ebdd6c6e3dca0bda5a15f863427b6e"},
     {file = "mccabe-0.7.0.tar.gz", hash = "sha256:348e0240c33b60bbdf4e523192ef919f28cb2c3d7d5c7794f74009290f236325"},
@@ -1658,6 +1665,8 @@ version = "1.32.0"
 description = "The Microsoft Authentication Library (MSAL) for Python library enables your app to access the Microsoft Cloud by supporting authentication of users with Microsoft Azure Active Directory accounts (AAD) and Microsoft Accounts (MSA) using industry standard OAuth2 and OpenID Connect."
 optional = true
 python-versions = ">=3.7"
+groups = ["main"]
+markers = "extra == \"extra-proxy\""
 files = [
     {file = "msal-1.32.0-py3-none-any.whl", hash = "sha256:9dbac5384a10bbbf4dae5c7ea0d707d14e087b92c5aa4954b3feaa2d1aa0bcb7"},
     {file = "msal-1.32.0.tar.gz", hash = "sha256:5445fe3af1da6be484991a7ab32eaa82461dc2347de105b76af92c610c3335c2"},
@@ -1677,6 +1686,8 @@ version = "1.3.0"
 description = "Microsoft Authentication Library extensions (MSAL EX) provides a persistence API that can save your data on disk, encrypted on Windows, macOS and Linux. Concurrent data access will be coordinated by a file lock mechanism."
 optional = true
 python-versions = ">=3.7"
+groups = ["main"]
+markers = "extra == \"extra-proxy\""
 files = [
     {file = "msal_extensions-1.3.0-py3-none-any.whl", hash = "sha256:105328ddcbdd342016c9949d8f89e3917554740c8ab26669c0fa0e069e730a0e"},
     {file = "msal_extensions-1.3.0.tar.gz", hash = "sha256:96918996642b38c78cd59b55efa0f06fd1373c90e0949be8615697c048fba62c"},
@@ -1694,6 +1705,7 @@ version = "6.1.0"
 description = "multidict implementation"
 optional = false
 python-versions = ">=3.8"
+groups = ["main"]
 files = [
     {file = "multidict-6.1.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:3380252550e372e8511d49481bd836264c009adb826b23fefcc5dd3c69692f60"},
     {file = "multidict-6.1.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:99f826cbf970077383d7de805c0681799491cb939c25450b9b5b3ced03ca99f1"},
@@ -1798,6 +1810,7 @@ version = "1.14.1"
 description = "Optional static typing for Python"
 optional = false
 python-versions = ">=3.8"
+groups = ["dev"]
 files = [
     {file = "mypy-1.14.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:52686e37cf13d559f668aa398dd7ddf1f92c5d613e4f8cb262be2fb4fedb0fcb"},
     {file = "mypy-1.14.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:1fb545ca340537d4b45d3eecdb3def05e913299ca72c290326be19b3804b39c0"},
@@ -1857,6 +1870,7 @@ version = "1.0.0"
 description = "Type system extensions for programs checked with the mypy type checker."
 optional = false
 python-versions = ">=3.5"
+groups = ["dev"]
 files = [
     {file = "mypy_extensions-1.0.0-py3-none-any.whl", hash = "sha256:4392f6c0eb8a5668a69e23d168ffa70f0be9ccfd32b5cc2d26a34ae5b844552d"},
     {file = "mypy_extensions-1.0.0.tar.gz", hash = "sha256:75dbf8955dc00442a438fc4d0666508a9a97b6bd41aa2f0ffe9d2f2725af0782"},
@@ -1868,6 +1882,8 @@ version = "1.9.1"
 description = "Node.js virtual environment builder"
 optional = true
 python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7"
+groups = ["main"]
+markers = "extra == \"extra-proxy\""
 files = [
     {file = "nodeenv-1.9.1-py2.py3-none-any.whl", hash = "sha256:ba11c9782d29c27c70ffbdda2d7415098754709be8a7056d79a737cd901155c9"},
     {file = "nodeenv-1.9.1.tar.gz", hash = "sha256:6ec12890a2dab7946721edbfbcd91f3319c6ccc9aec47be7c7e6b7011ee6645f"},
@@ -1879,6 +1895,8 @@ version = "3.2.2"
 description = "A generic, spec-compliant, thorough implementation of the OAuth request-signing logic"
 optional = true
 python-versions = ">=3.6"
+groups = ["main"]
+markers = "extra == \"proxy\""
 files = [
     {file = "oauthlib-3.2.2-py3-none-any.whl", hash = "sha256:8139f29aac13e25d502680e9e19963e83f16838d48a0d71c287fe40e7067fbca"},
     {file = "oauthlib-3.2.2.tar.gz", hash = "sha256:9859c40929662bec5d64f34d01c99e093149682a3f38915dc0655d5a633dd918"},
@@ -1895,6 +1913,7 @@ version = "1.66.3"
 description = "The official Python library for the openai API"
 optional = false
 python-versions = ">=3.8"
+groups = ["main"]
 files = [
     {file = "openai-1.66.3-py3-none-any.whl", hash = "sha256:a427c920f727711877ab17c11b95f1230b27767ba7a01e5b66102945141ceca9"},
     {file = "openai-1.66.3.tar.gz", hash = "sha256:8dde3aebe2d081258d4159c4cb27bdc13b5bb3f7ea2201d9bd940b9a89faf0c9"},
@@ -1920,6 +1939,8 @@ version = "3.10.15"
 description = "Fast, correct Python JSON library supporting dataclasses, datetimes, and numpy"
 optional = true
 python-versions = ">=3.8"
+groups = ["main"]
+markers = "extra == \"proxy\""
 files = [
     {file = "orjson-3.10.15-cp310-cp310-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:552c883d03ad185f720d0c09583ebde257e41b9521b74ff40e08b7dec4559c04"},
     {file = "orjson-3.10.15-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:616e3e8d438d02e4854f70bfdc03a6bcdb697358dbaa6bcd19cbe24d24ece1f8"},
@@ -2008,6 +2029,7 @@ version = "24.2"
 description = "Core utilities for Python packages"
 optional = false
 python-versions = ">=3.8"
+groups = ["main", "dev"]
 files = [
     {file = "packaging-24.2-py3-none-any.whl", hash = "sha256:09abb1bccd265c01f4a3aa3f7a7db064b36514d2cba19a2f694fe6150451a759"},
     {file = "packaging-24.2.tar.gz", hash = "sha256:c228a6dc5e932d346bc5739379109d49e8853dd8223571c7c5b55260edc0b97f"},
@@ -2019,6 +2041,7 @@ version = "0.12.1"
 description = "Utility library for gitignore style pattern matching of file paths."
 optional = false
 python-versions = ">=3.8"
+groups = ["dev"]
 files = [
     {file = "pathspec-0.12.1-py3-none-any.whl", hash = "sha256:a0d503e138a4c123b27490a4f7beda6a01c6f288df0e4a8b79c7eb0dc7b4cc08"},
     {file = "pathspec-0.12.1.tar.gz", hash = "sha256:a482d51503a1ab33b1c67a6c3813a26953dbdc71c31dacaef9a838c4e29f5712"},
@@ -2030,6 +2053,8 @@ version = "1.3.10"
 description = "Resolve a name to an object."
 optional = false
 python-versions = ">=3.6"
+groups = ["main"]
+markers = "python_version < \"3.9\""
 files = [
     {file = "pkgutil_resolve_name-1.3.10-py3-none-any.whl", hash = "sha256:ca27cc078d25c5ad71a9de0a7a330146c4e014c2462d9af19c6b828280649c5e"},
     {file = "pkgutil_resolve_name-1.3.10.tar.gz", hash = "sha256:357d6c9e6a755653cfd78893817c0853af365dd51ec97f3d358a819373bbd174"},
@@ -2041,6 +2066,7 @@ version = "4.3.6"
 description = "A small Python package for determining appropriate platform-specific dirs, e.g. a `user data dir`."
 optional = false
 python-versions = ">=3.8"
+groups = ["dev"]
 files = [
     {file = "platformdirs-4.3.6-py3-none-any.whl", hash = "sha256:73e575e1408ab8103900836b97580d5307456908a03e92031bab39e4554cc3fb"},
     {file = "platformdirs-4.3.6.tar.gz", hash = "sha256:357fb2acbc885b0419afd3ce3ed34564c13c9b95c89360cd9563f73aa5e2b907"},
@@ -2057,6 +2083,7 @@ version = "1.5.0"
 description = "plugin and hook calling mechanisms for python"
 optional = false
 python-versions = ">=3.8"
+groups = ["dev"]
 files = [
     {file = "pluggy-1.5.0-py3-none-any.whl", hash = "sha256:44e1ad92c8ca002de6377e165f3e0f1be63266ab4d554740532335b9d75ea669"},
     {file = "pluggy-1.5.0.tar.gz", hash = "sha256:2cffa88e94fdc978c4c574f15f9e59b7f4201d439195c3715ca9e2486f1d0cf1"},
@@ -2072,6 +2099,8 @@ version = "0.11.0"
 description = "Prisma Client Python is an auto-generated and fully type-safe database client"
 optional = true
 python-versions = ">=3.7.0"
+groups = ["main"]
+markers = "extra == \"extra-proxy\""
 files = [
     {file = "prisma-0.11.0-py3-none-any.whl", hash = "sha256:22bb869e59a2968b99f3483bb417717273ffbc569fd1e9ceed95e5614cbaf53a"},
     {file = "prisma-0.11.0.tar.gz", hash = "sha256:3f2f2fd2361e1ec5ff655f2a04c7860c2f2a5bc4c91f78ca9c5c6349735bf693"},
@@ -2097,6 +2126,7 @@ version = "0.2.0"
 description = "Accelerated property cache"
 optional = false
 python-versions = ">=3.8"
+groups = ["main"]
 files = [
     {file = "propcache-0.2.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:c5869b8fd70b81835a6f187c5fdbe67917a04d7e52b6e7cc4e5fe39d55c39d58"},
     {file = "propcache-0.2.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:952e0d9d07609d9c5be361f33b0d6d650cd2bae393aabb11d9b719364521984b"},
@@ -2204,6 +2234,8 @@ version = "1.26.1"
 description = "Beautiful, Pythonic protocol buffers"
 optional = true
 python-versions = ">=3.7"
+groups = ["main"]
+markers = "extra == \"extra-proxy\""
 files = [
     {file = "proto_plus-1.26.1-py3-none-any.whl", hash = "sha256:13285478c2dcf2abb829db158e1047e2f1e8d63a077d94263c2b88b043c75a66"},
     {file = "proto_plus-1.26.1.tar.gz", hash = "sha256:21a515a4c4c0088a773899e23c7bbade3d18f9c66c73edd4c7ee3816bc96a012"},
@@ -2221,6 +2253,8 @@ version = "5.29.3"
 description = ""
 optional = true
 python-versions = ">=3.8"
+groups = ["main"]
+markers = "extra == \"extra-proxy\""
 files = [
     {file = "protobuf-5.29.3-cp310-abi3-win32.whl", hash = "sha256:3ea51771449e1035f26069c4c7fd51fba990d07bc55ba80701c78f886bf9c888"},
     {file = "protobuf-5.29.3-cp310-abi3-win_amd64.whl", hash = "sha256:a4fa6f80816a9a0678429e84973f2f98cbc218cca434abe8db2ad0bffc98503a"},
@@ -2241,6 +2275,8 @@ version = "0.6.1"
 description = "Pure-Python implementation of ASN.1 types and DER/BER/CER codecs (X.208)"
 optional = true
 python-versions = ">=3.8"
+groups = ["main"]
+markers = "extra == \"extra-proxy\""
 files = [
     {file = "pyasn1-0.6.1-py3-none-any.whl", hash = "sha256:0d632f46f2ba09143da3a8afe9e33fb6f92fa2320ab7e886e2d0f7672af84629"},
     {file = "pyasn1-0.6.1.tar.gz", hash = "sha256:6f580d2bdd84365380830acf45550f2511469f673cb4a5ae3857a3170128b034"},
@@ -2252,6 +2288,8 @@ version = "0.4.1"
 description = "A collection of ASN.1-based protocols modules"
 optional = true
 python-versions = ">=3.8"
+groups = ["main"]
+markers = "extra == \"extra-proxy\""
 files = [
     {file = "pyasn1_modules-0.4.1-py3-none-any.whl", hash = "sha256:49bfa96b45a292b711e986f222502c1c9a5e1f4e568fc30e2574a6c7d07838fd"},
     {file = "pyasn1_modules-0.4.1.tar.gz", hash = "sha256:c28e2dbf9c06ad61c71a075c7e0f9fd0f1b0bb2d2ad4377f240d33ac2ab60a7c"},
@@ -2266,6 +2304,7 @@ version = "2.11.1"
 description = "Python style guide checker"
 optional = false
 python-versions = ">=3.8"
+groups = ["dev"]
 files = [
     {file = "pycodestyle-2.11.1-py2.py3-none-any.whl", hash = "sha256:44fe31000b2d866f2e41841b18528a505fbd7fef9017b04eff4e2648a0fadc67"},
     {file = "pycodestyle-2.11.1.tar.gz", hash = "sha256:41ba0e7afc9752dfb53ced5489e89f8186be00e599e712660695b7a75ff2663f"},
@@ -2277,6 +2316,8 @@ version = "2.22"
 description = "C parser in Python"
 optional = true
 python-versions = ">=3.8"
+groups = ["main"]
+markers = "extra == \"proxy\" or extra == \"extra-proxy\" and platform_python_implementation != \"PyPy\""
 files = [
     {file = "pycparser-2.22-py3-none-any.whl", hash = "sha256:c3702b6d3dd8c7abc1afa565d7e63d53a1d0bd86cdc24edd75470f4de499cfcc"},
     {file = "pycparser-2.22.tar.gz", hash = "sha256:491c8be9c040f5390f5bf44a5b07752bd07f56edf992381b05c701439eec10f6"},
@@ -2288,6 +2329,7 @@ version = "2.10.6"
 description = "Data validation using Python type hints"
 optional = false
 python-versions = ">=3.8"
+groups = ["main"]
 files = [
     {file = "pydantic-2.10.6-py3-none-any.whl", hash = "sha256:427d664bf0b8a2b34ff5dd0f5a18df00591adcee7198fbd71981054cef37b584"},
     {file = "pydantic-2.10.6.tar.gz", hash = "sha256:ca5daa827cce33de7a42be142548b0096bf05a7e7b365aebfa5f8eeec7128236"},
@@ -2309,6 +2351,7 @@ version = "2.27.2"
 description = "Core functionality for Pydantic validation and serialization"
 optional = false
 python-versions = ">=3.8"
+groups = ["main"]
 files = [
     {file = "pydantic_core-2.27.2-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:2d367ca20b2f14095a8f4fa1210f5a7b78b8a20009ecced6b12818f455b1e9fa"},
     {file = "pydantic_core-2.27.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:491a2b73db93fab69731eaee494f320faa4e093dbed776be1a829c2eb222c34c"},
@@ -2421,6 +2464,7 @@ version = "3.1.0"
 description = "passive checker of Python programs"
 optional = false
 python-versions = ">=3.8"
+groups = ["dev"]
 files = [
     {file = "pyflakes-3.1.0-py2.py3-none-any.whl", hash = "sha256:4132f6d49cb4dae6819e5379898f2b8cce3c5f23994194c24b77d5da2e36f774"},
     {file = "pyflakes-3.1.0.tar.gz", hash = "sha256:a0aae034c444db0071aa077972ba4768d40c830d9539fd45bf4cd3f8f6992efc"},
@@ -2432,6 +2476,8 @@ version = "2.9.0"
 description = "JSON Web Token implementation in Python"
 optional = true
 python-versions = ">=3.8"
+groups = ["main"]
+markers = "extra == \"proxy\" or extra == \"extra-proxy\""
 files = [
     {file = "PyJWT-2.9.0-py3-none-any.whl", hash = "sha256:3b02fb0f44517787776cf48f2ae25d8e14f300e6d7545a4315cee571a415e850"},
     {file = "pyjwt-2.9.0.tar.gz", hash = "sha256:7e1e5b56cc735432a7369cbfa0efe50fa113ebecdc04ae6922deba8b84582d0c"},
@@ -2452,6 +2498,8 @@ version = "1.5.0"
 description = "Python binding to the Networking and Cryptography (NaCl) library"
 optional = true
 python-versions = ">=3.6"
+groups = ["main"]
+markers = "extra == \"proxy\""
 files = [
     {file = "PyNaCl-1.5.0-cp36-abi3-macosx_10_10_universal2.whl", hash = "sha256:401002a4aaa07c9414132aaed7f6836ff98f59277a234704ff66878c2ee4a0d1"},
     {file = "PyNaCl-1.5.0-cp36-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_24_aarch64.whl", hash = "sha256:52cb72a79269189d4e0dc537556f4740f7f0a9ec41c1322598799b0bdad4ef92"},
@@ -2478,6 +2526,7 @@ version = "7.4.4"
 description = "pytest: simple powerful testing with Python"
 optional = false
 python-versions = ">=3.7"
+groups = ["dev"]
 files = [
     {file = "pytest-7.4.4-py3-none-any.whl", hash = "sha256:b090cdf5ed60bf4c45261be03239c2c1c22df034fbffe691abe93cd80cea01d8"},
     {file = "pytest-7.4.4.tar.gz", hash = "sha256:2cf0005922c6ace4a3e2ec8b4080eb0d9753fdc93107415332f50ce9e7994280"},
@@ -2500,6 +2549,7 @@ version = "3.14.0"
 description = "Thin-wrapper around the mock package for easier use with pytest"
 optional = false
 python-versions = ">=3.8"
+groups = ["dev"]
 files = [
     {file = "pytest-mock-3.14.0.tar.gz", hash = "sha256:2719255a1efeceadbc056d6bf3df3d1c5015530fb40cf347c0f9afac88410bd0"},
     {file = "pytest_mock-3.14.0-py3-none-any.whl", hash = "sha256:0b72c38033392a5f4621342fe11e9219ac11ec9d375f8e2a0c164539e0d70f6f"},
@@ -2517,6 +2567,8 @@ version = "2.9.0.post0"
 description = "Extensions to the standard Python datetime module"
 optional = true
 python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7"
+groups = ["main"]
+markers = "extra == \"proxy\""
 files = [
     {file = "python-dateutil-2.9.0.post0.tar.gz", hash = "sha256:37dd54208da7e1cd875388217d5e00ebd4179249f90fb72437e91a35459a0ad3"},
     {file = "python_dateutil-2.9.0.post0-py2.py3-none-any.whl", hash = "sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427"},
@@ -2531,6 +2583,7 @@ version = "1.0.1"
 description = "Read key-value pairs from a .env file and set them as environment variables"
 optional = false
 python-versions = ">=3.8"
+groups = ["main"]
 files = [
     {file = "python-dotenv-1.0.1.tar.gz", hash = "sha256:e324ee90a023d808f1959c46bcbc04446a10ced277783dc6ee09987c37ec10ca"},
     {file = "python_dotenv-1.0.1-py3-none-any.whl", hash = "sha256:f7b63ef50f1b690dddf550d03497b66d609393b40b564ed0d674909a68ebf16a"},
@@ -2545,6 +2598,8 @@ version = "0.0.18"
 description = "A streaming multipart parser for Python"
 optional = true
 python-versions = ">=3.8"
+groups = ["main"]
+markers = "extra == \"proxy\""
 files = [
     {file = "python_multipart-0.0.18-py3-none-any.whl", hash = "sha256:efe91480f485f6a361427a541db4796f9e1591afc0fb8e7a4ba06bfbc6708996"},
     {file = "python_multipart-0.0.18.tar.gz", hash = "sha256:7a68db60c8bfb82e460637fa4750727b45af1d5e2ed215593f917f64694d34fe"},
@@ -2556,6 +2611,7 @@ version = "6.0.2"
 description = "YAML parser and emitter for Python"
 optional = false
 python-versions = ">=3.8"
+groups = ["main"]
 files = [
     {file = "PyYAML-6.0.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:0a9a2848a5b7feac301353437eb7d5957887edbf81d56e903999a75a3d743086"},
     {file = "PyYAML-6.0.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:29717114e51c84ddfba879543fb232a6ed60086602313ca38cce623c1d62cfbf"},
@@ -2618,6 +2674,8 @@ version = "5.2.1"
 description = "Python client for Redis database and key-value store"
 optional = true
 python-versions = ">=3.8"
+groups = ["main"]
+markers = "extra == \"proxy\""
 files = [
     {file = "redis-5.2.1-py3-none-any.whl", hash = "sha256:ee7e1056b9aea0f04c6c2ed59452947f34c4940ee025f5dd83e6a6418b6989e4"},
     {file = "redis-5.2.1.tar.gz", hash = "sha256:16f2e22dff21d5125e8481515e386711a34cbec50f0e44413dd7d9c060a54e0f"},
@@ -2636,6 +2694,7 @@ version = "0.35.1"
 description = "JSON Referencing + Python"
 optional = false
 python-versions = ">=3.8"
+groups = ["main"]
 files = [
     {file = "referencing-0.35.1-py3-none-any.whl", hash = "sha256:eda6d3234d62814d1c64e305c1331c9a3a6132da475ab6382eaa997b21ee75de"},
     {file = "referencing-0.35.1.tar.gz", hash = "sha256:25b42124a6c8b632a425174f24087783efb348a6f1e0008e63cd4466fedf703c"},
@@ -2651,6 +2710,7 @@ version = "2024.11.6"
 description = "Alternative regular expression module, to replace re."
 optional = false
 python-versions = ">=3.8"
+groups = ["main"]
 files = [
     {file = "regex-2024.11.6-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:ff590880083d60acc0433f9c3f713c51f7ac6ebb9adf889c79a261ecf541aa91"},
     {file = "regex-2024.11.6-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:658f90550f38270639e83ce492f27d2c8d2cd63805c65a13a14d36ca126753f0"},
@@ -2754,6 +2814,7 @@ version = "2.31.0"
 description = "Python HTTP for Humans."
 optional = false
 python-versions = ">=3.7"
+groups = ["main"]
 files = [
     {file = "requests-2.31.0-py3-none-any.whl", hash = "sha256:58cd2187c01e70e6e26505bca751777aa9f2ee0b7f4300988b709f44e013003f"},
     {file = "requests-2.31.0.tar.gz", hash = "sha256:942c5a758f98d790eaed1a29cb6eefc7ffb0d1cf7af05c3d2791656dbd6ad1e1"},
@@ -2775,6 +2836,8 @@ version = "0.8.0"
 description = "Resend Python SDK"
 optional = true
 python-versions = ">=3.7"
+groups = ["main"]
+markers = "extra == \"extra-proxy\""
 files = [
     {file = "resend-0.8.0-py2.py3-none-any.whl", hash = "sha256:adc1515dadf4f4fc6b90db55a237f0f37fc56fd74287a986519a8a187fdb661d"},
     {file = "resend-0.8.0.tar.gz", hash = "sha256:94142394701724dbcfcd8f760f675c662a1025013e741dd7cc773ca885526257"},
@@ -2789,6 +2852,7 @@ version = "0.20.1"
 description = "Python bindings to Rust's persistent data structures (rpds)"
 optional = false
 python-versions = ">=3.8"
+groups = ["main"]
 files = [
     {file = "rpds_py-0.20.1-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:a649dfd735fff086e8a9d0503a9f0c7d01b7912a333c7ae77e1515c08c146dad"},
     {file = "rpds_py-0.20.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:f16bc1334853e91ddaaa1217045dd7be166170beec337576818461268a3de67f"},
@@ -2901,6 +2965,8 @@ version = "2.1.0"
 description = "RQ is a simple, lightweight, library for creating background jobs, and processing them."
 optional = true
 python-versions = ">=3.8"
+groups = ["main"]
+markers = "extra == \"proxy\""
 files = [
     {file = "rq-2.1.0-py3-none-any.whl", hash = "sha256:3c6892c6ca848e5fb47c1875399a66f13656bf0e123bf725d9aa9a12718e2fdf"},
     {file = "rq-2.1.0.tar.gz", hash = "sha256:764585b6cab69ef1412f4aee523347e5aa7ece3ca175c118b1d92223dd8c2826"},
@@ -2916,6 +2982,8 @@ version = "4.9"
 description = "Pure-Python RSA implementation"
 optional = true
 python-versions = ">=3.6,<4"
+groups = ["main"]
+markers = "extra == \"extra-proxy\""
 files = [
     {file = "rsa-4.9-py3-none-any.whl", hash = "sha256:90260d9058e514786967344d0ef75fa8727eed8a7d2e43ce9f4bcf1b536174f7"},
     {file = "rsa-4.9.tar.gz", hash = "sha256:e38464a49c6c85d7f1351b0126661487a7e0a14a50f1675ec50eb34d4f20ef21"},
@@ -2930,6 +2998,8 @@ version = "0.10.4"
 description = "An Amazon S3 Transfer Manager"
 optional = true
 python-versions = ">=3.8"
+groups = ["main"]
+markers = "extra == \"proxy\""
 files = [
     {file = "s3transfer-0.10.4-py3-none-any.whl", hash = "sha256:244a76a24355363a68164241438de1b72f8781664920260c48465896b712a41e"},
     {file = "s3transfer-0.10.4.tar.gz", hash = "sha256:29edc09801743c21eb5ecbc617a152df41d3c287f67b615f73e5f750583666a7"},
@@ -2947,6 +3017,8 @@ version = "1.17.0"
 description = "Python 2 and 3 compatibility utilities"
 optional = true
 python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7"
+groups = ["main"]
+markers = "extra == \"extra-proxy\" or extra == \"proxy\""
 files = [
     {file = "six-1.17.0-py2.py3-none-any.whl", hash = "sha256:4721f391ed90541fddacab5acf947aa0d3dc7d27b2e1e8eda2be8970586c3274"},
     {file = "six-1.17.0.tar.gz", hash = "sha256:ff70335d468e7eb6ec65b95b99d3a2836546063f63acc5171de367e834932a81"},
@@ -2958,6 +3030,7 @@ version = "1.3.1"
 description = "Sniff out which async library your code is running under"
 optional = false
 python-versions = ">=3.7"
+groups = ["main"]
 files = [
     {file = "sniffio-1.3.1-py3-none-any.whl", hash = "sha256:2f6da418d1f1e0fddd844478f41680e794e6051915791a034ff65e5f100525a2"},
     {file = "sniffio-1.3.1.tar.gz", hash = "sha256:f4324edc670a0f49750a81b895f35c3adb843cca46f0530f79fc1babb23789dc"},
@@ -2969,6 +3042,8 @@ version = "0.44.0"
 description = "The little ASGI library that shines."
 optional = true
 python-versions = ">=3.8"
+groups = ["main"]
+markers = "extra == \"proxy\""
 files = [
     {file = "starlette-0.44.0-py3-none-any.whl", hash = "sha256:19edeb75844c16dcd4f9dd72f22f9108c1539f3fc9c4c88885654fef64f85aea"},
     {file = "starlette-0.44.0.tar.gz", hash = "sha256:e35166950a3ccccc701962fe0711db0bc14f2ecd37c6f9fe5e3eae0cbaea8715"},
@@ -2987,6 +3062,7 @@ version = "0.7.0"
 description = "tiktoken is a fast BPE tokeniser for use with OpenAI's models"
 optional = false
 python-versions = ">=3.8"
+groups = ["main"]
 files = [
     {file = "tiktoken-0.7.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:485f3cc6aba7c6b6ce388ba634fbba656d9ee27f766216f45146beb4ac18b25f"},
     {file = "tiktoken-0.7.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:e54be9a2cd2f6d6ffa3517b064983fb695c9a9d8aa7d574d1ef3c3f931a99225"},
@@ -3039,6 +3115,7 @@ version = "0.21.0"
 description = ""
 optional = false
 python-versions = ">=3.7"
+groups = ["main"]
 files = [
     {file = "tokenizers-0.21.0-cp39-abi3-macosx_10_12_x86_64.whl", hash = "sha256:3c4c93eae637e7d2aaae3d376f06085164e1660f89304c0ab2b1d08a406636b2"},
     {file = "tokenizers-0.21.0-cp39-abi3-macosx_11_0_arm64.whl", hash = "sha256:f53ea537c925422a2e0e92a24cce96f6bc5046bbef24a1652a5edc8ba975f62e"},
@@ -3071,6 +3148,8 @@ version = "2.2.1"
 description = "A lil' TOML parser"
 optional = false
 python-versions = ">=3.8"
+groups = ["dev"]
+markers = "python_version < \"3.11\""
 files = [
     {file = "tomli-2.2.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:678e4fa69e4575eb77d103de3df8a895e1591b48e740211bd1067378c69e8249"},
     {file = "tomli-2.2.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:023aa114dd824ade0100497eb2318602af309e5a55595f76b626d6d9f3b7b0a6"},
@@ -3112,6 +3191,8 @@ version = "0.13.2"
 description = "Style preserving TOML library"
 optional = true
 python-versions = ">=3.8"
+groups = ["main"]
+markers = "extra == \"extra-proxy\""
 files = [
     {file = "tomlkit-0.13.2-py3-none-any.whl", hash = "sha256:7a974427f6e119197f670fbbbeae7bef749a6c14e793db934baefc1b5f03efde"},
     {file = "tomlkit-0.13.2.tar.gz", hash = "sha256:fff5fe59a87295b278abd31bec92c15d9bc4a06885ab12bcea52c71119392e79"},
@@ -3123,6 +3204,7 @@ version = "4.67.1"
 description = "Fast, Extensible Progress Meter"
 optional = false
 python-versions = ">=3.7"
+groups = ["main"]
 files = [
     {file = "tqdm-4.67.1-py3-none-any.whl", hash = "sha256:26445eca388f82e72884e0d580d5464cd801a3ea01e63e5601bdff9ba6a48de2"},
     {file = "tqdm-4.67.1.tar.gz", hash = "sha256:f8aef9c52c08c13a65f30ea34f4e5aac3fd1a34959879d7e59e63027286627f2"},
@@ -3144,6 +3226,7 @@ version = "4.12.2"
 description = "Backported and Experimental Type Hints for Python 3.8+"
 optional = false
 python-versions = ">=3.8"
+groups = ["main", "dev"]
 files = [
     {file = "typing_extensions-4.12.2-py3-none-any.whl", hash = "sha256:04e5ca0351e0f3f85c6853954072df659d0d13fac324d0072316b67d7794700d"},
     {file = "typing_extensions-4.12.2.tar.gz", hash = "sha256:1a7ead55c7e559dd4dee8856e3a88b41225abfe1ce8df57b7c13915fe121ffb8"},
@@ -3155,6 +3238,8 @@ version = "2025.1"
 description = "Provider of IANA time zone data"
 optional = true
 python-versions = ">=2"
+groups = ["main"]
+markers = "extra == \"proxy\" and platform_system == \"Windows\""
 files = [
     {file = "tzdata-2025.1-py2.py3-none-any.whl", hash = "sha256:7e127113816800496f027041c570f50bcd464a020098a3b6b199517772303639"},
     {file = "tzdata-2025.1.tar.gz", hash = "sha256:24894909e88cdb28bd1636c6887801df64cb485bd593f2fd83ef29075a81d694"},
@@ -3166,6 +3251,8 @@ version = "5.2"
 description = "tzinfo object for the local timezone"
 optional = true
 python-versions = ">=3.8"
+groups = ["main"]
+markers = "extra == \"proxy\""
 files = [
     {file = "tzlocal-5.2-py3-none-any.whl", hash = "sha256:49816ef2fe65ea8ac19d19aa7a1ae0551c834303d5014c6d5a62e4cbda8047b8"},
     {file = "tzlocal-5.2.tar.gz", hash = "sha256:8d399205578f1a9342816409cc1e46a93ebd5755e39ea2d85334bea911bf0e6e"},
@@ -3184,6 +3271,8 @@ version = "1.26.20"
 description = "HTTP library with thread-safe connection pooling, file post, and more."
 optional = false
 python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,>=2.7"
+groups = ["main"]
+markers = "python_version < \"3.10\""
 files = [
     {file = "urllib3-1.26.20-py2.py3-none-any.whl", hash = "sha256:0ed14ccfbf1c30a9072c7ca157e4319b70d65f623e91e7b32fadb2853431016e"},
     {file = "urllib3-1.26.20.tar.gz", hash = "sha256:40c2dc0c681e47eb8f90e7e27bf6ff7df2e677421fd46756da1161c39ca70d32"},
@@ -3200,6 +3289,8 @@ version = "2.2.3"
 description = "HTTP library with thread-safe connection pooling, file post, and more."
 optional = false
 python-versions = ">=3.8"
+groups = ["main"]
+markers = "python_version >= \"3.10\""
 files = [
     {file = "urllib3-2.2.3-py3-none-any.whl", hash = "sha256:ca899ca043dcb1bafa3e262d73aa25c465bfb49e0bd9dd5d59f1d0acba2f8fac"},
     {file = "urllib3-2.2.3.tar.gz", hash = "sha256:e7d814a81dad81e6caf2ec9fdedb284ecc9c73076b62654547cc64ccdcae26e9"},
@@ -3217,6 +3308,8 @@ version = "0.29.0"
 description = "The lightning-fast ASGI server."
 optional = true
 python-versions = ">=3.8"
+groups = ["main"]
+markers = "extra == \"proxy\""
 files = [
     {file = "uvicorn-0.29.0-py3-none-any.whl", hash = "sha256:2c2aac7ff4f4365c206fd773a39bf4ebd1047c238f8b8268ad996829323473de"},
     {file = "uvicorn-0.29.0.tar.gz", hash = "sha256:6a69214c0b6a087462412670b3ef21224fa48cae0e452b5883e8e8bdfdd11dd0"},
@@ -3236,6 +3329,8 @@ version = "0.21.0"
 description = "Fast implementation of asyncio event loop on top of libuv"
 optional = true
 python-versions = ">=3.8.0"
+groups = ["main"]
+markers = "extra == \"proxy\""
 files = [
     {file = "uvloop-0.21.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:ec7e6b09a6fdded42403182ab6b832b71f4edaf7f37a9a0e371a01db5f0cb45f"},
     {file = "uvloop-0.21.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:196274f2adb9689a289ad7d65700d37df0c0930fd8e4e743fa4834e850d7719d"},
@@ -3287,6 +3382,8 @@ version = "13.1"
 description = "An implementation of the WebSocket Protocol (RFC 6455 & 7692)"
 optional = true
 python-versions = ">=3.8"
+groups = ["main"]
+markers = "extra == \"proxy\""
 files = [
     {file = "websockets-13.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:f48c749857f8fb598fb890a75f540e3221d0976ed0bf879cf3c7eef34151acee"},
     {file = "websockets-13.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:c7e72ce6bda6fb9409cc1e8164dd41d7c91466fb599eb047cfda72fe758a34a7"},
@@ -3382,6 +3479,7 @@ version = "1.15.2"
 description = "Yet another URL library"
 optional = false
 python-versions = ">=3.8"
+groups = ["main"]
 files = [
     {file = "yarl-1.15.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:e4ee8b8639070ff246ad3649294336b06db37a94bdea0d09ea491603e0be73b8"},
     {file = "yarl-1.15.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:a7cf963a357c5f00cb55b1955df8bbe68d2f2f65de065160a1c26b85a1e44172"},
@@ -3494,6 +3592,7 @@ version = "3.20.2"
 description = "Backport of pathlib-compatible object wrapper for zip files"
 optional = false
 python-versions = ">=3.8"
+groups = ["main"]
 files = [
     {file = "zipp-3.20.2-py3-none-any.whl", hash = "sha256:a817ac80d6cf4b23bf7f2828b7cabf326f15a001bea8b1f9b49631780ba28350"},
     {file = "zipp-3.20.2.tar.gz", hash = "sha256:bc9eb26f4506fda01b81bcde0ca78103b6e62f991b381fec825435c836edbc29"},
@@ -3512,6 +3611,6 @@ extra-proxy = ["azure-identity", "azure-keyvault-secrets", "google-cloud-kms", "
 proxy = ["PyJWT", "apscheduler", "backoff", "boto3", "cryptography", "fastapi", "fastapi-sso", "gunicorn", "orjson", "pynacl", "python-multipart", "pyyaml", "rq", "uvicorn", "uvloop", "websockets"]
 
 [metadata]
-lock-version = "2.0"
+lock-version = "2.1"
 python-versions = ">=3.8.1,<4.0, !=3.9.7"
-content-hash = "b9daad0a009079f7bf9c520525e2f9d0ea7ade51a1c598b88e23d6d590ef44be"
+content-hash = "55078af47c1af79bd3ebadacb7ba92844d550a577bb0c49f5096693701ea4322"
diff --git a/pyproject.toml b/pyproject.toml
index 38d5687800..208804c562 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "litellm"
-version = "1.63.12"
+version = "1.63.14"
 description = "Library to easily interface with LLM API providers"
 authors = ["BerriAI"]
 license = "MIT"
@@ -34,7 +34,7 @@ jsonschema = "^4.22.0"
 
 uvicorn = {version = "^0.29.0", optional = true}
 uvloop = {version = "^0.21.0", optional = true}
-gunicorn = {version = "^22.0.0", optional = true}
+gunicorn = {version = "^23.0.0", optional = true}
 fastapi = {version = "^0.115.5", optional = true}
 backoff = {version = "*", optional = true}
 pyyaml = {version = "^6.0.1", optional = true}
@@ -100,7 +100,7 @@ requires = ["poetry-core", "wheel"]
 build-backend = "poetry.core.masonry.api"
 
 [tool.commitizen]
-version = "1.63.12"
+version = "1.63.14"
 version_files = [
     "pyproject.toml:^version"
 ]
diff --git a/requirements.txt b/requirements.txt
index abe021c709..2e3715e55e 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,12 +1,12 @@
 # LITELLM PROXY DEPENDENCIES #
 anyio==4.5.0 # openai + http req.
 httpx==0.27.0 # Pin Httpx dependency
-openai==1.66.1  # openai req. 
+openai==1.68.2  # openai req. 
 fastapi==0.115.5 # server dep
 backoff==2.2.1 # server dep
 pyyaml==6.0.2 # server dep
 uvicorn==0.29.0 # server dep
-gunicorn==22.0.0 # server dep
+gunicorn==23.0.0 # server dep
 uvloop==0.21.0 # uvicorn dep, gives us much better performance under load
 boto3==1.34.34 # aws bedrock/sagemaker calls
 redis==5.0.0 # caching
diff --git a/tests/litellm/caching/test_in_memory_cache.py b/tests/litellm/caching/test_in_memory_cache.py
new file mode 100644
index 0000000000..d69899fec1
--- /dev/null
+++ b/tests/litellm/caching/test_in_memory_cache.py
@@ -0,0 +1,45 @@
+import asyncio
+import json
+import os
+import sys
+import time
+from unittest.mock import MagicMock, patch
+
+import httpx
+import pytest
+import respx
+from fastapi.testclient import TestClient
+
+sys.path.insert(
+    0, os.path.abspath("../../..")
+)  # Adds the parent directory to the system path
+from unittest.mock import AsyncMock
+
+from litellm.caching.in_memory_cache import InMemoryCache
+
+
+def test_in_memory_openai_obj_cache():
+    from openai import OpenAI
+
+    openai_obj = OpenAI(api_key="my-fake-key")
+
+    in_memory_cache = InMemoryCache()
+
+    in_memory_cache.set_cache(key="my-fake-key", value=openai_obj)
+
+    cached_obj = in_memory_cache.get_cache(key="my-fake-key")
+
+    assert cached_obj is not None
+
+    assert cached_obj == openai_obj
+
+
+def test_in_memory_cache_max_size_per_item():
+    """
+    Test that the cache will not store items larger than the max size per item
+    """
+    in_memory_cache = InMemoryCache(max_size_per_item=100)
+
+    result = in_memory_cache.check_value_size("a" * 100000000)
+
+    assert result is False
diff --git a/tests/litellm/experimental_mcp_client/test_tools.py b/tests/litellm/experimental_mcp_client/test_tools.py
new file mode 100644
index 0000000000..7089d83217
--- /dev/null
+++ b/tests/litellm/experimental_mcp_client/test_tools.py
@@ -0,0 +1,157 @@
+import json
+import os
+import sys
+from unittest.mock import AsyncMock, MagicMock
+
+import pytest
+
+sys.path.insert(
+    0, os.path.abspath("../../..")
+)  # Adds the parent directory to the system path
+
+from mcp.types import (
+    CallToolRequestParams,
+    CallToolResult,
+    ListToolsResult,
+    TextContent,
+)
+from mcp.types import Tool as MCPTool
+
+from litellm.experimental_mcp_client.tools import (
+    _get_function_arguments,
+    _transform_openai_tool_call_to_mcp_tool_call_request,
+    call_mcp_tool,
+    call_openai_tool,
+    load_mcp_tools,
+    transform_mcp_tool_to_openai_tool,
+)
+
+
+@pytest.fixture
+def mock_mcp_tool():
+    return MCPTool(
+        name="test_tool",
+        description="A test tool",
+        inputSchema={"type": "object", "properties": {"test": {"type": "string"}}},
+    )
+
+
+@pytest.fixture
+def mock_session():
+    session = MagicMock()
+    session.list_tools = AsyncMock()
+    session.call_tool = AsyncMock()
+    return session
+
+
+@pytest.fixture
+def mock_list_tools_result():
+    return ListToolsResult(
+        tools=[
+            MCPTool(
+                name="test_tool",
+                description="A test tool",
+                inputSchema={
+                    "type": "object",
+                    "properties": {"test": {"type": "string"}},
+                },
+            )
+        ]
+    )
+
+
+@pytest.fixture
+def mock_mcp_tool_call_result():
+    return CallToolResult(content=[TextContent(type="text", text="test_output")])
+
+
+def test_transform_mcp_tool_to_openai_tool(mock_mcp_tool):
+    openai_tool = transform_mcp_tool_to_openai_tool(mock_mcp_tool)
+    assert openai_tool["type"] == "function"
+    assert openai_tool["function"]["name"] == "test_tool"
+    assert openai_tool["function"]["description"] == "A test tool"
+    assert openai_tool["function"]["parameters"] == {
+        "type": "object",
+        "properties": {"test": {"type": "string"}},
+    }
+
+
+def test_transform_openai_tool_call_to_mcp_tool_call_request(mock_mcp_tool):
+    openai_tool = {
+        "function": {"name": "test_tool", "arguments": json.dumps({"test": "value"})}
+    }
+    mcp_tool_call_request = _transform_openai_tool_call_to_mcp_tool_call_request(
+        openai_tool
+    )
+    assert mcp_tool_call_request.name == "test_tool"
+    assert mcp_tool_call_request.arguments == {"test": "value"}
+
+
+@pytest.mark.asyncio()
+async def test_load_mcp_tools_mcp_format(mock_session, mock_list_tools_result):
+    mock_session.list_tools.return_value = mock_list_tools_result
+    result = await load_mcp_tools(mock_session, format="mcp")
+    assert len(result) == 1
+    assert isinstance(result[0], MCPTool)
+    assert result[0].name == "test_tool"
+    mock_session.list_tools.assert_called_once()
+
+
+@pytest.mark.asyncio()
+async def test_load_mcp_tools_openai_format(mock_session, mock_list_tools_result):
+    mock_session.list_tools.return_value = mock_list_tools_result
+    result = await load_mcp_tools(mock_session, format="openai")
+    assert len(result) == 1
+    assert result[0]["type"] == "function"
+    assert result[0]["function"]["name"] == "test_tool"
+    mock_session.list_tools.assert_called_once()
+
+
+def test_get_function_arguments():
+    # Test with string arguments
+    function = {"arguments": '{"test": "value"}'}
+    result = _get_function_arguments(function)
+    assert result == {"test": "value"}
+
+    # Test with dict arguments
+    function = {"arguments": {"test": "value"}}
+    result = _get_function_arguments(function)
+    assert result == {"test": "value"}
+
+    # Test with invalid JSON string
+    function = {"arguments": "invalid json"}
+    result = _get_function_arguments(function)
+    assert result == {}
+
+    # Test with no arguments
+    function = {}
+    result = _get_function_arguments(function)
+    assert result == {}
+
+
+@pytest.mark.asyncio()
+async def test_call_openai_tool(mock_session, mock_mcp_tool_call_result):
+    mock_session.call_tool.return_value = mock_mcp_tool_call_result
+    openai_tool = {
+        "function": {"name": "test_tool", "arguments": json.dumps({"test": "value"})}
+    }
+    result = await call_openai_tool(mock_session, openai_tool)
+    print("result of call_openai_tool", result)
+    assert result.content[0].text == "test_output"
+    mock_session.call_tool.assert_called_once_with(
+        name="test_tool", arguments={"test": "value"}
+    )
+
+
+@pytest.mark.asyncio()
+async def test_call_mcp_tool(mock_session, mock_mcp_tool_call_result):
+    mock_session.call_tool.return_value = mock_mcp_tool_call_result
+    request_params = CallToolRequestParams(
+        name="test_tool", arguments={"test": "value"}
+    )
+    result = await call_mcp_tool(mock_session, request_params)
+    print("call_mcp_tool result", result)
+    assert result.content[0].text == "test_output"
+    mock_session.call_tool.assert_called_once_with(
+        name="test_tool", arguments={"test": "value"}
+    )
diff --git a/tests/litellm/litellm_core_utils/test_streaming_handler.py b/tests/litellm/litellm_core_utils/test_streaming_handler.py
index 75c4fc1035..988d533670 100644
--- a/tests/litellm/litellm_core_utils/test_streaming_handler.py
+++ b/tests/litellm/litellm_core_utils/test_streaming_handler.py
@@ -136,6 +136,40 @@ def test_is_chunk_non_empty(initialized_custom_stream_wrapper: CustomStreamWrapp
     )
 
 
+def test_is_chunk_non_empty_with_annotations(
+    initialized_custom_stream_wrapper: CustomStreamWrapper,
+):
+    """Unit test if non-empty when annotations are present"""
+    chunk = {
+        "id": "e89b6501-8ac2-464c-9550-7cd3daf94350",
+        "object": "chat.completion.chunk",
+        "created": 1741037890,
+        "model": "deepseek-reasoner",
+        "system_fingerprint": "fp_5417b77867_prod0225",
+        "choices": [
+            {
+                "index": 0,
+                "delta": {
+                    "content": None,
+                    "annotations": [
+                        {"type": "url_citation", "url": "https://www.google.com"}
+                    ],
+                },
+                "logprobs": None,
+                "finish_reason": None,
+            }
+        ],
+    }
+    assert (
+        initialized_custom_stream_wrapper.is_chunk_non_empty(
+            completion_obj=MagicMock(),
+            model_response=ModelResponseStream(**chunk),
+            response_obj=MagicMock(),
+        )
+        is True
+    )
+
+
 def test_optional_combine_thinking_block_in_choices(
     initialized_custom_stream_wrapper: CustomStreamWrapper,
 ):
diff --git a/tests/litellm/llms/anthropic/chat/test_anthropic_chat_transformation.py b/tests/litellm/llms/anthropic/chat/test_anthropic_chat_transformation.py
new file mode 100644
index 0000000000..04f2728284
--- /dev/null
+++ b/tests/litellm/llms/anthropic/chat/test_anthropic_chat_transformation.py
@@ -0,0 +1,35 @@
+import json
+import os
+import sys
+
+import pytest
+from fastapi.testclient import TestClient
+
+sys.path.insert(
+    0, os.path.abspath("../../../../..")
+)  # Adds the parent directory to the system path
+from unittest.mock import MagicMock, patch
+
+from litellm.llms.anthropic.chat.transformation import AnthropicConfig
+
+
+def test_response_format_transformation_unit_test():
+    config = AnthropicConfig()
+
+    response_format_json_schema = {
+        "description": 'Progress report for the thinking process\n\nThis model represents a snapshot of the agent\'s current progress during\nthe thinking process, providing a brief description of the current activity.\n\nAttributes:\n    agent_doing: Brief description of what the agent is currently doing.\n                Should be kept under 10 words. Example: "Learning about home automation"',
+        "properties": {"agent_doing": {"title": "Agent Doing", "type": "string"}},
+        "required": ["agent_doing"],
+        "title": "ThinkingStep",
+        "type": "object",
+        "additionalProperties": False,
+    }
+
+    result = config._create_json_tool_call_for_response_format(
+        json_schema=response_format_json_schema
+    )
+
+    assert result["input_schema"]["properties"] == {
+        "agent_doing": {"title": "Agent Doing", "type": "string"}
+    }
+    print(result)
diff --git a/tests/litellm/llms/custom_httpx/test_llm_http_handler.py b/tests/litellm/llms/custom_httpx/test_llm_http_handler.py
new file mode 100644
index 0000000000..26fc18de16
--- /dev/null
+++ b/tests/litellm/llms/custom_httpx/test_llm_http_handler.py
@@ -0,0 +1,77 @@
+import io
+import os
+import pathlib
+import ssl
+import sys
+from unittest.mock import MagicMock
+
+import pytest
+
+sys.path.insert(
+    0, os.path.abspath("../../../..")
+)  # Adds the parent directory to the system path
+import litellm
+from litellm.llms.custom_httpx.llm_http_handler import BaseLLMHTTPHandler
+
+
+def test_prepare_fake_stream_request():
+    # Initialize the BaseLLMHTTPHandler
+    handler = BaseLLMHTTPHandler()
+
+    # Test case 1: fake_stream is True
+    stream = True
+    data = {
+        "stream": True,
+        "model": "gpt-4",
+        "messages": [{"role": "user", "content": "Hello"}],
+    }
+    fake_stream = True
+
+    result_stream, result_data = handler._prepare_fake_stream_request(
+        stream=stream, data=data, fake_stream=fake_stream
+    )
+
+    # Verify that stream is set to False
+    assert result_stream is False
+    # Verify that "stream" key is removed from data
+    assert "stream" not in result_data
+    # Verify other data remains unchanged
+    assert result_data["model"] == "gpt-4"
+    assert result_data["messages"] == [{"role": "user", "content": "Hello"}]
+
+    # Test case 2: fake_stream is False
+    stream = True
+    data = {
+        "stream": True,
+        "model": "gpt-4",
+        "messages": [{"role": "user", "content": "Hello"}],
+    }
+    fake_stream = False
+
+    result_stream, result_data = handler._prepare_fake_stream_request(
+        stream=stream, data=data, fake_stream=fake_stream
+    )
+
+    # Verify that stream remains True
+    assert result_stream is True
+    # Verify that data remains unchanged
+    assert "stream" in result_data
+    assert result_data["stream"] is True
+    assert result_data["model"] == "gpt-4"
+    assert result_data["messages"] == [{"role": "user", "content": "Hello"}]
+
+    # Test case 3: data doesn't have stream key but fake_stream is True
+    stream = True
+    data = {"model": "gpt-4", "messages": [{"role": "user", "content": "Hello"}]}
+    fake_stream = True
+
+    result_stream, result_data = handler._prepare_fake_stream_request(
+        stream=stream, data=data, fake_stream=fake_stream
+    )
+
+    # Verify that stream is set to False
+    assert result_stream is False
+    # Verify that data remains unchanged (since there was no stream key to remove)
+    assert "stream" not in result_data
+    assert result_data["model"] == "gpt-4"
+    assert result_data["messages"] == [{"role": "user", "content": "Hello"}]
diff --git a/tests/litellm/llms/vertex_ai/test_vertex_ai_common_utils.py b/tests/litellm/llms/vertex_ai/test_vertex_ai_common_utils.py
new file mode 100644
index 0000000000..e89355443f
--- /dev/null
+++ b/tests/litellm/llms/vertex_ai/test_vertex_ai_common_utils.py
@@ -0,0 +1,43 @@
+import os
+import sys
+from unittest.mock import MagicMock, call, patch
+
+import pytest
+
+sys.path.insert(
+    0, os.path.abspath("../../..")
+)  # Adds the parent directory to the system path
+
+import litellm
+from litellm.llms.vertex_ai.common_utils import (
+    get_vertex_location_from_url,
+    get_vertex_project_id_from_url,
+)
+
+
+@pytest.mark.asyncio
+async def test_get_vertex_project_id_from_url():
+    """Test _get_vertex_project_id_from_url with various URLs"""
+    # Test with valid URL
+    url = "https://us-central1-aiplatform.googleapis.com/v1/projects/test-project/locations/us-central1/publishers/google/models/gemini-pro:streamGenerateContent"
+    project_id = get_vertex_project_id_from_url(url)
+    assert project_id == "test-project"
+
+    # Test with invalid URL
+    url = "https://invalid-url.com"
+    project_id = get_vertex_project_id_from_url(url)
+    assert project_id is None
+
+
+@pytest.mark.asyncio
+async def test_get_vertex_location_from_url():
+    """Test _get_vertex_location_from_url with various URLs"""
+    # Test with valid URL
+    url = "https://us-central1-aiplatform.googleapis.com/v1/projects/test-project/locations/us-central1/publishers/google/models/gemini-pro:streamGenerateContent"
+    location = get_vertex_location_from_url(url)
+    assert location == "us-central1"
+
+    # Test with invalid URL
+    url = "https://invalid-url.com"
+    location = get_vertex_location_from_url(url)
+    assert location is None
diff --git a/tests/litellm/proxy/management_endpoints/test_internal_user_endpoints.py b/tests/litellm/proxy/management_endpoints/test_internal_user_endpoints.py
new file mode 100644
index 0000000000..697be8b3c9
--- /dev/null
+++ b/tests/litellm/proxy/management_endpoints/test_internal_user_endpoints.py
@@ -0,0 +1,57 @@
+import json
+import os
+import sys
+
+import pytest
+from fastapi.testclient import TestClient
+
+sys.path.insert(
+    0, os.path.abspath("../../../..")
+)  # Adds the parent directory to the system path
+
+from litellm.proxy._types import LiteLLM_UserTableFiltered, UserAPIKeyAuth
+from litellm.proxy.management_endpoints.internal_user_endpoints import ui_view_users
+from litellm.proxy.proxy_server import app
+
+client = TestClient(app)
+
+
+@pytest.mark.asyncio
+async def test_ui_view_users_with_null_email(mocker, caplog):
+    """
+    Test that /user/filter/ui endpoint returns users even when they have null email fields
+    """
+    # Mock the prisma client
+    mock_prisma_client = mocker.MagicMock()
+
+    # Create mock user data with null email
+    mock_user = mocker.MagicMock()
+    mock_user.model_dump.return_value = {
+        "user_id": "test-user-null-email",
+        "user_email": None,
+        "user_role": "proxy_admin",
+        "created_at": "2024-01-01T00:00:00Z",
+    }
+
+    # Setup the mock find_many response
+    # Setup the mock find_many response as an async function
+    async def mock_find_many(*args, **kwargs):
+        return [mock_user]
+
+    mock_prisma_client.db.litellm_usertable.find_many = mock_find_many
+
+    # Patch the prisma client import in the endpoint
+    mocker.patch("litellm.proxy.proxy_server.prisma_client", mock_prisma_client)
+
+    # Call ui_view_users function directly
+    response = await ui_view_users(
+        user_api_key_dict=UserAPIKeyAuth(user_id="test_user"),
+        user_id="test_user",
+        user_email=None,
+        page=1,
+        page_size=50,
+    )
+
+    assert response == [
+        LiteLLM_UserTableFiltered(user_id="test-user-null-email", user_email=None)
+    ]
diff --git a/tests/litellm/proxy/pass_through_endpoints/test_llm_pass_through_endpoints.py b/tests/litellm/proxy/pass_through_endpoints/test_llm_pass_through_endpoints.py
index 2f5ce85de7..da08dea605 100644
--- a/tests/litellm/proxy/pass_through_endpoints/test_llm_pass_through_endpoints.py
+++ b/tests/litellm/proxy/pass_through_endpoints/test_llm_pass_through_endpoints.py
@@ -1,7 +1,9 @@
 import json
 import os
 import sys
-from unittest.mock import MagicMock, patch
+import traceback
+from unittest import mock
+from unittest.mock import AsyncMock, MagicMock, Mock, patch
 
 import httpx
 import pytest
@@ -17,7 +19,9 @@ from litellm.proxy.pass_through_endpoints.llm_passthrough_endpoints import (
     BaseOpenAIPassThroughHandler,
     RouteChecks,
     create_pass_through_route,
+    vertex_proxy_route,
 )
+from litellm.types.passthrough_endpoints.vertex_ai import VertexPassThroughCredentials
 
 
 class TestBaseOpenAIPassThroughHandler:
@@ -176,3 +180,279 @@ class TestBaseOpenAIPassThroughHandler:
         print(f"query_params: {call_kwargs['query_params']}")
         assert call_kwargs["stream"] is False
         assert call_kwargs["query_params"] == {"model": "gpt-4"}
+
+
+class TestVertexAIPassThroughHandler:
+    """
+    Case 1: User set passthrough credentials - confirm credentials used.
+
+    Case 2: User set default credentials, no exact passthrough credentials - confirm default credentials used.
+
+    Case 3: No default credentials, no mapped credentials - request passed through directly.
+    """
+
+    @pytest.mark.asyncio
+    async def test_vertex_passthrough_with_credentials(self, monkeypatch):
+        """
+        Test that when passthrough credentials are set, they are correctly used in the request
+        """
+        from litellm.proxy.pass_through_endpoints.passthrough_endpoint_router import (
+            PassthroughEndpointRouter,
+        )
+
+        vertex_project = "test-project"
+        vertex_location = "us-central1"
+        vertex_credentials = "test-creds"
+
+        pass_through_router = PassthroughEndpointRouter()
+
+        pass_through_router.add_vertex_credentials(
+            project_id=vertex_project,
+            location=vertex_location,
+            vertex_credentials=vertex_credentials,
+        )
+
+        monkeypatch.setattr(
+            "litellm.proxy.pass_through_endpoints.llm_passthrough_endpoints.passthrough_endpoint_router",
+            pass_through_router,
+        )
+
+        endpoint = f"/v1/projects/{vertex_project}/locations/{vertex_location}/publishers/google/models/gemini-1.5-flash:generateContent"
+
+        # Mock request
+        mock_request = Request(
+            scope={
+                "type": "http",
+                "method": "POST",
+                "path": endpoint,
+                "headers": [
+                    (b"Authorization", b"Bearer test-creds"),
+                    (b"Content-Type", b"application/json"),
+                ],
+            }
+        )
+
+        # Mock response
+        mock_response = Response()
+
+        # Mock vertex credentials
+        test_project = vertex_project
+        test_location = vertex_location
+        test_token = vertex_credentials
+
+        with mock.patch(
+            "litellm.proxy.pass_through_endpoints.llm_passthrough_endpoints.vertex_llm_base._ensure_access_token_async"
+        ) as mock_ensure_token, mock.patch(
+            "litellm.proxy.pass_through_endpoints.llm_passthrough_endpoints.vertex_llm_base._get_token_and_url"
+        ) as mock_get_token, mock.patch(
+            "litellm.proxy.pass_through_endpoints.llm_passthrough_endpoints.create_pass_through_route"
+        ) as mock_create_route:
+            mock_ensure_token.return_value = ("test-auth-header", test_project)
+            mock_get_token.return_value = (test_token, "")
+
+            # Call the route
+            try:
+                await vertex_proxy_route(
+                    endpoint=endpoint,
+                    request=mock_request,
+                    fastapi_response=mock_response,
+                )
+            except Exception as e:
+                print(f"Error: {e}")
+
+            # Verify create_pass_through_route was called with correct arguments
+            mock_create_route.assert_called_once_with(
+                endpoint=endpoint,
+                target=f"https://{test_location}-aiplatform.googleapis.com/v1/projects/{test_project}/locations/{test_location}/publishers/google/models/gemini-1.5-flash:generateContent",
+                custom_headers={"Authorization": f"Bearer {test_token}"},
+            )
+
+    @pytest.mark.parametrize(
+        "initial_endpoint",
+        [
+            "publishers/google/models/gemini-1.5-flash:generateContent",
+            "v1/projects/bad-project/locations/bad-location/publishers/google/models/gemini-1.5-flash:generateContent",
+        ],
+    )
+    @pytest.mark.asyncio
+    async def test_vertex_passthrough_with_default_credentials(
+        self, monkeypatch, initial_endpoint
+    ):
+        """
+        Test that when no passthrough credentials are set, default credentials are used in the request
+        """
+        from litellm.proxy.pass_through_endpoints.passthrough_endpoint_router import (
+            PassthroughEndpointRouter,
+        )
+
+        # Setup default credentials
+        default_project = "default-project"
+        default_location = "us-central1"
+        default_credentials = "default-creds"
+
+        pass_through_router = PassthroughEndpointRouter()
+        pass_through_router.default_vertex_config = VertexPassThroughCredentials(
+            vertex_project=default_project,
+            vertex_location=default_location,
+            vertex_credentials=default_credentials,
+        )
+
+        monkeypatch.setattr(
+            "litellm.proxy.pass_through_endpoints.llm_passthrough_endpoints.passthrough_endpoint_router",
+            pass_through_router,
+        )
+
+        # Use different project/location in request than the default
+        endpoint = initial_endpoint
+
+        mock_request = Request(
+            scope={
+                "type": "http",
+                "method": "POST",
+                "path": f"/vertex_ai/{endpoint}",
+                "headers": {},
+            }
+        )
+        mock_response = Response()
+
+        with mock.patch(
+            "litellm.proxy.pass_through_endpoints.llm_passthrough_endpoints.vertex_llm_base._ensure_access_token_async"
+        ) as mock_ensure_token, mock.patch(
+            "litellm.proxy.pass_through_endpoints.llm_passthrough_endpoints.vertex_llm_base._get_token_and_url"
+        ) as mock_get_token, mock.patch(
+            "litellm.proxy.pass_through_endpoints.llm_passthrough_endpoints.create_pass_through_route"
+        ) as mock_create_route:
+            mock_ensure_token.return_value = ("test-auth-header", default_project)
+            mock_get_token.return_value = (default_credentials, "")
+
+            try:
+                await vertex_proxy_route(
+                    endpoint=endpoint,
+                    request=mock_request,
+                    fastapi_response=mock_response,
+                )
+            except Exception as e:
+                traceback.print_exc()
+                print(f"Error: {e}")
+
+            # Verify default credentials were used
+            mock_create_route.assert_called_once_with(
+                endpoint=endpoint,
+                target=f"https://{default_location}-aiplatform.googleapis.com/v1/projects/{default_project}/locations/{default_location}/publishers/google/models/gemini-1.5-flash:generateContent",
+                custom_headers={"Authorization": f"Bearer {default_credentials}"},
+            )
+
+    @pytest.mark.asyncio
+    async def test_vertex_passthrough_with_no_default_credentials(self, monkeypatch):
+        """
+        Test that when no default credentials are set, the request fails
+        """
+        """
+        Test that when passthrough credentials are set, they are correctly used in the request
+        """
+        from litellm.proxy.pass_through_endpoints.passthrough_endpoint_router import (
+            PassthroughEndpointRouter,
+        )
+
+        vertex_project = "my-project"
+        vertex_location = "us-central1"
+        vertex_credentials = "test-creds"
+
+        test_project = "test-project"
+        test_location = "test-location"
+        test_token = "test-creds"
+
+        pass_through_router = PassthroughEndpointRouter()
+
+        pass_through_router.add_vertex_credentials(
+            project_id=vertex_project,
+            location=vertex_location,
+            vertex_credentials=vertex_credentials,
+        )
+
+        monkeypatch.setattr(
+            "litellm.proxy.pass_through_endpoints.llm_passthrough_endpoints.passthrough_endpoint_router",
+            pass_through_router,
+        )
+
+        endpoint = f"/v1/projects/{test_project}/locations/{test_location}/publishers/google/models/gemini-1.5-flash:generateContent"
+
+        # Mock request
+        mock_request = Request(
+            scope={
+                "type": "http",
+                "method": "POST",
+                "path": endpoint,
+                "headers": [
+                    (b"authorization", b"Bearer test-creds"),
+                ],
+            }
+        )
+
+        # Mock response
+        mock_response = Response()
+
+        with mock.patch(
+            "litellm.proxy.pass_through_endpoints.llm_passthrough_endpoints.vertex_llm_base._ensure_access_token_async"
+        ) as mock_ensure_token, mock.patch(
+            "litellm.proxy.pass_through_endpoints.llm_passthrough_endpoints.vertex_llm_base._get_token_and_url"
+        ) as mock_get_token, mock.patch(
+            "litellm.proxy.pass_through_endpoints.llm_passthrough_endpoints.create_pass_through_route"
+        ) as mock_create_route:
+            mock_ensure_token.return_value = ("test-auth-header", test_project)
+            mock_get_token.return_value = (test_token, "")
+
+            # Call the route
+            try:
+                await vertex_proxy_route(
+                    endpoint=endpoint,
+                    request=mock_request,
+                    fastapi_response=mock_response,
+                )
+            except Exception as e:
+                traceback.print_exc()
+                print(f"Error: {e}")
+
+            # Verify create_pass_through_route was called with correct arguments
+            mock_create_route.assert_called_once_with(
+                endpoint=endpoint,
+                target=f"https://{test_location}-aiplatform.googleapis.com/v1/projects/{test_project}/locations/{test_location}/publishers/google/models/gemini-1.5-flash:generateContent",
+                custom_headers={"authorization": f"Bearer {test_token}"},
+            )
+
+    @pytest.mark.asyncio
+    async def test_async_vertex_proxy_route_api_key_auth(self):
+        """
+        Critical
+
+        This is how Vertex AI JS SDK will Auth to Litellm Proxy
+        """
+        # Mock dependencies
+        mock_request = Mock()
+        mock_request.headers = {"x-litellm-api-key": "test-key-123"}
+        mock_request.method = "POST"
+        mock_response = Mock()
+
+        with patch(
+            "litellm.proxy.pass_through_endpoints.llm_passthrough_endpoints.user_api_key_auth"
+        ) as mock_auth:
+            mock_auth.return_value = {"api_key": "test-key-123"}
+
+            with patch(
+                "litellm.proxy.pass_through_endpoints.llm_passthrough_endpoints.create_pass_through_route"
+            ) as mock_pass_through:
+                mock_pass_through.return_value = AsyncMock(
+                    return_value={"status": "success"}
+                )
+
+                # Call the function
+                result = await vertex_proxy_route(
+                    endpoint="v1/projects/test-project/locations/us-central1/publishers/google/models/gemini-1.5-pro:generateContent",
+                    request=mock_request,
+                    fastapi_response=mock_response,
+                )
+
+                # Verify user_api_key_auth was called with the correct Bearer token
+                mock_auth.assert_called_once()
+                call_args = mock_auth.call_args[1]
+                assert call_args["api_key"] == "Bearer test-key-123"
diff --git a/tests/litellm/proxy/pass_through_endpoints/test_passthrough_endpoints_common_utils.py b/tests/litellm/proxy/pass_through_endpoints/test_passthrough_endpoints_common_utils.py
new file mode 100644
index 0000000000..bd8c5f5a99
--- /dev/null
+++ b/tests/litellm/proxy/pass_through_endpoints/test_passthrough_endpoints_common_utils.py
@@ -0,0 +1,44 @@
+import json
+import os
+import sys
+import traceback
+from unittest import mock
+from unittest.mock import MagicMock, patch
+
+import httpx
+import pytest
+from fastapi import Request, Response
+from fastapi.testclient import TestClient
+
+sys.path.insert(
+    0, os.path.abspath("../../../..")
+)  # Adds the parent directory to the system path
+
+from unittest.mock import Mock
+
+from litellm.proxy.pass_through_endpoints.common_utils import get_litellm_virtual_key
+
+
+@pytest.mark.asyncio
+async def test_get_litellm_virtual_key():
+    """
+    Test that the get_litellm_virtual_key function correctly handles the API key authentication
+    """
+    # Test with x-litellm-api-key
+    mock_request = Mock()
+    mock_request.headers = {"x-litellm-api-key": "test-key-123"}
+    result = get_litellm_virtual_key(mock_request)
+    assert result == "Bearer test-key-123"
+
+    # Test with Authorization header
+    mock_request.headers = {"Authorization": "Bearer auth-key-456"}
+    result = get_litellm_virtual_key(mock_request)
+    assert result == "Bearer auth-key-456"
+
+    # Test with both headers (x-litellm-api-key should take precedence)
+    mock_request.headers = {
+        "x-litellm-api-key": "test-key-123",
+        "Authorization": "Bearer auth-key-456",
+    }
+    result = get_litellm_virtual_key(mock_request)
+    assert result == "Bearer test-key-123"
diff --git a/tests/llm_responses_api_testing/test_openai_responses_api.py b/tests/llm_responses_api_testing/test_openai_responses_api.py
index 37674551fe..677e13b08a 100644
--- a/tests/llm_responses_api_testing/test_openai_responses_api.py
+++ b/tests/llm_responses_api_testing/test_openai_responses_api.py
@@ -94,7 +94,7 @@ def validate_responses_api_response(response, final_chunk: bool = False):
 @pytest.mark.asyncio
 async def test_basic_openai_responses_api(sync_mode):
     litellm._turn_on_debug()
-
+    litellm.set_verbose = True
     if sync_mode:
         response = litellm.responses(
             model="gpt-4o", input="Basic ping", max_output_tokens=20
@@ -826,3 +826,219 @@ async def test_async_bad_request_bad_param_error():
         print(f"Exception details: {e.__dict__}")
     except Exception as e:
         pytest.fail(f"Unexpected exception raised: {e}")
+
+
+@pytest.mark.asyncio
+@pytest.mark.parametrize("sync_mode", [True, False])
+async def test_openai_o1_pro_response_api(sync_mode):
+    """
+    Test that LiteLLM correctly handles an incomplete response from OpenAI's o1-pro model
+    due to reaching max_output_tokens limit.
+    """
+    # Mock response from o1-pro
+    mock_response = {
+        "id": "resp_67dc3dd77b388190822443a85252da5a0e13d8bdc0e28d88",
+        "object": "response",
+        "created_at": 1742486999,
+        "status": "incomplete",
+        "error": None,
+        "incomplete_details": {"reason": "max_output_tokens"},
+        "instructions": None,
+        "max_output_tokens": 20,
+        "model": "o1-pro-2025-03-19",
+        "output": [
+            {
+                "type": "reasoning",
+                "id": "rs_67dc3de50f64819097450ed50a33d5f90e13d8bdc0e28d88",
+                "summary": [],
+            }
+        ],
+        "parallel_tool_calls": True,
+        "previous_response_id": None,
+        "reasoning": {"effort": "medium", "generate_summary": None},
+        "store": True,
+        "temperature": 1.0,
+        "text": {"format": {"type": "text"}},
+        "tool_choice": "auto",
+        "tools": [],
+        "top_p": 1.0,
+        "truncation": "disabled",
+        "usage": {
+            "input_tokens": 73,
+            "input_tokens_details": {"cached_tokens": 0},
+            "output_tokens": 20,
+            "output_tokens_details": {"reasoning_tokens": 0},
+            "total_tokens": 93,
+        },
+        "user": None,
+        "metadata": {},
+    }
+
+    class MockResponse:
+        def __init__(self, json_data, status_code):
+            self._json_data = json_data
+            self.status_code = status_code
+            self.text = json.dumps(json_data)
+
+        def json(self):  # Changed from async to sync
+            return self._json_data
+
+    with patch(
+        "litellm.llms.custom_httpx.http_handler.AsyncHTTPHandler.post",
+        new_callable=AsyncMock,
+    ) as mock_post:
+        # Configure the mock to return our response
+        mock_post.return_value = MockResponse(mock_response, 200)
+
+        litellm._turn_on_debug()
+        litellm.set_verbose = True
+
+        # Call o1-pro with max_output_tokens=20
+        response = await litellm.aresponses(
+            model="openai/o1-pro",
+            input="Write a detailed essay about artificial intelligence and its impact on society",
+            max_output_tokens=20,
+        )
+
+        # Verify the request was made correctly
+        mock_post.assert_called_once()
+        request_body = json.loads(mock_post.call_args.kwargs["data"])
+        assert request_body["model"] == "o1-pro"
+        assert request_body["max_output_tokens"] == 20
+
+        # Validate the response
+        print("Response:", json.dumps(response, indent=4, default=str))
+
+        # Check that the response has the expected structure
+        assert response["id"] == mock_response["id"]
+        assert response["status"] == "incomplete"
+        assert response["incomplete_details"].reason == "max_output_tokens"
+        assert response["max_output_tokens"] == 20
+
+        # Validate usage information
+        assert response["usage"]["input_tokens"] == 73
+        assert response["usage"]["output_tokens"] == 20
+        assert response["usage"]["total_tokens"] == 93
+
+        # Validate that the response is properly identified as incomplete
+        validate_responses_api_response(response, final_chunk=True)
+
+
+@pytest.mark.asyncio
+@pytest.mark.parametrize("sync_mode", [True, False])
+async def test_openai_o1_pro_response_api_streaming(sync_mode):
+    """
+    Test that LiteLLM correctly handles an incomplete response from OpenAI's o1-pro model
+    due to reaching max_output_tokens limit in both sync and async streaming modes.
+    """
+    # Mock response from o1-pro
+    mock_response = {
+        "id": "resp_67dc3dd77b388190822443a85252da5a0e13d8bdc0e28d88",
+        "object": "response",
+        "created_at": 1742486999,
+        "status": "incomplete",
+        "error": None,
+        "incomplete_details": {"reason": "max_output_tokens"},
+        "instructions": None,
+        "max_output_tokens": 20,
+        "model": "o1-pro-2025-03-19",
+        "output": [
+            {
+                "type": "reasoning",
+                "id": "rs_67dc3de50f64819097450ed50a33d5f90e13d8bdc0e28d88",
+                "summary": [],
+            }
+        ],
+        "parallel_tool_calls": True,
+        "previous_response_id": None,
+        "reasoning": {"effort": "medium", "generate_summary": None},
+        "store": True,
+        "temperature": 1.0,
+        "text": {"format": {"type": "text"}},
+        "tool_choice": "auto",
+        "tools": [],
+        "top_p": 1.0,
+        "truncation": "disabled",
+        "usage": {
+            "input_tokens": 73,
+            "input_tokens_details": {"cached_tokens": 0},
+            "output_tokens": 20,
+            "output_tokens_details": {"reasoning_tokens": 0},
+            "total_tokens": 93,
+        },
+        "user": None,
+        "metadata": {},
+    }
+
+    class MockResponse:
+        def __init__(self, json_data, status_code):
+            self._json_data = json_data
+            self.status_code = status_code
+            self.text = json.dumps(json_data)
+
+        def json(self):
+            return self._json_data
+
+    with patch(
+        "litellm.llms.custom_httpx.http_handler.AsyncHTTPHandler.post",
+        new_callable=AsyncMock,
+    ) as mock_post:
+        # Configure the mock to return our response
+        mock_post.return_value = MockResponse(mock_response, 200)
+
+        litellm._turn_on_debug()
+        litellm.set_verbose = True
+
+        # Verify the request was made correctly
+        if sync_mode:
+            # For sync mode, we need to patch the sync HTTP handler
+            with patch(
+                "litellm.llms.custom_httpx.http_handler.HTTPHandler.post",
+                return_value=MockResponse(mock_response, 200),
+            ) as mock_sync_post:
+                response = litellm.responses(
+                    model="openai/o1-pro",
+                    input="Write a detailed essay about artificial intelligence and its impact on society",
+                    max_output_tokens=20,
+                    stream=True,
+                )
+
+                # Process the sync stream
+                event_count = 0
+                for event in response:
+                    print(
+                        f"Sync litellm response #{event_count}:",
+                        json.dumps(event, indent=4, default=str),
+                    )
+                    event_count += 1
+
+                # Verify the sync request was made correctly
+                mock_sync_post.assert_called_once()
+                request_body = json.loads(mock_sync_post.call_args.kwargs["data"])
+                assert request_body["model"] == "o1-pro"
+                assert request_body["max_output_tokens"] == 20
+                assert "stream" not in request_body
+        else:
+            # For async mode
+            response = await litellm.aresponses(
+                model="openai/o1-pro",
+                input="Write a detailed essay about artificial intelligence and its impact on society",
+                max_output_tokens=20,
+                stream=True,
+            )
+
+            # Process the async stream
+            event_count = 0
+            async for event in response:
+                print(
+                    f"Async litellm response #{event_count}:",
+                    json.dumps(event, indent=4, default=str),
+                )
+                event_count += 1
+
+            # Verify the async request was made correctly
+            mock_post.assert_called_once()
+            request_body = json.loads(mock_post.call_args.kwargs["data"])
+            assert request_body["model"] == "o1-pro"
+            assert request_body["max_output_tokens"] == 20
+            assert "stream" not in request_body
diff --git a/tests/llm_translation/base_llm_unit_tests.py b/tests/llm_translation/base_llm_unit_tests.py
index 32f631daad..82a1ef40fb 100644
--- a/tests/llm_translation/base_llm_unit_tests.py
+++ b/tests/llm_translation/base_llm_unit_tests.py
@@ -20,6 +20,7 @@ from litellm.utils import (
     get_optional_params,
     ProviderConfigManager,
 )
+from litellm.main import stream_chunk_builder
 from typing import Union
 
 # test_example.py
@@ -338,7 +339,7 @@ class BaseLLMChatTest(ABC):
 
     @pytest.mark.flaky(retries=6, delay=1)
     def test_json_response_pydantic_obj(self):
-        litellm.set_verbose = True
+        litellm._turn_on_debug()
         from pydantic import BaseModel
         from litellm.utils import supports_response_schema
 
@@ -995,3 +996,73 @@ class BaseOSeriesModelsTest(ABC):  # test across azure/openai
             ), "temperature should not be in the request body"
         except Exception as e:
             pytest.fail(f"Error occurred: {e}")
+
+
+class BaseAnthropicChatTest(ABC):
+    """
+    Ensures consistent result across anthropic model usage
+    """
+
+    @abstractmethod
+    def get_base_completion_call_args(self) -> dict:
+        """Must return the base completion call args"""
+        pass
+
+    @property
+    def completion_function(self):
+        return litellm.completion
+
+    def test_anthropic_response_format_streaming_vs_non_streaming(self):
+        litellm.set_verbose = True
+        args = {
+            "messages": [
+                {
+                    "content": "Your goal is to summarize the previous agent's thinking process into short descriptions to let user better understand the research progress. If no information is available, just say generic phrase like 'Doing some research...' with the given output format. Make sure to adhere to the output format no matter what, even if you don't have any information or you are not allowed to respond to the given input information (then just say generic phrase like 'Doing some research...').",
+                    "role": "system",
+                },
+                {
+                    "role": "user",
+                    "content": "Here is the input data (previous agent's output): \n\n Let's try to refine our search further, focusing more on the technical aspects of home automation and home energy system management:",
+                },
+            ],
+            "response_format": {
+                "type": "json_schema",
+                "json_schema": {
+                    "name": "final_output",
+                    "strict": True,
+                    "schema": {
+                        "description": 'Progress report for the thinking process\n\nThis model represents a snapshot of the agent\'s current progress during\nthe thinking process, providing a brief description of the current activity.\n\nAttributes:\n    agent_doing: Brief description of what the agent is currently doing.\n                Should be kept under 10 words. Example: "Learning about home automation"',
+                        "properties": {
+                            "agent_doing": {"title": "Agent Doing", "type": "string"}
+                        },
+                        "required": ["agent_doing"],
+                        "title": "ThinkingStep",
+                        "type": "object",
+                        "additionalProperties": False,
+                    },
+                },
+            },
+        }
+
+        base_completion_call_args = self.get_base_completion_call_args()
+
+        response = self.completion_function(
+            **base_completion_call_args, **args, stream=True
+        )
+
+        chunks = []
+        for chunk in response:
+            print(f"chunk: {chunk}")
+            chunks.append(chunk)
+
+        print(f"chunks: {chunks}")
+        built_response = stream_chunk_builder(chunks=chunks)
+
+        non_stream_response = self.completion_function(
+            **base_completion_call_args, **args, stream=False
+        )
+
+        assert (
+            json.loads(built_response.choices[0].message.content).keys()
+            == json.loads(non_stream_response.choices[0].message.content).keys()
+        ), f"Got={json.loads(built_response.choices[0].message.content)}, Expected={json.loads(non_stream_response.choices[0].message.content)}"
diff --git a/tests/llm_translation/test_anthropic_completion.py b/tests/llm_translation/test_anthropic_completion.py
index da47e745e7..a83d1d69e9 100644
--- a/tests/llm_translation/test_anthropic_completion.py
+++ b/tests/llm_translation/test_anthropic_completion.py
@@ -36,7 +36,7 @@ from litellm.types.llms.openai import ChatCompletionToolCallFunctionChunk
 from litellm.llms.anthropic.common_utils import process_anthropic_headers
 from litellm.llms.anthropic.chat.handler import AnthropicChatCompletion
 from httpx import Headers
-from base_llm_unit_tests import BaseLLMChatTest
+from base_llm_unit_tests import BaseLLMChatTest, BaseAnthropicChatTest
 
 
 def streaming_format_tests(chunk: dict, idx: int):
@@ -455,14 +455,15 @@ def test_create_json_tool_call_for_response_format():
     _input_schema = tool.get("input_schema")
     assert _input_schema is not None
     assert _input_schema.get("type") == "object"
-    assert _input_schema.get("properties") == {"values": custom_schema}
+    assert _input_schema.get("name") == custom_schema["name"]
+    assert _input_schema.get("age") == custom_schema["age"]
     assert "additionalProperties" not in _input_schema
 
 
 from litellm import completion
 
 
-class TestAnthropicCompletion(BaseLLMChatTest):
+class TestAnthropicCompletion(BaseLLMChatTest, BaseAnthropicChatTest):
     def get_base_completion_call_args(self) -> dict:
         return {"model": "anthropic/claude-3-5-sonnet-20240620"}
 
diff --git a/tests/llm_translation/test_openai.py b/tests/llm_translation/test_openai.py
index 172c946636..633ff76467 100644
--- a/tests/llm_translation/test_openai.py
+++ b/tests/llm_translation/test_openai.py
@@ -3,6 +3,7 @@ import os
 import sys
 from datetime import datetime
 from unittest.mock import AsyncMock, patch
+from typing import Optional
 
 sys.path.insert(
     0, os.path.abspath("../..")
@@ -17,6 +18,10 @@ import litellm
 from litellm import Choices, Message, ModelResponse
 from base_llm_unit_tests import BaseLLMChatTest
 import asyncio
+from litellm.types.llms.openai import (
+    ChatCompletionAnnotation,
+    ChatCompletionAnnotationURLCitation,
+)
 
 
 def test_openai_prediction_param():
@@ -391,3 +396,65 @@ def test_openai_chat_completion_streaming_handler_reasoning_content():
     )
 
     assert response.choices[0].delta.reasoning_content == "."
+
+
+def validate_response_url_citation(url_citation: ChatCompletionAnnotationURLCitation):
+    assert "end_index" in url_citation
+    assert "start_index" in url_citation
+    assert "url" in url_citation
+
+
+def validate_web_search_annotations(annotations: ChatCompletionAnnotation):
+    """validates litellm response contains web search annotations"""
+    print("annotations: ", annotations)
+    assert annotations is not None
+    assert isinstance(annotations, list)
+    for annotation in annotations:
+        assert annotation["type"] == "url_citation"
+        url_citation: ChatCompletionAnnotationURLCitation = annotation["url_citation"]
+        validate_response_url_citation(url_citation)
+
+
+def test_openai_web_search():
+    """Makes a simple web search request and validates the response contains web search annotations and all expected fields are present"""
+    litellm._turn_on_debug()
+    response = litellm.completion(
+        model="openai/gpt-4o-search-preview",
+        messages=[
+            {
+                "role": "user",
+                "content": "What was a positive news story from today?",
+            }
+        ],
+    )
+    print("litellm response: ", response.model_dump_json(indent=4))
+    message = response.choices[0].message
+    annotations: ChatCompletionAnnotation = message.annotations
+    validate_web_search_annotations(annotations)
+
+
+def test_openai_web_search_streaming():
+    """Makes a simple web search request and validates the response contains web search annotations and all expected fields are present"""
+    # litellm._turn_on_debug()
+    test_openai_web_search: Optional[ChatCompletionAnnotation] = None
+    response = litellm.completion(
+        model="openai/gpt-4o-search-preview",
+        messages=[
+            {
+                "role": "user",
+                "content": "What was a positive news story from today?",
+            }
+        ],
+        stream=True,
+    )
+    for chunk in response:
+        print("litellm response chunk: ", chunk)
+        if (
+            hasattr(chunk.choices[0].delta, "annotations")
+            and chunk.choices[0].delta.annotations is not None
+        ):
+            test_openai_web_search = chunk.choices[0].delta.annotations
+
+    # Assert this request has at-least one web search annotation
+    assert test_openai_web_search is not None
+    validate_web_search_annotations(test_openai_web_search)
diff --git a/tests/local_testing/test_get_model_info.py b/tests/local_testing/test_get_model_info.py
index d71f3f7c24..1a0f6d7a8d 100644
--- a/tests/local_testing/test_get_model_info.py
+++ b/tests/local_testing/test_get_model_info.py
@@ -500,6 +500,7 @@ def test_aaamodel_prices_and_context_window_json_is_valid():
                 "supports_tool_choice": {"type": "boolean"},
                 "supports_video_input": {"type": "boolean"},
                 "supports_vision": {"type": "boolean"},
+                "supports_web_search": {"type": "boolean"},
                 "tool_use_system_prompt_tokens": {"type": "number"},
                 "tpm": {"type": "number"},
                 "supported_endpoints": {
@@ -518,6 +519,15 @@ def test_aaamodel_prices_and_context_window_json_is_valid():
                         ],
                     },
                 },
+                "search_context_cost_per_query": {
+                    "type": "object",
+                    "properties": {
+                        "search_context_size_low": {"type": "number"},
+                        "search_context_size_medium": {"type": "number"},
+                        "search_context_size_high": {"type": "number"},
+                    },
+                    "additionalProperties": False,
+                },
                 "supported_modalities": {
                     "type": "array",
                     "items": {
diff --git a/tests/local_testing/test_sagemaker.py b/tests/local_testing/test_sagemaker.py
index ba1ab11596..9c7161e4ae 100644
--- a/tests/local_testing/test_sagemaker.py
+++ b/tests/local_testing/test_sagemaker.py
@@ -8,7 +8,7 @@ from dotenv import load_dotenv
 load_dotenv()
 import io
 import os
-
+import litellm
 from test_streaming import streaming_format_tests
 
 sys.path.insert(
@@ -96,26 +96,57 @@ async def test_completion_sagemaker_messages_api(sync_mode):
         litellm.set_verbose = True
         verbose_logger.setLevel(logging.DEBUG)
         print("testing sagemaker")
+        from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler
+
         if sync_mode is True:
-            resp = litellm.completion(
-                model="sagemaker_chat/huggingface-pytorch-tgi-inference-2024-08-23-15-48-59-245",
-                messages=[
-                    {"role": "user", "content": "hi"},
-                ],
-                temperature=0.2,
-                max_tokens=80,
-            )
-            print(resp)
+            client = HTTPHandler()
+            with patch.object(client, "post") as mock_post:
+                try:
+                    resp = litellm.completion(
+                        model="sagemaker_chat/huggingface-pytorch-tgi-inference-2024-08-23-15-48-59-245",
+                        messages=[
+                            {"role": "user", "content": "hi"},
+                        ],
+                        temperature=0.2,
+                        max_tokens=80,
+                        client=client,
+                    )
+                except Exception as e:
+                    print(e)
+                mock_post.assert_called_once()
+                json_data = json.loads(mock_post.call_args.kwargs["data"])
+                assert (
+                    json_data["model"]
+                    == "huggingface-pytorch-tgi-inference-2024-08-23-15-48-59-245"
+                )
+                assert json_data["messages"] == [{"role": "user", "content": "hi"}]
+                assert json_data["temperature"] == 0.2
+                assert json_data["max_tokens"] == 80
+
         else:
-            resp = await litellm.acompletion(
-                model="sagemaker_chat/huggingface-pytorch-tgi-inference-2024-08-23-15-48-59-245",
-                messages=[
-                    {"role": "user", "content": "hi"},
-                ],
-                temperature=0.2,
-                max_tokens=80,
-            )
-            print(resp)
+            client = AsyncHTTPHandler()
+            with patch.object(client, "post") as mock_post:
+                try:
+                    resp = await litellm.acompletion(
+                        model="sagemaker_chat/huggingface-pytorch-tgi-inference-2024-08-23-15-48-59-245",
+                        messages=[
+                            {"role": "user", "content": "hi"},
+                        ],
+                        temperature=0.2,
+                        max_tokens=80,
+                        client=client,
+                    )
+                except Exception as e:
+                    print(e)
+                mock_post.assert_called_once()
+                json_data = json.loads(mock_post.call_args.kwargs["data"])
+                assert (
+                    json_data["model"]
+                    == "huggingface-pytorch-tgi-inference-2024-08-23-15-48-59-245"
+                )
+                assert json_data["messages"] == [{"role": "user", "content": "hi"}]
+                assert json_data["temperature"] == 0.2
+                assert json_data["max_tokens"] == 80
     except Exception as e:
         pytest.fail(f"Error occurred: {e}")
 
@@ -125,7 +156,7 @@ async def test_completion_sagemaker_messages_api(sync_mode):
 @pytest.mark.parametrize(
     "model",
     [
-        "sagemaker_chat/huggingface-pytorch-tgi-inference-2024-08-23-15-48-59-245",
+        # "sagemaker_chat/huggingface-pytorch-tgi-inference-2024-08-23-15-48-59-245",
         "sagemaker/jumpstart-dft-hf-textgeneration1-mp-20240815-185614",
     ],
 )
@@ -185,7 +216,7 @@ async def test_completion_sagemaker_stream(sync_mode, model):
 @pytest.mark.parametrize(
     "model",
     [
-        "sagemaker_chat/huggingface-pytorch-tgi-inference-2024-08-23-15-48-59-245",
+        # "sagemaker_chat/huggingface-pytorch-tgi-inference-2024-08-23-15-48-59-245",
         "sagemaker/jumpstart-dft-hf-textgeneration1-mp-20240815-185614",
     ],
 )
diff --git a/tests/mcp_tests/mcp_server.py b/tests/mcp_tests/mcp_server.py
new file mode 100644
index 0000000000..99a67edd02
--- /dev/null
+++ b/tests/mcp_tests/mcp_server.py
@@ -0,0 +1,20 @@
+# math_server.py
+from mcp.server.fastmcp import FastMCP
+
+mcp = FastMCP("Math")
+
+
+@mcp.tool()
+def add(a: int, b: int) -> int:
+    """Add two numbers"""
+    return a + b
+
+
+@mcp.tool()
+def multiply(a: int, b: int) -> int:
+    """Multiply two numbers"""
+    return a * b
+
+
+if __name__ == "__main__":
+    mcp.run(transport="stdio")
diff --git a/tests/mcp_tests/test_mcp_litellm_client.py b/tests/mcp_tests/test_mcp_litellm_client.py
new file mode 100644
index 0000000000..0f8fb7994a
--- /dev/null
+++ b/tests/mcp_tests/test_mcp_litellm_client.py
@@ -0,0 +1,86 @@
+# Create server parameters for stdio connection
+import os
+import sys
+import pytest
+
+sys.path.insert(
+    0, os.path.abspath("../../..")
+)  # Adds the parent directory to the system path
+
+from mcp import ClientSession, StdioServerParameters
+from mcp.client.stdio import stdio_client
+import os
+from litellm import experimental_mcp_client
+import litellm
+import pytest
+import json
+
+
+@pytest.mark.asyncio
+async def test_mcp_agent():
+    local_server_path = "./mcp_server.py"
+    ci_cd_server_path = "tests/mcp_tests/mcp_server.py"
+    server_params = StdioServerParameters(
+        command="python3",
+        # Make sure to update to the full absolute path to your math_server.py file
+        args=[ci_cd_server_path],
+    )
+
+    async with stdio_client(server_params) as (read, write):
+        async with ClientSession(read, write) as session:
+            # Initialize the connection
+            await session.initialize()
+
+            # Get tools
+            tools = await experimental_mcp_client.load_mcp_tools(
+                session=session, format="openai"
+            )
+            print("MCP TOOLS: ", tools)
+
+            # Create and run the agent
+            messages = [{"role": "user", "content": "what's (3 + 5)"}]
+            llm_response = await litellm.acompletion(
+                model="gpt-4o",
+                api_key=os.getenv("OPENAI_API_KEY"),
+                messages=messages,
+                tools=tools,
+                tool_choice="required",
+            )
+            print("LLM RESPONSE: ", json.dumps(llm_response, indent=4, default=str))
+            # Add assertions to verify the response
+            assert llm_response["choices"][0]["message"]["tool_calls"] is not None
+
+            assert (
+                llm_response["choices"][0]["message"]["tool_calls"][0]["function"][
+                    "name"
+                ]
+                == "add"
+            )
+            openai_tool = llm_response["choices"][0]["message"]["tool_calls"][0]
+
+            # Call the tool using MCP client
+            call_result = await experimental_mcp_client.call_openai_tool(
+                session=session,
+                openai_tool=openai_tool,
+            )
+            print("CALL RESULT: ", call_result)
+
+            # send the tool result to the LLM
+            messages.append(llm_response["choices"][0]["message"])
+            messages.append(
+                {
+                    "role": "tool",
+                    "content": str(call_result.content[0].text),
+                    "tool_call_id": openai_tool["id"],
+                }
+            )
+            print("final messages: ", messages)
+            llm_response = await litellm.acompletion(
+                model="gpt-4o",
+                api_key=os.getenv("OPENAI_API_KEY"),
+                messages=messages,
+                tools=tools,
+            )
+            print(
+                "FINAL LLM RESPONSE: ", json.dumps(llm_response, indent=4, default=str)
+            )
diff --git a/tests/pass_through_unit_tests/test_pass_through_unit_tests.py b/tests/pass_through_unit_tests/test_pass_through_unit_tests.py
index db0a647e41..cb9db00324 100644
--- a/tests/pass_through_unit_tests/test_pass_through_unit_tests.py
+++ b/tests/pass_through_unit_tests/test_pass_through_unit_tests.py
@@ -339,9 +339,6 @@ def test_pass_through_routes_support_all_methods():
     from litellm.proxy.pass_through_endpoints.llm_passthrough_endpoints import (
         router as llm_router,
     )
-    from litellm.proxy.vertex_ai_endpoints.vertex_endpoints import (
-        router as vertex_router,
-    )
 
     # Expected HTTP methods
     expected_methods = {"GET", "POST", "PUT", "DELETE", "PATCH"}
@@ -361,7 +358,6 @@ def test_pass_through_routes_support_all_methods():
 
     # Check both routers
     check_router_methods(llm_router)
-    check_router_methods(vertex_router)
 
 
 def test_is_bedrock_agent_runtime_route():
diff --git a/tests/pass_through_unit_tests/test_unit_test_passthrough_router.py b/tests/pass_through_unit_tests/test_unit_test_passthrough_router.py
index 6e8296876a..8e016b68d0 100644
--- a/tests/pass_through_unit_tests/test_unit_test_passthrough_router.py
+++ b/tests/pass_through_unit_tests/test_unit_test_passthrough_router.py
@@ -11,6 +11,7 @@ from unittest.mock import patch
 from litellm.proxy.pass_through_endpoints.passthrough_endpoint_router import (
     PassthroughEndpointRouter,
 )
+from litellm.types.passthrough_endpoints.vertex_ai import VertexPassThroughCredentials
 
 passthrough_endpoint_router = PassthroughEndpointRouter()
 
@@ -132,3 +133,185 @@ class TestPassthroughEndpointRouter(unittest.TestCase):
             ),
             "COHERE_API_KEY",
         )
+
+    def test_get_deployment_key(self):
+        """Test _get_deployment_key with various inputs"""
+        router = PassthroughEndpointRouter()
+
+        # Test with valid inputs
+        key = router._get_deployment_key("test-project", "us-central1")
+        assert key == "test-project-us-central1"
+
+        # Test with None values
+        key = router._get_deployment_key(None, "us-central1")
+        assert key is None
+
+        key = router._get_deployment_key("test-project", None)
+        assert key is None
+
+        key = router._get_deployment_key(None, None)
+        assert key is None
+
+    def test_add_vertex_credentials(self):
+        """Test add_vertex_credentials functionality"""
+        router = PassthroughEndpointRouter()
+
+        # Test adding valid credentials
+        router.add_vertex_credentials(
+            project_id="test-project",
+            location="us-central1",
+            vertex_credentials='{"credentials": "test-creds"}',
+        )
+
+        assert "test-project-us-central1" in router.deployment_key_to_vertex_credentials
+        creds = router.deployment_key_to_vertex_credentials["test-project-us-central1"]
+        assert creds.vertex_project == "test-project"
+        assert creds.vertex_location == "us-central1"
+        assert creds.vertex_credentials == '{"credentials": "test-creds"}'
+
+        # Test adding with None values
+        router.add_vertex_credentials(
+            project_id=None,
+            location=None,
+            vertex_credentials='{"credentials": "test-creds"}',
+        )
+        # Should not add None values
+        assert len(router.deployment_key_to_vertex_credentials) == 1
+
+    def test_default_credentials(self):
+        """
+        Test get_vertex_credentials with stored credentials.
+
+        Tests if default credentials are used if set.
+
+        Tests if no default credentials are used, if no default set
+        """
+        router = PassthroughEndpointRouter()
+        router.add_vertex_credentials(
+            project_id="test-project",
+            location="us-central1",
+            vertex_credentials='{"credentials": "test-creds"}',
+        )
+
+        creds = router.get_vertex_credentials(
+            project_id="test-project", location="us-central2"
+        )
+
+        assert creds is None
+
+    def test_get_vertex_env_vars(self):
+        """Test that _get_vertex_env_vars correctly reads environment variables"""
+        # Set environment variables for the test
+        os.environ["DEFAULT_VERTEXAI_PROJECT"] = "test-project-123"
+        os.environ["DEFAULT_VERTEXAI_LOCATION"] = "us-central1"
+        os.environ["DEFAULT_GOOGLE_APPLICATION_CREDENTIALS"] = "/path/to/creds"
+
+        try:
+            result = self.router._get_vertex_env_vars()
+            print(result)
+
+            # Verify the result
+            assert isinstance(result, VertexPassThroughCredentials)
+            assert result.vertex_project == "test-project-123"
+            assert result.vertex_location == "us-central1"
+            assert result.vertex_credentials == "/path/to/creds"
+
+        finally:
+            # Clean up environment variables
+            del os.environ["DEFAULT_VERTEXAI_PROJECT"]
+            del os.environ["DEFAULT_VERTEXAI_LOCATION"]
+            del os.environ["DEFAULT_GOOGLE_APPLICATION_CREDENTIALS"]
+
+    def test_set_default_vertex_config(self):
+        """Test set_default_vertex_config with various inputs"""
+        # Test with None config - set environment variables first
+        os.environ["DEFAULT_VERTEXAI_PROJECT"] = "env-project"
+        os.environ["DEFAULT_VERTEXAI_LOCATION"] = "env-location"
+        os.environ["DEFAULT_GOOGLE_APPLICATION_CREDENTIALS"] = "env-creds"
+        os.environ["GOOGLE_CREDS"] = "secret-creds"
+
+        try:
+            # Test with None config
+            self.router.set_default_vertex_config()
+
+            assert self.router.default_vertex_config.vertex_project == "env-project"
+            assert self.router.default_vertex_config.vertex_location == "env-location"
+            assert self.router.default_vertex_config.vertex_credentials == "env-creds"
+
+            # Test with valid config.yaml settings on vertex_config
+            test_config = {
+                "vertex_project": "my-project-123",
+                "vertex_location": "us-central1",
+                "vertex_credentials": "path/to/creds",
+            }
+            self.router.set_default_vertex_config(test_config)
+
+            assert self.router.default_vertex_config.vertex_project == "my-project-123"
+            assert self.router.default_vertex_config.vertex_location == "us-central1"
+            assert (
+                self.router.default_vertex_config.vertex_credentials == "path/to/creds"
+            )
+
+            # Test with environment variable reference
+            test_config = {
+                "vertex_project": "my-project-123",
+                "vertex_location": "us-central1",
+                "vertex_credentials": "os.environ/GOOGLE_CREDS",
+            }
+            self.router.set_default_vertex_config(test_config)
+
+            assert (
+                self.router.default_vertex_config.vertex_credentials == "secret-creds"
+            )
+
+        finally:
+            # Clean up environment variables
+            del os.environ["DEFAULT_VERTEXAI_PROJECT"]
+            del os.environ["DEFAULT_VERTEXAI_LOCATION"]
+            del os.environ["DEFAULT_GOOGLE_APPLICATION_CREDENTIALS"]
+            del os.environ["GOOGLE_CREDS"]
+
+    def test_vertex_passthrough_router_init(self):
+        """Test VertexPassThroughRouter initialization"""
+        router = PassthroughEndpointRouter()
+        assert isinstance(router.deployment_key_to_vertex_credentials, dict)
+        assert len(router.deployment_key_to_vertex_credentials) == 0
+
+    def test_get_vertex_credentials_none(self):
+        """Test get_vertex_credentials with various inputs"""
+        router = PassthroughEndpointRouter()
+
+        router.set_default_vertex_config(
+            config={
+                "vertex_project": None,
+                "vertex_location": None,
+                "vertex_credentials": None,
+            }
+        )
+
+        # Test with None project_id and location - should return default config
+        creds = router.get_vertex_credentials(None, None)
+        assert isinstance(creds, VertexPassThroughCredentials)
+
+        # Test with valid project_id and location but no stored credentials
+        creds = router.get_vertex_credentials("test-project", "us-central1")
+        assert isinstance(creds, VertexPassThroughCredentials)
+        assert creds.vertex_project is None
+        assert creds.vertex_location is None
+        assert creds.vertex_credentials is None
+
+    def test_get_vertex_credentials_stored(self):
+        """Test get_vertex_credentials with stored credentials"""
+        router = PassthroughEndpointRouter()
+        router.add_vertex_credentials(
+            project_id="test-project",
+            location="us-central1",
+            vertex_credentials='{"credentials": "test-creds"}',
+        )
+
+        creds = router.get_vertex_credentials(
+            project_id="test-project", location="us-central1"
+        )
+        assert creds.vertex_project == "test-project"
+        assert creds.vertex_location == "us-central1"
+        assert creds.vertex_credentials == '{"credentials": "test-creds"}'
diff --git a/tests/pass_through_unit_tests/test_unit_test_vertex_pass_through.py b/tests/pass_through_unit_tests/test_unit_test_vertex_pass_through.py
deleted file mode 100644
index ba5dfa33a8..0000000000
--- a/tests/pass_through_unit_tests/test_unit_test_vertex_pass_through.py
+++ /dev/null
@@ -1,294 +0,0 @@
-import json
-import os
-import sys
-from datetime import datetime
-from unittest.mock import AsyncMock, Mock, patch
-
-sys.path.insert(
-    0, os.path.abspath("../..")
-)  # Adds the parent directory to the system-path
-
-
-import httpx
-import pytest
-import litellm
-from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj
-
-
-from litellm.proxy.vertex_ai_endpoints.vertex_endpoints import (
-    get_litellm_virtual_key,
-    vertex_proxy_route,
-    _get_vertex_env_vars,
-    set_default_vertex_config,
-    VertexPassThroughCredentials,
-    default_vertex_config,
-)
-from litellm.proxy.vertex_ai_endpoints.vertex_passthrough_router import (
-    VertexPassThroughRouter,
-)
-
-
-@pytest.mark.asyncio
-async def test_get_litellm_virtual_key():
-    """
-    Test that the get_litellm_virtual_key function correctly handles the API key authentication
-    """
-    # Test with x-litellm-api-key
-    mock_request = Mock()
-    mock_request.headers = {"x-litellm-api-key": "test-key-123"}
-    result = get_litellm_virtual_key(mock_request)
-    assert result == "Bearer test-key-123"
-
-    # Test with Authorization header
-    mock_request.headers = {"Authorization": "Bearer auth-key-456"}
-    result = get_litellm_virtual_key(mock_request)
-    assert result == "Bearer auth-key-456"
-
-    # Test with both headers (x-litellm-api-key should take precedence)
-    mock_request.headers = {
-        "x-litellm-api-key": "test-key-123",
-        "Authorization": "Bearer auth-key-456",
-    }
-    result = get_litellm_virtual_key(mock_request)
-    assert result == "Bearer test-key-123"
-
-
-@pytest.mark.asyncio
-async def test_async_vertex_proxy_route_api_key_auth():
-    """
-    Critical
-
-    This is how Vertex AI JS SDK will Auth to Litellm Proxy
-    """
-    # Mock dependencies
-    mock_request = Mock()
-    mock_request.headers = {"x-litellm-api-key": "test-key-123"}
-    mock_request.method = "POST"
-    mock_response = Mock()
-
-    with patch(
-        "litellm.proxy.vertex_ai_endpoints.vertex_endpoints.user_api_key_auth"
-    ) as mock_auth:
-        mock_auth.return_value = {"api_key": "test-key-123"}
-
-        with patch(
-            "litellm.proxy.vertex_ai_endpoints.vertex_endpoints.create_pass_through_route"
-        ) as mock_pass_through:
-            mock_pass_through.return_value = AsyncMock(
-                return_value={"status": "success"}
-            )
-
-            # Call the function
-            result = await vertex_proxy_route(
-                endpoint="v1/projects/test-project/locations/us-central1/publishers/google/models/gemini-1.5-pro:generateContent",
-                request=mock_request,
-                fastapi_response=mock_response,
-            )
-
-            # Verify user_api_key_auth was called with the correct Bearer token
-            mock_auth.assert_called_once()
-            call_args = mock_auth.call_args[1]
-            assert call_args["api_key"] == "Bearer test-key-123"
-
-
-@pytest.mark.asyncio
-async def test_get_vertex_env_vars():
-    """Test that _get_vertex_env_vars correctly reads environment variables"""
-    # Set environment variables for the test
-    os.environ["DEFAULT_VERTEXAI_PROJECT"] = "test-project-123"
-    os.environ["DEFAULT_VERTEXAI_LOCATION"] = "us-central1"
-    os.environ["DEFAULT_GOOGLE_APPLICATION_CREDENTIALS"] = "/path/to/creds"
-
-    try:
-        result = _get_vertex_env_vars()
-        print(result)
-
-        # Verify the result
-        assert isinstance(result, VertexPassThroughCredentials)
-        assert result.vertex_project == "test-project-123"
-        assert result.vertex_location == "us-central1"
-        assert result.vertex_credentials == "/path/to/creds"
-
-    finally:
-        # Clean up environment variables
-        del os.environ["DEFAULT_VERTEXAI_PROJECT"]
-        del os.environ["DEFAULT_VERTEXAI_LOCATION"]
-        del os.environ["DEFAULT_GOOGLE_APPLICATION_CREDENTIALS"]
-
-
-@pytest.mark.asyncio
-async def test_set_default_vertex_config():
-    """Test set_default_vertex_config with various inputs"""
-    # Test with None config - set environment variables first
-    os.environ["DEFAULT_VERTEXAI_PROJECT"] = "env-project"
-    os.environ["DEFAULT_VERTEXAI_LOCATION"] = "env-location"
-    os.environ["DEFAULT_GOOGLE_APPLICATION_CREDENTIALS"] = "env-creds"
-    os.environ["GOOGLE_CREDS"] = "secret-creds"
-
-    try:
-        # Test with None config
-        set_default_vertex_config()
-        from litellm.proxy.vertex_ai_endpoints.vertex_endpoints import (
-            default_vertex_config,
-        )
-
-        assert default_vertex_config.vertex_project == "env-project"
-        assert default_vertex_config.vertex_location == "env-location"
-        assert default_vertex_config.vertex_credentials == "env-creds"
-
-        # Test with valid config.yaml settings on vertex_config
-        test_config = {
-            "vertex_project": "my-project-123",
-            "vertex_location": "us-central1",
-            "vertex_credentials": "path/to/creds",
-        }
-        set_default_vertex_config(test_config)
-        from litellm.proxy.vertex_ai_endpoints.vertex_endpoints import (
-            default_vertex_config,
-        )
-
-        assert default_vertex_config.vertex_project == "my-project-123"
-        assert default_vertex_config.vertex_location == "us-central1"
-        assert default_vertex_config.vertex_credentials == "path/to/creds"
-
-        # Test with environment variable reference
-        test_config = {
-            "vertex_project": "my-project-123",
-            "vertex_location": "us-central1",
-            "vertex_credentials": "os.environ/GOOGLE_CREDS",
-        }
-        set_default_vertex_config(test_config)
-        from litellm.proxy.vertex_ai_endpoints.vertex_endpoints import (
-            default_vertex_config,
-        )
-
-        assert default_vertex_config.vertex_credentials == "secret-creds"
-
-    finally:
-        # Clean up environment variables
-        del os.environ["DEFAULT_VERTEXAI_PROJECT"]
-        del os.environ["DEFAULT_VERTEXAI_LOCATION"]
-        del os.environ["DEFAULT_GOOGLE_APPLICATION_CREDENTIALS"]
-        del os.environ["GOOGLE_CREDS"]
-
-
-@pytest.mark.asyncio
-async def test_vertex_passthrough_router_init():
-    """Test VertexPassThroughRouter initialization"""
-    router = VertexPassThroughRouter()
-    assert isinstance(router.deployment_key_to_vertex_credentials, dict)
-    assert len(router.deployment_key_to_vertex_credentials) == 0
-
-
-@pytest.mark.asyncio
-async def test_get_vertex_credentials_none():
-    """Test get_vertex_credentials with various inputs"""
-    from litellm.proxy.vertex_ai_endpoints import vertex_endpoints
-
-    setattr(vertex_endpoints, "default_vertex_config", VertexPassThroughCredentials())
-    router = VertexPassThroughRouter()
-
-    # Test with None project_id and location - should return default config
-    creds = router.get_vertex_credentials(None, None)
-    assert isinstance(creds, VertexPassThroughCredentials)
-
-    # Test with valid project_id and location but no stored credentials
-    creds = router.get_vertex_credentials("test-project", "us-central1")
-    assert isinstance(creds, VertexPassThroughCredentials)
-    assert creds.vertex_project is None
-    assert creds.vertex_location is None
-    assert creds.vertex_credentials is None
-
-
-@pytest.mark.asyncio
-async def test_get_vertex_credentials_stored():
-    """Test get_vertex_credentials with stored credentials"""
-    router = VertexPassThroughRouter()
-    router.add_vertex_credentials(
-        project_id="test-project",
-        location="us-central1",
-        vertex_credentials='{"credentials": "test-creds"}',
-    )
-
-    creds = router.get_vertex_credentials(
-        project_id="test-project", location="us-central1"
-    )
-    assert creds.vertex_project == "test-project"
-    assert creds.vertex_location == "us-central1"
-    assert creds.vertex_credentials == '{"credentials": "test-creds"}'
-
-
-@pytest.mark.asyncio
-async def test_add_vertex_credentials():
-    """Test add_vertex_credentials functionality"""
-    router = VertexPassThroughRouter()
-
-    # Test adding valid credentials
-    router.add_vertex_credentials(
-        project_id="test-project",
-        location="us-central1",
-        vertex_credentials='{"credentials": "test-creds"}',
-    )
-
-    assert "test-project-us-central1" in router.deployment_key_to_vertex_credentials
-    creds = router.deployment_key_to_vertex_credentials["test-project-us-central1"]
-    assert creds.vertex_project == "test-project"
-    assert creds.vertex_location == "us-central1"
-    assert creds.vertex_credentials == '{"credentials": "test-creds"}'
-
-    # Test adding with None values
-    router.add_vertex_credentials(
-        project_id=None,
-        location=None,
-        vertex_credentials='{"credentials": "test-creds"}',
-    )
-    # Should not add None values
-    assert len(router.deployment_key_to_vertex_credentials) == 1
-
-
-@pytest.mark.asyncio
-async def test_get_deployment_key():
-    """Test _get_deployment_key with various inputs"""
-    router = VertexPassThroughRouter()
-
-    # Test with valid inputs
-    key = router._get_deployment_key("test-project", "us-central1")
-    assert key == "test-project-us-central1"
-
-    # Test with None values
-    key = router._get_deployment_key(None, "us-central1")
-    assert key is None
-
-    key = router._get_deployment_key("test-project", None)
-    assert key is None
-
-    key = router._get_deployment_key(None, None)
-    assert key is None
-
-
-@pytest.mark.asyncio
-async def test_get_vertex_project_id_from_url():
-    """Test _get_vertex_project_id_from_url with various URLs"""
-    # Test with valid URL
-    url = "https://us-central1-aiplatform.googleapis.com/v1/projects/test-project/locations/us-central1/publishers/google/models/gemini-pro:streamGenerateContent"
-    project_id = VertexPassThroughRouter._get_vertex_project_id_from_url(url)
-    assert project_id == "test-project"
-
-    # Test with invalid URL
-    url = "https://invalid-url.com"
-    project_id = VertexPassThroughRouter._get_vertex_project_id_from_url(url)
-    assert project_id is None
-
-
-@pytest.mark.asyncio
-async def test_get_vertex_location_from_url():
-    """Test _get_vertex_location_from_url with various URLs"""
-    # Test with valid URL
-    url = "https://us-central1-aiplatform.googleapis.com/v1/projects/test-project/locations/us-central1/publishers/google/models/gemini-pro:streamGenerateContent"
-    location = VertexPassThroughRouter._get_vertex_location_from_url(url)
-    assert location == "us-central1"
-
-    # Test with invalid URL
-    url = "https://invalid-url.com"
-    location = VertexPassThroughRouter._get_vertex_location_from_url(url)
-    assert location is None
diff --git a/tests/proxy_admin_ui_tests/test_route_check_unit_tests.py b/tests/proxy_admin_ui_tests/test_route_check_unit_tests.py
index 718f707755..937eb6f298 100644
--- a/tests/proxy_admin_ui_tests/test_route_check_unit_tests.py
+++ b/tests/proxy_admin_ui_tests/test_route_check_unit_tests.py
@@ -30,9 +30,6 @@ from litellm.proxy._types import LiteLLM_UserTable, LitellmUserRoles, UserAPIKey
 from litellm.proxy.pass_through_endpoints.llm_passthrough_endpoints import (
     router as llm_passthrough_router,
 )
-from litellm.proxy.vertex_ai_endpoints.vertex_endpoints import (
-    router as vertex_router,
-)
 
 # Replace the actual hash_token function with our mock
 import litellm.proxy.auth.route_checks
@@ -96,7 +93,7 @@ def test_is_llm_api_route():
     assert RouteChecks.is_llm_api_route("/key/regenerate/82akk800000000jjsk") is False
     assert RouteChecks.is_llm_api_route("/key/82akk800000000jjsk/delete") is False
 
-    all_llm_api_routes = vertex_router.routes + llm_passthrough_router.routes
+    all_llm_api_routes = llm_passthrough_router.routes
 
     # check all routes in llm_passthrough_router, ensure they are considered llm api routes
     for route in all_llm_api_routes:
@@ -165,7 +162,6 @@ def test_llm_api_route(route_checks):
             route="/v1/chat/completions",
             request=MockRequest(),
             valid_token=UserAPIKeyAuth(api_key="test_key"),
-            api_key="test_key",
             request_data={},
         )
         is None
@@ -183,7 +179,6 @@ def test_key_info_route_allowed(route_checks):
             route="/key/info",
             request=MockRequest(query_params={"key": "test_key"}),
             valid_token=UserAPIKeyAuth(api_key="test_key"),
-            api_key="test_key",
             request_data={},
         )
         is None
@@ -201,7 +196,6 @@ def test_user_info_route_allowed(route_checks):
             route="/user/info",
             request=MockRequest(query_params={"user_id": "test_user"}),
             valid_token=UserAPIKeyAuth(api_key="test_key", user_id="test_user"),
-            api_key="test_key",
             request_data={},
         )
         is None
@@ -219,7 +213,6 @@ def test_user_info_route_forbidden(route_checks):
             route="/user/info",
             request=MockRequest(query_params={"user_id": "wrong_user"}),
             valid_token=UserAPIKeyAuth(api_key="test_key", user_id="test_user"),
-            api_key="test_key",
             request_data={},
         )
     assert exc_info.value.status_code == 403
diff --git a/tests/proxy_unit_tests/test_user_api_key_auth.py b/tests/proxy_unit_tests/test_user_api_key_auth.py
index e956a22282..f0ca27c946 100644
--- a/tests/proxy_unit_tests/test_user_api_key_auth.py
+++ b/tests/proxy_unit_tests/test_user_api_key_auth.py
@@ -4,6 +4,9 @@
 import os
 import sys
 
+import litellm.proxy
+import litellm.proxy.proxy_server
+
 sys.path.insert(
     0, os.path.abspath("../..")
 )  # Adds the parent directory to the system path
@@ -329,7 +332,7 @@ async def test_auth_with_allowed_routes(route, should_raise_error):
     ],
 )
 def test_is_ui_route_allowed(route, user_role, expected_result):
-    from litellm.proxy.auth.user_api_key_auth import _is_ui_route
+    from litellm.proxy.auth.auth_checks import _is_ui_route
     from litellm.proxy._types import LiteLLM_UserTable
 
     user_obj = LiteLLM_UserTable(
@@ -367,7 +370,7 @@ def test_is_ui_route_allowed(route, user_role, expected_result):
     ],
 )
 def test_is_api_route_allowed(route, user_role, expected_result):
-    from litellm.proxy.auth.user_api_key_auth import _is_api_route_allowed
+    from litellm.proxy.auth.auth_checks import _is_api_route_allowed
     from litellm.proxy._types import LiteLLM_UserTable
 
     user_obj = LiteLLM_UserTable(
@@ -635,7 +638,7 @@ async def test_soft_budget_alert():
 
 
 def test_is_allowed_route():
-    from litellm.proxy.auth.user_api_key_auth import _is_allowed_route
+    from litellm.proxy.auth.auth_checks import _is_allowed_route
     from litellm.proxy._types import UserAPIKeyAuth
     import datetime
 
@@ -646,7 +649,6 @@ def test_is_allowed_route():
         "token_type": "api",
         "request": request,
         "request_data": {"input": ["hello world"], "model": "embedding-small"},
-        "api_key": "9644159bc181998825c44c788b1526341ed2e825d1b6f562e23173759e14bb86",
         "valid_token": UserAPIKeyAuth(
             token="9644159bc181998825c44c788b1526341ed2e825d1b6f562e23173759e14bb86",
             key_name="sk-...CJjQ",
@@ -734,7 +736,7 @@ def test_is_allowed_route():
     ],
 )
 def test_is_user_proxy_admin(user_obj, expected_result):
-    from litellm.proxy.auth.user_api_key_auth import _is_user_proxy_admin
+    from litellm.proxy.auth.auth_checks import _is_user_proxy_admin
 
     assert _is_user_proxy_admin(user_obj) == expected_result
 
@@ -947,3 +949,53 @@ def test_get_model_from_request(route, request_data, expected_model):
     from litellm.proxy.auth.user_api_key_auth import get_model_from_request
 
     assert get_model_from_request(request_data, route) == expected_model
+
+
+@pytest.mark.asyncio
+async def test_jwt_non_admin_team_route_access(monkeypatch):
+    """
+    Test that a non-admin JWT user cannot access team management routes
+    """
+    from fastapi import Request, HTTPException
+    from starlette.datastructures import URL
+    from unittest.mock import patch
+    from litellm.proxy.auth.user_api_key_auth import user_api_key_auth
+    import json
+    from litellm.proxy._types import ProxyException
+
+    mock_jwt_response = {
+        "is_proxy_admin": False,
+        "team_id": None,
+        "team_object": None,
+        "user_id": None,
+        "user_object": None,
+        "org_id": None,
+        "org_object": None,
+        "end_user_id": None,
+        "end_user_object": None,
+        "token": "eyJhbGciOiJSUzI1NiIsInR5cCIgOiAiSldUIiwia2lkIiA6ICJmR09YQTNhbHFObjByRzJ6OHJQT1FLZVVMSWxCNDFnVWl4VDJ5WE1QVG1ZIn0.eyJleHAiOjE3NDI2MDAzODIsImlhdCI6MTc0MjYwMDA4MiwianRpIjoiODRhNjZmZjAtMTE5OC00YmRkLTk1NzAtNWZhMjNhZjYxMmQyIiwiaXNzIjoiaHR0cDovL2xvY2FsaG9zdDo4MDgwL3JlYWxtcy9saXRlbGxtLXJlYWxtIiwiYXVkIjoiYWNjb3VudCIsInN1YiI6ImZmMGZjOGNiLWUyMjktNDkyYy05NzYwLWNlYzVhMDYxNmI2MyIsInR5cCI6IkJlYXJlciIsImF6cCI6ImxpdGVsbG0tdGVzdC1jbGllbnQtaWQiLCJzaWQiOiI4MTYwNjIxOC0yNmZmLTQwMjAtOWQxNy05Zjc0YmFlNTBkODUiLCJhY3IiOiIxIiwiYWxsb3dlZC1vcmlnaW5zIjpbImh0dHA6Ly9sb2NhbGhvc3Q6NDAwMC8qIl0sInJlYWxtX2FjY2VzcyI6eyJyb2xlcyI6WyJvZmZsaW5lX2FjY2VzcyIsImRlZmF1bHQtcm9sZXMtbGl0ZWxsbS1yZWFsbSIsInVtYV9hdXRob3JpemF0aW9uIl19LCJyZXNvdXJjZV9hY2Nlc3MiOnsiYWNjb3VudCI6eyJyb2xlcyI6WyJtYW5hZ2UtYWNjb3VudCIsIm1hbmFnZS1hY2NvdW50LWxpbmtzIiwidmlldy1wcm9maWxlIl19fSwic2NvcGUiOiJwcm9maWxlIGdyb3Vwcy1zY29wZSBlbWFpbCBsaXRlbGxtLmFwaS5jb25zdW1lciIsImVtYWlsX3ZlcmlmaWVkIjp0cnVlLCJuYW1lIjoiS3Jpc2ggRGhvbGFraWEiLCJncm91cHMiOlsiL28zX21pbmlfYWNjZXNzIl0sInByZWZlcnJlZF91c2VybmFtZSI6ImtycmlzaGRoMiIsImdpdmVuX25hbWUiOiJLcmlzaCIsImZhbWlseV9uYW1lIjoiRGhvbGFraWEiLCJlbWFpbCI6ImtycmlzaGRob2xha2lhMkBnbWFpbC5jb20ifQ.Fu2ErZhnfez-bhn_XmjkywcFdZHcFUSvzIzfdNiEowdA0soLmCyqf9731amP6m68shd9qk11e0mQhxFIAIxZPojViC1Csc9TBXLRRQ8ESMd6gPIj-DBkKVkQSZLJ1uibsh4Oo2RViGtqWVcEt32T8U_xhGdtdzNkJ8qy_e0fdNDsUnhmSaTQvmZJYarW0roIrkC-zYZrX3fftzbQfavSu9eqdfPf6wUttIrkaWThWUuORy-xaeZfSmvsGbEg027hh6QwlChiZTSF8R6bRxoqfPN3ZaGFFgbBXNRYZA_eYi2IevhIwJHi_r4o1UvtKAJyfPefm-M6hCfkN_6da4zsog",
+    }
+
+    # Create request
+    request = Request(
+        scope={"type": "http", "headers": [("Authorization", "Bearer fake.jwt.token")]}
+    )
+    request._url = URL(url="/team/new")
+
+    monkeypatch.setattr(
+        litellm.proxy.proxy_server, "general_settings", {"enable_jwt_auth": True}
+    )
+
+    # Mock JWTAuthManager.auth_builder
+    with patch(
+        "litellm.proxy.auth.handle_jwt.JWTAuthManager.auth_builder",
+        return_value=mock_jwt_response,
+    ):
+        try:
+            await user_api_key_auth(request=request, api_key="Bearer fake.jwt.token")
+            pytest.fail(
+                "Expected this call to fail. Non-admin user should not access team routes."
+            )
+        except ProxyException as e:
+            print("e", e)
+            assert "Only proxy admin can be used to generate" in str(e.message)
diff --git a/tests/router_unit_tests/test_router_adding_deployments.py b/tests/router_unit_tests/test_router_adding_deployments.py
index fca3f147e5..53fe7347d3 100644
--- a/tests/router_unit_tests/test_router_adding_deployments.py
+++ b/tests/router_unit_tests/test_router_adding_deployments.py
@@ -36,11 +36,11 @@ def test_initialize_deployment_for_pass_through_success():
     )
 
     # Verify the credentials were properly set
-    from litellm.proxy.vertex_ai_endpoints.vertex_endpoints import (
-        vertex_pass_through_router,
+    from litellm.proxy.pass_through_endpoints.llm_passthrough_endpoints import (
+        passthrough_endpoint_router,
     )
 
-    vertex_creds = vertex_pass_through_router.get_vertex_credentials(
+    vertex_creds = passthrough_endpoint_router.get_vertex_credentials(
         project_id="test-project", location="us-central1"
     )
     assert vertex_creds.vertex_project == "test-project"
@@ -123,21 +123,21 @@ def test_add_vertex_pass_through_deployment():
     router.add_deployment(deployment)
 
     # Get the vertex credentials from the router
-    from litellm.proxy.vertex_ai_endpoints.vertex_endpoints import (
-        vertex_pass_through_router,
+    from litellm.proxy.pass_through_endpoints.llm_passthrough_endpoints import (
+        passthrough_endpoint_router,
     )
 
     # current state of pass-through vertex router
     print("\n vertex_pass_through_router.deployment_key_to_vertex_credentials\n\n")
     print(
         json.dumps(
-            vertex_pass_through_router.deployment_key_to_vertex_credentials,
+            passthrough_endpoint_router.deployment_key_to_vertex_credentials,
             indent=4,
             default=str,
         )
     )
 
-    vertex_creds = vertex_pass_through_router.get_vertex_credentials(
+    vertex_creds = passthrough_endpoint_router.get_vertex_credentials(
         project_id="test-project", location="us-central1"
     )
 
diff --git a/ui/litellm-dashboard/package-lock.json b/ui/litellm-dashboard/package-lock.json
index 307e95217f..39ab75d8c7 100644
--- a/ui/litellm-dashboard/package-lock.json
+++ b/ui/litellm-dashboard/package-lock.json
@@ -21,7 +21,7 @@
         "jsonwebtoken": "^9.0.2",
         "jwt-decode": "^4.0.0",
         "moment": "^2.30.1",
-        "next": "^14.2.15",
+        "next": "^14.2.25",
         "openai": "^4.28.0",
         "papaparse": "^5.5.2",
         "react": "^18",
@@ -418,9 +418,10 @@
       }
     },
     "node_modules/@next/env": {
-      "version": "14.2.21",
-      "resolved": "https://registry.npmjs.org/@next/env/-/env-14.2.21.tgz",
-      "integrity": "sha512-lXcwcJd5oR01tggjWJ6SrNNYFGuOOMB9c251wUNkjCpkoXOPkDeF/15c3mnVlBqrW4JJXb2kVxDFhC4GduJt2A=="
+      "version": "14.2.25",
+      "resolved": "https://registry.npmjs.org/@next/env/-/env-14.2.25.tgz",
+      "integrity": "sha512-JnzQ2cExDeG7FxJwqAksZ3aqVJrHjFwZQAEJ9gQZSoEhIow7SNoKZzju/AwQ+PLIR4NY8V0rhcVozx/2izDO0w==",
+      "license": "MIT"
     },
     "node_modules/@next/eslint-plugin-next": {
       "version": "14.1.0",
@@ -432,12 +433,13 @@
       }
     },
     "node_modules/@next/swc-darwin-arm64": {
-      "version": "14.2.21",
-      "resolved": "https://registry.npmjs.org/@next/swc-darwin-arm64/-/swc-darwin-arm64-14.2.21.tgz",
-      "integrity": "sha512-HwEjcKsXtvszXz5q5Z7wCtrHeTTDSTgAbocz45PHMUjU3fBYInfvhR+ZhavDRUYLonm53aHZbB09QtJVJj8T7g==",
+      "version": "14.2.25",
+      "resolved": "https://registry.npmjs.org/@next/swc-darwin-arm64/-/swc-darwin-arm64-14.2.25.tgz",
+      "integrity": "sha512-09clWInF1YRd6le00vt750s3m7SEYNehz9C4PUcSu3bAdCTpjIV4aTYQZ25Ehrr83VR1rZeqtKUPWSI7GfuKZQ==",
       "cpu": [
         "arm64"
       ],
+      "license": "MIT",
       "optional": true,
       "os": [
         "darwin"
@@ -447,12 +449,13 @@
       }
     },
     "node_modules/@next/swc-darwin-x64": {
-      "version": "14.2.21",
-      "resolved": "https://registry.npmjs.org/@next/swc-darwin-x64/-/swc-darwin-x64-14.2.21.tgz",
-      "integrity": "sha512-TSAA2ROgNzm4FhKbTbyJOBrsREOMVdDIltZ6aZiKvCi/v0UwFmwigBGeqXDA97TFMpR3LNNpw52CbVelkoQBxA==",
+      "version": "14.2.25",
+      "resolved": "https://registry.npmjs.org/@next/swc-darwin-x64/-/swc-darwin-x64-14.2.25.tgz",
+      "integrity": "sha512-V+iYM/QR+aYeJl3/FWWU/7Ix4b07ovsQ5IbkwgUK29pTHmq+5UxeDr7/dphvtXEq5pLB/PucfcBNh9KZ8vWbug==",
       "cpu": [
         "x64"
       ],
+      "license": "MIT",
       "optional": true,
       "os": [
         "darwin"
@@ -462,12 +465,13 @@
       }
     },
     "node_modules/@next/swc-linux-arm64-gnu": {
-      "version": "14.2.21",
-      "resolved": "https://registry.npmjs.org/@next/swc-linux-arm64-gnu/-/swc-linux-arm64-gnu-14.2.21.tgz",
-      "integrity": "sha512-0Dqjn0pEUz3JG+AImpnMMW/m8hRtl1GQCNbO66V1yp6RswSTiKmnHf3pTX6xMdJYSemf3O4Q9ykiL0jymu0TuA==",
+      "version": "14.2.25",
+      "resolved": "https://registry.npmjs.org/@next/swc-linux-arm64-gnu/-/swc-linux-arm64-gnu-14.2.25.tgz",
+      "integrity": "sha512-LFnV2899PJZAIEHQ4IMmZIgL0FBieh5keMnriMY1cK7ompR+JUd24xeTtKkcaw8QmxmEdhoE5Mu9dPSuDBgtTg==",
       "cpu": [
         "arm64"
       ],
+      "license": "MIT",
       "optional": true,
       "os": [
         "linux"
@@ -477,12 +481,13 @@
       }
     },
     "node_modules/@next/swc-linux-arm64-musl": {
-      "version": "14.2.21",
-      "resolved": "https://registry.npmjs.org/@next/swc-linux-arm64-musl/-/swc-linux-arm64-musl-14.2.21.tgz",
-      "integrity": "sha512-Ggfw5qnMXldscVntwnjfaQs5GbBbjioV4B4loP+bjqNEb42fzZlAaK+ldL0jm2CTJga9LynBMhekNfV8W4+HBw==",
+      "version": "14.2.25",
+      "resolved": "https://registry.npmjs.org/@next/swc-linux-arm64-musl/-/swc-linux-arm64-musl-14.2.25.tgz",
+      "integrity": "sha512-QC5y5PPTmtqFExcKWKYgUNkHeHE/z3lUsu83di488nyP0ZzQ3Yse2G6TCxz6nNsQwgAx1BehAJTZez+UQxzLfw==",
       "cpu": [
         "arm64"
       ],
+      "license": "MIT",
       "optional": true,
       "os": [
         "linux"
@@ -492,12 +497,13 @@
       }
     },
     "node_modules/@next/swc-linux-x64-gnu": {
-      "version": "14.2.21",
-      "resolved": "https://registry.npmjs.org/@next/swc-linux-x64-gnu/-/swc-linux-x64-gnu-14.2.21.tgz",
-      "integrity": "sha512-uokj0lubN1WoSa5KKdThVPRffGyiWlm/vCc/cMkWOQHw69Qt0X1o3b2PyLLx8ANqlefILZh1EdfLRz9gVpG6tg==",
+      "version": "14.2.25",
+      "resolved": "https://registry.npmjs.org/@next/swc-linux-x64-gnu/-/swc-linux-x64-gnu-14.2.25.tgz",
+      "integrity": "sha512-y6/ML4b9eQ2D/56wqatTJN5/JR8/xdObU2Fb1RBidnrr450HLCKr6IJZbPqbv7NXmje61UyxjF5kvSajvjye5w==",
       "cpu": [
         "x64"
       ],
+      "license": "MIT",
       "optional": true,
       "os": [
         "linux"
@@ -507,12 +513,13 @@
       }
     },
     "node_modules/@next/swc-linux-x64-musl": {
-      "version": "14.2.21",
-      "resolved": "https://registry.npmjs.org/@next/swc-linux-x64-musl/-/swc-linux-x64-musl-14.2.21.tgz",
-      "integrity": "sha512-iAEBPzWNbciah4+0yI4s7Pce6BIoxTQ0AGCkxn/UBuzJFkYyJt71MadYQkjPqCQCJAFQ26sYh7MOKdU+VQFgPg==",
+      "version": "14.2.25",
+      "resolved": "https://registry.npmjs.org/@next/swc-linux-x64-musl/-/swc-linux-x64-musl-14.2.25.tgz",
+      "integrity": "sha512-sPX0TSXHGUOZFvv96GoBXpB3w4emMqKeMgemrSxI7A6l55VBJp/RKYLwZIB9JxSqYPApqiREaIIap+wWq0RU8w==",
       "cpu": [
         "x64"
       ],
+      "license": "MIT",
       "optional": true,
       "os": [
         "linux"
@@ -522,12 +529,13 @@
       }
     },
     "node_modules/@next/swc-win32-arm64-msvc": {
-      "version": "14.2.21",
-      "resolved": "https://registry.npmjs.org/@next/swc-win32-arm64-msvc/-/swc-win32-arm64-msvc-14.2.21.tgz",
-      "integrity": "sha512-plykgB3vL2hB4Z32W3ktsfqyuyGAPxqwiyrAi2Mr8LlEUhNn9VgkiAl5hODSBpzIfWweX3er1f5uNpGDygfQVQ==",
+      "version": "14.2.25",
+      "resolved": "https://registry.npmjs.org/@next/swc-win32-arm64-msvc/-/swc-win32-arm64-msvc-14.2.25.tgz",
+      "integrity": "sha512-ReO9S5hkA1DU2cFCsGoOEp7WJkhFzNbU/3VUF6XxNGUCQChyug6hZdYL/istQgfT/GWE6PNIg9cm784OI4ddxQ==",
       "cpu": [
         "arm64"
       ],
+      "license": "MIT",
       "optional": true,
       "os": [
         "win32"
@@ -537,12 +545,13 @@
       }
     },
     "node_modules/@next/swc-win32-ia32-msvc": {
-      "version": "14.2.21",
-      "resolved": "https://registry.npmjs.org/@next/swc-win32-ia32-msvc/-/swc-win32-ia32-msvc-14.2.21.tgz",
-      "integrity": "sha512-w5bacz4Vxqrh06BjWgua3Yf7EMDb8iMcVhNrNx8KnJXt8t+Uu0Zg4JHLDL/T7DkTCEEfKXO/Er1fcfWxn2xfPA==",
+      "version": "14.2.25",
+      "resolved": "https://registry.npmjs.org/@next/swc-win32-ia32-msvc/-/swc-win32-ia32-msvc-14.2.25.tgz",
+      "integrity": "sha512-DZ/gc0o9neuCDyD5IumyTGHVun2dCox5TfPQI/BJTYwpSNYM3CZDI4i6TOdjeq1JMo+Ug4kPSMuZdwsycwFbAw==",
       "cpu": [
         "ia32"
       ],
+      "license": "MIT",
       "optional": true,
       "os": [
         "win32"
@@ -552,12 +561,13 @@
       }
     },
     "node_modules/@next/swc-win32-x64-msvc": {
-      "version": "14.2.21",
-      "resolved": "https://registry.npmjs.org/@next/swc-win32-x64-msvc/-/swc-win32-x64-msvc-14.2.21.tgz",
-      "integrity": "sha512-sT6+llIkzpsexGYZq8cjjthRyRGe5cJVhqh12FmlbxHqna6zsDDK8UNaV7g41T6atFHCJUPeLb3uyAwrBwy0NA==",
+      "version": "14.2.25",
+      "resolved": "https://registry.npmjs.org/@next/swc-win32-x64-msvc/-/swc-win32-x64-msvc-14.2.25.tgz",
+      "integrity": "sha512-KSznmS6eFjQ9RJ1nEc66kJvtGIL1iZMYmGEXsZPh2YtnLtqrgdVvKXJY2ScjjoFnG6nGLyPFR0UiEvDwVah4Tw==",
       "cpu": [
         "x64"
       ],
+      "license": "MIT",
       "optional": true,
       "os": [
         "win32"
@@ -5001,11 +5011,12 @@
       "dev": true
     },
     "node_modules/next": {
-      "version": "14.2.21",
-      "resolved": "https://registry.npmjs.org/next/-/next-14.2.21.tgz",
-      "integrity": "sha512-rZmLwucLHr3/zfDMYbJXbw0ZeoBpirxkXuvsJbk7UPorvPYZhP7vq7aHbKnU7dQNCYIimRrbB2pp3xmf+wsYUg==",
+      "version": "14.2.25",
+      "resolved": "https://registry.npmjs.org/next/-/next-14.2.25.tgz",
+      "integrity": "sha512-N5M7xMc4wSb4IkPvEV5X2BRRXUmhVHNyaXwEM86+voXthSZz8ZiRyQW4p9mwAoAPIm6OzuVZtn7idgEJeAJN3Q==",
+      "license": "MIT",
       "dependencies": {
-        "@next/env": "14.2.21",
+        "@next/env": "14.2.25",
         "@swc/helpers": "0.5.5",
         "busboy": "1.6.0",
         "caniuse-lite": "^1.0.30001579",
@@ -5020,15 +5031,15 @@
         "node": ">=18.17.0"
       },
       "optionalDependencies": {
-        "@next/swc-darwin-arm64": "14.2.21",
-        "@next/swc-darwin-x64": "14.2.21",
-        "@next/swc-linux-arm64-gnu": "14.2.21",
-        "@next/swc-linux-arm64-musl": "14.2.21",
-        "@next/swc-linux-x64-gnu": "14.2.21",
-        "@next/swc-linux-x64-musl": "14.2.21",
-        "@next/swc-win32-arm64-msvc": "14.2.21",
-        "@next/swc-win32-ia32-msvc": "14.2.21",
-        "@next/swc-win32-x64-msvc": "14.2.21"
+        "@next/swc-darwin-arm64": "14.2.25",
+        "@next/swc-darwin-x64": "14.2.25",
+        "@next/swc-linux-arm64-gnu": "14.2.25",
+        "@next/swc-linux-arm64-musl": "14.2.25",
+        "@next/swc-linux-x64-gnu": "14.2.25",
+        "@next/swc-linux-x64-musl": "14.2.25",
+        "@next/swc-win32-arm64-msvc": "14.2.25",
+        "@next/swc-win32-ia32-msvc": "14.2.25",
+        "@next/swc-win32-x64-msvc": "14.2.25"
       },
       "peerDependencies": {
         "@opentelemetry/api": "^1.1.0",
diff --git a/ui/litellm-dashboard/package.json b/ui/litellm-dashboard/package.json
index 79f096106d..895e2576cc 100644
--- a/ui/litellm-dashboard/package.json
+++ b/ui/litellm-dashboard/package.json
@@ -22,7 +22,7 @@
     "jsonwebtoken": "^9.0.2",
     "jwt-decode": "^4.0.0",
     "moment": "^2.30.1",
-    "next": "^14.2.15",
+    "next": "^14.2.25",
     "openai": "^4.28.0",
     "papaparse": "^5.5.2",
     "react": "^18",