diff --git a/.circleci/config.yml b/.circleci/config.yml
index 14a22a5995..32414e8c4e 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -1127,6 +1127,7 @@ jobs:
           name: Install Dependencies
           command: |
             python -m pip install --upgrade pip
+            python -m pip install wheel setuptools
             python -m pip install -r requirements.txt
             pip install "pytest==7.3.1"
             pip install "pytest-retry==1.6.3"
diff --git a/deploy/charts/litellm-helm/templates/service.yaml b/deploy/charts/litellm-helm/templates/service.yaml
index 40e7f27f16..d8d81e78c8 100644
--- a/deploy/charts/litellm-helm/templates/service.yaml
+++ b/deploy/charts/litellm-helm/templates/service.yaml
@@ -2,6 +2,10 @@ apiVersion: v1
 kind: Service
 metadata:
   name: {{ include "litellm.fullname" . }}
+  {{- with .Values.service.annotations }}
+  annotations:
+    {{- toYaml . | nindent 4 }}
+  {{- end }}
   labels:
     {{- include "litellm.labels" . | nindent 4 }}
 spec:
diff --git a/docs/my-website/docs/providers/vertex.md b/docs/my-website/docs/providers/vertex.md
index ab13a51137..cdd3fce6c6 100644
--- a/docs/my-website/docs/providers/vertex.md
+++ b/docs/my-website/docs/providers/vertex.md
@@ -398,6 +398,8 @@ curl http://localhost:4000/v1/chat/completions \
 </TabItem>
 </Tabs>
 
+You can also use the `enterpriseWebSearch` tool for an [enterprise compliant search](https://cloud.google.com/vertex-ai/generative-ai/docs/grounding/web-grounding-enterprise).
+
 #### **Moving from Vertex AI SDK to LiteLLM (GROUNDING)**
 
 
diff --git a/docs/my-website/docs/proxy/config_settings.md b/docs/my-website/docs/proxy/config_settings.md
index 455bdda938..863349a8fe 100644
--- a/docs/my-website/docs/proxy/config_settings.md
+++ b/docs/my-website/docs/proxy/config_settings.md
@@ -449,6 +449,7 @@ router_settings:
 | MICROSOFT_CLIENT_ID | Client ID for Microsoft services
 | MICROSOFT_CLIENT_SECRET | Client secret for Microsoft services
 | MICROSOFT_TENANT | Tenant ID for Microsoft Azure
+| MICROSOFT_SERVICE_PRINCIPAL_ID | Service Principal ID for Microsoft Enterprise Application. (This is an advanced feature if you want litellm to auto-assign members to Litellm Teams based on their Microsoft Entra ID Groups)
 | NO_DOCS | Flag to disable documentation generation
 | NO_PROXY | List of addresses to bypass proxy
 | OAUTH_TOKEN_INFO_ENDPOINT | Endpoint for OAuth token info retrieval
diff --git a/docs/my-website/docs/proxy/self_serve.md b/docs/my-website/docs/proxy/self_serve.md
index 604ceee3e5..d630c8e7f3 100644
--- a/docs/my-website/docs/proxy/self_serve.md
+++ b/docs/my-website/docs/proxy/self_serve.md
@@ -161,6 +161,89 @@ Here's the available UI roles for a LiteLLM Internal User:
   - `internal_user`: can login, view/create/delete their own keys, view their spend. **Cannot** add new users.
   - `internal_user_viewer`: can login, view their own keys, view their own spend. **Cannot** create/delete keys, add new users.
 
+## Auto-add SSO users to teams
+
+This walks through setting up sso auto-add for **Okta, Google SSO**
+
+### Okta, Google SSO 
+
+1. Specify the JWT field that contains the team ids, that the user belongs to. 
+
+```yaml
+general_settings:
+  master_key: sk-1234
+  litellm_jwtauth:
+    team_ids_jwt_field: "groups" # 👈 CAN BE ANY FIELD
+```
+
+This is assuming your SSO token looks like this. **If you need to inspect the JWT fields received from your SSO provider by LiteLLM, follow these instructions [here](#debugging-sso-jwt-fields)**
+
+```
+{
+  ...,
+  "groups": ["team_id_1", "team_id_2"]
+}
+```
+
+2. Create the teams on LiteLLM 
+
+```bash
+curl -X POST '<PROXY_BASE_URL>/team/new' \
+-H 'Authorization: Bearer <PROXY_MASTER_KEY>' \
+-H 'Content-Type: application/json' \
+-D '{
+    "team_alias": "team_1",
+    "team_id": "team_id_1" # 👈 MUST BE THE SAME AS THE SSO GROUP ID
+}'
+```
+
+3. Test the SSO flow
+
+Here's a walkthrough of [how it works](https://www.loom.com/share/8959be458edf41fd85937452c29a33f3?sid=7ebd6d37-569a-4023-866e-e0cde67cb23e)
+
+### Microsoft Entra ID SSO group assignment
+
+This walks through setting up sso auto-add for **Microsoft Entra ID**
+
+Follow along this video for a walkthrough of how to set this up with Microsoft Entra ID
+
+
+<iframe width="840" height="500" src="https://www.loom.com/embed/ea711323aa9a496d84a01fd7b2a12f54?sid=c53e238c-5bfd-4135-b8fb-b5b1a08632cf" frameborder="0" webkitallowfullscreen mozallowfullscreen allowfullscreen></iframe>
+
+
+### Debugging SSO JWT fields 
+
+If you need to inspect the JWT fields received from your SSO provider by LiteLLM, follow these instructions. This guide walks you through setting up a debug callback to view the JWT data during the SSO process.
+
+
+<Image img={require('../../img/debug_sso.png')}  style={{ width: '500px', height: 'auto' }} />
+<br />
+
+1. Add `/sso/debug/callback` as a redirect URL in your SSO provider 
+
+  In your SSO provider's settings, add the following URL as a new redirect (callback) URL:
+
+  ```bash showLineNumbers title="Redirect URL"
+  http://<proxy_base_url>/sso/debug/callback
+  ```
+
+
+2. Navigate to the debug login page on your browser 
+
+    Navigate to the following URL on your browser:
+
+    ```bash showLineNumbers title="URL to navigate to"
+    https://<proxy_base_url>/sso/debug/login
+    ```
+
+    This will initiate the standard SSO flow. You will be redirected to your SSO provider's login screen, and after successful authentication, you will be redirected back to LiteLLM's debug callback route.
+
+
+3. View the JWT fields 
+
+Once redirected, you should see a page called "SSO Debug Information". This page displays the JWT fields received from your SSO provider (as shown in the image above)
+
+
 ## Advanced
 ### Setting custom logout URLs
 
@@ -196,40 +279,6 @@ This budget does not apply to keys created under non-default teams.
 
 [**Go Here**](./team_budgets.md)
 
-### Auto-add SSO users to teams
-
-1. Specify the JWT field that contains the team ids, that the user belongs to. 
-
-```yaml
-general_settings:
-  master_key: sk-1234
-  litellm_jwtauth:
-    team_ids_jwt_field: "groups" # 👈 CAN BE ANY FIELD
-```
-
-This is assuming your SSO token looks like this:
-```
-{
-  ...,
-  "groups": ["team_id_1", "team_id_2"]
-}
-```
-
-2. Create the teams on LiteLLM 
-
-```bash
-curl -X POST '<PROXY_BASE_URL>/team/new' \
--H 'Authorization: Bearer <PROXY_MASTER_KEY>' \
--H 'Content-Type: application/json' \
--D '{
-    "team_alias": "team_1",
-    "team_id": "team_id_1" # 👈 MUST BE THE SAME AS THE SSO GROUP ID
-}'
-```
-
-3. Test the SSO flow
-
-Here's a walkthrough of [how it works](https://www.loom.com/share/8959be458edf41fd85937452c29a33f3?sid=7ebd6d37-569a-4023-866e-e0cde67cb23e)
 
 ### Restrict Users from creating personal keys 
 
diff --git a/docs/my-website/img/debug_sso.png b/docs/my-website/img/debug_sso.png
new file mode 100644
index 0000000000..d7dde36892
Binary files /dev/null and b/docs/my-website/img/debug_sso.png differ
diff --git a/docs/my-website/src/components/TransformRequestPlayground.tsx b/docs/my-website/src/components/TransformRequestPlayground.tsx
new file mode 100644
index 0000000000..8f22e5e198
--- /dev/null
+++ b/docs/my-website/src/components/TransformRequestPlayground.tsx
@@ -0,0 +1,161 @@
+import React, { useState } from 'react';
+import styles from './transform_request.module.css';
+
+const DEFAULT_REQUEST = {
+  "model": "bedrock/gpt-4",
+  "messages": [
+    {
+      "role": "system",
+      "content": "You are a helpful assistant."
+    },
+    {
+      "role": "user",
+      "content": "Explain quantum computing in simple terms"
+    }
+  ],
+  "temperature": 0.7,
+  "max_tokens": 500,
+  "stream": true
+};
+
+type ViewMode = 'split' | 'request' | 'transformed';
+
+const TransformRequestPlayground: React.FC = () => {
+  const [request, setRequest] = useState(JSON.stringify(DEFAULT_REQUEST, null, 2));
+  const [transformedRequest, setTransformedRequest] = useState('');
+  const [viewMode, setViewMode] = useState<ViewMode>('split');
+
+  const handleTransform = async () => {
+    try {
+      // Here you would make the actual API call to transform the request
+      // For now, we'll just set a sample response
+      const sampleResponse = `curl -X POST \\
+  https://api.openai.com/v1/chat/completions \\
+  -H 'Authorization: Bearer sk-xxx' \\
+  -H 'Content-Type: application/json' \\
+  -d '{
+    "model": "gpt-4",
+    "messages": [
+      {
+        "role": "system",
+        "content": "You are a helpful assistant."
+      }
+    ],
+    "temperature": 0.7
+  }'`;
+      setTransformedRequest(sampleResponse);
+    } catch (error) {
+      console.error('Error transforming request:', error);
+    }
+  };
+
+  const handleCopy = () => {
+    navigator.clipboard.writeText(transformedRequest);
+  };
+
+  const renderContent = () => {
+    switch (viewMode) {
+      case 'request':
+        return (
+          <div className={styles.panel}>
+            <div className={styles['panel-header']}>
+              <h2>Original Request</h2>
+              <p>The request you would send to LiteLLM /chat/completions endpoint.</p>
+            </div>
+            <textarea
+              className={styles['code-input']}
+              value={request}
+              onChange={(e) => setRequest(e.target.value)}
+              spellCheck={false}
+            />
+            <div className={styles['panel-footer']}>
+              <button className={styles['transform-button']} onClick={handleTransform}>
+                Transform →
+              </button>
+            </div>
+          </div>
+        );
+      case 'transformed':
+        return (
+          <div className={styles.panel}>
+            <div className={styles['panel-header']}>
+              <h2>Transformed Request</h2>
+              <p>How LiteLLM transforms your request for the specified provider.</p>
+              <p className={styles.note}>Note: Sensitive headers are not shown.</p>
+            </div>
+            <div className={styles['code-output-container']}>
+              <pre className={styles['code-output']}>{transformedRequest}</pre>
+              <button className={styles['copy-button']} onClick={handleCopy}>
+                Copy
+              </button>
+            </div>
+          </div>
+        );
+      default:
+        return (
+          <>
+            <div className={styles.panel}>
+              <div className={styles['panel-header']}>
+                <h2>Original Request</h2>
+                <p>The request you would send to LiteLLM /chat/completions endpoint.</p>
+              </div>
+              <textarea
+                className={styles['code-input']}
+                value={request}
+                onChange={(e) => setRequest(e.target.value)}
+                spellCheck={false}
+              />
+              <div className={styles['panel-footer']}>
+                <button className={styles['transform-button']} onClick={handleTransform}>
+                  Transform →
+                </button>
+              </div>
+            </div>
+            <div className={styles.panel}>
+              <div className={styles['panel-header']}>
+                <h2>Transformed Request</h2>
+                <p>How LiteLLM transforms your request for the specified provider.</p>
+                <p className={styles.note}>Note: Sensitive headers are not shown.</p>
+              </div>
+              <div className={styles['code-output-container']}>
+                <pre className={styles['code-output']}>{transformedRequest}</pre>
+                <button className={styles['copy-button']} onClick={handleCopy}>
+                  Copy
+                </button>
+              </div>
+            </div>
+          </>
+        );
+    }
+  };
+
+  return (
+    <div className={styles['transform-playground']}>
+      <div className={styles['view-toggle']}>
+        <button
+          className={viewMode === 'split' ? styles.active : ''}
+          onClick={() => setViewMode('split')}
+        >
+          Split View
+        </button>
+        <button
+          className={viewMode === 'request' ? styles.active : ''}
+          onClick={() => setViewMode('request')}
+        >
+          Request
+        </button>
+        <button
+          className={viewMode === 'transformed' ? styles.active : ''}
+          onClick={() => setViewMode('transformed')}
+        >
+          Transformed
+        </button>
+      </div>
+      <div className={styles['playground-container']}>
+        {renderContent()}
+      </div>
+    </div>
+  );
+};
+
+export default TransformRequestPlayground; 
\ No newline at end of file
diff --git a/litellm/cost_calculator.py b/litellm/cost_calculator.py
index 0cff152c86..9376defc00 100644
--- a/litellm/cost_calculator.py
+++ b/litellm/cost_calculator.py
@@ -1158,8 +1158,128 @@ def batch_cost_calculator(
     return total_prompt_cost, total_completion_cost
 
 
+class RealtimeAPITokenUsageProcessor:
+    @staticmethod
+    def collect_usage_from_realtime_stream_results(
+        results: OpenAIRealtimeStreamList,
+    ) -> List[Usage]:
+        """
+        Collect usage from realtime stream results
+        """
+        response_done_events: List[OpenAIRealtimeStreamResponseBaseObject] = cast(
+            List[OpenAIRealtimeStreamResponseBaseObject],
+            [result for result in results if result["type"] == "response.done"],
+        )
+        usage_objects: List[Usage] = []
+        for result in response_done_events:
+            usage_object = (
+                ResponseAPILoggingUtils._transform_response_api_usage_to_chat_usage(
+                    result["response"].get("usage", {})
+                )
+            )
+            usage_objects.append(usage_object)
+        return usage_objects
+
+    @staticmethod
+    def combine_usage_objects(usage_objects: List[Usage]) -> Usage:
+        """
+        Combine multiple Usage objects into a single Usage object, checking model keys for nested values.
+        """
+        from litellm.types.utils import (
+            CompletionTokensDetails,
+            PromptTokensDetailsWrapper,
+            Usage,
+        )
+
+        combined = Usage()
+
+        # Sum basic token counts
+        for usage in usage_objects:
+            # Handle direct attributes by checking what exists in the model
+            for attr in dir(usage):
+                if not attr.startswith("_") and not callable(getattr(usage, attr)):
+                    current_val = getattr(combined, attr, 0)
+                    new_val = getattr(usage, attr, 0)
+                    if (
+                        new_val is not None
+                        and isinstance(new_val, (int, float))
+                        and isinstance(current_val, (int, float))
+                    ):
+                        setattr(combined, attr, current_val + new_val)
+            # Handle nested prompt_tokens_details
+            if hasattr(usage, "prompt_tokens_details") and usage.prompt_tokens_details:
+                if (
+                    not hasattr(combined, "prompt_tokens_details")
+                    or not combined.prompt_tokens_details
+                ):
+                    combined.prompt_tokens_details = PromptTokensDetailsWrapper()
+
+                # Check what keys exist in the model's prompt_tokens_details
+                for attr in dir(usage.prompt_tokens_details):
+                    if not attr.startswith("_") and not callable(
+                        getattr(usage.prompt_tokens_details, attr)
+                    ):
+                        current_val = getattr(combined.prompt_tokens_details, attr, 0)
+                        new_val = getattr(usage.prompt_tokens_details, attr, 0)
+                        if new_val is not None:
+                            setattr(
+                                combined.prompt_tokens_details,
+                                attr,
+                                current_val + new_val,
+                            )
+
+            # Handle nested completion_tokens_details
+            if (
+                hasattr(usage, "completion_tokens_details")
+                and usage.completion_tokens_details
+            ):
+                if (
+                    not hasattr(combined, "completion_tokens_details")
+                    or not combined.completion_tokens_details
+                ):
+                    combined.completion_tokens_details = CompletionTokensDetails()
+
+                # Check what keys exist in the model's completion_tokens_details
+                for attr in dir(usage.completion_tokens_details):
+                    if not attr.startswith("_") and not callable(
+                        getattr(usage.completion_tokens_details, attr)
+                    ):
+                        current_val = getattr(
+                            combined.completion_tokens_details, attr, 0
+                        )
+                        new_val = getattr(usage.completion_tokens_details, attr, 0)
+                        if new_val is not None:
+                            setattr(
+                                combined.completion_tokens_details,
+                                attr,
+                                current_val + new_val,
+                            )
+
+        return combined
+
+    @staticmethod
+    def collect_and_combine_usage_from_realtime_stream_results(
+        results: OpenAIRealtimeStreamList,
+    ) -> Usage:
+        """
+        Collect and combine usage from realtime stream results
+        """
+        collected_usage_objects = (
+            RealtimeAPITokenUsageProcessor.collect_usage_from_realtime_stream_results(
+                results
+            )
+        )
+        combined_usage_object = RealtimeAPITokenUsageProcessor.combine_usage_objects(
+            collected_usage_objects
+        )
+        return combined_usage_object
+
+
 def handle_realtime_stream_cost_calculation(
-    results: OpenAIRealtimeStreamList, custom_llm_provider: str, litellm_model_name: str
+    results: OpenAIRealtimeStreamList,
+    combined_usage_object: Usage,
+    custom_llm_provider: str,
+    litellm_model_name: str,
 ) -> float:
     """
     Handles the cost calculation for realtime stream responses.
@@ -1169,10 +1289,6 @@ def handle_realtime_stream_cost_calculation(
     Args:
         results: A list of OpenAIRealtimeStreamBaseObject objects
     """
-    response_done_events: List[OpenAIRealtimeStreamResponseBaseObject] = cast(
-        List[OpenAIRealtimeStreamResponseBaseObject],
-        [result for result in results if result["type"] == "response.done"],
-    )
     received_model = None
     potential_model_names = []
     for result in results:
@@ -1185,21 +1301,19 @@ def handle_realtime_stream_cost_calculation(
     potential_model_names.append(litellm_model_name)
     input_cost_per_token = 0.0
     output_cost_per_token = 0.0
-    for result in response_done_events:
-        usage_object = (
-            ResponseAPILoggingUtils._transform_response_api_usage_to_chat_usage(
-                result["response"].get("usage", {})
-            )
-        )
 
-        for model_name in potential_model_names:
+    for model_name in potential_model_names:
+        try:
             _input_cost_per_token, _output_cost_per_token = generic_cost_per_token(
                 model=model_name,
-                usage=usage_object,
+                usage=combined_usage_object,
                 custom_llm_provider=custom_llm_provider,
             )
-            input_cost_per_token += _input_cost_per_token
-            output_cost_per_token += _output_cost_per_token
+        except Exception:
+            continue
+        input_cost_per_token += _input_cost_per_token
+        output_cost_per_token += _output_cost_per_token
+        break  # exit if we find a valid model
     total_cost = input_cost_per_token + output_cost_per_token
 
     return total_cost
diff --git a/litellm/litellm_core_utils/get_litellm_params.py b/litellm/litellm_core_utils/get_litellm_params.py
index 4f2f43f0de..f40f1ae4c7 100644
--- a/litellm/litellm_core_utils/get_litellm_params.py
+++ b/litellm/litellm_core_utils/get_litellm_params.py
@@ -110,5 +110,8 @@ def get_litellm_params(
         "azure_password": kwargs.get("azure_password"),
         "max_retries": max_retries,
         "timeout": kwargs.get("timeout"),
+        "bucket_name": kwargs.get("bucket_name"),
+        "vertex_credentials": kwargs.get("vertex_credentials"),
+        "vertex_project": kwargs.get("vertex_project"),
     }
     return litellm_params
diff --git a/litellm/litellm_core_utils/litellm_logging.py b/litellm/litellm_core_utils/litellm_logging.py
index 1cf1bbf569..f00b81619a 100644
--- a/litellm/litellm_core_utils/litellm_logging.py
+++ b/litellm/litellm_core_utils/litellm_logging.py
@@ -33,6 +33,7 @@ from litellm.constants import (
     DEFAULT_MOCK_RESPONSE_PROMPT_TOKEN_COUNT,
 )
 from litellm.cost_calculator import (
+    RealtimeAPITokenUsageProcessor,
     _select_model_name_for_cost_calc,
     handle_realtime_stream_cost_calculation,
 )
@@ -1055,11 +1056,18 @@ class Logging(LiteLLMLoggingBaseClass):
             ## else set cost to None
 
             if self.call_type == CallTypes.arealtime.value and isinstance(result, list):
+                combined_usage_object = RealtimeAPITokenUsageProcessor.collect_and_combine_usage_from_realtime_stream_results(
+                    results=result
+                )
                 self.model_call_details[
                     "response_cost"
                 ] = handle_realtime_stream_cost_calculation(
-                    result, self.custom_llm_provider, self.model
+                    results=result,
+                    combined_usage_object=combined_usage_object,
+                    custom_llm_provider=self.custom_llm_provider,
+                    litellm_model_name=self.model,
                 )
+                self.model_call_details["combined_usage_object"] = combined_usage_object
             if (
                 standard_logging_object is None
                 and result is not None
@@ -3133,6 +3141,7 @@ class StandardLoggingPayloadSetup:
         prompt_integration: Optional[str] = None,
         applied_guardrails: Optional[List[str]] = None,
         mcp_tool_call_metadata: Optional[StandardLoggingMCPToolCall] = None,
+        usage_object: Optional[dict] = None,
     ) -> StandardLoggingMetadata:
         """
         Clean and filter the metadata dictionary to include only the specified keys in StandardLoggingMetadata.
@@ -3180,6 +3189,7 @@ class StandardLoggingPayloadSetup:
             prompt_management_metadata=prompt_management_metadata,
             applied_guardrails=applied_guardrails,
             mcp_tool_call_metadata=mcp_tool_call_metadata,
+            usage_object=usage_object,
         )
         if isinstance(metadata, dict):
             # Filter the metadata dictionary to include only the specified keys
@@ -3205,8 +3215,12 @@ class StandardLoggingPayloadSetup:
         return clean_metadata
 
     @staticmethod
-    def get_usage_from_response_obj(response_obj: Optional[dict]) -> Usage:
+    def get_usage_from_response_obj(
+        response_obj: Optional[dict], combined_usage_object: Optional[Usage] = None
+    ) -> Usage:
         ## BASE CASE ##
+        if combined_usage_object is not None:
+            return combined_usage_object
         if response_obj is None:
             return Usage(
                 prompt_tokens=0,
@@ -3335,6 +3349,7 @@ class StandardLoggingPayloadSetup:
             litellm_overhead_time_ms=None,
             batch_models=None,
             litellm_model_name=None,
+            usage_object=None,
         )
         if hidden_params is not None:
             for key in StandardLoggingHiddenParams.__annotations__.keys():
@@ -3451,6 +3466,7 @@ def get_standard_logging_object_payload(
                         litellm_overhead_time_ms=None,
                         batch_models=None,
                         litellm_model_name=None,
+                        usage_object=None,
                     )
                 )
 
@@ -3467,8 +3483,12 @@ def get_standard_logging_object_payload(
         call_type = kwargs.get("call_type")
         cache_hit = kwargs.get("cache_hit", False)
         usage = StandardLoggingPayloadSetup.get_usage_from_response_obj(
-            response_obj=response_obj
+            response_obj=response_obj,
+            combined_usage_object=cast(
+                Optional[Usage], kwargs.get("combined_usage_object")
+            ),
         )
+
         id = response_obj.get("id", kwargs.get("litellm_call_id"))
 
         _model_id = metadata.get("model_info", {}).get("id", "")
@@ -3507,6 +3527,7 @@ def get_standard_logging_object_payload(
             prompt_integration=kwargs.get("prompt_integration", None),
             applied_guardrails=kwargs.get("applied_guardrails", None),
             mcp_tool_call_metadata=kwargs.get("mcp_tool_call_metadata", None),
+            usage_object=usage.model_dump(),
         )
 
         _request_body = proxy_server_request.get("body", {})
@@ -3647,6 +3668,7 @@ def get_standard_logging_metadata(
         prompt_management_metadata=None,
         applied_guardrails=None,
         mcp_tool_call_metadata=None,
+        usage_object=None,
     )
     if isinstance(metadata, dict):
         # Filter the metadata dictionary to include only the specified keys
@@ -3741,6 +3763,7 @@ def create_dummy_standard_logging_payload() -> StandardLoggingPayload:
         litellm_overhead_time_ms=None,
         batch_models=None,
         litellm_model_name=None,
+        usage_object=None,
     )
 
     # Convert numeric values to appropriate types
diff --git a/litellm/litellm_core_utils/llm_cost_calc/utils.py b/litellm/litellm_core_utils/llm_cost_calc/utils.py
index caccbc4bac..ae5eb286e4 100644
--- a/litellm/litellm_core_utils/llm_cost_calc/utils.py
+++ b/litellm/litellm_core_utils/llm_cost_calc/utils.py
@@ -90,35 +90,45 @@ def _generic_cost_per_character(
     return prompt_cost, completion_cost
 
 
-def _get_prompt_token_base_cost(model_info: ModelInfo, usage: Usage) -> float:
+def _get_token_base_cost(model_info: ModelInfo, usage: Usage) -> Tuple[float, float]:
     """
     Return prompt cost for a given model and usage.
 
-    If input_tokens > 128k and `input_cost_per_token_above_128k_tokens` is set, then we use the `input_cost_per_token_above_128k_tokens` field.
+    If input_tokens > threshold and `input_cost_per_token_above_[x]k_tokens` or `input_cost_per_token_above_[x]_tokens` is set,
+    then we use the corresponding threshold cost.
     """
-    input_cost_per_token_above_128k_tokens = model_info.get(
-        "input_cost_per_token_above_128k_tokens"
-    )
-    if _is_above_128k(usage.prompt_tokens) and input_cost_per_token_above_128k_tokens:
-        return input_cost_per_token_above_128k_tokens
-    return model_info["input_cost_per_token"]
+    prompt_base_cost = model_info["input_cost_per_token"]
+    completion_base_cost = model_info["output_cost_per_token"]
 
+    ## CHECK IF ABOVE THRESHOLD
+    threshold: Optional[float] = None
+    for key, value in sorted(model_info.items(), reverse=True):
+        if key.startswith("input_cost_per_token_above_") and value is not None:
+            try:
+                # Handle both formats: _above_128k_tokens and _above_128_tokens
+                threshold_str = key.split("_above_")[1].split("_tokens")[0]
+                threshold = float(threshold_str.replace("k", "")) * (
+                    1000 if "k" in threshold_str else 1
+                )
+                if usage.prompt_tokens > threshold:
+                    prompt_base_cost = cast(
+                        float,
+                        model_info.get(key, prompt_base_cost),
+                    )
+                    completion_base_cost = cast(
+                        float,
+                        model_info.get(
+                            f"output_cost_per_token_above_{threshold_str}_tokens",
+                            completion_base_cost,
+                        ),
+                    )
+                    break
+            except (IndexError, ValueError):
+                continue
+            except Exception:
+                continue
 
-def _get_completion_token_base_cost(model_info: ModelInfo, usage: Usage) -> float:
-    """
-    Return prompt cost for a given model and usage.
-
-    If input_tokens > 128k and `input_cost_per_token_above_128k_tokens` is set, then we use the `input_cost_per_token_above_128k_tokens` field.
-    """
-    output_cost_per_token_above_128k_tokens = model_info.get(
-        "output_cost_per_token_above_128k_tokens"
-    )
-    if (
-        _is_above_128k(usage.completion_tokens)
-        and output_cost_per_token_above_128k_tokens
-    ):
-        return output_cost_per_token_above_128k_tokens
-    return model_info["output_cost_per_token"]
+    return prompt_base_cost, completion_base_cost
 
 
 def calculate_cost_component(
@@ -215,7 +225,9 @@ def generic_cost_per_token(
     if text_tokens == 0:
         text_tokens = usage.prompt_tokens - cache_hit_tokens - audio_tokens
 
-    prompt_base_cost = _get_prompt_token_base_cost(model_info=model_info, usage=usage)
+    prompt_base_cost, completion_base_cost = _get_token_base_cost(
+        model_info=model_info, usage=usage
+    )
 
     prompt_cost = float(text_tokens) * prompt_base_cost
 
@@ -253,9 +265,6 @@ def generic_cost_per_token(
     )
 
     ## CALCULATE OUTPUT COST
-    completion_base_cost = _get_completion_token_base_cost(
-        model_info=model_info, usage=usage
-    )
     text_tokens = usage.completion_tokens
     audio_tokens = 0
     if usage.completion_tokens_details is not None:
diff --git a/litellm/litellm_core_utils/prompt_templates/common_utils.py b/litellm/litellm_core_utils/prompt_templates/common_utils.py
index 0f2d0da388..44b680d487 100644
--- a/litellm/litellm_core_utils/prompt_templates/common_utils.py
+++ b/litellm/litellm_core_utils/prompt_templates/common_utils.py
@@ -2,7 +2,10 @@
 Common utility functions used for translating messages across providers
 """
 
-from typing import Dict, List, Literal, Optional, Union, cast
+import io
+import mimetypes
+from os import PathLike
+from typing import Dict, List, Literal, Mapping, Optional, Union, cast
 
 from litellm.types.llms.openai import (
     AllMessageValues,
@@ -10,7 +13,13 @@ from litellm.types.llms.openai import (
     ChatCompletionFileObject,
     ChatCompletionUserMessage,
 )
-from litellm.types.utils import Choices, ModelResponse, StreamingChoices
+from litellm.types.utils import (
+    Choices,
+    ExtractedFileData,
+    FileTypes,
+    ModelResponse,
+    StreamingChoices,
+)
 
 DEFAULT_USER_CONTINUE_MESSAGE = ChatCompletionUserMessage(
     content="Please continue.", role="user"
@@ -350,6 +359,68 @@ def update_messages_with_model_file_ids(
     return messages
 
 
+def extract_file_data(file_data: FileTypes) -> ExtractedFileData:
+    """
+    Extracts and processes file data from various input formats.
+
+    Args:
+        file_data: Can be a tuple of (filename, content, [content_type], [headers]) or direct file content
+
+    Returns:
+        ExtractedFileData containing:
+        - filename: Name of the file if provided
+        - content: The file content in bytes
+        - content_type: MIME type of the file
+        - headers: Any additional headers
+    """
+    # Parse the file_data based on its type
+    filename = None
+    file_content = None
+    content_type = None
+    file_headers: Mapping[str, str] = {}
+
+    if isinstance(file_data, tuple):
+        if len(file_data) == 2:
+            filename, file_content = file_data
+        elif len(file_data) == 3:
+            filename, file_content, content_type = file_data
+        elif len(file_data) == 4:
+            filename, file_content, content_type, file_headers = file_data
+    else:
+        file_content = file_data
+    # Convert content to bytes
+    if isinstance(file_content, (str, PathLike)):
+        # If it's a path, open and read the file
+        with open(file_content, "rb") as f:
+            content = f.read()
+    elif isinstance(file_content, io.IOBase):
+        # If it's a file-like object
+        content = file_content.read()
+
+        if isinstance(content, str):
+            content = content.encode("utf-8")
+        # Reset file pointer to beginning
+        file_content.seek(0)
+    elif isinstance(file_content, bytes):
+        content = file_content
+    else:
+        raise ValueError(f"Unsupported file content type: {type(file_content)}")
+
+    # Use provided content type or guess based on filename
+    if not content_type:
+        content_type = (
+            mimetypes.guess_type(filename)[0]
+            if filename
+            else "application/octet-stream"
+        )
+
+    return ExtractedFileData(
+        filename=filename,
+        content=content,
+        content_type=content_type,
+        headers=file_headers,
+    )
+
 def unpack_defs(schema, defs):
     properties = schema.get("properties", None)
     if properties is None:
@@ -381,3 +452,4 @@ def unpack_defs(schema, defs):
                 unpack_defs(ref, defs)
                 value["items"] = ref
                 continue
+
diff --git a/litellm/llms/aiohttp_openai/chat/transformation.py b/litellm/llms/aiohttp_openai/chat/transformation.py
index af073fe8e3..c2d4e5adcd 100644
--- a/litellm/llms/aiohttp_openai/chat/transformation.py
+++ b/litellm/llms/aiohttp_openai/chat/transformation.py
@@ -50,6 +50,7 @@ class AiohttpOpenAIChatConfig(OpenAILikeChatConfig):
         model: str,
         messages: List[AllMessageValues],
         optional_params: dict,
+        litellm_params: dict,
         api_key: Optional[str] = None,
         api_base: Optional[str] = None,
     ) -> dict:
diff --git a/litellm/llms/anthropic/chat/handler.py b/litellm/llms/anthropic/chat/handler.py
index f2a5542dcd..ebb8650044 100644
--- a/litellm/llms/anthropic/chat/handler.py
+++ b/litellm/llms/anthropic/chat/handler.py
@@ -4,7 +4,7 @@ Calling + translation logic for anthropic's `/v1/messages` endpoint
 
 import copy
 import json
-from typing import Any, Callable, Dict, List, Optional, Tuple, Union
+from typing import Any, Callable, Dict, List, Optional, Tuple, Union, cast
 
 import httpx  # type: ignore
 
@@ -301,6 +301,7 @@ class AnthropicChatCompletion(BaseLLM):
             model=model,
             messages=messages,
             optional_params={**optional_params, "is_vertex_request": is_vertex_request},
+            litellm_params=litellm_params,
         )
 
         config = ProviderConfigManager.get_provider_chat_config(
@@ -491,29 +492,10 @@ class ModelResponseIterator:
         return False
 
     def _handle_usage(self, anthropic_usage_chunk: Union[dict, UsageDelta]) -> Usage:
-        usage_block = Usage(
-            prompt_tokens=anthropic_usage_chunk.get("input_tokens", 0),
-            completion_tokens=anthropic_usage_chunk.get("output_tokens", 0),
-            total_tokens=anthropic_usage_chunk.get("input_tokens", 0)
-            + anthropic_usage_chunk.get("output_tokens", 0),
+        return AnthropicConfig().calculate_usage(
+            usage_object=cast(dict, anthropic_usage_chunk), reasoning_content=None
         )
 
-        cache_creation_input_tokens = anthropic_usage_chunk.get(
-            "cache_creation_input_tokens"
-        )
-        if cache_creation_input_tokens is not None and isinstance(
-            cache_creation_input_tokens, int
-        ):
-            usage_block["cache_creation_input_tokens"] = cache_creation_input_tokens
-
-        cache_read_input_tokens = anthropic_usage_chunk.get("cache_read_input_tokens")
-        if cache_read_input_tokens is not None and isinstance(
-            cache_read_input_tokens, int
-        ):
-            usage_block["cache_read_input_tokens"] = cache_read_input_tokens
-
-        return usage_block
-
     def _content_block_delta_helper(
         self, chunk: dict
     ) -> Tuple[
diff --git a/litellm/llms/anthropic/chat/transformation.py b/litellm/llms/anthropic/chat/transformation.py
index 8a2048f95a..96da34a855 100644
--- a/litellm/llms/anthropic/chat/transformation.py
+++ b/litellm/llms/anthropic/chat/transformation.py
@@ -682,6 +682,45 @@ class AnthropicConfig(BaseConfig):
                     reasoning_content += block["thinking"]
         return text_content, citations, thinking_blocks, reasoning_content, tool_calls
 
+    def calculate_usage(
+        self, usage_object: dict, reasoning_content: Optional[str]
+    ) -> Usage:
+        prompt_tokens = usage_object.get("input_tokens", 0)
+        completion_tokens = usage_object.get("output_tokens", 0)
+        _usage = usage_object
+        cache_creation_input_tokens: int = 0
+        cache_read_input_tokens: int = 0
+
+        if "cache_creation_input_tokens" in _usage:
+            cache_creation_input_tokens = _usage["cache_creation_input_tokens"]
+        if "cache_read_input_tokens" in _usage:
+            cache_read_input_tokens = _usage["cache_read_input_tokens"]
+            prompt_tokens += cache_read_input_tokens
+
+        prompt_tokens_details = PromptTokensDetailsWrapper(
+            cached_tokens=cache_read_input_tokens
+        )
+        completion_token_details = (
+            CompletionTokensDetailsWrapper(
+                reasoning_tokens=token_counter(
+                    text=reasoning_content, count_response_tokens=True
+                )
+            )
+            if reasoning_content
+            else None
+        )
+        total_tokens = prompt_tokens + completion_tokens
+        usage = Usage(
+            prompt_tokens=prompt_tokens,
+            completion_tokens=completion_tokens,
+            total_tokens=total_tokens,
+            prompt_tokens_details=prompt_tokens_details,
+            cache_creation_input_tokens=cache_creation_input_tokens,
+            cache_read_input_tokens=cache_read_input_tokens,
+            completion_tokens_details=completion_token_details,
+        )
+        return usage
+
     def transform_response(
         self,
         model: str,
@@ -772,45 +811,14 @@ class AnthropicConfig(BaseConfig):
             )
 
         ## CALCULATING USAGE
-        prompt_tokens = completion_response["usage"]["input_tokens"]
-        completion_tokens = completion_response["usage"]["output_tokens"]
-        _usage = completion_response["usage"]
-        cache_creation_input_tokens: int = 0
-        cache_read_input_tokens: int = 0
+        usage = self.calculate_usage(
+            usage_object=completion_response["usage"],
+            reasoning_content=reasoning_content,
+        )
+        setattr(model_response, "usage", usage)  # type: ignore
 
         model_response.created = int(time.time())
         model_response.model = completion_response["model"]
-        if "cache_creation_input_tokens" in _usage:
-            cache_creation_input_tokens = _usage["cache_creation_input_tokens"]
-            prompt_tokens += cache_creation_input_tokens
-        if "cache_read_input_tokens" in _usage:
-            cache_read_input_tokens = _usage["cache_read_input_tokens"]
-            prompt_tokens += cache_read_input_tokens
-
-        prompt_tokens_details = PromptTokensDetailsWrapper(
-            cached_tokens=cache_read_input_tokens
-        )
-        completion_token_details = (
-            CompletionTokensDetailsWrapper(
-                reasoning_tokens=token_counter(
-                    text=reasoning_content, count_response_tokens=True
-                )
-            )
-            if reasoning_content
-            else None
-        )
-        total_tokens = prompt_tokens + completion_tokens
-        usage = Usage(
-            prompt_tokens=prompt_tokens,
-            completion_tokens=completion_tokens,
-            total_tokens=total_tokens,
-            prompt_tokens_details=prompt_tokens_details,
-            cache_creation_input_tokens=cache_creation_input_tokens,
-            cache_read_input_tokens=cache_read_input_tokens,
-            completion_tokens_details=completion_token_details,
-        )
-
-        setattr(model_response, "usage", usage)  # type: ignore
 
         model_response._hidden_params = _hidden_params
         return model_response
@@ -868,6 +876,7 @@ class AnthropicConfig(BaseConfig):
         model: str,
         messages: List[AllMessageValues],
         optional_params: dict,
+        litellm_params: dict,
         api_key: Optional[str] = None,
         api_base: Optional[str] = None,
     ) -> Dict:
diff --git a/litellm/llms/anthropic/completion/transformation.py b/litellm/llms/anthropic/completion/transformation.py
index e4e04df4d6..9e3287aa8a 100644
--- a/litellm/llms/anthropic/completion/transformation.py
+++ b/litellm/llms/anthropic/completion/transformation.py
@@ -87,6 +87,7 @@ class AnthropicTextConfig(BaseConfig):
         model: str,
         messages: List[AllMessageValues],
         optional_params: dict,
+        litellm_params: dict,
         api_key: Optional[str] = None,
         api_base: Optional[str] = None,
     ) -> dict:
diff --git a/litellm/llms/azure/chat/gpt_transformation.py b/litellm/llms/azure/chat/gpt_transformation.py
index e30d68f97d..ea61ef2c9a 100644
--- a/litellm/llms/azure/chat/gpt_transformation.py
+++ b/litellm/llms/azure/chat/gpt_transformation.py
@@ -293,6 +293,7 @@ class AzureOpenAIConfig(BaseConfig):
         model: str,
         messages: List[AllMessageValues],
         optional_params: dict,
+        litellm_params: dict,
         api_key: Optional[str] = None,
         api_base: Optional[str] = None,
     ) -> dict:
diff --git a/litellm/llms/azure_ai/chat/transformation.py b/litellm/llms/azure_ai/chat/transformation.py
index 007a4303c8..839f875f75 100644
--- a/litellm/llms/azure_ai/chat/transformation.py
+++ b/litellm/llms/azure_ai/chat/transformation.py
@@ -39,6 +39,7 @@ class AzureAIStudioConfig(OpenAIConfig):
         model: str,
         messages: List[AllMessageValues],
         optional_params: dict,
+        litellm_params: dict,
         api_key: Optional[str] = None,
         api_base: Optional[str] = None,
     ) -> dict:
diff --git a/litellm/llms/base_llm/chat/transformation.py b/litellm/llms/base_llm/chat/transformation.py
index 5279a44201..fa278c805e 100644
--- a/litellm/llms/base_llm/chat/transformation.py
+++ b/litellm/llms/base_llm/chat/transformation.py
@@ -262,6 +262,7 @@ class BaseConfig(ABC):
         model: str,
         messages: List[AllMessageValues],
         optional_params: dict,
+        litellm_params: dict,
         api_key: Optional[str] = None,
         api_base: Optional[str] = None,
     ) -> dict:
diff --git a/litellm/llms/base_llm/files/transformation.py b/litellm/llms/base_llm/files/transformation.py
index 0f1f46352f..9925004c89 100644
--- a/litellm/llms/base_llm/files/transformation.py
+++ b/litellm/llms/base_llm/files/transformation.py
@@ -1,5 +1,5 @@
 from abc import abstractmethod
-from typing import TYPE_CHECKING, Any, List, Optional
+from typing import TYPE_CHECKING, Any, List, Optional, Union
 
 import httpx
 
@@ -33,23 +33,22 @@ class BaseFilesConfig(BaseConfig):
     ) -> List[OpenAICreateFileRequestOptionalParams]:
         pass
 
-    def get_complete_url(
+    def get_complete_file_url(
         self,
         api_base: Optional[str],
         api_key: Optional[str],
         model: str,
         optional_params: dict,
         litellm_params: dict,
-        stream: Optional[bool] = None,
-    ) -> str:
-        """
-        OPTIONAL
-
-        Get the complete url for the request
-
-        Some providers need `model` in `api_base`
-        """
-        return api_base or ""
+        data: CreateFileRequest,
+    ):
+        return self.get_complete_url(
+            api_base=api_base,
+            api_key=api_key,
+            model=model,
+            optional_params=optional_params,
+            litellm_params=litellm_params,
+        )
 
     @abstractmethod
     def transform_create_file_request(
@@ -58,7 +57,7 @@ class BaseFilesConfig(BaseConfig):
         create_file_data: CreateFileRequest,
         optional_params: dict,
         litellm_params: dict,
-    ) -> dict:
+    ) -> Union[dict, str, bytes]:
         pass
 
     @abstractmethod
diff --git a/litellm/llms/base_llm/image_variations/transformation.py b/litellm/llms/base_llm/image_variations/transformation.py
index 3ed446a84e..60444d0fb7 100644
--- a/litellm/llms/base_llm/image_variations/transformation.py
+++ b/litellm/llms/base_llm/image_variations/transformation.py
@@ -65,6 +65,7 @@ class BaseImageVariationConfig(BaseConfig, ABC):
         model: str,
         messages: List[AllMessageValues],
         optional_params: dict,
+        litellm_params: dict,
         api_key: Optional[str] = None,
         api_base: Optional[str] = None,
     ) -> dict:
diff --git a/litellm/llms/bedrock/chat/converse_transformation.py b/litellm/llms/bedrock/chat/converse_transformation.py
index 8ce2c4818b..76ea51f435 100644
--- a/litellm/llms/bedrock/chat/converse_transformation.py
+++ b/litellm/llms/bedrock/chat/converse_transformation.py
@@ -653,8 +653,10 @@ class AmazonConverseConfig(BaseConfig):
             cache_read_input_tokens = usage["cacheReadInputTokens"]
             input_tokens += cache_read_input_tokens
         if "cacheWriteInputTokens" in usage:
+            """
+            Do not increment prompt_tokens with cacheWriteInputTokens
+            """
             cache_creation_input_tokens = usage["cacheWriteInputTokens"]
-            input_tokens += cache_creation_input_tokens
 
         prompt_tokens_details = PromptTokensDetailsWrapper(
             cached_tokens=cache_read_input_tokens
@@ -831,6 +833,7 @@ class AmazonConverseConfig(BaseConfig):
         model: str,
         messages: List[AllMessageValues],
         optional_params: dict,
+        litellm_params: dict,
         api_key: Optional[str] = None,
         api_base: Optional[str] = None,
     ) -> dict:
diff --git a/litellm/llms/bedrock/chat/invoke_transformations/base_invoke_transformation.py b/litellm/llms/bedrock/chat/invoke_transformations/base_invoke_transformation.py
index cb12f779cc..67194e83e7 100644
--- a/litellm/llms/bedrock/chat/invoke_transformations/base_invoke_transformation.py
+++ b/litellm/llms/bedrock/chat/invoke_transformations/base_invoke_transformation.py
@@ -442,6 +442,7 @@ class AmazonInvokeConfig(BaseConfig, BaseAWSLLM):
         model: str,
         messages: List[AllMessageValues],
         optional_params: dict,
+        litellm_params: dict,
         api_key: Optional[str] = None,
         api_base: Optional[str] = None,
     ) -> dict:
diff --git a/litellm/llms/clarifai/chat/transformation.py b/litellm/llms/clarifai/chat/transformation.py
index 916da73883..73be89fc6e 100644
--- a/litellm/llms/clarifai/chat/transformation.py
+++ b/litellm/llms/clarifai/chat/transformation.py
@@ -118,6 +118,7 @@ class ClarifaiConfig(BaseConfig):
         model: str,
         messages: List[AllMessageValues],
         optional_params: dict,
+        litellm_params: dict,
         api_key: Optional[str] = None,
         api_base: Optional[str] = None,
     ) -> dict:
diff --git a/litellm/llms/cloudflare/chat/transformation.py b/litellm/llms/cloudflare/chat/transformation.py
index 1874bb5115..9e59782bf7 100644
--- a/litellm/llms/cloudflare/chat/transformation.py
+++ b/litellm/llms/cloudflare/chat/transformation.py
@@ -60,6 +60,7 @@ class CloudflareChatConfig(BaseConfig):
         model: str,
         messages: List[AllMessageValues],
         optional_params: dict,
+        litellm_params: dict,
         api_key: Optional[str] = None,
         api_base: Optional[str] = None,
     ) -> dict:
diff --git a/litellm/llms/cohere/chat/transformation.py b/litellm/llms/cohere/chat/transformation.py
index 70677214a7..5dd44aca80 100644
--- a/litellm/llms/cohere/chat/transformation.py
+++ b/litellm/llms/cohere/chat/transformation.py
@@ -118,6 +118,7 @@ class CohereChatConfig(BaseConfig):
         model: str,
         messages: List[AllMessageValues],
         optional_params: dict,
+        litellm_params: dict,
         api_key: Optional[str] = None,
         api_base: Optional[str] = None,
     ) -> dict:
diff --git a/litellm/llms/cohere/completion/transformation.py b/litellm/llms/cohere/completion/transformation.py
index bdfcda020e..f96ef89d3c 100644
--- a/litellm/llms/cohere/completion/transformation.py
+++ b/litellm/llms/cohere/completion/transformation.py
@@ -101,6 +101,7 @@ class CohereTextConfig(BaseConfig):
         model: str,
         messages: List[AllMessageValues],
         optional_params: dict,
+        litellm_params: dict,
         api_key: Optional[str] = None,
         api_base: Optional[str] = None,
     ) -> dict:
diff --git a/litellm/llms/custom_httpx/aiohttp_handler.py b/litellm/llms/custom_httpx/aiohttp_handler.py
index 72092cf261..13141fc19a 100644
--- a/litellm/llms/custom_httpx/aiohttp_handler.py
+++ b/litellm/llms/custom_httpx/aiohttp_handler.py
@@ -229,6 +229,7 @@ class BaseLLMAIOHTTPHandler:
             model=model,
             messages=messages,
             optional_params=optional_params,
+            litellm_params=litellm_params,
             api_base=api_base,
         )
 
@@ -498,6 +499,7 @@ class BaseLLMAIOHTTPHandler:
             model=model,
             messages=[{"role": "user", "content": "test"}],
             optional_params=optional_params,
+            litellm_params=litellm_params,
             api_base=api_base,
         )
 
diff --git a/litellm/llms/custom_httpx/http_handler.py b/litellm/llms/custom_httpx/http_handler.py
index 23d7fe4b4d..f1aa5627dc 100644
--- a/litellm/llms/custom_httpx/http_handler.py
+++ b/litellm/llms/custom_httpx/http_handler.py
@@ -192,7 +192,7 @@ class AsyncHTTPHandler:
     async def post(
         self,
         url: str,
-        data: Optional[Union[dict, str]] = None,  # type: ignore
+        data: Optional[Union[dict, str, bytes]] = None,  # type: ignore
         json: Optional[dict] = None,
         params: Optional[dict] = None,
         headers: Optional[dict] = None,
@@ -427,7 +427,7 @@ class AsyncHTTPHandler:
         self,
         url: str,
         client: httpx.AsyncClient,
-        data: Optional[Union[dict, str]] = None,  # type: ignore
+        data: Optional[Union[dict, str, bytes]] = None,  # type: ignore
         json: Optional[dict] = None,
         params: Optional[dict] = None,
         headers: Optional[dict] = None,
@@ -527,7 +527,7 @@ class HTTPHandler:
     def post(
         self,
         url: str,
-        data: Optional[Union[dict, str]] = None,
+        data: Optional[Union[dict, str, bytes]] = None,
         json: Optional[Union[dict, str, List]] = None,
         params: Optional[dict] = None,
         headers: Optional[dict] = None,
@@ -573,7 +573,6 @@ class HTTPHandler:
                 setattr(e, "text", error_text)
 
             setattr(e, "status_code", e.response.status_code)
-
             raise e
         except Exception as e:
             raise e
diff --git a/litellm/llms/custom_httpx/llm_http_handler.py b/litellm/llms/custom_httpx/llm_http_handler.py
index 5778f0228f..b7c72e89ef 100644
--- a/litellm/llms/custom_httpx/llm_http_handler.py
+++ b/litellm/llms/custom_httpx/llm_http_handler.py
@@ -247,6 +247,7 @@ class BaseLLMHTTPHandler:
             messages=messages,
             optional_params=optional_params,
             api_base=api_base,
+            litellm_params=litellm_params,
         )
 
         api_base = provider_config.get_complete_url(
@@ -625,6 +626,7 @@ class BaseLLMHTTPHandler:
             model=model,
             messages=[],
             optional_params=optional_params,
+            litellm_params=litellm_params,
         )
 
         api_base = provider_config.get_complete_url(
@@ -896,6 +898,7 @@ class BaseLLMHTTPHandler:
             model=model,
             messages=[],
             optional_params=optional_params,
+            litellm_params=litellm_params,
         )
 
         if client is None or not isinstance(client, HTTPHandler):
@@ -1228,15 +1231,19 @@ class BaseLLMHTTPHandler:
             model="",
             messages=[],
             optional_params={},
+            litellm_params=litellm_params,
         )
 
-        api_base = provider_config.get_complete_url(
+        api_base = provider_config.get_complete_file_url(
             api_base=api_base,
             api_key=api_key,
             model="",
             optional_params={},
             litellm_params=litellm_params,
+            data=create_file_data,
         )
+        if api_base is None:
+            raise ValueError("api_base is required for create_file")
 
         # Get the transformed request data for both steps
         transformed_request = provider_config.transform_create_file_request(
@@ -1263,48 +1270,57 @@ class BaseLLMHTTPHandler:
         else:
             sync_httpx_client = client
 
-        try:
-            # Step 1: Initial request to get upload URL
-            initial_response = sync_httpx_client.post(
-                url=api_base,
-                headers={
-                    **headers,
-                    **transformed_request["initial_request"]["headers"],
-                },
-                data=json.dumps(transformed_request["initial_request"]["data"]),
-                timeout=timeout,
-            )
-
-            # Extract upload URL from response headers
-            upload_url = initial_response.headers.get("X-Goog-Upload-URL")
-
-            if not upload_url:
-                raise ValueError("Failed to get upload URL from initial request")
-
-            # Step 2: Upload the actual file
+        if isinstance(transformed_request, str) or isinstance(
+            transformed_request, bytes
+        ):
             upload_response = sync_httpx_client.post(
-                url=upload_url,
-                headers=transformed_request["upload_request"]["headers"],
-                data=transformed_request["upload_request"]["data"],
+                url=api_base,
+                headers=headers,
+                data=transformed_request,
                 timeout=timeout,
             )
+        else:
+            try:
+                # Step 1: Initial request to get upload URL
+                initial_response = sync_httpx_client.post(
+                    url=api_base,
+                    headers={
+                        **headers,
+                        **transformed_request["initial_request"]["headers"],
+                    },
+                    data=json.dumps(transformed_request["initial_request"]["data"]),
+                    timeout=timeout,
+                )
 
-            return provider_config.transform_create_file_response(
-                model=None,
-                raw_response=upload_response,
-                logging_obj=logging_obj,
-                litellm_params=litellm_params,
-            )
+                # Extract upload URL from response headers
+                upload_url = initial_response.headers.get("X-Goog-Upload-URL")
 
-        except Exception as e:
-            raise self._handle_error(
-                e=e,
-                provider_config=provider_config,
-            )
+                if not upload_url:
+                    raise ValueError("Failed to get upload URL from initial request")
+
+                # Step 2: Upload the actual file
+                upload_response = sync_httpx_client.post(
+                    url=upload_url,
+                    headers=transformed_request["upload_request"]["headers"],
+                    data=transformed_request["upload_request"]["data"],
+                    timeout=timeout,
+                )
+            except Exception as e:
+                raise self._handle_error(
+                    e=e,
+                    provider_config=provider_config,
+                )
+
+        return provider_config.transform_create_file_response(
+            model=None,
+            raw_response=upload_response,
+            logging_obj=logging_obj,
+            litellm_params=litellm_params,
+        )
 
     async def async_create_file(
         self,
-        transformed_request: dict,
+        transformed_request: Union[bytes, str, dict],
         litellm_params: dict,
         provider_config: BaseFilesConfig,
         headers: dict,
@@ -1323,45 +1339,54 @@ class BaseLLMHTTPHandler:
         else:
             async_httpx_client = client
 
-        try:
-            # Step 1: Initial request to get upload URL
-            initial_response = await async_httpx_client.post(
-                url=api_base,
-                headers={
-                    **headers,
-                    **transformed_request["initial_request"]["headers"],
-                },
-                data=json.dumps(transformed_request["initial_request"]["data"]),
-                timeout=timeout,
-            )
-
-            # Extract upload URL from response headers
-            upload_url = initial_response.headers.get("X-Goog-Upload-URL")
-
-            if not upload_url:
-                raise ValueError("Failed to get upload URL from initial request")
-
-            # Step 2: Upload the actual file
+        if isinstance(transformed_request, str) or isinstance(
+            transformed_request, bytes
+        ):
             upload_response = await async_httpx_client.post(
-                url=upload_url,
-                headers=transformed_request["upload_request"]["headers"],
-                data=transformed_request["upload_request"]["data"],
+                url=api_base,
+                headers=headers,
+                data=transformed_request,
                 timeout=timeout,
             )
+        else:
+            try:
+                # Step 1: Initial request to get upload URL
+                initial_response = await async_httpx_client.post(
+                    url=api_base,
+                    headers={
+                        **headers,
+                        **transformed_request["initial_request"]["headers"],
+                    },
+                    data=json.dumps(transformed_request["initial_request"]["data"]),
+                    timeout=timeout,
+                )
 
-            return provider_config.transform_create_file_response(
-                model=None,
-                raw_response=upload_response,
-                logging_obj=logging_obj,
-                litellm_params=litellm_params,
-            )
+                # Extract upload URL from response headers
+                upload_url = initial_response.headers.get("X-Goog-Upload-URL")
 
-        except Exception as e:
-            verbose_logger.exception(f"Error creating file: {e}")
-            raise self._handle_error(
-                e=e,
-                provider_config=provider_config,
-            )
+                if not upload_url:
+                    raise ValueError("Failed to get upload URL from initial request")
+
+                # Step 2: Upload the actual file
+                upload_response = await async_httpx_client.post(
+                    url=upload_url,
+                    headers=transformed_request["upload_request"]["headers"],
+                    data=transformed_request["upload_request"]["data"],
+                    timeout=timeout,
+                )
+            except Exception as e:
+                verbose_logger.exception(f"Error creating file: {e}")
+                raise self._handle_error(
+                    e=e,
+                    provider_config=provider_config,
+                )
+
+        return provider_config.transform_create_file_response(
+            model=None,
+            raw_response=upload_response,
+            logging_obj=logging_obj,
+            litellm_params=litellm_params,
+        )
 
     def list_files(self):
         """
diff --git a/litellm/llms/databricks/chat/transformation.py b/litellm/llms/databricks/chat/transformation.py
index 1940f09608..6f5738fb4b 100644
--- a/litellm/llms/databricks/chat/transformation.py
+++ b/litellm/llms/databricks/chat/transformation.py
@@ -116,6 +116,7 @@ class DatabricksConfig(DatabricksBase, OpenAILikeChatConfig, AnthropicConfig):
         model: str,
         messages: List[AllMessageValues],
         optional_params: dict,
+        litellm_params: dict,
         api_key: Optional[str] = None,
         api_base: Optional[str] = None,
     ) -> dict:
diff --git a/litellm/llms/deepgram/audio_transcription/transformation.py b/litellm/llms/deepgram/audio_transcription/transformation.py
index b4803576e0..f1b18808f7 100644
--- a/litellm/llms/deepgram/audio_transcription/transformation.py
+++ b/litellm/llms/deepgram/audio_transcription/transformation.py
@@ -171,6 +171,7 @@ class DeepgramAudioTranscriptionConfig(BaseAudioTranscriptionConfig):
         model: str,
         messages: List[AllMessageValues],
         optional_params: dict,
+        litellm_params: dict,
         api_key: Optional[str] = None,
         api_base: Optional[str] = None,
     ) -> dict:
diff --git a/litellm/llms/fireworks_ai/common_utils.py b/litellm/llms/fireworks_ai/common_utils.py
index 293403b133..17aa67b525 100644
--- a/litellm/llms/fireworks_ai/common_utils.py
+++ b/litellm/llms/fireworks_ai/common_utils.py
@@ -41,6 +41,7 @@ class FireworksAIMixin:
         model: str,
         messages: List[AllMessageValues],
         optional_params: dict,
+        litellm_params: dict,
         api_key: Optional[str] = None,
         api_base: Optional[str] = None,
     ) -> dict:
diff --git a/litellm/llms/gemini/common_utils.py b/litellm/llms/gemini/common_utils.py
index ace24e982f..fef41f7d58 100644
--- a/litellm/llms/gemini/common_utils.py
+++ b/litellm/llms/gemini/common_utils.py
@@ -20,6 +20,7 @@ class GeminiModelInfo(BaseLLMModelInfo):
         model: str,
         messages: List[AllMessageValues],
         optional_params: dict,
+        litellm_params: dict,
         api_key: Optional[str] = None,
         api_base: Optional[str] = None,
     ) -> dict:
diff --git a/litellm/llms/gemini/files/transformation.py b/litellm/llms/gemini/files/transformation.py
index a1f99c6903..e98e76dabc 100644
--- a/litellm/llms/gemini/files/transformation.py
+++ b/litellm/llms/gemini/files/transformation.py
@@ -4,11 +4,12 @@ Supports writing files to Google AI Studio Files API.
 For vertex ai, check out the vertex_ai/files/handler.py file.
 """
 import time
-from typing import List, Mapping, Optional
+from typing import List, Optional
 
 import httpx
 
 from litellm._logging import verbose_logger
+from litellm.litellm_core_utils.prompt_templates.common_utils import extract_file_data
 from litellm.llms.base_llm.files.transformation import (
     BaseFilesConfig,
     LiteLLMLoggingObj,
@@ -91,66 +92,28 @@ class GoogleAIStudioFilesHandler(GeminiModelInfo, BaseFilesConfig):
         if file_data is None:
             raise ValueError("File data is required")
 
-        # Parse the file_data based on its type
-        filename = None
-        file_content = None
-        content_type = None
-        file_headers: Mapping[str, str] = {}
-
-        if isinstance(file_data, tuple):
-            if len(file_data) == 2:
-                filename, file_content = file_data
-            elif len(file_data) == 3:
-                filename, file_content, content_type = file_data
-            elif len(file_data) == 4:
-                filename, file_content, content_type, file_headers = file_data
-        else:
-            file_content = file_data
-
-        # Handle the file content based on its type
-        import io
-        from os import PathLike
-
-        # Convert content to bytes
-        if isinstance(file_content, (str, PathLike)):
-            # If it's a path, open and read the file
-            with open(file_content, "rb") as f:
-                content = f.read()
-        elif isinstance(file_content, io.IOBase):
-            # If it's a file-like object
-            content = file_content.read()
-            if isinstance(content, str):
-                content = content.encode("utf-8")
-        elif isinstance(file_content, bytes):
-            content = file_content
-        else:
-            raise ValueError(f"Unsupported file content type: {type(file_content)}")
+        # Use the common utility function to extract file data
+        extracted_data = extract_file_data(file_data)
 
         # Get file size
-        file_size = len(content)
-
-        # Use provided content type or guess based on filename
-        if not content_type:
-            import mimetypes
-
-            content_type = (
-                mimetypes.guess_type(filename)[0]
-                if filename
-                else "application/octet-stream"
-            )
+        file_size = len(extracted_data["content"])
 
         # Step 1: Initial resumable upload request
         headers = {
             "X-Goog-Upload-Protocol": "resumable",
             "X-Goog-Upload-Command": "start",
             "X-Goog-Upload-Header-Content-Length": str(file_size),
-            "X-Goog-Upload-Header-Content-Type": content_type,
+            "X-Goog-Upload-Header-Content-Type": extracted_data["content_type"],
             "Content-Type": "application/json",
         }
-        headers.update(file_headers)  # Add any custom headers
+        headers.update(extracted_data["headers"])  # Add any custom headers
 
         # Initial metadata request body
-        initial_data = {"file": {"display_name": filename or str(int(time.time()))}}
+        initial_data = {
+            "file": {
+                "display_name": extracted_data["filename"] or str(int(time.time()))
+            }
+        }
 
         # Step 2: Actual file upload data
         upload_headers = {
@@ -161,7 +124,10 @@ class GoogleAIStudioFilesHandler(GeminiModelInfo, BaseFilesConfig):
 
         return {
             "initial_request": {"headers": headers, "data": initial_data},
-            "upload_request": {"headers": upload_headers, "data": content},
+            "upload_request": {
+                "headers": upload_headers,
+                "data": extracted_data["content"],
+            },
         }
 
     def transform_create_file_response(
diff --git a/litellm/llms/huggingface/chat/transformation.py b/litellm/llms/huggingface/chat/transformation.py
index c84f03ab93..0ad93be763 100644
--- a/litellm/llms/huggingface/chat/transformation.py
+++ b/litellm/llms/huggingface/chat/transformation.py
@@ -1,6 +1,6 @@
 import logging
 import os
-from typing import TYPE_CHECKING, Any, List, Optional, Union
+from typing import TYPE_CHECKING, Any, Dict, List, Optional, Union
 
 import httpx
 
@@ -18,7 +18,6 @@ from litellm.llms.base_llm.chat.transformation import BaseLLMException
 from ...openai.chat.gpt_transformation import OpenAIGPTConfig
 from ..common_utils import HuggingFaceError, _fetch_inference_provider_mapping
 
-
 logger = logging.getLogger(__name__)
 
 BASE_URL = "https://router.huggingface.co"
@@ -34,7 +33,8 @@ class HuggingFaceChatConfig(OpenAIGPTConfig):
         headers: dict,
         model: str,
         messages: List[AllMessageValues],
-        optional_params: dict,
+        optional_params: Dict,
+        litellm_params: dict,
         api_key: Optional[str] = None,
         api_base: Optional[str] = None,
     ) -> dict:
@@ -51,7 +51,9 @@ class HuggingFaceChatConfig(OpenAIGPTConfig):
     def get_error_class(
         self, error_message: str, status_code: int, headers: Union[dict, httpx.Headers]
     ) -> BaseLLMException:
-        return HuggingFaceError(status_code=status_code, message=error_message, headers=headers)
+        return HuggingFaceError(
+            status_code=status_code, message=error_message, headers=headers
+        )
 
     def get_base_url(self, model: str, base_url: Optional[str]) -> Optional[str]:
         """
@@ -82,7 +84,9 @@ class HuggingFaceChatConfig(OpenAIGPTConfig):
         if api_base is not None:
             complete_url = api_base
         elif os.getenv("HF_API_BASE") or os.getenv("HUGGINGFACE_API_BASE"):
-            complete_url = str(os.getenv("HF_API_BASE")) or str(os.getenv("HUGGINGFACE_API_BASE"))
+            complete_url = str(os.getenv("HF_API_BASE")) or str(
+                os.getenv("HUGGINGFACE_API_BASE")
+            )
         elif model.startswith(("http://", "https://")):
             complete_url = model
         # 4. Default construction with provider
@@ -138,4 +142,8 @@ class HuggingFaceChatConfig(OpenAIGPTConfig):
             )
         mapped_model = provider_mapping["providerId"]
         messages = self._transform_messages(messages=messages, model=mapped_model)
-        return dict(ChatCompletionRequest(model=mapped_model, messages=messages, **optional_params))
+        return dict(
+            ChatCompletionRequest(
+                model=mapped_model, messages=messages, **optional_params
+            )
+        )
diff --git a/litellm/llms/huggingface/embedding/handler.py b/litellm/llms/huggingface/embedding/handler.py
index 7277fbd0e3..bfd73c1346 100644
--- a/litellm/llms/huggingface/embedding/handler.py
+++ b/litellm/llms/huggingface/embedding/handler.py
@@ -1,15 +1,6 @@
 import json
 import os
-from typing import (
-    Any,
-    Callable,
-    Dict,
-    List,
-    Literal,
-    Optional,
-    Union,
-    get_args,
-)
+from typing import Any, Callable, Dict, List, Literal, Optional, Union, get_args
 
 import httpx
 
@@ -35,8 +26,9 @@ hf_tasks_embeddings = Literal[  # pipeline tags + hf tei endpoints - https://hug
 ]
 
 
-
-def get_hf_task_embedding_for_model(model: str, task_type: Optional[str], api_base: str) -> Optional[str]:
+def get_hf_task_embedding_for_model(
+    model: str, task_type: Optional[str], api_base: str
+) -> Optional[str]:
     if task_type is not None:
         if task_type in get_args(hf_tasks_embeddings):
             return task_type
@@ -57,7 +49,9 @@ def get_hf_task_embedding_for_model(model: str, task_type: Optional[str], api_ba
     return pipeline_tag
 
 
-async def async_get_hf_task_embedding_for_model(model: str, task_type: Optional[str], api_base: str) -> Optional[str]:
+async def async_get_hf_task_embedding_for_model(
+    model: str, task_type: Optional[str], api_base: str
+) -> Optional[str]:
     if task_type is not None:
         if task_type in get_args(hf_tasks_embeddings):
             return task_type
@@ -116,7 +110,9 @@ class HuggingFaceEmbedding(BaseLLM):
         input: List,
         optional_params: dict,
     ) -> dict:
-        hf_task = await async_get_hf_task_embedding_for_model(model=model, task_type=task_type, api_base=HF_HUB_URL)
+        hf_task = await async_get_hf_task_embedding_for_model(
+            model=model, task_type=task_type, api_base=HF_HUB_URL
+        )
 
         data = self._transform_input_on_pipeline_tag(input=input, pipeline_tag=hf_task)
 
@@ -173,7 +169,9 @@ class HuggingFaceEmbedding(BaseLLM):
             task_type = optional_params.pop("input_type", None)
 
             if call_type == "sync":
-                hf_task = get_hf_task_embedding_for_model(model=model, task_type=task_type, api_base=HF_HUB_URL)
+                hf_task = get_hf_task_embedding_for_model(
+                    model=model, task_type=task_type, api_base=HF_HUB_URL
+                )
             elif call_type == "async":
                 return self._async_transform_input(
                     model=model, task_type=task_type, embed_url=embed_url, input=input
@@ -325,6 +323,7 @@ class HuggingFaceEmbedding(BaseLLM):
         input: list,
         model_response: EmbeddingResponse,
         optional_params: dict,
+        litellm_params: dict,
         logging_obj: LiteLLMLoggingObj,
         encoding: Callable,
         api_key: Optional[str] = None,
@@ -341,9 +340,12 @@ class HuggingFaceEmbedding(BaseLLM):
             model=model,
             optional_params=optional_params,
             messages=[],
+            litellm_params=litellm_params,
         )
         task_type = optional_params.pop("input_type", None)
-        task = get_hf_task_embedding_for_model(model=model, task_type=task_type, api_base=HF_HUB_URL)
+        task = get_hf_task_embedding_for_model(
+            model=model, task_type=task_type, api_base=HF_HUB_URL
+        )
         # print_verbose(f"{model}, {task}")
         embed_url = ""
         if "https" in model:
@@ -355,7 +357,9 @@ class HuggingFaceEmbedding(BaseLLM):
         elif "HUGGINGFACE_API_BASE" in os.environ:
             embed_url = os.getenv("HUGGINGFACE_API_BASE", "")
         else:
-            embed_url = f"https://router.huggingface.co/hf-inference/pipeline/{task}/{model}"
+            embed_url = (
+                f"https://router.huggingface.co/hf-inference/pipeline/{task}/{model}"
+            )
 
         ## ROUTING ##
         if aembedding is True:
diff --git a/litellm/llms/huggingface/embedding/transformation.py b/litellm/llms/huggingface/embedding/transformation.py
index f803157768..60bd5dcd61 100644
--- a/litellm/llms/huggingface/embedding/transformation.py
+++ b/litellm/llms/huggingface/embedding/transformation.py
@@ -355,6 +355,7 @@ class HuggingFaceEmbeddingConfig(BaseConfig):
         model: str,
         messages: List[AllMessageValues],
         optional_params: Dict,
+        litellm_params: dict,
         api_key: Optional[str] = None,
         api_base: Optional[str] = None,
     ) -> Dict:
diff --git a/litellm/llms/nlp_cloud/chat/handler.py b/litellm/llms/nlp_cloud/chat/handler.py
index b0abdda587..b0563d8b55 100644
--- a/litellm/llms/nlp_cloud/chat/handler.py
+++ b/litellm/llms/nlp_cloud/chat/handler.py
@@ -36,6 +36,7 @@ def completion(
         model=model,
         messages=messages,
         optional_params=optional_params,
+        litellm_params=litellm_params,
     )
 
     ## Load Config
diff --git a/litellm/llms/nlp_cloud/chat/transformation.py b/litellm/llms/nlp_cloud/chat/transformation.py
index b7967249ab..8037a45832 100644
--- a/litellm/llms/nlp_cloud/chat/transformation.py
+++ b/litellm/llms/nlp_cloud/chat/transformation.py
@@ -93,6 +93,7 @@ class NLPCloudConfig(BaseConfig):
         model: str,
         messages: List[AllMessageValues],
         optional_params: dict,
+        litellm_params: dict,
         api_key: Optional[str] = None,
         api_base: Optional[str] = None,
     ) -> dict:
diff --git a/litellm/llms/ollama/completion/transformation.py b/litellm/llms/ollama/completion/transformation.py
index 64544bd269..789b728337 100644
--- a/litellm/llms/ollama/completion/transformation.py
+++ b/litellm/llms/ollama/completion/transformation.py
@@ -353,6 +353,7 @@ class OllamaConfig(BaseConfig):
         model: str,
         messages: List[AllMessageValues],
         optional_params: dict,
+        litellm_params: dict,
         api_key: Optional[str] = None,
         api_base: Optional[str] = None,
     ) -> dict:
diff --git a/litellm/llms/oobabooga/chat/oobabooga.py b/litellm/llms/oobabooga/chat/oobabooga.py
index 8829d2233e..5eb68a03d4 100644
--- a/litellm/llms/oobabooga/chat/oobabooga.py
+++ b/litellm/llms/oobabooga/chat/oobabooga.py
@@ -32,6 +32,7 @@ def completion(
         model=model,
         messages=messages,
         optional_params=optional_params,
+        litellm_params=litellm_params,
     )
     if "https" in model:
         completion_url = model
@@ -123,6 +124,7 @@ def embedding(
         model=model,
         messages=[],
         optional_params=optional_params,
+        litellm_params={},
     )
     response = litellm.module_level_client.post(
         embeddings_url, headers=headers, json=data
diff --git a/litellm/llms/oobabooga/chat/transformation.py b/litellm/llms/oobabooga/chat/transformation.py
index 6fd56f934e..e87b70130c 100644
--- a/litellm/llms/oobabooga/chat/transformation.py
+++ b/litellm/llms/oobabooga/chat/transformation.py
@@ -88,6 +88,7 @@ class OobaboogaConfig(OpenAIGPTConfig):
         model: str,
         messages: List[AllMessageValues],
         optional_params: dict,
+        litellm_params: dict,
         api_key: Optional[str] = None,
         api_base: Optional[str] = None,
     ) -> dict:
diff --git a/litellm/llms/openai/chat/gpt_transformation.py b/litellm/llms/openai/chat/gpt_transformation.py
index fcab43901a..434214639e 100644
--- a/litellm/llms/openai/chat/gpt_transformation.py
+++ b/litellm/llms/openai/chat/gpt_transformation.py
@@ -321,6 +321,7 @@ class OpenAIGPTConfig(BaseLLMModelInfo, BaseConfig):
         model: str,
         messages: List[AllMessageValues],
         optional_params: dict,
+        litellm_params: dict,
         api_key: Optional[str] = None,
         api_base: Optional[str] = None,
     ) -> dict:
diff --git a/litellm/llms/openai/openai.py b/litellm/llms/openai/openai.py
index 3b6be1a034..13412ef96a 100644
--- a/litellm/llms/openai/openai.py
+++ b/litellm/llms/openai/openai.py
@@ -286,6 +286,7 @@ class OpenAIConfig(BaseConfig):
         model: str,
         messages: List[AllMessageValues],
         optional_params: dict,
+        litellm_params: dict,
         api_key: Optional[str] = None,
         api_base: Optional[str] = None,
     ) -> dict:
diff --git a/litellm/llms/openai/transcriptions/whisper_transformation.py b/litellm/llms/openai/transcriptions/whisper_transformation.py
index 2d3d611dac..c0ccc71579 100644
--- a/litellm/llms/openai/transcriptions/whisper_transformation.py
+++ b/litellm/llms/openai/transcriptions/whisper_transformation.py
@@ -53,6 +53,7 @@ class OpenAIWhisperAudioTranscriptionConfig(BaseAudioTranscriptionConfig):
         model: str,
         messages: List[AllMessageValues],
         optional_params: dict,
+        litellm_params: dict,
         api_key: Optional[str] = None,
         api_base: Optional[str] = None,
     ) -> dict:
diff --git a/litellm/llms/petals/completion/transformation.py b/litellm/llms/petals/completion/transformation.py
index a9e37d27fc..24910cba8f 100644
--- a/litellm/llms/petals/completion/transformation.py
+++ b/litellm/llms/petals/completion/transformation.py
@@ -131,6 +131,7 @@ class PetalsConfig(BaseConfig):
         model: str,
         messages: List[AllMessageValues],
         optional_params: dict,
+        litellm_params: dict,
         api_key: Optional[str] = None,
         api_base: Optional[str] = None,
     ) -> dict:
diff --git a/litellm/llms/predibase/chat/handler.py b/litellm/llms/predibase/chat/handler.py
index cd80fa53e4..79936764ac 100644
--- a/litellm/llms/predibase/chat/handler.py
+++ b/litellm/llms/predibase/chat/handler.py
@@ -228,10 +228,10 @@ class PredibaseChatCompletion:
         api_key: str,
         logging_obj,
         optional_params: dict,
+        litellm_params: dict,
         tenant_id: str,
         timeout: Union[float, httpx.Timeout],
         acompletion=None,
-        litellm_params=None,
         logger_fn=None,
         headers: dict = {},
     ) -> Union[ModelResponse, CustomStreamWrapper]:
@@ -241,6 +241,7 @@ class PredibaseChatCompletion:
             messages=messages,
             optional_params=optional_params,
             model=model,
+            litellm_params=litellm_params,
         )
         completion_url = ""
         input_text = ""
diff --git a/litellm/llms/predibase/chat/transformation.py b/litellm/llms/predibase/chat/transformation.py
index 8ef0eea173..9fbb9d6c9e 100644
--- a/litellm/llms/predibase/chat/transformation.py
+++ b/litellm/llms/predibase/chat/transformation.py
@@ -164,6 +164,7 @@ class PredibaseConfig(BaseConfig):
         model: str,
         messages: List[AllMessageValues],
         optional_params: dict,
+        litellm_params: dict,
         api_key: Optional[str] = None,
         api_base: Optional[str] = None,
     ) -> dict:
diff --git a/litellm/llms/replicate/chat/handler.py b/litellm/llms/replicate/chat/handler.py
index d954416381..e4bb64fed7 100644
--- a/litellm/llms/replicate/chat/handler.py
+++ b/litellm/llms/replicate/chat/handler.py
@@ -141,6 +141,7 @@ def completion(
         model=model,
         messages=messages,
         optional_params=optional_params,
+        litellm_params=litellm_params,
     )
     # Start a prediction and get the prediction URL
     version_id = replicate_config.model_to_version_id(model)
diff --git a/litellm/llms/replicate/chat/transformation.py b/litellm/llms/replicate/chat/transformation.py
index 604e6eefe6..4c61086801 100644
--- a/litellm/llms/replicate/chat/transformation.py
+++ b/litellm/llms/replicate/chat/transformation.py
@@ -312,6 +312,7 @@ class ReplicateConfig(BaseConfig):
         model: str,
         messages: List[AllMessageValues],
         optional_params: dict,
+        litellm_params: dict,
         api_key: Optional[str] = None,
         api_base: Optional[str] = None,
     ) -> dict:
diff --git a/litellm/llms/sagemaker/completion/handler.py b/litellm/llms/sagemaker/completion/handler.py
index 296689c31c..ebd96ac5b1 100644
--- a/litellm/llms/sagemaker/completion/handler.py
+++ b/litellm/llms/sagemaker/completion/handler.py
@@ -96,6 +96,7 @@ class SagemakerLLM(BaseAWSLLM):
         model: str,
         data: dict,
         messages: List[AllMessageValues],
+        litellm_params: dict,
         optional_params: dict,
         aws_region_name: str,
         extra_headers: Optional[dict] = None,
@@ -122,6 +123,7 @@ class SagemakerLLM(BaseAWSLLM):
             model=model,
             messages=messages,
             optional_params=optional_params,
+            litellm_params=litellm_params,
         )
         request = AWSRequest(
             method="POST", url=api_base, data=encoded_data, headers=headers
@@ -198,6 +200,7 @@ class SagemakerLLM(BaseAWSLLM):
                     data=data,
                     messages=messages,
                     optional_params=optional_params,
+                    litellm_params=litellm_params,
                     credentials=credentials,
                     aws_region_name=aws_region_name,
                 )
@@ -274,6 +277,7 @@ class SagemakerLLM(BaseAWSLLM):
             "model": model,
             "data": _data,
             "optional_params": optional_params,
+            "litellm_params": litellm_params,
             "credentials": credentials,
             "aws_region_name": aws_region_name,
             "messages": messages,
@@ -426,6 +430,7 @@ class SagemakerLLM(BaseAWSLLM):
             "model": model,
             "data": data,
             "optional_params": optional_params,
+            "litellm_params": litellm_params,
             "credentials": credentials,
             "aws_region_name": aws_region_name,
             "messages": messages,
@@ -496,6 +501,7 @@ class SagemakerLLM(BaseAWSLLM):
             "model": model,
             "data": data,
             "optional_params": optional_params,
+            "litellm_params": litellm_params,
             "credentials": credentials,
             "aws_region_name": aws_region_name,
             "messages": messages,
diff --git a/litellm/llms/sagemaker/completion/transformation.py b/litellm/llms/sagemaker/completion/transformation.py
index df3d028c99..bfc0b6e5f6 100644
--- a/litellm/llms/sagemaker/completion/transformation.py
+++ b/litellm/llms/sagemaker/completion/transformation.py
@@ -263,6 +263,7 @@ class SagemakerConfig(BaseConfig):
         model: str,
         messages: List[AllMessageValues],
         optional_params: dict,
+        litellm_params: dict,
         api_key: Optional[str] = None,
         api_base: Optional[str] = None,
     ) -> dict:
diff --git a/litellm/llms/snowflake/chat/transformation.py b/litellm/llms/snowflake/chat/transformation.py
index 574c4704cd..2b92911b05 100644
--- a/litellm/llms/snowflake/chat/transformation.py
+++ b/litellm/llms/snowflake/chat/transformation.py
@@ -92,6 +92,7 @@ class SnowflakeConfig(OpenAIGPTConfig):
         model: str,
         messages: List[AllMessageValues],
         optional_params: dict,
+        litellm_params: dict,
         api_key: Optional[str] = None,
         api_base: Optional[str] = None,
     ) -> dict:
diff --git a/litellm/llms/topaz/image_variations/transformation.py b/litellm/llms/topaz/image_variations/transformation.py
index 4d14f1ad24..afbd89b9bc 100644
--- a/litellm/llms/topaz/image_variations/transformation.py
+++ b/litellm/llms/topaz/image_variations/transformation.py
@@ -37,6 +37,7 @@ class TopazImageVariationConfig(BaseImageVariationConfig):
         model: str,
         messages: List[AllMessageValues],
         optional_params: dict,
+        litellm_params: dict,
         api_key: Optional[str] = None,
         api_base: Optional[str] = None,
     ) -> dict:
diff --git a/litellm/llms/triton/completion/transformation.py b/litellm/llms/triton/completion/transformation.py
index 49126917f2..21fcf2eefb 100644
--- a/litellm/llms/triton/completion/transformation.py
+++ b/litellm/llms/triton/completion/transformation.py
@@ -48,6 +48,7 @@ class TritonConfig(BaseConfig):
         model: str,
         messages: List[AllMessageValues],
         optional_params: Dict,
+        litellm_params: dict,
         api_key: Optional[str] = None,
         api_base: Optional[str] = None,
     ) -> Dict:
diff --git a/litellm/llms/triton/embedding/transformation.py b/litellm/llms/triton/embedding/transformation.py
index 4744ec0834..8ab0277e36 100644
--- a/litellm/llms/triton/embedding/transformation.py
+++ b/litellm/llms/triton/embedding/transformation.py
@@ -42,6 +42,7 @@ class TritonEmbeddingConfig(BaseEmbeddingConfig):
         model: str,
         messages: List[AllMessageValues],
         optional_params: dict,
+        litellm_params: dict,
         api_key: Optional[str] = None,
         api_base: Optional[str] = None,
     ) -> dict:
diff --git a/litellm/llms/vertex_ai/files/handler.py b/litellm/llms/vertex_ai/files/handler.py
index 87c1cb8320..a666a2c37f 100644
--- a/litellm/llms/vertex_ai/files/handler.py
+++ b/litellm/llms/vertex_ai/files/handler.py
@@ -1,3 +1,4 @@
+import asyncio
 from typing import Any, Coroutine, Optional, Union
 
 import httpx
@@ -11,9 +12,9 @@ from litellm.llms.custom_httpx.http_handler import get_async_httpx_client
 from litellm.types.llms.openai import CreateFileRequest, OpenAIFileObject
 from litellm.types.llms.vertex_ai import VERTEX_CREDENTIALS_TYPES
 
-from .transformation import VertexAIFilesTransformation
+from .transformation import VertexAIJsonlFilesTransformation
 
-vertex_ai_files_transformation = VertexAIFilesTransformation()
+vertex_ai_files_transformation = VertexAIJsonlFilesTransformation()
 
 
 class VertexAIFilesHandler(GCSBucketBase):
@@ -92,5 +93,15 @@ class VertexAIFilesHandler(GCSBucketBase):
                 timeout=timeout,
                 max_retries=max_retries,
             )
-
-        return None  # type: ignore
+        else:
+            return asyncio.run(
+                self.async_create_file(
+                    create_file_data=create_file_data,
+                    api_base=api_base,
+                    vertex_credentials=vertex_credentials,
+                    vertex_project=vertex_project,
+                    vertex_location=vertex_location,
+                    timeout=timeout,
+                    max_retries=max_retries,
+                )
+            )
diff --git a/litellm/llms/vertex_ai/files/transformation.py b/litellm/llms/vertex_ai/files/transformation.py
index 89c6ff9deb..c795367e48 100644
--- a/litellm/llms/vertex_ai/files/transformation.py
+++ b/litellm/llms/vertex_ai/files/transformation.py
@@ -1,7 +1,17 @@
 import json
+import os
+import time
 import uuid
 from typing import Any, Dict, List, Optional, Tuple, Union
 
+from httpx import Headers, Response
+
+from litellm.litellm_core_utils.prompt_templates.common_utils import extract_file_data
+from litellm.llms.base_llm.chat.transformation import BaseLLMException
+from litellm.llms.base_llm.files.transformation import (
+    BaseFilesConfig,
+    LiteLLMLoggingObj,
+)
 from litellm.llms.vertex_ai.common_utils import (
     _convert_vertex_datetime_to_openai_datetime,
 )
@@ -10,14 +20,317 @@ from litellm.llms.vertex_ai.gemini.vertex_and_google_ai_studio_gemini import (
     VertexGeminiConfig,
 )
 from litellm.types.llms.openai import (
+    AllMessageValues,
     CreateFileRequest,
     FileTypes,
+    OpenAICreateFileRequestOptionalParams,
     OpenAIFileObject,
     PathLike,
 )
+from litellm.types.llms.vertex_ai import GcsBucketResponse
+from litellm.types.utils import ExtractedFileData, LlmProviders
+
+from ..common_utils import VertexAIError
+from ..vertex_llm_base import VertexBase
 
 
-class VertexAIFilesTransformation(VertexGeminiConfig):
+class VertexAIFilesConfig(VertexBase, BaseFilesConfig):
+    """
+    Config for VertexAI Files
+    """
+
+    def __init__(self):
+        self.jsonl_transformation = VertexAIJsonlFilesTransformation()
+        super().__init__()
+
+    @property
+    def custom_llm_provider(self) -> LlmProviders:
+        return LlmProviders.VERTEX_AI
+
+    def validate_environment(
+        self,
+        headers: dict,
+        model: str,
+        messages: List[AllMessageValues],
+        optional_params: dict,
+        litellm_params: dict,
+        api_key: Optional[str] = None,
+        api_base: Optional[str] = None,
+    ) -> dict:
+        if not api_key:
+            api_key, _ = self.get_access_token(
+                credentials=litellm_params.get("vertex_credentials"),
+                project_id=litellm_params.get("vertex_project"),
+            )
+            if not api_key:
+                raise ValueError("api_key is required")
+            headers["Authorization"] = f"Bearer {api_key}"
+        return headers
+
+    def _get_content_from_openai_file(self, openai_file_content: FileTypes) -> str:
+        """
+        Helper to extract content from various OpenAI file types and return as string.
+
+        Handles:
+        - Direct content (str, bytes, IO[bytes])
+        - Tuple formats: (filename, content, [content_type], [headers])
+        - PathLike objects
+        """
+        content: Union[str, bytes] = b""
+        # Extract file content from tuple if necessary
+        if isinstance(openai_file_content, tuple):
+            # Take the second element which is always the file content
+            file_content = openai_file_content[1]
+        else:
+            file_content = openai_file_content
+
+        # Handle different file content types
+        if isinstance(file_content, str):
+            # String content can be used directly
+            content = file_content
+        elif isinstance(file_content, bytes):
+            # Bytes content can be decoded
+            content = file_content
+        elif isinstance(file_content, PathLike):  # PathLike
+            with open(str(file_content), "rb") as f:
+                content = f.read()
+        elif hasattr(file_content, "read"):  # IO[bytes]
+            # File-like objects need to be read
+            content = file_content.read()
+
+        # Ensure content is string
+        if isinstance(content, bytes):
+            content = content.decode("utf-8")
+
+        return content
+
+    def _get_gcs_object_name_from_batch_jsonl(
+        self,
+        openai_jsonl_content: List[Dict[str, Any]],
+    ) -> str:
+        """
+        Gets a unique GCS object name for the VertexAI batch prediction job
+
+        named as: litellm-vertex-{model}-{uuid}
+        """
+        _model = openai_jsonl_content[0].get("body", {}).get("model", "")
+        if "publishers/google/models" not in _model:
+            _model = f"publishers/google/models/{_model}"
+        object_name = f"litellm-vertex-files/{_model}/{uuid.uuid4()}"
+        return object_name
+
+    def get_object_name(
+        self, extracted_file_data: ExtractedFileData, purpose: str
+    ) -> str:
+        """
+        Get the object name for the request
+        """
+        extracted_file_data_content = extracted_file_data.get("content")
+
+        if extracted_file_data_content is None:
+            raise ValueError("file content is required")
+
+        if purpose == "batch":
+            ## 1. If jsonl, check if there's a model name
+            file_content = self._get_content_from_openai_file(
+                extracted_file_data_content
+            )
+
+            # Split into lines and parse each line as JSON
+            openai_jsonl_content = [
+                json.loads(line) for line in file_content.splitlines() if line.strip()
+            ]
+            if len(openai_jsonl_content) > 0:
+                return self._get_gcs_object_name_from_batch_jsonl(openai_jsonl_content)
+
+        ## 2. If not jsonl, return the filename
+        filename = extracted_file_data.get("filename")
+        if filename:
+            return filename
+        ## 3. If no file name, return timestamp
+        return str(int(time.time()))
+
+    def get_complete_file_url(
+        self,
+        api_base: Optional[str],
+        api_key: Optional[str],
+        model: str,
+        optional_params: Dict,
+        litellm_params: Dict,
+        data: CreateFileRequest,
+    ) -> str:
+        """
+        Get the complete url for the request
+        """
+        bucket_name = litellm_params.get("bucket_name") or os.getenv("GCS_BUCKET_NAME")
+        if not bucket_name:
+            raise ValueError("GCS bucket_name is required")
+        file_data = data.get("file")
+        purpose = data.get("purpose")
+        if file_data is None:
+            raise ValueError("file is required")
+        if purpose is None:
+            raise ValueError("purpose is required")
+        extracted_file_data = extract_file_data(file_data)
+        object_name = self.get_object_name(extracted_file_data, purpose)
+        endpoint = (
+            f"upload/storage/v1/b/{bucket_name}/o?uploadType=media&name={object_name}"
+        )
+        api_base = api_base or "https://storage.googleapis.com"
+        if not api_base:
+            raise ValueError("api_base is required")
+
+        return f"{api_base}/{endpoint}"
+
+    def get_supported_openai_params(
+        self, model: str
+    ) -> List[OpenAICreateFileRequestOptionalParams]:
+        return []
+
+    def map_openai_params(
+        self,
+        non_default_params: dict,
+        optional_params: dict,
+        model: str,
+        drop_params: bool,
+    ) -> dict:
+        return optional_params
+
+    def _map_openai_to_vertex_params(
+        self,
+        openai_request_body: Dict[str, Any],
+    ) -> Dict[str, Any]:
+        """
+        wrapper to call VertexGeminiConfig.map_openai_params
+        """
+        from litellm.llms.vertex_ai.gemini.vertex_and_google_ai_studio_gemini import (
+            VertexGeminiConfig,
+        )
+
+        config = VertexGeminiConfig()
+        _model = openai_request_body.get("model", "")
+        vertex_params = config.map_openai_params(
+            model=_model,
+            non_default_params=openai_request_body,
+            optional_params={},
+            drop_params=False,
+        )
+        return vertex_params
+
+    def _transform_openai_jsonl_content_to_vertex_ai_jsonl_content(
+        self, openai_jsonl_content: List[Dict[str, Any]]
+    ) -> List[Dict[str, Any]]:
+        """
+        Transforms OpenAI JSONL content to VertexAI JSONL content
+
+        jsonl body for vertex is {"request": <request_body>}
+        Example Vertex jsonl
+        {"request":{"contents": [{"role": "user", "parts": [{"text": "What is the relation between the following video and image samples?"}, {"fileData": {"fileUri": "gs://cloud-samples-data/generative-ai/video/animals.mp4", "mimeType": "video/mp4"}}, {"fileData": {"fileUri": "gs://cloud-samples-data/generative-ai/image/cricket.jpeg", "mimeType": "image/jpeg"}}]}]}}
+        {"request":{"contents": [{"role": "user", "parts": [{"text": "Describe what is happening in this video."}, {"fileData": {"fileUri": "gs://cloud-samples-data/generative-ai/video/another_video.mov", "mimeType": "video/mov"}}]}]}}
+        """
+
+        vertex_jsonl_content = []
+        for _openai_jsonl_content in openai_jsonl_content:
+            openai_request_body = _openai_jsonl_content.get("body") or {}
+            vertex_request_body = _transform_request_body(
+                messages=openai_request_body.get("messages", []),
+                model=openai_request_body.get("model", ""),
+                optional_params=self._map_openai_to_vertex_params(openai_request_body),
+                custom_llm_provider="vertex_ai",
+                litellm_params={},
+                cached_content=None,
+            )
+            vertex_jsonl_content.append({"request": vertex_request_body})
+        return vertex_jsonl_content
+
+    def transform_create_file_request(
+        self,
+        model: str,
+        create_file_data: CreateFileRequest,
+        optional_params: dict,
+        litellm_params: dict,
+    ) -> Union[bytes, str, dict]:
+        """
+        2 Cases:
+        1. Handle basic file upload
+        2. Handle batch file upload (.jsonl)
+        """
+        file_data = create_file_data.get("file")
+        if file_data is None:
+            raise ValueError("file is required")
+        extracted_file_data = extract_file_data(file_data)
+        extracted_file_data_content = extracted_file_data.get("content")
+        if (
+            create_file_data.get("purpose") == "batch"
+            and extracted_file_data.get("content_type") == "application/jsonl"
+            and extracted_file_data_content is not None
+        ):
+            ## 1. If jsonl, check if there's a model name
+            file_content = self._get_content_from_openai_file(
+                extracted_file_data_content
+            )
+
+            # Split into lines and parse each line as JSON
+            openai_jsonl_content = [
+                json.loads(line) for line in file_content.splitlines() if line.strip()
+            ]
+            vertex_jsonl_content = (
+                self._transform_openai_jsonl_content_to_vertex_ai_jsonl_content(
+                    openai_jsonl_content
+                )
+            )
+            return json.dumps(vertex_jsonl_content)
+        elif isinstance(extracted_file_data_content, bytes):
+            return extracted_file_data_content
+        else:
+            raise ValueError("Unsupported file content type")
+
+    def transform_create_file_response(
+        self,
+        model: Optional[str],
+        raw_response: Response,
+        logging_obj: LiteLLMLoggingObj,
+        litellm_params: dict,
+    ) -> OpenAIFileObject:
+        """
+        Transform VertexAI File upload response into OpenAI-style FileObject
+        """
+        response_json = raw_response.json()
+
+        try:
+            response_object = GcsBucketResponse(**response_json)  # type: ignore
+        except Exception as e:
+            raise VertexAIError(
+                status_code=raw_response.status_code,
+                message=f"Error reading GCS bucket response: {e}",
+                headers=raw_response.headers,
+            )
+
+        gcs_id = response_object.get("id", "")
+        # Remove the last numeric ID from the path
+        gcs_id = "/".join(gcs_id.split("/")[:-1]) if gcs_id else ""
+
+        return OpenAIFileObject(
+            purpose=response_object.get("purpose", "batch"),
+            id=f"gs://{gcs_id}",
+            filename=response_object.get("name", ""),
+            created_at=_convert_vertex_datetime_to_openai_datetime(
+                vertex_datetime=response_object.get("timeCreated", "")
+            ),
+            status="uploaded",
+            bytes=int(response_object.get("size", 0)),
+            object="file",
+        )
+
+    def get_error_class(
+        self, error_message: str, status_code: int, headers: Union[Dict, Headers]
+    ) -> BaseLLMException:
+        return VertexAIError(
+            status_code=status_code, message=error_message, headers=headers
+        )
+
+
+class VertexAIJsonlFilesTransformation(VertexGeminiConfig):
     """
     Transforms OpenAI /v1/files/* requests to VertexAI /v1/files/* requests
     """
diff --git a/litellm/llms/vertex_ai/gemini/vertex_and_google_ai_studio_gemini.py b/litellm/llms/vertex_ai/gemini/vertex_and_google_ai_studio_gemini.py
index 749b6d9428..b3b7857ea1 100644
--- a/litellm/llms/vertex_ai/gemini/vertex_and_google_ai_studio_gemini.py
+++ b/litellm/llms/vertex_ai/gemini/vertex_and_google_ai_studio_gemini.py
@@ -240,6 +240,7 @@ class VertexGeminiConfig(VertexAIBaseConfig, BaseConfig):
         gtool_func_declarations = []
         googleSearch: Optional[dict] = None
         googleSearchRetrieval: Optional[dict] = None
+        enterpriseWebSearch: Optional[dict] = None
         code_execution: Optional[dict] = None
         # remove 'additionalProperties' from tools
         value = _remove_additional_properties(value)
@@ -273,6 +274,8 @@ class VertexGeminiConfig(VertexAIBaseConfig, BaseConfig):
                 googleSearch = tool["googleSearch"]
             elif tool.get("googleSearchRetrieval", None) is not None:
                 googleSearchRetrieval = tool["googleSearchRetrieval"]
+            elif tool.get("enterpriseWebSearch", None) is not None:
+                enterpriseWebSearch = tool["enterpriseWebSearch"]
             elif tool.get("code_execution", None) is not None:
                 code_execution = tool["code_execution"]
             elif openai_function_object is not None:
@@ -299,6 +302,8 @@ class VertexGeminiConfig(VertexAIBaseConfig, BaseConfig):
             _tools["googleSearch"] = googleSearch
         if googleSearchRetrieval is not None:
             _tools["googleSearchRetrieval"] = googleSearchRetrieval
+        if enterpriseWebSearch is not None:
+            _tools["enterpriseWebSearch"] = enterpriseWebSearch
         if code_execution is not None:
             _tools["code_execution"] = code_execution
         return [_tools]
@@ -900,6 +905,7 @@ class VertexGeminiConfig(VertexAIBaseConfig, BaseConfig):
         model: str,
         messages: List[AllMessageValues],
         optional_params: Dict,
+        litellm_params: Dict,
         api_key: Optional[str] = None,
         api_base: Optional[str] = None,
     ) -> Dict:
@@ -1017,7 +1023,7 @@ class VertexLLM(VertexBase):
         logging_obj,
         stream,
         optional_params: dict,
-        litellm_params=None,
+        litellm_params: dict,
         logger_fn=None,
         api_base: Optional[str] = None,
         client: Optional[AsyncHTTPHandler] = None,
@@ -1058,6 +1064,7 @@ class VertexLLM(VertexBase):
             model=model,
             messages=messages,
             optional_params=optional_params,
+            litellm_params=litellm_params,
         )
 
         ## LOGGING
@@ -1144,6 +1151,7 @@ class VertexLLM(VertexBase):
             model=model,
             messages=messages,
             optional_params=optional_params,
+            litellm_params=litellm_params,
         )
 
         request_body = await async_transform_request_body(**data)  # type: ignore
@@ -1317,6 +1325,7 @@ class VertexLLM(VertexBase):
             model=model,
             messages=messages,
             optional_params=optional_params,
+            litellm_params=litellm_params,
         )
 
         ## TRANSFORMATION ##
diff --git a/litellm/llms/vertex_ai/multimodal_embeddings/embedding_handler.py b/litellm/llms/vertex_ai/multimodal_embeddings/embedding_handler.py
index 88d7339449..8aebd83cc4 100644
--- a/litellm/llms/vertex_ai/multimodal_embeddings/embedding_handler.py
+++ b/litellm/llms/vertex_ai/multimodal_embeddings/embedding_handler.py
@@ -94,6 +94,7 @@ class VertexMultimodalEmbedding(VertexLLM):
             optional_params=optional_params,
             api_key=auth_header,
             api_base=api_base,
+            litellm_params=litellm_params,
         )
 
         ## LOGGING
diff --git a/litellm/llms/vertex_ai/multimodal_embeddings/transformation.py b/litellm/llms/vertex_ai/multimodal_embeddings/transformation.py
index afa58c7e5c..5bf02ad765 100644
--- a/litellm/llms/vertex_ai/multimodal_embeddings/transformation.py
+++ b/litellm/llms/vertex_ai/multimodal_embeddings/transformation.py
@@ -47,6 +47,7 @@ class VertexAIMultimodalEmbeddingConfig(BaseEmbeddingConfig):
         model: str,
         messages: List[AllMessageValues],
         optional_params: dict,
+        litellm_params: dict,
         api_key: Optional[str] = None,
         api_base: Optional[str] = None,
     ) -> dict:
diff --git a/litellm/llms/vertex_ai/vertex_llm_base.py b/litellm/llms/vertex_ai/vertex_llm_base.py
index 994e46b50b..8f3037c791 100644
--- a/litellm/llms/vertex_ai/vertex_llm_base.py
+++ b/litellm/llms/vertex_ai/vertex_llm_base.py
@@ -10,7 +10,6 @@ from typing import TYPE_CHECKING, Any, Dict, Literal, Optional, Tuple
 
 from litellm._logging import verbose_logger
 from litellm.litellm_core_utils.asyncify import asyncify
-from litellm.llms.base import BaseLLM
 from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler
 from litellm.types.llms.vertex_ai import VERTEX_CREDENTIALS_TYPES
 
@@ -22,7 +21,7 @@ else:
     GoogleCredentialsObject = Any
 
 
-class VertexBase(BaseLLM):
+class VertexBase:
     def __init__(self) -> None:
         super().__init__()
         self.access_token: Optional[str] = None
diff --git a/litellm/llms/voyage/embedding/transformation.py b/litellm/llms/voyage/embedding/transformation.py
index df6ef91a41..91811e0392 100644
--- a/litellm/llms/voyage/embedding/transformation.py
+++ b/litellm/llms/voyage/embedding/transformation.py
@@ -83,6 +83,7 @@ class VoyageEmbeddingConfig(BaseEmbeddingConfig):
         model: str,
         messages: List[AllMessageValues],
         optional_params: dict,
+        litellm_params: dict,
         api_key: Optional[str] = None,
         api_base: Optional[str] = None,
     ) -> dict:
diff --git a/litellm/llms/watsonx/chat/handler.py b/litellm/llms/watsonx/chat/handler.py
index aeb0167595..45378c5529 100644
--- a/litellm/llms/watsonx/chat/handler.py
+++ b/litellm/llms/watsonx/chat/handler.py
@@ -49,6 +49,7 @@ class WatsonXChatHandler(OpenAILikeChatHandler):
             messages=messages,
             optional_params=optional_params,
             api_key=api_key,
+            litellm_params=litellm_params,
         )
 
         ## UPDATE PAYLOAD (optional params)
diff --git a/litellm/llms/watsonx/common_utils.py b/litellm/llms/watsonx/common_utils.py
index 4916cd1c75..e13e015add 100644
--- a/litellm/llms/watsonx/common_utils.py
+++ b/litellm/llms/watsonx/common_utils.py
@@ -165,6 +165,7 @@ class IBMWatsonXMixin:
         model: str,
         messages: List[AllMessageValues],
         optional_params: Dict,
+        litellm_params: dict,
         api_key: Optional[str] = None,
         api_base: Optional[str] = None,
     ) -> Dict:
diff --git a/litellm/main.py b/litellm/main.py
index cd7d255e21..3f1d9a1e76 100644
--- a/litellm/main.py
+++ b/litellm/main.py
@@ -3616,6 +3616,7 @@ def embedding(  # noqa: PLR0915
                 optional_params=optional_params,
                 client=client,
                 aembedding=aembedding,
+                litellm_params=litellm_params_dict,
             )
         elif custom_llm_provider == "bedrock":
             if isinstance(input, str):
diff --git a/litellm/model_prices_and_context_window_backup.json b/litellm/model_prices_and_context_window_backup.json
index ea33bdb02b..c67c3b85af 100644
--- a/litellm/model_prices_and_context_window_backup.json
+++ b/litellm/model_prices_and_context_window_backup.json
@@ -380,6 +380,7 @@
         "supports_tool_choice": true,
         "supports_native_streaming": false,
         "supported_modalities": ["text", "image"],
+        "supported_output_modalities": ["text"],
         "supported_endpoints": ["/v1/responses", "/v1/batch"]
     },
     "o1-pro-2025-03-19": {
@@ -401,6 +402,7 @@
         "supports_tool_choice": true,
         "supports_native_streaming": false,
         "supported_modalities": ["text", "image"],
+        "supported_output_modalities": ["text"],
         "supported_endpoints": ["/v1/responses", "/v1/batch"]
     },
     "o1": {
@@ -2409,25 +2411,26 @@
         "max_tokens": 4096,
         "max_input_tokens": 131072,
         "max_output_tokens": 4096,
-        "input_cost_per_token": 0,
-        "output_cost_per_token": 0,
+        "input_cost_per_token": 0.000000075,
+        "output_cost_per_token": 0.0000003,
         "litellm_provider": "azure_ai",
         "mode": "chat",
         "supports_function_calling": true,
-        "source": "https://learn.microsoft.com/en-us/azure/ai-foundry/concepts/models-featured#microsoft"
+        "source": "https://techcommunity.microsoft.com/blog/Azure-AI-Services-blog/announcing-new-phi-pricing-empowering-your-business-with-small-language-models/4395112"
     },
     "azure_ai/Phi-4-multimodal-instruct": {
         "max_tokens": 4096,
         "max_input_tokens": 131072,
         "max_output_tokens": 4096,
-        "input_cost_per_token": 0,
-        "output_cost_per_token": 0,
+        "input_cost_per_token": 0.00000008,
+        "input_cost_per_audio_token": 0.000004,
+        "output_cost_per_token": 0.00032,
         "litellm_provider": "azure_ai",
         "mode": "chat",
         "supports_audio_input": true,
         "supports_function_calling": true,
         "supports_vision": true,
-        "source": "https://learn.microsoft.com/en-us/azure/ai-foundry/concepts/models-featured#microsoft"
+        "source": "https://techcommunity.microsoft.com/blog/Azure-AI-Services-blog/announcing-new-phi-pricing-empowering-your-business-with-small-language-models/4395112"
     },
     "azure_ai/Phi-4": {
         "max_tokens": 16384,
@@ -4511,20 +4514,10 @@
         "max_audio_length_hours": 8.4,
         "max_audio_per_prompt": 1,
         "max_pdf_size_mb": 30,
-        "input_cost_per_image": 0,
-        "input_cost_per_video_per_second": 0,
-        "input_cost_per_audio_per_second": 0,
-        "input_cost_per_token": 0,
-        "input_cost_per_character": 0, 
-        "input_cost_per_token_above_128k_tokens": 0, 
-        "input_cost_per_character_above_128k_tokens": 0, 
-        "input_cost_per_image_above_128k_tokens": 0,
-        "input_cost_per_video_per_second_above_128k_tokens": 0,
-        "input_cost_per_audio_per_second_above_128k_tokens": 0,
-        "output_cost_per_token": 0,
-        "output_cost_per_character": 0,
-        "output_cost_per_token_above_128k_tokens": 0,
-        "output_cost_per_character_above_128k_tokens": 0,
+        "input_cost_per_token": 0.00000125,
+        "input_cost_per_token_above_200k_tokens": 0.0000025,
+        "output_cost_per_token": 0.00001,
+        "output_cost_per_token_above_200k_tokens": 0.000015,
         "litellm_provider": "vertex_ai-language-models",
         "mode": "chat",
         "supports_system_messages": true,
@@ -4535,6 +4528,9 @@
         "supports_pdf_input": true,
         "supports_response_schema": true,
         "supports_tool_choice": true,
+        "supported_endpoints": ["/v1/chat/completions", "/v1/completions"],
+        "supported_modalities": ["text", "image", "audio", "video"],
+        "supported_output_modalities": ["text"],
         "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing"
     },
     "gemini-2.0-pro-exp-02-05": {
@@ -4547,20 +4543,10 @@
         "max_audio_length_hours": 8.4,
         "max_audio_per_prompt": 1,
         "max_pdf_size_mb": 30,
-        "input_cost_per_image": 0,
-        "input_cost_per_video_per_second": 0,
-        "input_cost_per_audio_per_second": 0,
-        "input_cost_per_token": 0,
-        "input_cost_per_character": 0, 
-        "input_cost_per_token_above_128k_tokens": 0, 
-        "input_cost_per_character_above_128k_tokens": 0, 
-        "input_cost_per_image_above_128k_tokens": 0,
-        "input_cost_per_video_per_second_above_128k_tokens": 0,
-        "input_cost_per_audio_per_second_above_128k_tokens": 0,
-        "output_cost_per_token": 0,
-        "output_cost_per_character": 0,
-        "output_cost_per_token_above_128k_tokens": 0,
-        "output_cost_per_character_above_128k_tokens": 0,
+        "input_cost_per_token": 0.00000125,
+        "input_cost_per_token_above_200k_tokens": 0.0000025,
+        "output_cost_per_token": 0.00001,
+        "output_cost_per_token_above_200k_tokens": 0.000015,
         "litellm_provider": "vertex_ai-language-models",
         "mode": "chat",
         "supports_system_messages": true,
@@ -4571,6 +4557,9 @@
         "supports_pdf_input": true,
         "supports_response_schema": true,
         "supports_tool_choice": true,
+        "supported_endpoints": ["/v1/chat/completions", "/v1/completions"],
+        "supported_modalities": ["text", "image", "audio", "video"],
+        "supported_output_modalities": ["text"],
         "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing"
     },
     "gemini-2.0-flash-exp": {
@@ -4604,6 +4593,8 @@
         "supports_vision": true,
         "supports_response_schema": true,
         "supports_audio_output": true,
+        "supported_modalities": ["text", "image", "audio", "video"],
+        "supported_output_modalities": ["text", "image"],
         "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing",
         "supports_tool_choice": true
     },
@@ -4628,6 +4619,8 @@
         "supports_response_schema": true,
         "supports_audio_output": true,
         "supports_tool_choice": true,
+        "supported_modalities": ["text", "image", "audio", "video"],
+        "supported_output_modalities": ["text", "image"],
         "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing"
     },
     "gemini-2.0-flash-thinking-exp": {
@@ -4661,6 +4654,8 @@
         "supports_vision": true,
         "supports_response_schema": true,
         "supports_audio_output": true,
+        "supported_modalities": ["text", "image", "audio", "video"],
+        "supported_output_modalities": ["text", "image"],
         "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash",
         "supports_tool_choice": true
     },
@@ -4695,6 +4690,8 @@
         "supports_vision": true,
         "supports_response_schema": false,
         "supports_audio_output": false,
+        "supported_modalities": ["text", "image", "audio", "video"],
+        "supported_output_modalities": ["text", "image"],
         "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash",
         "supports_tool_choice": true
     },
@@ -4720,6 +4717,7 @@
         "supports_audio_output": true,
         "supports_audio_input": true,
         "supported_modalities": ["text", "image", "audio", "video"],
+        "supported_output_modalities": ["text", "image"],
         "supports_tool_choice": true,
         "source": "https://ai.google.dev/pricing#2_0flash"
     },
@@ -4742,6 +4740,32 @@
         "supports_vision": true,
         "supports_response_schema": true,
         "supports_audio_output": true,
+        "supported_modalities": ["text", "image", "audio", "video"],
+        "supported_output_modalities": ["text"],
+        "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash",
+        "supports_tool_choice": true
+    },
+    "gemini-2.0-flash-lite-001": {
+        "max_input_tokens": 1048576,
+        "max_output_tokens": 8192,
+        "max_images_per_prompt": 3000,
+        "max_videos_per_prompt": 10,
+        "max_video_length": 1,
+        "max_audio_length_hours": 8.4,
+        "max_audio_per_prompt": 1,
+        "max_pdf_size_mb": 50,
+        "input_cost_per_audio_token": 0.000000075,
+        "input_cost_per_token": 0.000000075,
+        "output_cost_per_token": 0.0000003,
+        "litellm_provider": "vertex_ai-language-models",
+        "mode": "chat",
+        "supports_system_messages": true,
+        "supports_function_calling": true,
+        "supports_vision": true,
+        "supports_response_schema": true,
+        "supports_audio_output": true,
+        "supported_modalities": ["text", "image", "audio", "video"],
+        "supported_output_modalities": ["text"],
         "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash",
         "supports_tool_choice": true
     },
@@ -4807,6 +4831,7 @@
         "supports_audio_output": true,
         "supports_audio_input": true,
         "supported_modalities": ["text", "image", "audio", "video"],
+        "supported_output_modalities": ["text", "image"],
         "supports_tool_choice": true,
         "source": "https://ai.google.dev/pricing#2_0flash"
     },
@@ -4832,6 +4857,8 @@
         "supports_response_schema": true,
         "supports_audio_output": true,
         "supports_tool_choice": true,
+        "supported_modalities": ["text", "image", "audio", "video"],
+        "supported_output_modalities": ["text"],
         "source": "https://ai.google.dev/gemini-api/docs/pricing#gemini-2.0-flash-lite"
     },
     "gemini/gemini-2.0-flash-001": {
@@ -4857,6 +4884,8 @@
         "supports_response_schema": true,
         "supports_audio_output": false,
         "supports_tool_choice": true,
+        "supported_modalities": ["text", "image", "audio", "video"],
+        "supported_output_modalities": ["text", "image"],
         "source": "https://ai.google.dev/pricing#2_0flash"
     },
     "gemini/gemini-2.5-pro-preview-03-25": {
@@ -4871,9 +4900,9 @@
         "max_pdf_size_mb": 30,
         "input_cost_per_audio_token": 0.0000007,
         "input_cost_per_token": 0.00000125,
-        "input_cost_per_token_above_128k_tokens": 0.0000025, 
+        "input_cost_per_token_above_200k_tokens": 0.0000025, 
         "output_cost_per_token": 0.0000010,
-        "output_cost_per_token_above_128k_tokens": 0.000015, 
+        "output_cost_per_token_above_200k_tokens": 0.000015, 
         "litellm_provider": "gemini",
         "mode": "chat",
         "rpm": 10000,
@@ -4884,6 +4913,8 @@
         "supports_response_schema": true,
         "supports_audio_output": false,
         "supports_tool_choice": true,
+        "supported_modalities": ["text", "image", "audio", "video"],
+        "supported_output_modalities": ["text"],
         "source": "https://ai.google.dev/gemini-api/docs/pricing#gemini-2.5-pro-preview"
     },
     "gemini/gemini-2.0-flash-exp": {
@@ -4919,6 +4950,8 @@
         "supports_audio_output": true,
         "tpm": 4000000,
         "rpm": 10,
+        "supported_modalities": ["text", "image", "audio", "video"],
+        "supported_output_modalities": ["text", "image"],
         "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash",
         "supports_tool_choice": true
     },
@@ -4945,6 +4978,8 @@
         "supports_response_schema": true,
         "supports_audio_output": false,
         "supports_tool_choice": true,
+        "supported_modalities": ["text", "image", "audio", "video"],
+        "supported_output_modalities": ["text"],
         "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash-lite"
     },
     "gemini/gemini-2.0-flash-thinking-exp": {
@@ -4980,6 +5015,8 @@
         "supports_audio_output": true,
         "tpm": 4000000,
         "rpm": 10,
+        "supported_modalities": ["text", "image", "audio", "video"],
+        "supported_output_modalities": ["text", "image"],
         "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash",
         "supports_tool_choice": true
     },
@@ -5016,6 +5053,8 @@
         "supports_audio_output": true,
         "tpm": 4000000,
         "rpm": 10,
+        "supported_modalities": ["text", "image", "audio", "video"],
+        "supported_output_modalities": ["text", "image"],
         "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash",
         "supports_tool_choice": true
     },
diff --git a/litellm/proxy/_new_secret_config.yaml b/litellm/proxy/_new_secret_config.yaml
index 661003d894..f64e243489 100644
--- a/litellm/proxy/_new_secret_config.yaml
+++ b/litellm/proxy/_new_secret_config.yaml
@@ -29,20 +29,14 @@ model_list:
       model: databricks/databricks-claude-3-7-sonnet
       api_key: os.environ/DATABRICKS_API_KEY
       api_base: os.environ/DATABRICKS_API_BASE
-  - model_name: "gemini/gemini-2.0-flash"
+  - model_name: "llmaas-meta/llama-3.1-8b-instruct"
     litellm_params:
-      model: gemini/gemini-2.0-flash
-      api_key: os.environ/GEMINI_API_KEY
-  - model_name: ali/qwen-turbo
-    litellm_params:
-      api_base: https://dashscope.aliyuncs.com/compatible-mode/v1
-      api_key: sk-xxx
-      model: openai/qwen-turbo
+      model: nvidia_nim/meta/llama-3.3-70b-instruct
+      api_key: "invalid"
+      api_base: "http://0.0.0.0:8090"
     model_info:
-      input_cost_per_token: 0.000000042
-      output_cost_per_token: 0.000000084
-
-
+      input_cost_per_token: "100"
+      output_cost_per_token: "100"
 
 litellm_settings:
   num_retries: 0
@@ -50,4 +44,7 @@ litellm_settings:
 
 files_settings:
   - custom_llm_provider: gemini
-    api_key: os.environ/GEMINI_API_KEY
\ No newline at end of file
+    api_key: os.environ/GEMINI_API_KEY
+
+general_settings:
+  store_prompts_in_spend_logs: true
\ No newline at end of file
diff --git a/litellm/proxy/_types.py b/litellm/proxy/_types.py
index ae4bdc7b8c..9a2a06fae9 100644
--- a/litellm/proxy/_types.py
+++ b/litellm/proxy/_types.py
@@ -28,6 +28,7 @@ from litellm.types.utils import (
     ProviderField,
     StandardCallbackDynamicParams,
     StandardLoggingMCPToolCall,
+    StandardLoggingModelInformation,
     StandardLoggingPayloadErrorInformation,
     StandardLoggingPayloadStatus,
     StandardPassThroughResponseObject,
@@ -1625,6 +1626,7 @@ class LiteLLM_UserTable(LiteLLMPydanticObjectBase):
     model_max_budget: Optional[Dict] = {}
     model_spend: Optional[Dict] = {}
     user_email: Optional[str] = None
+    user_alias: Optional[str] = None
     models: list = []
     tpm_limit: Optional[int] = None
     rpm_limit: Optional[int] = None
@@ -1935,6 +1937,8 @@ class SpendLogsMetadata(TypedDict):
     proxy_server_request: Optional[str]
     batch_models: Optional[List[str]]
     error_information: Optional[StandardLoggingPayloadErrorInformation]
+    usage_object: Optional[dict]
+    model_map_information: Optional[StandardLoggingModelInformation]
 
 
 class SpendLogsPayload(TypedDict):
diff --git a/litellm/proxy/common_utils/html_forms/jwt_display_template.py b/litellm/proxy/common_utils/html_forms/jwt_display_template.py
new file mode 100644
index 0000000000..03dff78dba
--- /dev/null
+++ b/litellm/proxy/common_utils/html_forms/jwt_display_template.py
@@ -0,0 +1,284 @@
+# JWT display template for SSO debug callback
+jwt_display_template = """
+<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8">
+    <title>LiteLLM SSO Debug - JWT Information</title>
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <style>
+        body {
+            font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Oxygen, Ubuntu, Cantarell, sans-serif;
+            background-color: #f8fafc;
+            margin: 0;
+            padding: 20px;
+            display: flex;
+            justify-content: center;
+            align-items: center;
+            min-height: 100vh;
+            color: #333;
+        }
+
+        .container {
+            background-color: #fff;
+            padding: 40px;
+            border-radius: 8px;
+            box-shadow: 0 4px 12px rgba(0, 0, 0, 0.1);
+            width: 800px;
+            max-width: 100%;
+        }
+        
+        .logo-container {
+            text-align: center;
+            margin-bottom: 30px;
+        }
+        
+        .logo {
+            font-size: 24px;
+            font-weight: 600;
+            color: #1e293b;
+        }
+        
+        h2 {
+            margin: 0 0 10px;
+            color: #1e293b;
+            font-size: 28px;
+            font-weight: 600;
+            text-align: center;
+        }
+        
+        .subtitle {
+            color: #64748b;
+            margin: 0 0 20px;
+            font-size: 16px;
+            text-align: center;
+        }
+
+        .info-box {
+            background-color: #f1f5f9;
+            border-radius: 6px;
+            padding: 20px;
+            margin-bottom: 30px;
+            border-left: 4px solid #2563eb;
+        }
+        
+        .success-box {
+            background-color: #f0fdf4;
+            border-radius: 6px;
+            padding: 20px;
+            margin-bottom: 30px;
+            border-left: 4px solid #16a34a;
+        }
+
+        .info-header {
+            display: flex;
+            align-items: center;
+            margin-bottom: 12px;
+            color: #1e40af;
+            font-weight: 600;
+            font-size: 16px;
+        }
+        
+        .success-header {
+            display: flex;
+            align-items: center;
+            margin-bottom: 12px;
+            color: #166534;
+            font-weight: 600;
+            font-size: 16px;
+        }
+        
+        .info-header svg, .success-header svg {
+            margin-right: 8px;
+        }
+        
+        .data-container {
+            margin-top: 20px;
+        }
+        
+        .data-row {
+            display: flex;
+            border-bottom: 1px solid #e2e8f0;
+            padding: 12px 0;
+        }
+        
+        .data-row:last-child {
+            border-bottom: none;
+        }
+        
+        .data-label {
+            font-weight: 500;
+            color: #334155;
+            width: 180px;
+            flex-shrink: 0;
+        }
+        
+        .data-value {
+            color: #475569;
+            word-break: break-all;
+        }
+        
+        .jwt-container {
+            background-color: #f8fafc;
+            border-radius: 6px;
+            padding: 15px;
+            margin-top: 20px;
+            overflow-x: auto;
+            border: 1px solid #e2e8f0;
+        }
+        
+        .jwt-text {
+            font-family: monospace;
+            white-space: pre-wrap;
+            word-break: break-all;
+            margin: 0;
+            color: #334155;
+        }
+        
+        .back-button {
+            display: inline-block;
+            background-color: #6466E9;
+            color: #fff;
+            text-decoration: none;
+            padding: 10px 16px;
+            border-radius: 6px;
+            font-weight: 500;
+            margin-top: 20px;
+            text-align: center;
+        }
+        
+        .back-button:hover {
+            background-color: #4138C2;
+            text-decoration: none;
+        }
+        
+        .buttons {
+            display: flex;
+            gap: 10px;
+            margin-top: 20px;
+        }
+        
+        .copy-button {
+            background-color: #e2e8f0;
+            color: #334155;
+            border: none;
+            padding: 8px 12px;
+            border-radius: 4px;
+            cursor: pointer;
+            font-size: 14px;
+            display: flex;
+            align-items: center;
+        }
+        
+        .copy-button:hover {
+            background-color: #cbd5e1;
+        }
+        
+        .copy-button svg {
+            margin-right: 6px;
+        }
+    </style>
+</head>
+<body>
+    <div class="container">
+        <div class="logo-container">
+            <div class="logo">
+                🚅 LiteLLM
+            </div>
+        </div>
+        <h2>SSO Debug Information</h2>
+        <p class="subtitle">Results from the SSO authentication process.</p>
+        
+        <div class="success-box">
+            <div class="success-header">
+                <svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
+                    <path d="M22 11.08V12a10 10 0 1 1-5.93-9.14"></path>
+                    <polyline points="22 4 12 14.01 9 11.01"></polyline>
+                </svg>
+                Authentication Successful
+            </div>
+            <p>The SSO authentication completed successfully. Below is the information returned by the provider.</p>
+        </div>
+        
+        <div class="data-container" id="userData">
+            <!-- Data will be inserted here by JavaScript -->
+        </div>
+        
+        <div class="info-box">
+            <div class="info-header">
+                <svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
+                    <circle cx="12" cy="12" r="10"></circle>
+                    <line x1="12" y1="16" x2="12" y2="12"></line>
+                    <line x1="12" y1="8" x2="12.01" y2="8"></line>
+                </svg>
+                JSON Representation
+            </div>
+            <div class="jwt-container">
+                <pre class="jwt-text" id="jsonData">Loading...</pre>
+            </div>
+            <div class="buttons">
+                <button class="copy-button" onclick="copyToClipboard('jsonData')">
+                    <svg xmlns="http://www.w3.org/2000/svg" width="14" height="14" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
+                        <rect x="9" y="9" width="13" height="13" rx="2" ry="2"></rect>
+                        <path d="M5 15H4a2 2 0 0 1-2-2V4a2 2 0 0 1 2-2h9a2 2 0 0 1 2 2v1"></path>
+                    </svg>
+                    Copy to Clipboard
+                </button>
+            </div>
+        </div>
+        
+        <a href="/sso/debug/login" class="back-button">
+            Try Another SSO Login
+        </a>
+    </div>
+
+    <script>
+        // This will be populated with the actual data from the server
+        const userData = SSO_DATA;
+        
+        function renderUserData() {
+            const container = document.getElementById('userData');
+            const jsonDisplay = document.getElementById('jsonData');
+            
+            // Format JSON with indentation for display
+            jsonDisplay.textContent = JSON.stringify(userData, null, 2);
+            
+            // Clear container
+            container.innerHTML = '';
+            
+            // Add each key-value pair to the UI
+            for (const [key, value] of Object.entries(userData)) {
+                if (typeof value !== 'object' || value === null) {
+                    const row = document.createElement('div');
+                    row.className = 'data-row';
+                    
+                    const label = document.createElement('div');
+                    label.className = 'data-label';
+                    label.textContent = key;
+                    
+                    const dataValue = document.createElement('div');
+                    dataValue.className = 'data-value';
+                    dataValue.textContent = value !== null ? value : 'null';
+                    
+                    row.appendChild(label);
+                    row.appendChild(dataValue);
+                    container.appendChild(row);
+                }
+            }
+        }
+        
+        function copyToClipboard(elementId) {
+            const text = document.getElementById(elementId).textContent;
+            navigator.clipboard.writeText(text).then(() => {
+                alert('Copied to clipboard!');
+            }).catch(err => {
+                console.error('Could not copy text: ', err);
+            });
+        }
+        
+        // Render the data when the page loads
+        document.addEventListener('DOMContentLoaded', renderUserData);
+    </script>
+</body>
+</html>
+"""
diff --git a/litellm/proxy/management_endpoints/internal_user_endpoints.py b/litellm/proxy/management_endpoints/internal_user_endpoints.py
index fe5c1bacab..efc1bafa15 100644
--- a/litellm/proxy/management_endpoints/internal_user_endpoints.py
+++ b/litellm/proxy/management_endpoints/internal_user_endpoints.py
@@ -1434,7 +1434,7 @@ async def get_user_daily_activity(
         default=1, description="Page number for pagination", ge=1
     ),
     page_size: int = fastapi.Query(
-        default=50, description="Items per page", ge=1, le=100
+        default=50, description="Items per page", ge=1, le=1000
     ),
     user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
 ) -> SpendAnalyticsPaginatedResponse:
diff --git a/litellm/proxy/management_endpoints/ui_sso.py b/litellm/proxy/management_endpoints/ui_sso.py
index 970587ded9..0cd3600220 100644
--- a/litellm/proxy/management_endpoints/ui_sso.py
+++ b/litellm/proxy/management_endpoints/ui_sso.py
@@ -3,12 +3,15 @@ Has all /sso/* routes
 
 /sso/key/generate - handles user signing in with SSO and redirects to /sso/callback
 /sso/callback - returns JWT Redirect Response that redirects to LiteLLM UI
+
+/sso/debug/login - handles user signing in with SSO and redirects to /sso/debug/callback
+/sso/debug/callback - returns the OpenID object returned by the SSO provider
 """
 
 import asyncio
 import os
 import uuid
-from typing import TYPE_CHECKING, Any, Dict, List, Optional, Union, cast
+from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Union, cast
 
 from fastapi import APIRouter, Depends, HTTPException, Request, status
 from fastapi.responses import RedirectResponse
@@ -16,10 +19,16 @@ from fastapi.responses import RedirectResponse
 import litellm
 from litellm._logging import verbose_proxy_logger
 from litellm.constants import MAX_SPENDLOG_ROWS_TO_QUERY
+from litellm.llms.custom_httpx.http_handler import (
+    AsyncHTTPHandler,
+    get_async_httpx_client,
+    httpxSpecialProvider,
+)
 from litellm.proxy._types import (
     LiteLLM_UserTable,
     LitellmUserRoles,
     Member,
+    NewTeamRequest,
     NewUserRequest,
     NewUserResponse,
     ProxyErrorTypes,
@@ -36,15 +45,20 @@ from litellm.proxy.common_utils.admin_ui_utils import (
     admin_ui_disabled,
     show_missing_vars_in_env,
 )
+from litellm.proxy.common_utils.html_forms.jwt_display_template import (
+    jwt_display_template,
+)
 from litellm.proxy.common_utils.html_forms.ui_login import html_form
 from litellm.proxy.management_endpoints.internal_user_endpoints import new_user
 from litellm.proxy.management_endpoints.sso_helper_utils import (
     check_is_admin_only_access,
     has_admin_ui_access,
 )
-from litellm.proxy.management_endpoints.team_endpoints import team_member_add
+from litellm.proxy.management_endpoints.team_endpoints import new_team, team_member_add
 from litellm.proxy.management_endpoints.types import CustomOpenID
+from litellm.proxy.utils import PrismaClient
 from litellm.secret_managers.main import str_to_bool
+from litellm.types.proxy.management_endpoints.ui_sso import *
 
 if TYPE_CHECKING:
     from fastapi_sso.sso.base import OpenID
@@ -92,131 +106,29 @@ async def google_login(request: Request):  # noqa: PLR0915
     missing_env_vars = show_missing_vars_in_env()
     if missing_env_vars is not None:
         return missing_env_vars
+    ui_username = os.getenv("UI_USERNAME")
 
     # get url from request
-    redirect_url = os.getenv("PROXY_BASE_URL", str(request.base_url))
-    ui_username = os.getenv("UI_USERNAME")
-    if redirect_url.endswith("/"):
-        redirect_url += "sso/callback"
-    else:
-        redirect_url += "/sso/callback"
-    # Google SSO Auth
-    if google_client_id is not None:
-        from fastapi_sso.sso.google import GoogleSSO
+    redirect_url = SSOAuthenticationHandler.get_redirect_url_for_sso(
+        request=request,
+        sso_callback_route="sso/callback",
+    )
 
-        google_client_secret = os.getenv("GOOGLE_CLIENT_SECRET", None)
-        if google_client_secret is None:
-            raise ProxyException(
-                message="GOOGLE_CLIENT_SECRET not set. Set it in .env file",
-                type=ProxyErrorTypes.auth_error,
-                param="GOOGLE_CLIENT_SECRET",
-                code=status.HTTP_500_INTERNAL_SERVER_ERROR,
-            )
-        google_sso = GoogleSSO(
-            client_id=google_client_id,
-            client_secret=google_client_secret,
-            redirect_uri=redirect_url,
+    # Check if we should use SSO handler
+    if (
+        SSOAuthenticationHandler.should_use_sso_handler(
+            microsoft_client_id=microsoft_client_id,
+            google_client_id=google_client_id,
+            generic_client_id=generic_client_id,
         )
-        verbose_proxy_logger.info(
-            f"In /google-login/key/generate, \nGOOGLE_REDIRECT_URI: {redirect_url}\nGOOGLE_CLIENT_ID: {google_client_id}"
+        is True
+    ):
+        return await SSOAuthenticationHandler.get_sso_login_redirect(
+            redirect_url=redirect_url,
+            microsoft_client_id=microsoft_client_id,
+            google_client_id=google_client_id,
+            generic_client_id=generic_client_id,
         )
-        with google_sso:
-            return await google_sso.get_login_redirect()
-    # Microsoft SSO Auth
-    elif microsoft_client_id is not None:
-        from fastapi_sso.sso.microsoft import MicrosoftSSO
-
-        microsoft_client_secret = os.getenv("MICROSOFT_CLIENT_SECRET", None)
-        microsoft_tenant = os.getenv("MICROSOFT_TENANT", None)
-        if microsoft_client_secret is None:
-            raise ProxyException(
-                message="MICROSOFT_CLIENT_SECRET not set. Set it in .env file",
-                type=ProxyErrorTypes.auth_error,
-                param="MICROSOFT_CLIENT_SECRET",
-                code=status.HTTP_500_INTERNAL_SERVER_ERROR,
-            )
-        microsoft_sso = MicrosoftSSO(
-            client_id=microsoft_client_id,
-            client_secret=microsoft_client_secret,
-            tenant=microsoft_tenant,
-            redirect_uri=redirect_url,
-            allow_insecure_http=True,
-        )
-        with microsoft_sso:
-            return await microsoft_sso.get_login_redirect()
-    elif generic_client_id is not None:
-        from fastapi_sso.sso.base import DiscoveryDocument
-        from fastapi_sso.sso.generic import create_provider
-
-        generic_client_secret = os.getenv("GENERIC_CLIENT_SECRET", None)
-        generic_scope = os.getenv("GENERIC_SCOPE", "openid email profile").split(" ")
-        generic_authorization_endpoint = os.getenv(
-            "GENERIC_AUTHORIZATION_ENDPOINT", None
-        )
-        generic_token_endpoint = os.getenv("GENERIC_TOKEN_ENDPOINT", None)
-        generic_userinfo_endpoint = os.getenv("GENERIC_USERINFO_ENDPOINT", None)
-        if generic_client_secret is None:
-            raise ProxyException(
-                message="GENERIC_CLIENT_SECRET not set. Set it in .env file",
-                type=ProxyErrorTypes.auth_error,
-                param="GENERIC_CLIENT_SECRET",
-                code=status.HTTP_500_INTERNAL_SERVER_ERROR,
-            )
-        if generic_authorization_endpoint is None:
-            raise ProxyException(
-                message="GENERIC_AUTHORIZATION_ENDPOINT not set. Set it in .env file",
-                type=ProxyErrorTypes.auth_error,
-                param="GENERIC_AUTHORIZATION_ENDPOINT",
-                code=status.HTTP_500_INTERNAL_SERVER_ERROR,
-            )
-        if generic_token_endpoint is None:
-            raise ProxyException(
-                message="GENERIC_TOKEN_ENDPOINT not set. Set it in .env file",
-                type=ProxyErrorTypes.auth_error,
-                param="GENERIC_TOKEN_ENDPOINT",
-                code=status.HTTP_500_INTERNAL_SERVER_ERROR,
-            )
-        if generic_userinfo_endpoint is None:
-            raise ProxyException(
-                message="GENERIC_USERINFO_ENDPOINT not set. Set it in .env file",
-                type=ProxyErrorTypes.auth_error,
-                param="GENERIC_USERINFO_ENDPOINT",
-                code=status.HTTP_500_INTERNAL_SERVER_ERROR,
-            )
-        verbose_proxy_logger.debug(
-            f"authorization_endpoint: {generic_authorization_endpoint}\ntoken_endpoint: {generic_token_endpoint}\nuserinfo_endpoint: {generic_userinfo_endpoint}"
-        )
-        verbose_proxy_logger.debug(
-            f"GENERIC_REDIRECT_URI: {redirect_url}\nGENERIC_CLIENT_ID: {generic_client_id}\n"
-        )
-        discovery = DiscoveryDocument(
-            authorization_endpoint=generic_authorization_endpoint,
-            token_endpoint=generic_token_endpoint,
-            userinfo_endpoint=generic_userinfo_endpoint,
-        )
-        SSOProvider = create_provider(name="oidc", discovery_document=discovery)
-        generic_sso = SSOProvider(
-            client_id=generic_client_id,
-            client_secret=generic_client_secret,
-            redirect_uri=redirect_url,
-            allow_insecure_http=True,
-            scope=generic_scope,
-        )
-        with generic_sso:
-            # TODO: state should be a random string and added to the user session with cookie
-            # or a cryptographicly signed state that we can verify stateless
-            # For simplification we are using a static state, this is not perfect but some
-            # SSO providers do not allow stateless verification
-            redirect_params = {}
-            state = os.getenv("GENERIC_CLIENT_STATE", None)
-
-            if state:
-                redirect_params["state"] = state
-            elif "okta" in generic_authorization_endpoint:
-                redirect_params[
-                    "state"
-                ] = uuid.uuid4().hex  # set state param for okta - required
-            return await generic_sso.get_login_redirect(**redirect_params)  # type: ignore
     elif ui_username is not None:
         # No Google, Microsoft SSO
         # Use UI Credentials set in .env
@@ -271,7 +183,7 @@ async def get_generic_sso_response(
     jwt_handler: JWTHandler,
     generic_client_id: str,
     redirect_url: str,
-) -> Optional[OpenID]:
+) -> Union[OpenID, dict]:
     # make generic sso provider
     from fastapi_sso.sso.base import DiscoveryDocument
     from fastapi_sso.sso.generic import create_provider
@@ -348,7 +260,7 @@ async def get_generic_sso_response(
         request, params={"include_client_id": generic_include_client_id}
     )
     verbose_proxy_logger.debug("generic result: %s", result)
-    return result
+    return result or {}
 
 
 async def create_team_member_add_task(team_id, user_info):
@@ -443,55 +355,16 @@ async def auth_callback(request: Request):  # noqa: PLR0915
 
     result = None
     if google_client_id is not None:
-        from fastapi_sso.sso.google import GoogleSSO
-
-        google_client_secret = os.getenv("GOOGLE_CLIENT_SECRET", None)
-        if google_client_secret is None:
-            raise ProxyException(
-                message="GOOGLE_CLIENT_SECRET not set. Set it in .env file",
-                type=ProxyErrorTypes.auth_error,
-                param="GOOGLE_CLIENT_SECRET",
-                code=status.HTTP_500_INTERNAL_SERVER_ERROR,
-            )
-        google_sso = GoogleSSO(
-            client_id=google_client_id,
-            redirect_uri=redirect_url,
-            client_secret=google_client_secret,
-        )
-        result = await google_sso.verify_and_process(request)
-    elif microsoft_client_id is not None:
-        from fastapi_sso.sso.microsoft import MicrosoftSSO
-
-        microsoft_client_secret = os.getenv("MICROSOFT_CLIENT_SECRET", None)
-        microsoft_tenant = os.getenv("MICROSOFT_TENANT", None)
-        if microsoft_client_secret is None:
-            raise ProxyException(
-                message="MICROSOFT_CLIENT_SECRET not set. Set it in .env file",
-                type=ProxyErrorTypes.auth_error,
-                param="MICROSOFT_CLIENT_SECRET",
-                code=status.HTTP_500_INTERNAL_SERVER_ERROR,
-            )
-        if microsoft_tenant is None:
-            raise ProxyException(
-                message="MICROSOFT_TENANT not set. Set it in .env file",
-                type=ProxyErrorTypes.auth_error,
-                param="MICROSOFT_TENANT",
-                code=status.HTTP_500_INTERNAL_SERVER_ERROR,
-            )
-        microsoft_sso = MicrosoftSSO(
-            client_id=microsoft_client_id,
-            client_secret=microsoft_client_secret,
-            tenant=microsoft_tenant,
-            redirect_uri=redirect_url,
-            allow_insecure_http=True,
-        )
-        original_msft_result = await microsoft_sso.verify_and_process(
+        result = await GoogleSSOHandler.get_google_callback_response(
             request=request,
-            convert_response=False,
+            google_client_id=google_client_id,
+            redirect_url=redirect_url,
         )
-        result = MicrosoftSSOHandler.openid_from_response(
-            response=original_msft_result,
-            jwt_handler=jwt_handler,
+    elif microsoft_client_id is not None:
+        result = await MicrosoftSSOHandler.get_microsoft_callback_response(
+            request=request,
+            microsoft_client_id=microsoft_client_id,
+            redirect_url=redirect_url,
         )
     elif generic_client_id is not None:
         result = await get_generic_sso_response(
@@ -590,42 +463,26 @@ async def auth_callback(request: Request):  # noqa: PLR0915
                 f"user_info: {user_info}; litellm.default_internal_user_params: {litellm.default_internal_user_params}"
             )
 
-            if user_info is not None:
-                user_id = user_info.user_id
-                user_defined_values = SSOUserDefinedValues(
-                    models=getattr(user_info, "models", user_id_models),
-                    user_id=user_info.user_id,
-                    user_email=getattr(user_info, "user_email", user_email),
-                    user_role=getattr(user_info, "user_role", None),
-                    max_budget=getattr(
-                        user_info, "max_budget", max_internal_user_budget
-                    ),
-                    budget_duration=getattr(
-                        user_info, "budget_duration", internal_user_budget_duration
-                    ),
-                )
-
-                user_role = getattr(user_info, "user_role", None)
-
-                # update id
-                await prisma_client.db.litellm_usertable.update_many(
-                    where={"user_email": user_email}, data={"user_id": user_id}  # type: ignore
-                )
+            # Upsert SSO User to LiteLLM DB
+            user_info = await SSOAuthenticationHandler.upsert_sso_user(
+                result=result,
+                user_info=user_info,
+                user_email=user_email,
+                user_id_models=user_id_models,
+                max_internal_user_budget=max_internal_user_budget,
+                internal_user_budget_duration=internal_user_budget_duration,
+                user_defined_values=user_defined_values,
+                prisma_client=prisma_client,
+            )
+            if user_info and user_info.user_role is not None:
+                user_role = user_info.user_role
             else:
-                verbose_proxy_logger.info(
-                    "user not in DB, inserting user into LiteLLM DB"
-                )
-                # user not in DB, insert User into LiteLLM DB
-                user_info = await insert_sso_user(
-                    result_openid=result,
-                    user_defined_values=user_defined_values,
-                )
+                user_role = LitellmUserRoles.INTERNAL_USER_VIEW_ONLY
 
-                user_role = (
-                    user_info.user_role or LitellmUserRoles.INTERNAL_USER_VIEW_ONLY
-                )
-            sso_teams = getattr(result, "team_ids", [])
-            await add_missing_team_member(user_info=user_info, sso_teams=sso_teams)
+            await SSOAuthenticationHandler.add_user_to_teams_from_sso_response(
+                result=result,
+                user_info=user_info,
+            )
 
     except Exception as e:
         verbose_proxy_logger.debug(
@@ -705,7 +562,7 @@ async def auth_callback(request: Request):  # noqa: PLR0915
 
 
 async def insert_sso_user(
-    result_openid: Optional[OpenID],
+    result_openid: Optional[Union[OpenID, dict]],
     user_defined_values: Optional[SSOUserDefinedValues] = None,
 ) -> NewUserResponse:
     """
@@ -721,6 +578,10 @@ async def insert_sso_user(
     verbose_proxy_logger.debug(
         f"Inserting SSO user into DB. User values: {user_defined_values}"
     )
+    if result_openid is None:
+        raise ValueError("result_openid is None")
+    if isinstance(result_openid, dict):
+        result_openid = OpenID(**result_openid)
 
     if user_defined_values is None:
         raise ValueError("user_defined_values is None")
@@ -733,9 +594,9 @@ async def insert_sso_user(
         if user_defined_values.get("max_budget") is None:
             user_defined_values["max_budget"] = litellm.max_internal_user_budget
         if user_defined_values.get("budget_duration") is None:
-            user_defined_values[
-                "budget_duration"
-            ] = litellm.internal_user_budget_duration
+            user_defined_values["budget_duration"] = (
+                litellm.internal_user_budget_duration
+            )
 
     if user_defined_values["user_role"] is None:
         user_defined_values["user_role"] = LitellmUserRoles.INTERNAL_USER_VIEW_ONLY
@@ -789,14 +650,335 @@ async def get_ui_settings(request: Request):
     }
 
 
+class SSOAuthenticationHandler:
+    """
+    Handler for SSO Authentication across all SSO providers
+    """
+
+    @staticmethod
+    async def get_sso_login_redirect(
+        redirect_url: str,
+        google_client_id: Optional[str] = None,
+        microsoft_client_id: Optional[str] = None,
+        generic_client_id: Optional[str] = None,
+    ) -> Optional[RedirectResponse]:
+        """
+        Step 1. Call Get Login Redirect for the SSO provider. Send the redirect response to `redirect_url`
+
+        Args:
+            redirect_url (str): The URL to redirect the user to after login
+            google_client_id (Optional[str], optional): The Google Client ID. Defaults to None.
+            microsoft_client_id (Optional[str], optional): The Microsoft Client ID. Defaults to None.
+            generic_client_id (Optional[str], optional): The Generic Client ID. Defaults to None.
+
+        Returns:
+            RedirectResponse: The redirect response from the SSO provider
+        """
+        # Google SSO Auth
+        if google_client_id is not None:
+            from fastapi_sso.sso.google import GoogleSSO
+
+            google_client_secret = os.getenv("GOOGLE_CLIENT_SECRET", None)
+            if google_client_secret is None:
+                raise ProxyException(
+                    message="GOOGLE_CLIENT_SECRET not set. Set it in .env file",
+                    type=ProxyErrorTypes.auth_error,
+                    param="GOOGLE_CLIENT_SECRET",
+                    code=status.HTTP_500_INTERNAL_SERVER_ERROR,
+                )
+            google_sso = GoogleSSO(
+                client_id=google_client_id,
+                client_secret=google_client_secret,
+                redirect_uri=redirect_url,
+            )
+            verbose_proxy_logger.info(
+                f"In /google-login/key/generate, \nGOOGLE_REDIRECT_URI: {redirect_url}\nGOOGLE_CLIENT_ID: {google_client_id}"
+            )
+            with google_sso:
+                return await google_sso.get_login_redirect()
+        # Microsoft SSO Auth
+        elif microsoft_client_id is not None:
+            from fastapi_sso.sso.microsoft import MicrosoftSSO
+
+            microsoft_client_secret = os.getenv("MICROSOFT_CLIENT_SECRET", None)
+            microsoft_tenant = os.getenv("MICROSOFT_TENANT", None)
+            if microsoft_client_secret is None:
+                raise ProxyException(
+                    message="MICROSOFT_CLIENT_SECRET not set. Set it in .env file",
+                    type=ProxyErrorTypes.auth_error,
+                    param="MICROSOFT_CLIENT_SECRET",
+                    code=status.HTTP_500_INTERNAL_SERVER_ERROR,
+                )
+            microsoft_sso = MicrosoftSSO(
+                client_id=microsoft_client_id,
+                client_secret=microsoft_client_secret,
+                tenant=microsoft_tenant,
+                redirect_uri=redirect_url,
+                allow_insecure_http=True,
+            )
+            with microsoft_sso:
+                return await microsoft_sso.get_login_redirect()
+        elif generic_client_id is not None:
+            from fastapi_sso.sso.base import DiscoveryDocument
+            from fastapi_sso.sso.generic import create_provider
+
+            generic_client_secret = os.getenv("GENERIC_CLIENT_SECRET", None)
+            generic_scope = os.getenv("GENERIC_SCOPE", "openid email profile").split(
+                " "
+            )
+            generic_authorization_endpoint = os.getenv(
+                "GENERIC_AUTHORIZATION_ENDPOINT", None
+            )
+            generic_token_endpoint = os.getenv("GENERIC_TOKEN_ENDPOINT", None)
+            generic_userinfo_endpoint = os.getenv("GENERIC_USERINFO_ENDPOINT", None)
+            if generic_client_secret is None:
+                raise ProxyException(
+                    message="GENERIC_CLIENT_SECRET not set. Set it in .env file",
+                    type=ProxyErrorTypes.auth_error,
+                    param="GENERIC_CLIENT_SECRET",
+                    code=status.HTTP_500_INTERNAL_SERVER_ERROR,
+                )
+            if generic_authorization_endpoint is None:
+                raise ProxyException(
+                    message="GENERIC_AUTHORIZATION_ENDPOINT not set. Set it in .env file",
+                    type=ProxyErrorTypes.auth_error,
+                    param="GENERIC_AUTHORIZATION_ENDPOINT",
+                    code=status.HTTP_500_INTERNAL_SERVER_ERROR,
+                )
+            if generic_token_endpoint is None:
+                raise ProxyException(
+                    message="GENERIC_TOKEN_ENDPOINT not set. Set it in .env file",
+                    type=ProxyErrorTypes.auth_error,
+                    param="GENERIC_TOKEN_ENDPOINT",
+                    code=status.HTTP_500_INTERNAL_SERVER_ERROR,
+                )
+            if generic_userinfo_endpoint is None:
+                raise ProxyException(
+                    message="GENERIC_USERINFO_ENDPOINT not set. Set it in .env file",
+                    type=ProxyErrorTypes.auth_error,
+                    param="GENERIC_USERINFO_ENDPOINT",
+                    code=status.HTTP_500_INTERNAL_SERVER_ERROR,
+                )
+            verbose_proxy_logger.debug(
+                f"authorization_endpoint: {generic_authorization_endpoint}\ntoken_endpoint: {generic_token_endpoint}\nuserinfo_endpoint: {generic_userinfo_endpoint}"
+            )
+            verbose_proxy_logger.debug(
+                f"GENERIC_REDIRECT_URI: {redirect_url}\nGENERIC_CLIENT_ID: {generic_client_id}\n"
+            )
+            discovery = DiscoveryDocument(
+                authorization_endpoint=generic_authorization_endpoint,
+                token_endpoint=generic_token_endpoint,
+                userinfo_endpoint=generic_userinfo_endpoint,
+            )
+            SSOProvider = create_provider(name="oidc", discovery_document=discovery)
+            generic_sso = SSOProvider(
+                client_id=generic_client_id,
+                client_secret=generic_client_secret,
+                redirect_uri=redirect_url,
+                allow_insecure_http=True,
+                scope=generic_scope,
+            )
+            with generic_sso:
+                # TODO: state should be a random string and added to the user session with cookie
+                # or a cryptographicly signed state that we can verify stateless
+                # For simplification we are using a static state, this is not perfect but some
+                # SSO providers do not allow stateless verification
+                redirect_params = {}
+                state = os.getenv("GENERIC_CLIENT_STATE", None)
+
+                if state:
+                    redirect_params["state"] = state
+                elif "okta" in generic_authorization_endpoint:
+                    redirect_params["state"] = (
+                        uuid.uuid4().hex
+                    )  # set state param for okta - required
+                return await generic_sso.get_login_redirect(**redirect_params)  # type: ignore
+        raise ValueError(
+            "Unknown SSO provider. Please setup SSO with client IDs https://docs.litellm.ai/docs/proxy/admin_ui_sso"
+        )
+
+    @staticmethod
+    def should_use_sso_handler(
+        google_client_id: Optional[str] = None,
+        microsoft_client_id: Optional[str] = None,
+        generic_client_id: Optional[str] = None,
+    ) -> bool:
+        if (
+            google_client_id is not None
+            or microsoft_client_id is not None
+            or generic_client_id is not None
+        ):
+            return True
+        return False
+
+    @staticmethod
+    def get_redirect_url_for_sso(
+        request: Request,
+        sso_callback_route: str,
+    ) -> str:
+        """
+        Get the redirect URL for SSO
+        """
+        redirect_url = os.getenv("PROXY_BASE_URL", str(request.base_url))
+        if redirect_url.endswith("/"):
+            redirect_url += sso_callback_route
+        else:
+            redirect_url += "/" + sso_callback_route
+        return redirect_url
+
+    @staticmethod
+    async def upsert_sso_user(
+        result: Optional[Union[CustomOpenID, OpenID, dict]],
+        user_info: Optional[Union[NewUserResponse, LiteLLM_UserTable]],
+        user_email: Optional[str],
+        user_id_models: List[str],
+        max_internal_user_budget: Optional[float],
+        internal_user_budget_duration: Optional[str],
+        user_defined_values: Optional[SSOUserDefinedValues],
+        prisma_client: PrismaClient,
+    ):
+        """
+        Connects the SSO Users to the User Table in LiteLLM DB
+
+        - If user on LiteLLM DB, update the user_id with the SSO user_id
+        - If user not on LiteLLM DB, insert the user into LiteLLM DB
+        """
+        try:
+            if user_info is not None:
+                user_id = user_info.user_id
+                user_defined_values = SSOUserDefinedValues(
+                    models=getattr(user_info, "models", user_id_models),
+                    user_id=user_info.user_id or "",
+                    user_email=getattr(user_info, "user_email", user_email),
+                    user_role=getattr(user_info, "user_role", None),
+                    max_budget=getattr(
+                        user_info, "max_budget", max_internal_user_budget
+                    ),
+                    budget_duration=getattr(
+                        user_info, "budget_duration", internal_user_budget_duration
+                    ),
+                )
+
+                # update id
+                await prisma_client.db.litellm_usertable.update_many(
+                    where={"user_email": user_email}, data={"user_id": user_id}  # type: ignore
+                )
+            else:
+                verbose_proxy_logger.info(
+                    "user not in DB, inserting user into LiteLLM DB"
+                )
+                # user not in DB, insert User into LiteLLM DB
+                user_info = await insert_sso_user(
+                    result_openid=result,
+                    user_defined_values=user_defined_values,
+                )
+            return user_info
+        except Exception as e:
+            verbose_proxy_logger.error(f"Error upserting SSO user into LiteLLM DB: {e}")
+            return user_info
+
+    @staticmethod
+    async def add_user_to_teams_from_sso_response(
+        result: Optional[Union[CustomOpenID, OpenID, dict]],
+        user_info: Optional[Union[NewUserResponse, LiteLLM_UserTable]],
+    ):
+        """
+        Adds the user as a team member to the teams specified in the SSO responses `team_ids` field
+
+
+        The `team_ids` field is populated by litellm after processing the SSO response
+        """
+        if user_info is None:
+            verbose_proxy_logger.debug(
+                "User not found in LiteLLM DB, skipping team member addition"
+            )
+            return
+        sso_teams = getattr(result, "team_ids", [])
+        await add_missing_team_member(user_info=user_info, sso_teams=sso_teams)
+
+
 class MicrosoftSSOHandler:
     """
     Handles Microsoft SSO callback response and returns a CustomOpenID object
     """
 
+    graph_api_base_url = "https://graph.microsoft.com/v1.0"
+    graph_api_user_groups_endpoint = f"{graph_api_base_url}/me/memberOf"
+
+    """
+    Constants
+    """
+    MAX_GRAPH_API_PAGES = 200
+
+    # used for debugging to show the user groups litellm found from Graph API
+    GRAPH_API_RESPONSE_KEY = "graph_api_user_groups"
+
+    @staticmethod
+    async def get_microsoft_callback_response(
+        request: Request,
+        microsoft_client_id: str,
+        redirect_url: str,
+        return_raw_sso_response: bool = False,
+    ) -> Union[CustomOpenID, OpenID, dict]:
+        """
+        Get the Microsoft SSO callback response
+
+        Args:
+            return_raw_sso_response: If True, return the raw SSO response
+        """
+        from fastapi_sso.sso.microsoft import MicrosoftSSO
+
+        microsoft_client_secret = os.getenv("MICROSOFT_CLIENT_SECRET", None)
+        microsoft_tenant = os.getenv("MICROSOFT_TENANT", None)
+        if microsoft_client_secret is None:
+            raise ProxyException(
+                message="MICROSOFT_CLIENT_SECRET not set. Set it in .env file",
+                type=ProxyErrorTypes.auth_error,
+                param="MICROSOFT_CLIENT_SECRET",
+                code=status.HTTP_500_INTERNAL_SERVER_ERROR,
+            )
+        if microsoft_tenant is None:
+            raise ProxyException(
+                message="MICROSOFT_TENANT not set. Set it in .env file",
+                type=ProxyErrorTypes.auth_error,
+                param="MICROSOFT_TENANT",
+                code=status.HTTP_500_INTERNAL_SERVER_ERROR,
+            )
+        microsoft_sso = MicrosoftSSO(
+            client_id=microsoft_client_id,
+            client_secret=microsoft_client_secret,
+            tenant=microsoft_tenant,
+            redirect_uri=redirect_url,
+            allow_insecure_http=True,
+        )
+        original_msft_result = (
+            await microsoft_sso.verify_and_process(
+                request=request,
+                convert_response=False,
+            )
+            or {}
+        )
+
+        user_team_ids = await MicrosoftSSOHandler.get_user_groups_from_graph_api(
+            access_token=microsoft_sso.access_token
+        )
+
+        # if user is trying to get the raw sso response for debugging, return the raw sso response
+        if return_raw_sso_response:
+            original_msft_result[MicrosoftSSOHandler.GRAPH_API_RESPONSE_KEY] = (
+                user_team_ids
+            )
+            return original_msft_result or {}
+
+        result = MicrosoftSSOHandler.openid_from_response(
+            response=original_msft_result,
+            team_ids=user_team_ids,
+        )
+        return result
+
     @staticmethod
     def openid_from_response(
-        response: Optional[dict], jwt_handler: JWTHandler
+        response: Optional[dict], team_ids: List[str]
     ) -> CustomOpenID:
         response = response or {}
         verbose_proxy_logger.debug(f"Microsoft SSO Callback Response: {response}")
@@ -807,7 +989,421 @@ class MicrosoftSSOHandler:
             id=response.get("id"),
             first_name=response.get("givenName"),
             last_name=response.get("surname"),
-            team_ids=jwt_handler.get_team_ids_from_jwt(cast(dict, response)),
+            team_ids=team_ids,
         )
         verbose_proxy_logger.debug(f"Microsoft SSO OpenID Response: {openid_response}")
         return openid_response
+
+    @staticmethod
+    async def get_user_groups_from_graph_api(
+        access_token: Optional[str] = None,
+    ) -> List[str]:
+        """
+        Returns a list of `team_ids` the user belongs to from the Microsoft Graph API
+
+        Args:
+            access_token (Optional[str]): Microsoft Graph API access token
+
+        Returns:
+            List[str]: List of group IDs the user belongs to
+        """
+        try:
+            async_client = get_async_httpx_client(
+                llm_provider=httpxSpecialProvider.SSO_HANDLER
+            )
+
+            # Handle MSFT Enterprise Application Groups
+            service_principal_id = os.getenv("MICROSOFT_SERVICE_PRINCIPAL_ID", None)
+            service_principal_group_ids: Optional[List[str]] = []
+            service_principal_teams: Optional[List[MicrosoftServicePrincipalTeam]] = []
+            if service_principal_id:
+                service_principal_group_ids, service_principal_teams = (
+                    await MicrosoftSSOHandler.get_group_ids_from_service_principal(
+                        service_principal_id=service_principal_id,
+                        async_client=async_client,
+                        access_token=access_token,
+                    )
+                )
+                verbose_proxy_logger.debug(
+                    f"Service principal group IDs: {service_principal_group_ids}"
+                )
+                if len(service_principal_group_ids) > 0:
+                    await MicrosoftSSOHandler.create_litellm_teams_from_service_principal_team_ids(
+                        service_principal_teams=service_principal_teams,
+                    )
+
+            # Fetch user membership from Microsoft Graph API
+            all_group_ids = []
+            next_link: Optional[str] = (
+                MicrosoftSSOHandler.graph_api_user_groups_endpoint
+            )
+            auth_headers = {"Authorization": f"Bearer {access_token}"}
+            page_count = 0
+
+            while (
+                next_link is not None
+                and page_count < MicrosoftSSOHandler.MAX_GRAPH_API_PAGES
+            ):
+                group_ids, next_link = await MicrosoftSSOHandler.fetch_and_parse_groups(
+                    url=next_link, headers=auth_headers, async_client=async_client
+                )
+                all_group_ids.extend(group_ids)
+                page_count += 1
+
+            if (
+                next_link is not None
+                and page_count >= MicrosoftSSOHandler.MAX_GRAPH_API_PAGES
+            ):
+                verbose_proxy_logger.warning(
+                    f"Reached maximum page limit of {MicrosoftSSOHandler.MAX_GRAPH_API_PAGES}. Some groups may not be included."
+                )
+
+            # If service_principal_group_ids is not empty, only return group_ids that are in both all_group_ids and service_principal_group_ids
+            if service_principal_group_ids and len(service_principal_group_ids) > 0:
+                all_group_ids = [
+                    group_id
+                    for group_id in all_group_ids
+                    if group_id in service_principal_group_ids
+                ]
+
+            return all_group_ids
+
+        except Exception as e:
+            verbose_proxy_logger.error(
+                f"Error getting user groups from Microsoft Graph API: {e}"
+            )
+            return []
+
+    @staticmethod
+    async def fetch_and_parse_groups(
+        url: str, headers: dict, async_client: AsyncHTTPHandler
+    ) -> Tuple[List[str], Optional[str]]:
+        """Helper function to fetch and parse group data from a URL"""
+        response = await async_client.get(url, headers=headers)
+        response_json = response.json()
+        response_typed = await MicrosoftSSOHandler._cast_graph_api_response_dict(
+            response=response_json
+        )
+        group_ids = MicrosoftSSOHandler._get_group_ids_from_graph_api_response(
+            response=response_typed
+        )
+        return group_ids, response_typed.get("odata_nextLink")
+
+    @staticmethod
+    def _get_group_ids_from_graph_api_response(
+        response: MicrosoftGraphAPIUserGroupResponse,
+    ) -> List[str]:
+        group_ids = []
+        for _object in response.get("value", []) or []:
+            _group_id = _object.get("id")
+            if _group_id is not None:
+                group_ids.append(_group_id)
+        return group_ids
+
+    @staticmethod
+    async def _cast_graph_api_response_dict(
+        response: dict,
+    ) -> MicrosoftGraphAPIUserGroupResponse:
+        directory_objects: List[MicrosoftGraphAPIUserGroupDirectoryObject] = []
+        for _object in response.get("value", []):
+            directory_objects.append(
+                MicrosoftGraphAPIUserGroupDirectoryObject(
+                    odata_type=_object.get("@odata.type"),
+                    id=_object.get("id"),
+                    deletedDateTime=_object.get("deletedDateTime"),
+                    description=_object.get("description"),
+                    displayName=_object.get("displayName"),
+                    roleTemplateId=_object.get("roleTemplateId"),
+                )
+            )
+        return MicrosoftGraphAPIUserGroupResponse(
+            odata_context=response.get("@odata.context"),
+            odata_nextLink=response.get("@odata.nextLink"),
+            value=directory_objects,
+        )
+
+    @staticmethod
+    async def get_group_ids_from_service_principal(
+        service_principal_id: str,
+        async_client: AsyncHTTPHandler,
+        access_token: Optional[str] = None,
+    ) -> Tuple[List[str], List[MicrosoftServicePrincipalTeam]]:
+        """
+        Gets the groups belonging to the Service Principal Application
+
+        Service Principal Id is an `Enterprise Application` in Azure AD
+
+        Users use Enterprise Applications to manage Groups and Users on Microsoft Entra ID
+        """
+        base_url = "https://graph.microsoft.com/v1.0"
+        # Endpoint to get app role assignments for the given service principal
+        endpoint = f"/servicePrincipals/{service_principal_id}/appRoleAssignedTo"
+        url = base_url + endpoint
+
+        headers = {
+            "Authorization": f"Bearer {access_token}",
+            "Content-Type": "application/json",
+        }
+
+        response = await async_client.get(url, headers=headers)
+        response_json = response.json()
+        verbose_proxy_logger.debug(
+            f"Response from service principal app role assigned to: {response_json}"
+        )
+        group_ids: List[str] = []
+        service_principal_teams: List[MicrosoftServicePrincipalTeam] = []
+
+        for _object in response_json.get("value", []):
+            if _object.get("principalType") == "Group":
+                # Append the group ID to the list
+                group_ids.append(_object.get("principalId"))
+                # Append the service principal team to the list
+                service_principal_teams.append(
+                    MicrosoftServicePrincipalTeam(
+                        principalDisplayName=_object.get("principalDisplayName"),
+                        principalId=_object.get("principalId"),
+                    )
+                )
+
+        return group_ids, service_principal_teams
+
+    @staticmethod
+    async def create_litellm_teams_from_service_principal_team_ids(
+        service_principal_teams: List[MicrosoftServicePrincipalTeam],
+    ):
+        """
+        Creates Litellm Teams from the Service Principal Group IDs
+
+        When a user sets a `SERVICE_PRINCIPAL_ID` in the env, litellm will fetch groups under that service principal and create Litellm Teams from them
+        """
+        from litellm.proxy.proxy_server import prisma_client
+
+        if prisma_client is None:
+            raise ProxyException(
+                message="Prisma client not found. Set it in the proxy_server.py file",
+                type=ProxyErrorTypes.auth_error,
+                param="prisma_client",
+                code=status.HTTP_500_INTERNAL_SERVER_ERROR,
+            )
+        verbose_proxy_logger.debug(
+            f"Creating Litellm Teams from Service Principal Teams: {service_principal_teams}"
+        )
+        for service_principal_team in service_principal_teams:
+            litellm_team_id: Optional[str] = service_principal_team.get("principalId")
+            litellm_team_name: Optional[str] = service_principal_team.get(
+                "principalDisplayName"
+            )
+            if not litellm_team_id:
+                verbose_proxy_logger.debug(
+                    f"Skipping team creation for {litellm_team_name} because it has no principalId"
+                )
+                continue
+
+            try:
+                verbose_proxy_logger.debug(
+                    f"Creating Litellm Team: {litellm_team_id} - {litellm_team_name}"
+                )
+
+                team_obj = await prisma_client.db.litellm_teamtable.find_first(
+                    where={"team_id": litellm_team_id}
+                )
+                verbose_proxy_logger.debug(f"Team object: {team_obj}")
+
+                # only create a new team if it doesn't exist
+                if team_obj:
+                    verbose_proxy_logger.debug(
+                        f"Team already exists: {litellm_team_id} - {litellm_team_name}"
+                    )
+                    continue
+                await new_team(
+                    data=NewTeamRequest(
+                        team_id=litellm_team_id,
+                        team_alias=litellm_team_name,
+                    ),
+                    # params used for Audit Logging
+                    http_request=Request(scope={"type": "http", "method": "POST"}),
+                    user_api_key_dict=UserAPIKeyAuth(
+                        token="",
+                        key_alias=f"litellm.{MicrosoftSSOHandler.__name__}",
+                    ),
+                )
+            except Exception as e:
+                verbose_proxy_logger.exception(f"Error creating Litellm Team: {e}")
+
+
+class GoogleSSOHandler:
+    """
+    Handles Google SSO callback response and returns a CustomOpenID object
+    """
+
+    @staticmethod
+    async def get_google_callback_response(
+        request: Request,
+        google_client_id: str,
+        redirect_url: str,
+        return_raw_sso_response: bool = False,
+    ) -> Union[OpenID, dict]:
+        """
+        Get the Google SSO callback response
+
+        Args:
+            return_raw_sso_response: If True, return the raw SSO response
+        """
+        from fastapi_sso.sso.google import GoogleSSO
+
+        google_client_secret = os.getenv("GOOGLE_CLIENT_SECRET", None)
+        if google_client_secret is None:
+            raise ProxyException(
+                message="GOOGLE_CLIENT_SECRET not set. Set it in .env file",
+                type=ProxyErrorTypes.auth_error,
+                param="GOOGLE_CLIENT_SECRET",
+                code=status.HTTP_500_INTERNAL_SERVER_ERROR,
+            )
+        google_sso = GoogleSSO(
+            client_id=google_client_id,
+            redirect_uri=redirect_url,
+            client_secret=google_client_secret,
+        )
+
+        # if user is trying to get the raw sso response for debugging, return the raw sso response
+        if return_raw_sso_response:
+            return (
+                await google_sso.verify_and_process(
+                    request=request,
+                    convert_response=False,
+                )
+                or {}
+            )
+
+        result = await google_sso.verify_and_process(request)
+        return result or {}
+
+
+@router.get("/sso/debug/login", tags=["experimental"], include_in_schema=False)
+async def debug_sso_login(request: Request):
+    """
+    Create Proxy API Keys using Google Workspace SSO. Requires setting PROXY_BASE_URL in .env
+    PROXY_BASE_URL should be the your deployed proxy endpoint, e.g. PROXY_BASE_URL="https://litellm-production-7002.up.railway.app/"
+    Example:
+    """
+    from litellm.proxy.proxy_server import premium_user
+
+    microsoft_client_id = os.getenv("MICROSOFT_CLIENT_ID", None)
+    google_client_id = os.getenv("GOOGLE_CLIENT_ID", None)
+    generic_client_id = os.getenv("GENERIC_CLIENT_ID", None)
+
+    ####### Check if user is a Enterprise / Premium User #######
+    if (
+        microsoft_client_id is not None
+        or google_client_id is not None
+        or generic_client_id is not None
+    ):
+        if premium_user is not True:
+            raise ProxyException(
+                message="You must be a LiteLLM Enterprise user to use SSO. If you have a license please set `LITELLM_LICENSE` in your env. If you want to obtain a license meet with us here: https://calendly.com/d/4mp-gd3-k5k/litellm-1-1-onboarding-chat You are seeing this error message because You set one of `MICROSOFT_CLIENT_ID`, `GOOGLE_CLIENT_ID`, or `GENERIC_CLIENT_ID` in your env. Please unset this",
+                type=ProxyErrorTypes.auth_error,
+                param="premium_user",
+                code=status.HTTP_403_FORBIDDEN,
+            )
+
+    # get url from request
+    redirect_url = SSOAuthenticationHandler.get_redirect_url_for_sso(
+        request=request,
+        sso_callback_route="sso/debug/callback",
+    )
+
+    # Check if we should use SSO handler
+    if (
+        SSOAuthenticationHandler.should_use_sso_handler(
+            microsoft_client_id=microsoft_client_id,
+            google_client_id=google_client_id,
+            generic_client_id=generic_client_id,
+        )
+        is True
+    ):
+        return await SSOAuthenticationHandler.get_sso_login_redirect(
+            redirect_url=redirect_url,
+            microsoft_client_id=microsoft_client_id,
+            google_client_id=google_client_id,
+            generic_client_id=generic_client_id,
+        )
+
+
+@router.get("/sso/debug/callback", tags=["experimental"], include_in_schema=False)
+async def debug_sso_callback(request: Request):
+    """
+    Returns the OpenID object returned by the SSO provider
+    """
+    import json
+
+    from fastapi.responses import HTMLResponse
+
+    from litellm.proxy.proxy_server import jwt_handler
+
+    microsoft_client_id = os.getenv("MICROSOFT_CLIENT_ID", None)
+    google_client_id = os.getenv("GOOGLE_CLIENT_ID", None)
+    generic_client_id = os.getenv("GENERIC_CLIENT_ID", None)
+
+    redirect_url = os.getenv("PROXY_BASE_URL", str(request.base_url))
+    if redirect_url.endswith("/"):
+        redirect_url += "sso/debug/callback"
+    else:
+        redirect_url += "/sso/debug/callback"
+
+    result = None
+    if google_client_id is not None:
+        result = await GoogleSSOHandler.get_google_callback_response(
+            request=request,
+            google_client_id=google_client_id,
+            redirect_url=redirect_url,
+            return_raw_sso_response=True,
+        )
+    elif microsoft_client_id is not None:
+        result = await MicrosoftSSOHandler.get_microsoft_callback_response(
+            request=request,
+            microsoft_client_id=microsoft_client_id,
+            redirect_url=redirect_url,
+            return_raw_sso_response=True,
+        )
+
+    elif generic_client_id is not None:
+        result = await get_generic_sso_response(
+            request=request,
+            jwt_handler=jwt_handler,
+            generic_client_id=generic_client_id,
+            redirect_url=redirect_url,
+        )
+
+    # If result is None, return a basic error message
+    if result is None:
+        return HTMLResponse(
+            content="<h1>SSO Authentication Failed</h1><p>No data was returned from the SSO provider.</p>",
+            status_code=400,
+        )
+
+    # Convert the OpenID object to a dictionary
+    if hasattr(result, "__dict__"):
+        result_dict = result.__dict__
+    else:
+        result_dict = dict(result)
+
+    # Filter out any None values and convert to JSON serializable format
+    filtered_result = {}
+    for key, value in result_dict.items():
+        if value is not None and not key.startswith("_"):
+            if isinstance(value, (str, int, float, bool)) or value is None:
+                filtered_result[key] = value
+            else:
+                try:
+                    # Try to convert to string or another JSON serializable format
+                    filtered_result[key] = str(value)
+                except Exception as e:
+                    filtered_result[key] = f"Complex value (not displayable): {str(e)}"
+
+    # Replace the placeholder in the template with the actual data
+    html_content = jwt_display_template.replace(
+        "const userData = SSO_DATA;",
+        f"const userData = {json.dumps(filtered_result, indent=2)};",
+    )
+
+    return HTMLResponse(content=html_content)
diff --git a/litellm/proxy/pass_through_endpoints/pass_through_endpoints.py b/litellm/proxy/pass_through_endpoints/pass_through_endpoints.py
index a6b1b3e614..563d0cb543 100644
--- a/litellm/proxy/pass_through_endpoints/pass_through_endpoints.py
+++ b/litellm/proxy/pass_through_endpoints/pass_through_endpoints.py
@@ -4,16 +4,26 @@ import json
 import uuid
 from base64 import b64encode
 from datetime import datetime
-from typing import Dict, List, Optional, Union
+from typing import Dict, List, Optional, Tuple, Union
 from urllib.parse import parse_qs, urlencode, urlparse
 
 import httpx
-from fastapi import APIRouter, Depends, HTTPException, Request, Response, status
+from fastapi import (
+    APIRouter,
+    Depends,
+    HTTPException,
+    Request,
+    Response,
+    UploadFile,
+    status,
+)
 from fastapi.responses import StreamingResponse
+from starlette.datastructures import UploadFile as StarletteUploadFile
 
 import litellm
 from litellm._logging import verbose_proxy_logger
 from litellm.integrations.custom_logger import CustomLogger
+from litellm.litellm_core_utils.safe_json_dumps import safe_dumps
 from litellm.llms.custom_httpx.http_handler import get_async_httpx_client
 from litellm.proxy._types import (
     ConfigFieldInfo,
@@ -358,6 +368,92 @@ class HttpPassThroughEndpointHelpers:
             )
         return response
 
+    @staticmethod
+    async def non_streaming_http_request_handler(
+        request: Request,
+        async_client: httpx.AsyncClient,
+        url: httpx.URL,
+        headers: dict,
+        requested_query_params: Optional[dict] = None,
+        _parsed_body: Optional[dict] = None,
+    ) -> httpx.Response:
+        """
+        Handle non-streaming HTTP requests
+
+        Handles special cases when GET requests, multipart/form-data requests, and generic httpx requests
+        """
+        if request.method == "GET":
+            response = await async_client.request(
+                method=request.method,
+                url=url,
+                headers=headers,
+                params=requested_query_params,
+            )
+        elif HttpPassThroughEndpointHelpers.is_multipart(request) is True:
+            return await HttpPassThroughEndpointHelpers.make_multipart_http_request(
+                request=request,
+                async_client=async_client,
+                url=url,
+                headers=headers,
+                requested_query_params=requested_query_params,
+            )
+        else:
+            # Generic httpx method
+            response = await async_client.request(
+                method=request.method,
+                url=url,
+                headers=headers,
+                params=requested_query_params,
+                json=_parsed_body,
+            )
+        return response
+
+    @staticmethod
+    def is_multipart(request: Request) -> bool:
+        """Check if the request is a multipart/form-data request"""
+        return "multipart/form-data" in request.headers.get("content-type", "")
+
+    @staticmethod
+    async def _build_request_files_from_upload_file(
+        upload_file: Union[UploadFile, StarletteUploadFile],
+    ) -> Tuple[Optional[str], bytes, Optional[str]]:
+        """Build a request files dict from an UploadFile object"""
+        file_content = await upload_file.read()
+        return (upload_file.filename, file_content, upload_file.content_type)
+
+    @staticmethod
+    async def make_multipart_http_request(
+        request: Request,
+        async_client: httpx.AsyncClient,
+        url: httpx.URL,
+        headers: dict,
+        requested_query_params: Optional[dict] = None,
+    ) -> httpx.Response:
+        """Process multipart/form-data requests, handling both files and form fields"""
+        form_data = await request.form()
+        files = {}
+        form_data_dict = {}
+
+        for field_name, field_value in form_data.items():
+            if isinstance(field_value, (StarletteUploadFile, UploadFile)):
+                files[field_name] = (
+                    await HttpPassThroughEndpointHelpers._build_request_files_from_upload_file(
+                        upload_file=field_value
+                    )
+                )
+            else:
+                form_data_dict[field_name] = field_value
+
+        response = await async_client.request(
+            method=request.method,
+            url=url,
+            headers=headers,
+            params=requested_query_params,
+            files=files,
+            data=form_data_dict,
+        )
+        return response
+
 
 async def pass_through_request(  # noqa: PLR0915
     request: Request,
@@ -424,7 +520,7 @@ async def pass_through_request(  # noqa: PLR0915
         start_time = datetime.now()
         logging_obj = Logging(
             model="unknown",
-            messages=[{"role": "user", "content": json.dumps(_parsed_body)}],
+            messages=[{"role": "user", "content": safe_dumps(_parsed_body)}],
             stream=False,
             call_type="pass_through_endpoint",
             start_time=start_time,
@@ -453,7 +549,6 @@ async def pass_through_request(  # noqa: PLR0915
         logging_obj.model_call_details["litellm_call_id"] = litellm_call_id
 
         # combine url with query params for logging
-
         requested_query_params: Optional[dict] = (
             query_params or request.query_params.__dict__
         )
@@ -474,7 +569,7 @@ async def pass_through_request(  # noqa: PLR0915
                 logging_url = str(url) + "?" + requested_query_params_str
 
         logging_obj.pre_call(
-            input=[{"role": "user", "content": json.dumps(_parsed_body)}],
+            input=[{"role": "user", "content": safe_dumps(_parsed_body)}],
             api_key="",
             additional_args={
                 "complete_input_dict": _parsed_body,
@@ -525,22 +620,16 @@ async def pass_through_request(  # noqa: PLR0915
         )
         verbose_proxy_logger.debug("request body: {}".format(_parsed_body))
 
-        if request.method == "GET":
-            response = await async_client.request(
-                method=request.method,
+        response = (
+            await HttpPassThroughEndpointHelpers.non_streaming_http_request_handler(
+                request=request,
+                async_client=async_client,
                 url=url,
                 headers=headers,
-                params=requested_query_params,
+                requested_query_params=requested_query_params,
+                _parsed_body=_parsed_body,
             )
-        else:
-            response = await async_client.request(
-                method=request.method,
-                url=url,
-                headers=headers,
-                params=requested_query_params,
-                json=_parsed_body,
-            )
-
+        )
         verbose_proxy_logger.debug("response.headers= %s", response.headers)
 
         if _is_streaming_response(response) is True:
diff --git a/litellm/proxy/spend_tracking/spend_tracking_utils.py b/litellm/proxy/spend_tracking/spend_tracking_utils.py
index 096c5191b1..fc99d03d69 100644
--- a/litellm/proxy/spend_tracking/spend_tracking_utils.py
+++ b/litellm/proxy/spend_tracking/spend_tracking_utils.py
@@ -13,7 +13,11 @@ from litellm._logging import verbose_proxy_logger
 from litellm.litellm_core_utils.core_helpers import get_litellm_metadata_from_kwargs
 from litellm.proxy._types import SpendLogsMetadata, SpendLogsPayload
 from litellm.proxy.utils import PrismaClient, hash_token
-from litellm.types.utils import StandardLoggingMCPToolCall, StandardLoggingPayload
+from litellm.types.utils import (
+    StandardLoggingMCPToolCall,
+    StandardLoggingModelInformation,
+    StandardLoggingPayload,
+)
 from litellm.utils import get_end_user_id_for_cost_tracking
 
 
@@ -39,6 +43,8 @@ def _get_spend_logs_metadata(
     applied_guardrails: Optional[List[str]] = None,
     batch_models: Optional[List[str]] = None,
     mcp_tool_call_metadata: Optional[StandardLoggingMCPToolCall] = None,
+    usage_object: Optional[dict] = None,
+    model_map_information: Optional[StandardLoggingModelInformation] = None,
 ) -> SpendLogsMetadata:
     if metadata is None:
         return SpendLogsMetadata(
@@ -57,6 +63,8 @@ def _get_spend_logs_metadata(
             proxy_server_request=None,
             batch_models=None,
             mcp_tool_call_metadata=None,
+            model_map_information=None,
+            usage_object=None,
         )
     verbose_proxy_logger.debug(
         "getting payload for SpendLogs, available keys in metadata: "
@@ -74,6 +82,8 @@ def _get_spend_logs_metadata(
     clean_metadata["applied_guardrails"] = applied_guardrails
     clean_metadata["batch_models"] = batch_models
     clean_metadata["mcp_tool_call_metadata"] = mcp_tool_call_metadata
+    clean_metadata["usage_object"] = usage_object
+    clean_metadata["model_map_information"] = model_map_information
     return clean_metadata
 
 
@@ -153,6 +163,17 @@ def get_logging_payload(  # noqa: PLR0915
 
     api_key = metadata.get("user_api_key", "")
 
+    standard_logging_prompt_tokens: int = 0
+    standard_logging_completion_tokens: int = 0
+    standard_logging_total_tokens: int = 0
+    if standard_logging_payload is not None:
+        standard_logging_prompt_tokens = standard_logging_payload.get(
+            "prompt_tokens", 0
+        )
+        standard_logging_completion_tokens = standard_logging_payload.get(
+            "completion_tokens", 0
+        )
+        standard_logging_total_tokens = standard_logging_payload.get("total_tokens", 0)
     if api_key is not None and isinstance(api_key, str):
         if api_key.startswith("sk-"):
             # hash the api_key
@@ -208,6 +229,16 @@ def get_logging_payload(  # noqa: PLR0915
             if standard_logging_payload is not None
             else None
         ),
+        usage_object=(
+            standard_logging_payload["metadata"].get("usage_object", None)
+            if standard_logging_payload is not None
+            else None
+        ),
+        model_map_information=(
+            standard_logging_payload["model_map_information"]
+            if standard_logging_payload is not None
+            else None
+        ),
     )
 
     special_usage_fields = ["completion_tokens", "prompt_tokens", "total_tokens"]
@@ -227,6 +258,7 @@ def get_logging_payload(  # noqa: PLR0915
         import time
 
         id = f"{id}_cache_hit{time.time()}"  # SpendLogs does not allow duplicate request_id
+
     try:
         payload: SpendLogsPayload = SpendLogsPayload(
             request_id=str(id),
@@ -242,9 +274,11 @@ def get_logging_payload(  # noqa: PLR0915
             metadata=json.dumps(clean_metadata),
             cache_key=cache_key,
             spend=kwargs.get("response_cost", 0),
-            total_tokens=usage.get("total_tokens", 0),
-            prompt_tokens=usage.get("prompt_tokens", 0),
-            completion_tokens=usage.get("completion_tokens", 0),
+            total_tokens=usage.get("total_tokens", standard_logging_total_tokens),
+            prompt_tokens=usage.get("prompt_tokens", standard_logging_prompt_tokens),
+            completion_tokens=usage.get(
+                "completion_tokens", standard_logging_completion_tokens
+            ),
             request_tags=request_tags,
             end_user=end_user_id or "",
             api_base=litellm_params.get("api_base", ""),
@@ -360,6 +394,39 @@ def _get_messages_for_spend_logs_payload(
     return "{}"
 
 
+def _sanitize_request_body_for_spend_logs_payload(
+    request_body: dict,
+    visited: Optional[set] = None,
+) -> dict:
+    """
+    Recursively sanitize request body to prevent logging large base64 strings or other large values.
+    Truncates strings longer than 1000 characters and handles nested dictionaries.
+    """
+    MAX_STRING_LENGTH = 1000
+
+    if visited is None:
+        visited = set()
+
+    # Get the object's memory address to track visited objects
+    obj_id = id(request_body)
+    if obj_id in visited:
+        return {}
+    visited.add(obj_id)
+
+    def _sanitize_value(value: Any) -> Any:
+        if isinstance(value, dict):
+            return _sanitize_request_body_for_spend_logs_payload(value, visited)
+        elif isinstance(value, list):
+            return [_sanitize_value(item) for item in value]
+        elif isinstance(value, str):
+            if len(value) > MAX_STRING_LENGTH:
+                return f"{value[:MAX_STRING_LENGTH]}... (truncated {len(value) - MAX_STRING_LENGTH} chars)"
+            return value
+        return value
+
+    return {k: _sanitize_value(v) for k, v in request_body.items()}
+
+
 def _add_proxy_server_request_to_metadata(
     metadata: dict,
     litellm_params: dict,
@@ -373,6 +440,7 @@ def _add_proxy_server_request_to_metadata(
         )
         if _proxy_server_request is not None:
             _request_body = _proxy_server_request.get("body", {}) or {}
+            _request_body = _sanitize_request_body_for_spend_logs_payload(_request_body)
             _request_body_json_str = json.dumps(_request_body, default=str)
             metadata["proxy_server_request"] = _request_body_json_str
     return metadata
diff --git a/litellm/types/llms/custom_http.py b/litellm/types/llms/custom_http.py
index 5eec187dd4..8759dedec6 100644
--- a/litellm/types/llms/custom_http.py
+++ b/litellm/types/llms/custom_http.py
@@ -19,6 +19,7 @@ class httpxSpecialProvider(str, Enum):
     SecretManager = "secret_manager"
     PassThroughEndpoint = "pass_through_endpoint"
     PromptFactory = "prompt_factory"
+    SSO_HANDLER = "sso_handler"
 
 
 VerifyTypes = Union[str, bool, ssl.SSLContext]
diff --git a/litellm/types/llms/vertex_ai.py b/litellm/types/llms/vertex_ai.py
index 7fa167938f..55273371fc 100644
--- a/litellm/types/llms/vertex_ai.py
+++ b/litellm/types/llms/vertex_ai.py
@@ -187,6 +187,7 @@ class Tools(TypedDict, total=False):
     function_declarations: List[FunctionDeclaration]
     googleSearch: dict
     googleSearchRetrieval: dict
+    enterpriseWebSearch: dict
     code_execution: dict
     retrieval: Retrieval
 
@@ -497,6 +498,51 @@ class OutputConfig(TypedDict, total=False):
     gcsDestination: GcsDestination
 
 
+class GcsBucketResponse(TypedDict):
+    """
+    TypedDict for GCS bucket upload response
+
+    Attributes:
+        kind: The kind of item this is. For objects, this is always storage#object
+        id: The ID of the object
+        selfLink: The link to this object
+        mediaLink: The link to download the object
+        name: The name of the object
+        bucket: The name of the bucket containing this object
+        generation: The content generation of this object
+        metageneration: The metadata generation of this object
+        contentType: The content type of the object
+        storageClass: The storage class of the object
+        size: The size of the object in bytes
+        md5Hash: The MD5 hash of the object
+        crc32c: The CRC32c checksum of the object
+        etag: The ETag of the object
+        timeCreated: The creation time of the object
+        updated: The last update time of the object
+        timeStorageClassUpdated: The time the storage class was last updated
+        timeFinalized: The time the object was finalized
+    """
+
+    kind: Literal["storage#object"]
+    id: str
+    selfLink: str
+    mediaLink: str
+    name: str
+    bucket: str
+    generation: str
+    metageneration: str
+    contentType: str
+    storageClass: str
+    size: str
+    md5Hash: str
+    crc32c: str
+    etag: str
+    timeCreated: str
+    updated: str
+    timeStorageClassUpdated: str
+    timeFinalized: str
+
+
 class VertexAIBatchPredictionJob(TypedDict):
     displayName: str
     model: str
diff --git a/litellm/types/proxy/management_endpoints/ui_sso.py b/litellm/types/proxy/management_endpoints/ui_sso.py
new file mode 100644
index 0000000000..ca17c47006
--- /dev/null
+++ b/litellm/types/proxy/management_endpoints/ui_sso.py
@@ -0,0 +1,27 @@
+from typing import List, Optional, TypedDict
+
+
+class MicrosoftGraphAPIUserGroupDirectoryObject(TypedDict, total=False):
+    """Model for Microsoft Graph API directory object"""
+
+    odata_type: Optional[str]
+    id: Optional[str]
+    deletedDateTime: Optional[str]
+    description: Optional[str]
+    displayName: Optional[str]
+    roleTemplateId: Optional[str]
+
+
+class MicrosoftGraphAPIUserGroupResponse(TypedDict, total=False):
+    """Model for Microsoft Graph API user groups response"""
+
+    odata_context: Optional[str]
+    odata_nextLink: Optional[str]
+    value: Optional[List[MicrosoftGraphAPIUserGroupDirectoryObject]]
+
+
+class MicrosoftServicePrincipalTeam(TypedDict, total=False):
+    """Model for Microsoft Service Principal Team"""
+
+    principalDisplayName: Optional[str]
+    principalId: Optional[str]
diff --git a/litellm/types/utils.py b/litellm/types/utils.py
index 8439037758..475aa8aa6a 100644
--- a/litellm/types/utils.py
+++ b/litellm/types/utils.py
@@ -2,7 +2,7 @@ import json
 import time
 import uuid
 from enum import Enum
-from typing import Any, Dict, List, Literal, Optional, Tuple, Union
+from typing import Any, Dict, List, Literal, Mapping, Optional, Tuple, Union
 
 from aiohttp import FormData
 from openai._models import BaseModel as OpenAIObject
@@ -120,6 +120,9 @@ class ModelInfoBase(ProviderSpecificModelInfo, total=False):
     input_cost_per_character: Optional[float]  # only for vertex ai models
     input_cost_per_audio_token: Optional[float]
     input_cost_per_token_above_128k_tokens: Optional[float]  # only for vertex ai models
+    input_cost_per_token_above_200k_tokens: Optional[
+        float
+    ]  # only for vertex ai gemini-2.5-pro models
     input_cost_per_character_above_128k_tokens: Optional[
         float
     ]  # only for vertex ai models
@@ -136,6 +139,9 @@ class ModelInfoBase(ProviderSpecificModelInfo, total=False):
     output_cost_per_token_above_128k_tokens: Optional[
         float
     ]  # only for vertex ai models
+    output_cost_per_token_above_200k_tokens: Optional[
+        float
+    ]  # only for vertex ai gemini-2.5-pro models
     output_cost_per_character_above_128k_tokens: Optional[
         float
     ]  # only for vertex ai models
@@ -1703,6 +1709,7 @@ class StandardLoggingMetadata(StandardLoggingUserAPIKeyMetadata):
     prompt_management_metadata: Optional[StandardLoggingPromptManagementMetadata]
     mcp_tool_call_metadata: Optional[StandardLoggingMCPToolCall]
     applied_guardrails: Optional[List[str]]
+    usage_object: Optional[dict]
 
 
 class StandardLoggingAdditionalHeaders(TypedDict, total=False):
@@ -1723,6 +1730,7 @@ class StandardLoggingHiddenParams(TypedDict):
     additional_headers: Optional[StandardLoggingAdditionalHeaders]
     batch_models: Optional[List[str]]
     litellm_model_name: Optional[str]  # the model name sent to the provider by litellm
+    usage_object: Optional[dict]
 
 
 class StandardLoggingModelInformation(TypedDict):
@@ -2170,3 +2178,20 @@ class CreateCredentialItem(CredentialBase):
         if not values.get("credential_values") and not values.get("model_id"):
             raise ValueError("Either credential_values or model_id must be set")
         return values
+
+
+class ExtractedFileData(TypedDict):
+    """
+    TypedDict for storing processed file data
+
+    Attributes:
+        filename: Name of the file if provided
+        content: The file content in bytes
+        content_type: MIME type of the file
+        headers: Any additional headers for the file
+    """
+
+    filename: Optional[str]
+    content: bytes
+    content_type: Optional[str]
+    headers: Mapping[str, str]
diff --git a/litellm/utils.py b/litellm/utils.py
index 8174576705..25d2f2105e 100644
--- a/litellm/utils.py
+++ b/litellm/utils.py
@@ -4533,6 +4533,9 @@ def _get_model_info_helper(  # noqa: PLR0915
                 input_cost_per_token_above_128k_tokens=_model_info.get(
                     "input_cost_per_token_above_128k_tokens", None
                 ),
+                input_cost_per_token_above_200k_tokens=_model_info.get(
+                    "input_cost_per_token_above_200k_tokens", None
+                ),
                 input_cost_per_query=_model_info.get("input_cost_per_query", None),
                 input_cost_per_second=_model_info.get("input_cost_per_second", None),
                 input_cost_per_audio_token=_model_info.get(
@@ -4557,6 +4560,9 @@ def _get_model_info_helper(  # noqa: PLR0915
                 output_cost_per_character_above_128k_tokens=_model_info.get(
                     "output_cost_per_character_above_128k_tokens", None
                 ),
+                output_cost_per_token_above_200k_tokens=_model_info.get(
+                    "output_cost_per_token_above_200k_tokens", None
+                ),
                 output_cost_per_second=_model_info.get("output_cost_per_second", None),
                 output_cost_per_image=_model_info.get("output_cost_per_image", None),
                 output_vector_size=_model_info.get("output_vector_size", None),
@@ -6518,6 +6524,10 @@ class ProviderConfigManager:
             )
 
             return GoogleAIStudioFilesHandler()
+        elif LlmProviders.VERTEX_AI == provider:
+            from litellm.llms.vertex_ai.files.transformation import VertexAIFilesConfig
+
+            return VertexAIFilesConfig()
         return None
 
 
diff --git a/model_prices_and_context_window.json b/model_prices_and_context_window.json
index ea33bdb02b..c67c3b85af 100644
--- a/model_prices_and_context_window.json
+++ b/model_prices_and_context_window.json
@@ -380,6 +380,7 @@
         "supports_tool_choice": true,
         "supports_native_streaming": false,
         "supported_modalities": ["text", "image"],
+        "supported_output_modalities": ["text"],
         "supported_endpoints": ["/v1/responses", "/v1/batch"]
     },
     "o1-pro-2025-03-19": {
@@ -401,6 +402,7 @@
         "supports_tool_choice": true,
         "supports_native_streaming": false,
         "supported_modalities": ["text", "image"],
+        "supported_output_modalities": ["text"],
         "supported_endpoints": ["/v1/responses", "/v1/batch"]
     },
     "o1": {
@@ -2409,25 +2411,26 @@
         "max_tokens": 4096,
         "max_input_tokens": 131072,
         "max_output_tokens": 4096,
-        "input_cost_per_token": 0,
-        "output_cost_per_token": 0,
+        "input_cost_per_token": 0.000000075,
+        "output_cost_per_token": 0.0000003,
         "litellm_provider": "azure_ai",
         "mode": "chat",
         "supports_function_calling": true,
-        "source": "https://learn.microsoft.com/en-us/azure/ai-foundry/concepts/models-featured#microsoft"
+        "source": "https://techcommunity.microsoft.com/blog/Azure-AI-Services-blog/announcing-new-phi-pricing-empowering-your-business-with-small-language-models/4395112"
     },
     "azure_ai/Phi-4-multimodal-instruct": {
         "max_tokens": 4096,
         "max_input_tokens": 131072,
         "max_output_tokens": 4096,
-        "input_cost_per_token": 0,
-        "output_cost_per_token": 0,
+        "input_cost_per_token": 0.00000008,
+        "input_cost_per_audio_token": 0.000004,
+        "output_cost_per_token": 0.00032,
         "litellm_provider": "azure_ai",
         "mode": "chat",
         "supports_audio_input": true,
         "supports_function_calling": true,
         "supports_vision": true,
-        "source": "https://learn.microsoft.com/en-us/azure/ai-foundry/concepts/models-featured#microsoft"
+        "source": "https://techcommunity.microsoft.com/blog/Azure-AI-Services-blog/announcing-new-phi-pricing-empowering-your-business-with-small-language-models/4395112"
     },
     "azure_ai/Phi-4": {
         "max_tokens": 16384,
@@ -4511,20 +4514,10 @@
         "max_audio_length_hours": 8.4,
         "max_audio_per_prompt": 1,
         "max_pdf_size_mb": 30,
-        "input_cost_per_image": 0,
-        "input_cost_per_video_per_second": 0,
-        "input_cost_per_audio_per_second": 0,
-        "input_cost_per_token": 0,
-        "input_cost_per_character": 0, 
-        "input_cost_per_token_above_128k_tokens": 0, 
-        "input_cost_per_character_above_128k_tokens": 0, 
-        "input_cost_per_image_above_128k_tokens": 0,
-        "input_cost_per_video_per_second_above_128k_tokens": 0,
-        "input_cost_per_audio_per_second_above_128k_tokens": 0,
-        "output_cost_per_token": 0,
-        "output_cost_per_character": 0,
-        "output_cost_per_token_above_128k_tokens": 0,
-        "output_cost_per_character_above_128k_tokens": 0,
+        "input_cost_per_token": 0.00000125,
+        "input_cost_per_token_above_200k_tokens": 0.0000025,
+        "output_cost_per_token": 0.00001,
+        "output_cost_per_token_above_200k_tokens": 0.000015,
         "litellm_provider": "vertex_ai-language-models",
         "mode": "chat",
         "supports_system_messages": true,
@@ -4535,6 +4528,9 @@
         "supports_pdf_input": true,
         "supports_response_schema": true,
         "supports_tool_choice": true,
+        "supported_endpoints": ["/v1/chat/completions", "/v1/completions"],
+        "supported_modalities": ["text", "image", "audio", "video"],
+        "supported_output_modalities": ["text"],
         "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing"
     },
     "gemini-2.0-pro-exp-02-05": {
@@ -4547,20 +4543,10 @@
         "max_audio_length_hours": 8.4,
         "max_audio_per_prompt": 1,
         "max_pdf_size_mb": 30,
-        "input_cost_per_image": 0,
-        "input_cost_per_video_per_second": 0,
-        "input_cost_per_audio_per_second": 0,
-        "input_cost_per_token": 0,
-        "input_cost_per_character": 0, 
-        "input_cost_per_token_above_128k_tokens": 0, 
-        "input_cost_per_character_above_128k_tokens": 0, 
-        "input_cost_per_image_above_128k_tokens": 0,
-        "input_cost_per_video_per_second_above_128k_tokens": 0,
-        "input_cost_per_audio_per_second_above_128k_tokens": 0,
-        "output_cost_per_token": 0,
-        "output_cost_per_character": 0,
-        "output_cost_per_token_above_128k_tokens": 0,
-        "output_cost_per_character_above_128k_tokens": 0,
+        "input_cost_per_token": 0.00000125,
+        "input_cost_per_token_above_200k_tokens": 0.0000025,
+        "output_cost_per_token": 0.00001,
+        "output_cost_per_token_above_200k_tokens": 0.000015,
         "litellm_provider": "vertex_ai-language-models",
         "mode": "chat",
         "supports_system_messages": true,
@@ -4571,6 +4557,9 @@
         "supports_pdf_input": true,
         "supports_response_schema": true,
         "supports_tool_choice": true,
+        "supported_endpoints": ["/v1/chat/completions", "/v1/completions"],
+        "supported_modalities": ["text", "image", "audio", "video"],
+        "supported_output_modalities": ["text"],
         "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing"
     },
     "gemini-2.0-flash-exp": {
@@ -4604,6 +4593,8 @@
         "supports_vision": true,
         "supports_response_schema": true,
         "supports_audio_output": true,
+        "supported_modalities": ["text", "image", "audio", "video"],
+        "supported_output_modalities": ["text", "image"],
         "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing",
         "supports_tool_choice": true
     },
@@ -4628,6 +4619,8 @@
         "supports_response_schema": true,
         "supports_audio_output": true,
         "supports_tool_choice": true,
+        "supported_modalities": ["text", "image", "audio", "video"],
+        "supported_output_modalities": ["text", "image"],
         "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing"
     },
     "gemini-2.0-flash-thinking-exp": {
@@ -4661,6 +4654,8 @@
         "supports_vision": true,
         "supports_response_schema": true,
         "supports_audio_output": true,
+        "supported_modalities": ["text", "image", "audio", "video"],
+        "supported_output_modalities": ["text", "image"],
         "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash",
         "supports_tool_choice": true
     },
@@ -4695,6 +4690,8 @@
         "supports_vision": true,
         "supports_response_schema": false,
         "supports_audio_output": false,
+        "supported_modalities": ["text", "image", "audio", "video"],
+        "supported_output_modalities": ["text", "image"],
         "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash",
         "supports_tool_choice": true
     },
@@ -4720,6 +4717,7 @@
         "supports_audio_output": true,
         "supports_audio_input": true,
         "supported_modalities": ["text", "image", "audio", "video"],
+        "supported_output_modalities": ["text", "image"],
         "supports_tool_choice": true,
         "source": "https://ai.google.dev/pricing#2_0flash"
     },
@@ -4742,6 +4740,32 @@
         "supports_vision": true,
         "supports_response_schema": true,
         "supports_audio_output": true,
+        "supported_modalities": ["text", "image", "audio", "video"],
+        "supported_output_modalities": ["text"],
+        "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash",
+        "supports_tool_choice": true
+    },
+    "gemini-2.0-flash-lite-001": {
+        "max_input_tokens": 1048576,
+        "max_output_tokens": 8192,
+        "max_images_per_prompt": 3000,
+        "max_videos_per_prompt": 10,
+        "max_video_length": 1,
+        "max_audio_length_hours": 8.4,
+        "max_audio_per_prompt": 1,
+        "max_pdf_size_mb": 50,
+        "input_cost_per_audio_token": 0.000000075,
+        "input_cost_per_token": 0.000000075,
+        "output_cost_per_token": 0.0000003,
+        "litellm_provider": "vertex_ai-language-models",
+        "mode": "chat",
+        "supports_system_messages": true,
+        "supports_function_calling": true,
+        "supports_vision": true,
+        "supports_response_schema": true,
+        "supports_audio_output": true,
+        "supported_modalities": ["text", "image", "audio", "video"],
+        "supported_output_modalities": ["text"],
         "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash",
         "supports_tool_choice": true
     },
@@ -4807,6 +4831,7 @@
         "supports_audio_output": true,
         "supports_audio_input": true,
         "supported_modalities": ["text", "image", "audio", "video"],
+        "supported_output_modalities": ["text", "image"],
         "supports_tool_choice": true,
         "source": "https://ai.google.dev/pricing#2_0flash"
     },
@@ -4832,6 +4857,8 @@
         "supports_response_schema": true,
         "supports_audio_output": true,
         "supports_tool_choice": true,
+        "supported_modalities": ["text", "image", "audio", "video"],
+        "supported_output_modalities": ["text"],
         "source": "https://ai.google.dev/gemini-api/docs/pricing#gemini-2.0-flash-lite"
     },
     "gemini/gemini-2.0-flash-001": {
@@ -4857,6 +4884,8 @@
         "supports_response_schema": true,
         "supports_audio_output": false,
         "supports_tool_choice": true,
+        "supported_modalities": ["text", "image", "audio", "video"],
+        "supported_output_modalities": ["text", "image"],
         "source": "https://ai.google.dev/pricing#2_0flash"
     },
     "gemini/gemini-2.5-pro-preview-03-25": {
@@ -4871,9 +4900,9 @@
         "max_pdf_size_mb": 30,
         "input_cost_per_audio_token": 0.0000007,
         "input_cost_per_token": 0.00000125,
-        "input_cost_per_token_above_128k_tokens": 0.0000025, 
+        "input_cost_per_token_above_200k_tokens": 0.0000025, 
         "output_cost_per_token": 0.0000010,
-        "output_cost_per_token_above_128k_tokens": 0.000015, 
+        "output_cost_per_token_above_200k_tokens": 0.000015, 
         "litellm_provider": "gemini",
         "mode": "chat",
         "rpm": 10000,
@@ -4884,6 +4913,8 @@
         "supports_response_schema": true,
         "supports_audio_output": false,
         "supports_tool_choice": true,
+        "supported_modalities": ["text", "image", "audio", "video"],
+        "supported_output_modalities": ["text"],
         "source": "https://ai.google.dev/gemini-api/docs/pricing#gemini-2.5-pro-preview"
     },
     "gemini/gemini-2.0-flash-exp": {
@@ -4919,6 +4950,8 @@
         "supports_audio_output": true,
         "tpm": 4000000,
         "rpm": 10,
+        "supported_modalities": ["text", "image", "audio", "video"],
+        "supported_output_modalities": ["text", "image"],
         "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash",
         "supports_tool_choice": true
     },
@@ -4945,6 +4978,8 @@
         "supports_response_schema": true,
         "supports_audio_output": false,
         "supports_tool_choice": true,
+        "supported_modalities": ["text", "image", "audio", "video"],
+        "supported_output_modalities": ["text"],
         "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash-lite"
     },
     "gemini/gemini-2.0-flash-thinking-exp": {
@@ -4980,6 +5015,8 @@
         "supports_audio_output": true,
         "tpm": 4000000,
         "rpm": 10,
+        "supported_modalities": ["text", "image", "audio", "video"],
+        "supported_output_modalities": ["text", "image"],
         "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash",
         "supports_tool_choice": true
     },
@@ -5016,6 +5053,8 @@
         "supports_audio_output": true,
         "tpm": 4000000,
         "rpm": 10,
+        "supported_modalities": ["text", "image", "audio", "video"],
+        "supported_output_modalities": ["text", "image"],
         "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash",
         "supports_tool_choice": true
     },
diff --git a/pyproject.toml b/pyproject.toml
index c67d7da6c5..bde347f860 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "litellm"
-version = "1.65.4"
+version = "1.65.5"
 description = "Library to easily interface with LLM API providers"
 authors = ["BerriAI"]
 license = "MIT"
@@ -118,7 +118,7 @@ requires = ["poetry-core", "wheel"]
 build-backend = "poetry.core.masonry.api"
 
 [tool.commitizen]
-version = "1.65.4"
+version = "1.65.5"
 version_files = [
     "pyproject.toml:^version"
 ]
diff --git a/requirements.txt b/requirements.txt
index 20ef862715..e1a666f8c1 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -10,7 +10,6 @@ gunicorn==23.0.0 # server dep
 uvloop==0.21.0 # uvicorn dep, gives us much better performance under load
 boto3==1.34.34 # aws bedrock/sagemaker calls
 redis==5.2.1 # redis caching
-redisvl==0.4.1 # semantic caching
 prisma==0.11.0 # for db
 mangum==0.17.0 # for aws lambda functions
 pynacl==1.5.0 # for encrypting keys
diff --git a/tests/batches_tests/test_openai_batches_and_files.py b/tests/batches_tests/test_openai_batches_and_files.py
index 4669a2def6..b2826419e8 100644
--- a/tests/batches_tests/test_openai_batches_and_files.py
+++ b/tests/batches_tests/test_openai_batches_and_files.py
@@ -423,25 +423,35 @@ mock_vertex_batch_response = {
 
 
 @pytest.mark.asyncio
-async def test_avertex_batch_prediction():
-    with patch(
+async def test_avertex_batch_prediction(monkeypatch):
+    monkeypatch.setenv("GCS_BUCKET_NAME", "litellm-local")
+    from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler
+
+    client = AsyncHTTPHandler()
+
+    async def mock_side_effect(*args, **kwargs):
+        print("args", args, "kwargs", kwargs)
+        url = kwargs.get("url", "")
+        if "files" in url:
+            mock_response.json.return_value = mock_file_response
+        elif "batch" in url:
+            mock_response.json.return_value = mock_vertex_batch_response
+            mock_response.status_code = 200
+        return mock_response
+
+    with patch.object(
+        client, "post", side_effect=mock_side_effect
+    ) as mock_post, patch(
         "litellm.llms.custom_httpx.http_handler.AsyncHTTPHandler.post"
-    ) as mock_post:
+    ) as mock_global_post:
         # Configure mock responses
         mock_response = MagicMock()
         mock_response.raise_for_status.return_value = None
 
         # Set up different responses for different API calls
-        async def mock_side_effect(*args, **kwargs):
-            url = kwargs.get("url", "")
-            if "files" in url:
-                mock_response.json.return_value = mock_file_response
-            elif "batch" in url:
-                mock_response.json.return_value = mock_vertex_batch_response
-                mock_response.status_code = 200
-            return mock_response
-
+        
         mock_post.side_effect = mock_side_effect
+        mock_global_post.side_effect = mock_side_effect
 
         # load_vertex_ai_credentials()
         litellm.set_verbose = True
@@ -455,6 +465,7 @@ async def test_avertex_batch_prediction():
             file=open(file_path, "rb"),
             purpose="batch",
             custom_llm_provider="vertex_ai",
+            client=client
         )
         print("Response from creating file=", file_obj)
 
diff --git a/tests/code_coverage_tests/recursive_detector.py b/tests/code_coverage_tests/recursive_detector.py
index 48fe604dbc..c0761975a2 100644
--- a/tests/code_coverage_tests/recursive_detector.py
+++ b/tests/code_coverage_tests/recursive_detector.py
@@ -16,6 +16,8 @@ IGNORE_FUNCTIONS = [
     "_transform_prompt",
     "mask_dict",
     "_serialize",  # we now set a max depth for this
+    "_sanitize_request_body_for_spend_logs_payload", # testing added for circular reference
+    "_sanitize_value", # testing added for circular reference
 ]
 
 
diff --git a/tests/litellm/litellm_core_utils/llm_cost_calc/test_llm_cost_calc_utils.py b/tests/litellm/litellm_core_utils/llm_cost_calc/test_llm_cost_calc_utils.py
new file mode 100644
index 0000000000..8f8f043935
--- /dev/null
+++ b/tests/litellm/litellm_core_utils/llm_cost_calc/test_llm_cost_calc_utils.py
@@ -0,0 +1,50 @@
+import json
+import os
+import sys
+
+import pytest
+from fastapi.testclient import TestClient
+
+import litellm
+from litellm.litellm_core_utils.llm_cost_calc.tool_call_cost_tracking import (
+    StandardBuiltInToolCostTracking,
+)
+from litellm.types.llms.openai import FileSearchTool, WebSearchOptions
+from litellm.types.utils import ModelInfo, ModelResponse, StandardBuiltInToolsParams
+
+sys.path.insert(
+    0, os.path.abspath("../../..")
+)  # Adds the parent directory to the system path
+
+from litellm.litellm_core_utils.llm_cost_calc.utils import generic_cost_per_token
+from litellm.types.utils import Usage
+
+
+def test_generic_cost_per_token_above_200k_tokens():
+    model = "gemini-2.5-pro-exp-03-25"
+    custom_llm_provider = "vertex_ai"
+    os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
+    litellm.model_cost = litellm.get_model_cost_map(url="")
+
+    model_cost_map = litellm.model_cost[model]
+    prompt_tokens = 220 * 1e6
+    completion_tokens = 150
+    usage = Usage(
+        prompt_tokens=prompt_tokens,
+        completion_tokens=completion_tokens,
+        total_tokens=prompt_tokens + completion_tokens,
+    )
+    prompt_cost, completion_cost = generic_cost_per_token(
+        model=model,
+        usage=usage,
+        custom_llm_provider=custom_llm_provider,
+    )
+    assert round(prompt_cost, 10) == round(
+        model_cost_map["input_cost_per_token_above_200k_tokens"] * usage.prompt_tokens,
+        10,
+    )
+    assert round(completion_cost, 10) == round(
+        model_cost_map["output_cost_per_token_above_200k_tokens"]
+        * usage.completion_tokens,
+        10,
+    )
diff --git a/tests/litellm/llms/anthropic/chat/test_anthropic_chat_transformation.py b/tests/litellm/llms/anthropic/chat/test_anthropic_chat_transformation.py
index 04f2728284..9f672110a4 100644
--- a/tests/litellm/llms/anthropic/chat/test_anthropic_chat_transformation.py
+++ b/tests/litellm/llms/anthropic/chat/test_anthropic_chat_transformation.py
@@ -33,3 +33,26 @@ def test_response_format_transformation_unit_test():
         "agent_doing": {"title": "Agent Doing", "type": "string"}
     }
     print(result)
+
+
+def test_calculate_usage():
+    """
+    Do not include cache_creation_input_tokens in the prompt_tokens
+
+    Fixes https://github.com/BerriAI/litellm/issues/9812
+    """
+    config = AnthropicConfig()
+
+    usage_object = {
+        "input_tokens": 3,
+        "cache_creation_input_tokens": 12304,
+        "cache_read_input_tokens": 0,
+        "output_tokens": 550,
+    }
+    usage = config.calculate_usage(usage_object=usage_object, reasoning_content=None)
+    assert usage.prompt_tokens == 3
+    assert usage.completion_tokens == 550
+    assert usage.total_tokens == 3 + 550
+    assert usage.prompt_tokens_details.cached_tokens == 0
+    assert usage._cache_creation_input_tokens == 12304
+    assert usage._cache_read_input_tokens == 0
diff --git a/tests/litellm/llms/bedrock/chat/test_converse_transformation.py b/tests/litellm/llms/bedrock/chat/test_converse_transformation.py
index e912ada8ff..5390daa1a0 100644
--- a/tests/litellm/llms/bedrock/chat/test_converse_transformation.py
+++ b/tests/litellm/llms/bedrock/chat/test_converse_transformation.py
@@ -30,9 +30,7 @@ def test_transform_usage():
     openai_usage = config._transform_usage(usage)
     assert (
         openai_usage.prompt_tokens
-        == usage["inputTokens"]
-        + usage["cacheWriteInputTokens"]
-        + usage["cacheReadInputTokens"]
+        == usage["inputTokens"] + usage["cacheReadInputTokens"]
     )
     assert openai_usage.completion_tokens == usage["outputTokens"]
     assert openai_usage.total_tokens == usage["totalTokens"]
diff --git a/tests/litellm/proxy/management_endpoints/test_ui_sso.py b/tests/litellm/proxy/management_endpoints/test_ui_sso.py
index 7ad520f7d5..606f3833be 100644
--- a/tests/litellm/proxy/management_endpoints/test_ui_sso.py
+++ b/tests/litellm/proxy/management_endpoints/test_ui_sso.py
@@ -1,3 +1,4 @@
+import asyncio
 import json
 import os
 import sys
@@ -5,15 +6,23 @@ from typing import Optional, cast
 from unittest.mock import MagicMock, patch
 
 import pytest
+from fastapi import Request
 from fastapi.testclient import TestClient
 
 sys.path.insert(
-    0, os.path.abspath("../../..")
+    0, os.path.abspath("../../../")
 )  # Adds the parent directory to the system path
 
 from litellm.proxy.auth.handle_jwt import JWTHandler
 from litellm.proxy.management_endpoints.types import CustomOpenID
-from litellm.proxy.management_endpoints.ui_sso import MicrosoftSSOHandler
+from litellm.proxy.management_endpoints.ui_sso import (
+    GoogleSSOHandler,
+    MicrosoftSSOHandler,
+)
+from litellm.types.proxy.management_endpoints.ui_sso import (
+    MicrosoftGraphAPIUserGroupDirectoryObject,
+    MicrosoftGraphAPIUserGroupResponse,
+)
 
 
 def test_microsoft_sso_handler_openid_from_response():
@@ -27,23 +36,14 @@ def test_microsoft_sso_handler_openid_from_response():
         "surname": "User",
         "some_other_field": "value",
     }
-
-    # Create a mock JWTHandler that returns predetermined team IDs
-    mock_jwt_handler = MagicMock(spec=JWTHandler)
     expected_team_ids = ["team1", "team2"]
-    mock_jwt_handler.get_team_ids_from_jwt.return_value = expected_team_ids
-
     # Act
     # Call the method being tested
     result = MicrosoftSSOHandler.openid_from_response(
-        response=mock_response, jwt_handler=mock_jwt_handler
+        response=mock_response, team_ids=expected_team_ids
     )
 
     # Assert
-    # Verify the JWT handler was called with the correct parameters
-    mock_jwt_handler.get_team_ids_from_jwt.assert_called_once_with(
-        cast(dict, mock_response)
-    )
 
     # Check that the result is a CustomOpenID object with the expected values
     assert isinstance(result, CustomOpenID)
@@ -59,13 +59,9 @@ def test_microsoft_sso_handler_openid_from_response():
 def test_microsoft_sso_handler_with_empty_response():
     # Arrange
     # Test with None response
-    mock_jwt_handler = MagicMock(spec=JWTHandler)
-    mock_jwt_handler.get_team_ids_from_jwt.return_value = []
 
     # Act
-    result = MicrosoftSSOHandler.openid_from_response(
-        response=None, jwt_handler=mock_jwt_handler
-    )
+    result = MicrosoftSSOHandler.openid_from_response(response=None, team_ids=[])
 
     # Assert
     assert isinstance(result, CustomOpenID)
@@ -77,5 +73,309 @@ def test_microsoft_sso_handler_with_empty_response():
     assert result.last_name is None
     assert result.team_ids == []
 
-    # Make sure the JWT handler was called with an empty dict
-    mock_jwt_handler.get_team_ids_from_jwt.assert_called_once_with({})
+
+def test_get_microsoft_callback_response():
+    # Arrange
+    mock_request = MagicMock(spec=Request)
+    mock_response = {
+        "mail": "microsoft_user@example.com",
+        "displayName": "Microsoft User",
+        "id": "msft123",
+        "givenName": "Microsoft",
+        "surname": "User",
+    }
+
+    future = asyncio.Future()
+    future.set_result(mock_response)
+
+    with patch.dict(
+        os.environ,
+        {"MICROSOFT_CLIENT_SECRET": "mock_secret", "MICROSOFT_TENANT": "mock_tenant"},
+    ):
+        with patch(
+            "fastapi_sso.sso.microsoft.MicrosoftSSO.verify_and_process",
+            return_value=future,
+        ):
+            # Act
+            result = asyncio.run(
+                MicrosoftSSOHandler.get_microsoft_callback_response(
+                    request=mock_request,
+                    microsoft_client_id="mock_client_id",
+                    redirect_url="http://mock_redirect_url",
+                )
+            )
+
+    # Assert
+    assert isinstance(result, CustomOpenID)
+    assert result.email == "microsoft_user@example.com"
+    assert result.display_name == "Microsoft User"
+    assert result.provider == "microsoft"
+    assert result.id == "msft123"
+    assert result.first_name == "Microsoft"
+    assert result.last_name == "User"
+
+
+def test_get_microsoft_callback_response_raw_sso_response():
+    # Arrange
+    mock_request = MagicMock(spec=Request)
+    mock_response = {
+        "mail": "microsoft_user@example.com",
+        "displayName": "Microsoft User",
+        "id": "msft123",
+        "givenName": "Microsoft",
+        "surname": "User",
+    }
+
+    future = asyncio.Future()
+    future.set_result(mock_response)
+    with patch.dict(
+        os.environ,
+        {"MICROSOFT_CLIENT_SECRET": "mock_secret", "MICROSOFT_TENANT": "mock_tenant"},
+    ):
+        with patch(
+            "fastapi_sso.sso.microsoft.MicrosoftSSO.verify_and_process",
+            return_value=future,
+        ):
+            # Act
+            result = asyncio.run(
+                MicrosoftSSOHandler.get_microsoft_callback_response(
+                    request=mock_request,
+                    microsoft_client_id="mock_client_id",
+                    redirect_url="http://mock_redirect_url",
+                    return_raw_sso_response=True,
+                )
+            )
+
+    # Assert
+    print("result from verify_and_process", result)
+    assert isinstance(result, dict)
+    assert result["mail"] == "microsoft_user@example.com"
+    assert result["displayName"] == "Microsoft User"
+    assert result["id"] == "msft123"
+    assert result["givenName"] == "Microsoft"
+    assert result["surname"] == "User"
+
+
+def test_get_google_callback_response():
+    # Arrange
+    mock_request = MagicMock(spec=Request)
+    mock_response = {
+        "email": "google_user@example.com",
+        "name": "Google User",
+        "sub": "google123",
+        "given_name": "Google",
+        "family_name": "User",
+    }
+
+    future = asyncio.Future()
+    future.set_result(mock_response)
+
+    with patch.dict(os.environ, {"GOOGLE_CLIENT_SECRET": "mock_secret"}):
+        with patch(
+            "fastapi_sso.sso.google.GoogleSSO.verify_and_process", return_value=future
+        ):
+            # Act
+            result = asyncio.run(
+                GoogleSSOHandler.get_google_callback_response(
+                    request=mock_request,
+                    google_client_id="mock_client_id",
+                    redirect_url="http://mock_redirect_url",
+                )
+            )
+
+    # Assert
+    assert isinstance(result, dict)
+    assert result.get("email") == "google_user@example.com"
+    assert result.get("name") == "Google User"
+    assert result.get("sub") == "google123"
+    assert result.get("given_name") == "Google"
+    assert result.get("family_name") == "User"
+
+
+@pytest.mark.asyncio
+async def test_get_user_groups_from_graph_api():
+    # Arrange
+    mock_response = {
+        "@odata.context": "https://graph.microsoft.com/v1.0/$metadata#directoryObjects",
+        "value": [
+            {
+                "@odata.type": "#microsoft.graph.group",
+                "id": "group1",
+                "displayName": "Group 1",
+            },
+            {
+                "@odata.type": "#microsoft.graph.group",
+                "id": "group2",
+                "displayName": "Group 2",
+            },
+        ],
+    }
+
+    async def mock_get(*args, **kwargs):
+        mock = MagicMock()
+        mock.json.return_value = mock_response
+        return mock
+
+    with patch(
+        "litellm.proxy.management_endpoints.ui_sso.get_async_httpx_client"
+    ) as mock_client:
+        mock_client.return_value = MagicMock()
+        mock_client.return_value.get = mock_get
+
+        # Act
+        result = await MicrosoftSSOHandler.get_user_groups_from_graph_api(
+            access_token="mock_token"
+        )
+
+        # Assert
+        assert isinstance(result, list)
+        assert len(result) == 2
+        assert "group1" in result
+        assert "group2" in result
+
+
+@pytest.mark.asyncio
+async def test_get_user_groups_pagination():
+    # Arrange
+    first_response = {
+        "@odata.context": "https://graph.microsoft.com/v1.0/$metadata#directoryObjects",
+        "@odata.nextLink": "https://graph.microsoft.com/v1.0/me/memberOf?$skiptoken=page2",
+        "value": [
+            {
+                "@odata.type": "#microsoft.graph.group",
+                "id": "group1",
+                "displayName": "Group 1",
+            },
+        ],
+    }
+    second_response = {
+        "@odata.context": "https://graph.microsoft.com/v1.0/$metadata#directoryObjects",
+        "value": [
+            {
+                "@odata.type": "#microsoft.graph.group",
+                "id": "group2",
+                "displayName": "Group 2",
+            },
+        ],
+    }
+
+    responses = [first_response, second_response]
+    current_response = {"index": 0}
+
+    async def mock_get(*args, **kwargs):
+        mock = MagicMock()
+        mock.json.return_value = responses[current_response["index"]]
+        current_response["index"] += 1
+        return mock
+
+    with patch(
+        "litellm.proxy.management_endpoints.ui_sso.get_async_httpx_client"
+    ) as mock_client:
+        mock_client.return_value = MagicMock()
+        mock_client.return_value.get = mock_get
+
+        # Act
+        result = await MicrosoftSSOHandler.get_user_groups_from_graph_api(
+            access_token="mock_token"
+        )
+
+        # Assert
+        assert isinstance(result, list)
+        assert len(result) == 2
+        assert "group1" in result
+        assert "group2" in result
+        assert current_response["index"] == 2  # Verify both pages were fetched
+
+
+@pytest.mark.asyncio
+async def test_get_user_groups_empty_response():
+    # Arrange
+    mock_response = {
+        "@odata.context": "https://graph.microsoft.com/v1.0/$metadata#directoryObjects",
+        "value": [],
+    }
+
+    async def mock_get(*args, **kwargs):
+        mock = MagicMock()
+        mock.json.return_value = mock_response
+        return mock
+
+    with patch(
+        "litellm.proxy.management_endpoints.ui_sso.get_async_httpx_client"
+    ) as mock_client:
+        mock_client.return_value = MagicMock()
+        mock_client.return_value.get = mock_get
+
+        # Act
+        result = await MicrosoftSSOHandler.get_user_groups_from_graph_api(
+            access_token="mock_token"
+        )
+
+        # Assert
+        assert isinstance(result, list)
+        assert len(result) == 0
+
+
+@pytest.mark.asyncio
+async def test_get_user_groups_error_handling():
+    # Arrange
+    async def mock_get(*args, **kwargs):
+        raise Exception("API Error")
+
+    with patch(
+        "litellm.proxy.management_endpoints.ui_sso.get_async_httpx_client"
+    ) as mock_client:
+        mock_client.return_value = MagicMock()
+        mock_client.return_value.get = mock_get
+
+        # Act
+        result = await MicrosoftSSOHandler.get_user_groups_from_graph_api(
+            access_token="mock_token"
+        )
+
+        # Assert
+        assert isinstance(result, list)
+        assert len(result) == 0
+
+
+def test_get_group_ids_from_graph_api_response():
+    # Arrange
+    mock_response = MicrosoftGraphAPIUserGroupResponse(
+        odata_context="https://graph.microsoft.com/v1.0/$metadata#directoryObjects",
+        odata_nextLink=None,
+        value=[
+            MicrosoftGraphAPIUserGroupDirectoryObject(
+                odata_type="#microsoft.graph.group",
+                id="group1",
+                displayName="Group 1",
+                description=None,
+                deletedDateTime=None,
+                roleTemplateId=None,
+            ),
+            MicrosoftGraphAPIUserGroupDirectoryObject(
+                odata_type="#microsoft.graph.group",
+                id="group2",
+                displayName="Group 2",
+                description=None,
+                deletedDateTime=None,
+                roleTemplateId=None,
+            ),
+            MicrosoftGraphAPIUserGroupDirectoryObject(
+                odata_type="#microsoft.graph.group",
+                id=None,  # Test handling of None id
+                displayName="Invalid Group",
+                description=None,
+                deletedDateTime=None,
+                roleTemplateId=None,
+            ),
+        ],
+    )
+
+    # Act
+    result = MicrosoftSSOHandler._get_group_ids_from_graph_api_response(mock_response)
+
+    # Assert
+    assert isinstance(result, list)
+    assert len(result) == 2
+    assert "group1" in result
+    assert "group2" in result
diff --git a/tests/litellm/proxy/pass_through_endpoints/test_pass_through_endpoints.py b/tests/litellm/proxy/pass_through_endpoints/test_pass_through_endpoints.py
new file mode 100644
index 0000000000..43d4dd9cd8
--- /dev/null
+++ b/tests/litellm/proxy/pass_through_endpoints/test_pass_through_endpoints.py
@@ -0,0 +1,116 @@
+import json
+import os
+import sys
+from io import BytesIO
+from unittest.mock import AsyncMock, MagicMock, patch
+
+import httpx
+import pytest
+from fastapi import Request, UploadFile
+from fastapi.testclient import TestClient
+from starlette.datastructures import Headers
+from starlette.datastructures import UploadFile as StarletteUploadFile
+
+sys.path.insert(
+    0, os.path.abspath("../../..")
+)  # Adds the parent directory to the system path
+
+from litellm.proxy.pass_through_endpoints.pass_through_endpoints import (
+    HttpPassThroughEndpointHelpers,
+)
+
+
+# Test is_multipart
+def test_is_multipart():
+    # Test with multipart content type
+    request = MagicMock(spec=Request)
+    request.headers = Headers({"content-type": "multipart/form-data; boundary=123"})
+    assert HttpPassThroughEndpointHelpers.is_multipart(request) is True
+
+    # Test with non-multipart content type
+    request.headers = Headers({"content-type": "application/json"})
+    assert HttpPassThroughEndpointHelpers.is_multipart(request) is False
+
+    # Test with no content type
+    request.headers = Headers({})
+    assert HttpPassThroughEndpointHelpers.is_multipart(request) is False
+
+
+# Test _build_request_files_from_upload_file
+@pytest.mark.asyncio
+async def test_build_request_files_from_upload_file():
+    # Test with FastAPI UploadFile
+    file_content = b"test content"
+    file = BytesIO(file_content)
+    # Create SpooledTemporaryFile with content type headers
+    headers = {"content-type": "text/plain"}
+    upload_file = UploadFile(file=file, filename="test.txt", headers=headers)
+    upload_file.read = AsyncMock(return_value=file_content)
+
+    result = await HttpPassThroughEndpointHelpers._build_request_files_from_upload_file(
+        upload_file
+    )
+    assert result == ("test.txt", file_content, "text/plain")
+
+    # Test with Starlette UploadFile
+    file2 = BytesIO(file_content)
+    starlette_file = StarletteUploadFile(
+        file=file2,
+        filename="test2.txt",
+        headers=Headers({"content-type": "text/plain"}),
+    )
+    starlette_file.read = AsyncMock(return_value=file_content)
+
+    result = await HttpPassThroughEndpointHelpers._build_request_files_from_upload_file(
+        starlette_file
+    )
+    assert result == ("test2.txt", file_content, "text/plain")
+
+
+# Test make_multipart_http_request
+@pytest.mark.asyncio
+async def test_make_multipart_http_request():
+    # Mock request with file and form field
+    request = MagicMock(spec=Request)
+    request.method = "POST"
+
+    # Mock form data
+    file_content = b"test file content"
+    file = BytesIO(file_content)
+    # Create SpooledTemporaryFile with content type headers
+    headers = {"content-type": "text/plain"}
+    upload_file = UploadFile(file=file, filename="test.txt", headers=headers)
+    upload_file.read = AsyncMock(return_value=file_content)
+
+    form_data = {"file": upload_file, "text_field": "test value"}
+    request.form = AsyncMock(return_value=form_data)
+
+    # Mock httpx client
+    mock_response = MagicMock()
+    mock_response.status_code = 200
+    mock_response.headers = {}
+
+    async_client = MagicMock()
+    async_client.request = AsyncMock(return_value=mock_response)
+
+    # Test the function
+    response = await HttpPassThroughEndpointHelpers.make_multipart_http_request(
+        request=request,
+        async_client=async_client,
+        url=httpx.URL("http://test.com"),
+        headers={},
+        requested_query_params=None,
+    )
+
+    # Verify the response
+    assert response == mock_response
+
+    # Verify the client call
+    async_client.request.assert_called_once()
+    call_args = async_client.request.call_args[1]
+
+    assert call_args["method"] == "POST"
+    assert str(call_args["url"]) == "http://test.com"
+    assert isinstance(call_args["files"], dict)
+    assert isinstance(call_args["data"], dict)
+    assert call_args["data"]["text_field"] == "test value"
diff --git a/tests/litellm/proxy/spend_tracking/test_spend_management_endpoints.py b/tests/litellm/proxy/spend_tracking/test_spend_management_endpoints.py
index 69fb2f28a6..a80f05b47e 100644
--- a/tests/litellm/proxy/spend_tracking/test_spend_management_endpoints.py
+++ b/tests/litellm/proxy/spend_tracking/test_spend_management_endpoints.py
@@ -457,7 +457,7 @@ class TestSpendLogsPayload:
                     "model": "gpt-4o",
                     "user": "",
                     "team_id": "",
-                    "metadata": '{"applied_guardrails": [], "batch_models": null, "mcp_tool_call_metadata": null, "additional_usage_values": {"completion_tokens_details": null, "prompt_tokens_details": null}}',
+                    "metadata": '{"applied_guardrails": [], "batch_models": null, "mcp_tool_call_metadata": null, "usage_object": {"completion_tokens": 20, "prompt_tokens": 10, "total_tokens": 30, "completion_tokens_details": null, "prompt_tokens_details": null}, "model_map_information": {"model_map_key": "gpt-4o", "model_map_value": {"key": "gpt-4o", "max_tokens": 16384, "max_input_tokens": 128000, "max_output_tokens": 16384, "input_cost_per_token": 2.5e-06, "cache_creation_input_token_cost": null, "cache_read_input_token_cost": 1.25e-06, "input_cost_per_character": null, "input_cost_per_token_above_128k_tokens": null, "input_cost_per_query": null, "input_cost_per_second": null, "input_cost_per_audio_token": null, "input_cost_per_token_batches": 1.25e-06, "output_cost_per_token_batches": 5e-06, "output_cost_per_token": 1e-05, "output_cost_per_audio_token": null, "output_cost_per_character": null, "output_cost_per_token_above_128k_tokens": null, "output_cost_per_character_above_128k_tokens": null, "output_cost_per_second": null, "output_cost_per_image": null, "output_vector_size": null, "litellm_provider": "openai", "mode": "chat", "supports_system_messages": true, "supports_response_schema": true, "supports_vision": true, "supports_function_calling": true, "supports_tool_choice": true, "supports_assistant_prefill": false, "supports_prompt_caching": true, "supports_audio_input": false, "supports_audio_output": false, "supports_pdf_input": false, "supports_embedding_image_input": false, "supports_native_streaming": null, "supports_web_search": true, "search_context_cost_per_query": {"search_context_size_low": 0.03, "search_context_size_medium": 0.035, "search_context_size_high": 0.05}, "tpm": null, "rpm": null, "supported_openai_params": ["frequency_penalty", "logit_bias", "logprobs", "top_logprobs", "max_tokens", "max_completion_tokens", "modalities", "prediction", "n", "presence_penalty", "seed", "stop", "stream", "stream_options", "temperature", "top_p", "tools", "tool_choice", "function_call", "functions", "max_retries", "extra_headers", "parallel_tool_calls", "audio", "response_format", "user"]}}, "additional_usage_values": {"completion_tokens_details": null, "prompt_tokens_details": null}}',
                     "cache_key": "Cache OFF",
                     "spend": 0.00022500000000000002,
                     "total_tokens": 30,
@@ -555,7 +555,7 @@ class TestSpendLogsPayload:
                     "model": "claude-3-7-sonnet-20250219",
                     "user": "",
                     "team_id": "",
-                    "metadata": '{"applied_guardrails": [], "batch_models": null, "mcp_tool_call_metadata": null, "additional_usage_values": {"completion_tokens_details": null, "prompt_tokens_details": {"audio_tokens": null, "cached_tokens": 0, "text_tokens": null, "image_tokens": null}, "cache_creation_input_tokens": 0, "cache_read_input_tokens": 0}}',
+                    "metadata": '{"applied_guardrails": [], "batch_models": null, "mcp_tool_call_metadata": null, "usage_object": {"completion_tokens": 503, "prompt_tokens": 2095, "total_tokens": 2598, "completion_tokens_details": null, "prompt_tokens_details": {"audio_tokens": null, "cached_tokens": 0}, "cache_creation_input_tokens": 0, "cache_read_input_tokens": 0}, "model_map_information": {"model_map_key": "claude-3-7-sonnet-20250219", "model_map_value": {"key": "claude-3-7-sonnet-20250219", "max_tokens": 128000, "max_input_tokens": 200000, "max_output_tokens": 128000, "input_cost_per_token": 3e-06, "cache_creation_input_token_cost": 3.75e-06, "cache_read_input_token_cost": 3e-07, "input_cost_per_character": null, "input_cost_per_token_above_128k_tokens": null, "input_cost_per_query": null, "input_cost_per_second": null, "input_cost_per_audio_token": null, "input_cost_per_token_batches": null, "output_cost_per_token_batches": null, "output_cost_per_token": 1.5e-05, "output_cost_per_audio_token": null, "output_cost_per_character": null, "output_cost_per_token_above_128k_tokens": null, "output_cost_per_character_above_128k_tokens": null, "output_cost_per_second": null, "output_cost_per_image": null, "output_vector_size": null, "litellm_provider": "anthropic", "mode": "chat", "supports_system_messages": null, "supports_response_schema": true, "supports_vision": true, "supports_function_calling": true, "supports_tool_choice": true, "supports_assistant_prefill": true, "supports_prompt_caching": true, "supports_audio_input": false, "supports_audio_output": false, "supports_pdf_input": true, "supports_embedding_image_input": false, "supports_native_streaming": null, "supports_web_search": false, "search_context_cost_per_query": null, "tpm": null, "rpm": null, "supported_openai_params": ["stream", "stop", "temperature", "top_p", "max_tokens", "max_completion_tokens", "tools", "tool_choice", "extra_headers", "parallel_tool_calls", "response_format", "user", "reasoning_effort", "thinking"]}}, "additional_usage_values": {"completion_tokens_details": null, "prompt_tokens_details": {"audio_tokens": null, "cached_tokens": 0, "text_tokens": null, "image_tokens": null}, "cache_creation_input_tokens": 0, "cache_read_input_tokens": 0}}',
                     "cache_key": "Cache OFF",
                     "spend": 0.01383,
                     "total_tokens": 2598,
@@ -651,7 +651,7 @@ class TestSpendLogsPayload:
                     "model": "claude-3-7-sonnet-20250219",
                     "user": "",
                     "team_id": "",
-                    "metadata": '{"applied_guardrails": [], "batch_models": null, "mcp_tool_call_metadata": null, "additional_usage_values": {"completion_tokens_details": null, "prompt_tokens_details": {"audio_tokens": null, "cached_tokens": 0, "text_tokens": null, "image_tokens": null}, "cache_creation_input_tokens": 0, "cache_read_input_tokens": 0}}',
+                    "metadata": '{"applied_guardrails": [], "batch_models": null, "mcp_tool_call_metadata": null, "usage_object": {"completion_tokens": 503, "prompt_tokens": 2095, "total_tokens": 2598, "completion_tokens_details": null, "prompt_tokens_details": {"audio_tokens": null, "cached_tokens": 0}, "cache_creation_input_tokens": 0, "cache_read_input_tokens": 0}, "model_map_information": {"model_map_key": "claude-3-7-sonnet-20250219", "model_map_value": {"key": "claude-3-7-sonnet-20250219", "max_tokens": 128000, "max_input_tokens": 200000, "max_output_tokens": 128000, "input_cost_per_token": 3e-06, "cache_creation_input_token_cost": 3.75e-06, "cache_read_input_token_cost": 3e-07, "input_cost_per_character": null, "input_cost_per_token_above_128k_tokens": null, "input_cost_per_query": null, "input_cost_per_second": null, "input_cost_per_audio_token": null, "input_cost_per_token_batches": null, "output_cost_per_token_batches": null, "output_cost_per_token": 1.5e-05, "output_cost_per_audio_token": null, "output_cost_per_character": null, "output_cost_per_token_above_128k_tokens": null, "output_cost_per_character_above_128k_tokens": null, "output_cost_per_second": null, "output_cost_per_image": null, "output_vector_size": null, "litellm_provider": "anthropic", "mode": "chat", "supports_system_messages": null, "supports_response_schema": true, "supports_vision": true, "supports_function_calling": true, "supports_tool_choice": true, "supports_assistant_prefill": true, "supports_prompt_caching": true, "supports_audio_input": false, "supports_audio_output": false, "supports_pdf_input": true, "supports_embedding_image_input": false, "supports_native_streaming": null, "supports_web_search": false, "search_context_cost_per_query": null, "tpm": null, "rpm": null, "supported_openai_params": ["stream", "stop", "temperature", "top_p", "max_tokens", "max_completion_tokens", "tools", "tool_choice", "extra_headers", "parallel_tool_calls", "response_format", "user", "reasoning_effort", "thinking"]}}, "additional_usage_values": {"completion_tokens_details": null, "prompt_tokens_details": {"audio_tokens": null, "cached_tokens": 0, "text_tokens": null, "image_tokens": null}, "cache_creation_input_tokens": 0, "cache_read_input_tokens": 0}}',
                     "cache_key": "Cache OFF",
                     "spend": 0.01383,
                     "total_tokens": 2598,
diff --git a/tests/litellm/proxy/spend_tracking/test_spend_tracking_utils.py b/tests/litellm/proxy/spend_tracking/test_spend_tracking_utils.py
new file mode 100644
index 0000000000..2bef2512f3
--- /dev/null
+++ b/tests/litellm/proxy/spend_tracking/test_spend_tracking_utils.py
@@ -0,0 +1,102 @@
+import asyncio
+import datetime
+import json
+import os
+import sys
+from datetime import timezone
+from typing import Any
+
+import pytest
+from fastapi.testclient import TestClient
+
+sys.path.insert(
+    0, os.path.abspath("../../../..")
+)  # Adds the parent directory to the system path
+
+from unittest.mock import MagicMock, patch
+
+import litellm
+from litellm.proxy.spend_tracking.spend_tracking_utils import (
+    _sanitize_request_body_for_spend_logs_payload,
+)
+
+
+def test_sanitize_request_body_for_spend_logs_payload_basic():
+    request_body = {
+        "messages": [{"role": "user", "content": "Hello, how are you?"}],
+    }
+    assert _sanitize_request_body_for_spend_logs_payload(request_body) == request_body
+
+
+def test_sanitize_request_body_for_spend_logs_payload_long_string():
+    long_string = "a" * 2000  # Create a string longer than MAX_STRING_LENGTH
+    request_body = {"text": long_string, "normal_text": "short text"}
+    sanitized = _sanitize_request_body_for_spend_logs_payload(request_body)
+    assert len(sanitized["text"]) == 1000 + len("... (truncated 1000 chars)")
+    assert sanitized["normal_text"] == "short text"
+
+
+def test_sanitize_request_body_for_spend_logs_payload_nested_dict():
+    request_body = {"outer": {"inner": {"text": "a" * 2000, "normal": "short"}}}
+    sanitized = _sanitize_request_body_for_spend_logs_payload(request_body)
+    assert len(sanitized["outer"]["inner"]["text"]) == 1000 + len(
+        "... (truncated 1000 chars)"
+    )
+    assert sanitized["outer"]["inner"]["normal"] == "short"
+
+
+def test_sanitize_request_body_for_spend_logs_payload_nested_list():
+    request_body = {
+        "items": [{"text": "a" * 2000}, {"text": "short"}, [{"text": "a" * 2000}]]
+    }
+    sanitized = _sanitize_request_body_for_spend_logs_payload(request_body)
+    assert len(sanitized["items"][0]["text"]) == 1000 + len(
+        "... (truncated 1000 chars)"
+    )
+    assert sanitized["items"][1]["text"] == "short"
+    assert len(sanitized["items"][2][0]["text"]) == 1000 + len(
+        "... (truncated 1000 chars)"
+    )
+
+
+def test_sanitize_request_body_for_spend_logs_payload_non_string_values():
+    request_body = {"number": 42, "boolean": True, "none": None, "float": 3.14}
+    sanitized = _sanitize_request_body_for_spend_logs_payload(request_body)
+    assert sanitized == request_body
+
+
+def test_sanitize_request_body_for_spend_logs_payload_empty():
+    request_body: dict[str, Any] = {}
+    sanitized = _sanitize_request_body_for_spend_logs_payload(request_body)
+    assert sanitized == request_body
+
+
+def test_sanitize_request_body_for_spend_logs_payload_mixed_types():
+    request_body = {
+        "text": "a" * 2000,
+        "number": 42,
+        "nested": {"list": ["short", "a" * 2000], "dict": {"key": "a" * 2000}},
+    }
+    sanitized = _sanitize_request_body_for_spend_logs_payload(request_body)
+    assert len(sanitized["text"]) == 1000 + len("... (truncated 1000 chars)")
+    assert sanitized["number"] == 42
+    assert sanitized["nested"]["list"][0] == "short"
+    assert len(sanitized["nested"]["list"][1]) == 1000 + len(
+        "... (truncated 1000 chars)"
+    )
+    assert len(sanitized["nested"]["dict"]["key"]) == 1000 + len(
+        "... (truncated 1000 chars)"
+    )
+
+
+def test_sanitize_request_body_for_spend_logs_payload_circular_reference():
+    # Create a circular reference
+    a: dict[str, Any] = {}
+    b: dict[str, Any] = {"a": a}
+    a["b"] = b
+
+    # Test that it handles circular reference without infinite recursion
+    sanitized = _sanitize_request_body_for_spend_logs_payload(a)
+    assert sanitized == {
+        "b": {"a": {}}
+    }  # Should return empty dict for circular reference
diff --git a/tests/litellm/test_cost_calculator.py b/tests/litellm/test_cost_calculator.py
index 9232a1c408..4621dd722a 100644
--- a/tests/litellm/test_cost_calculator.py
+++ b/tests/litellm/test_cost_calculator.py
@@ -78,6 +78,8 @@ def test_cost_calculator_with_usage():
 
 
 def test_handle_realtime_stream_cost_calculation():
+    from litellm.cost_calculator import RealtimeAPITokenUsageProcessor
+
     # Setup test data
     results: OpenAIRealtimeStreamList = [
         {"type": "session.created", "session": {"model": "gpt-3.5-turbo"}},
@@ -99,9 +101,14 @@ def test_handle_realtime_stream_cost_calculation():
         },
     ]
 
+    combined_usage_object = RealtimeAPITokenUsageProcessor.collect_and_combine_usage_from_realtime_stream_results(
+        results=results,
+    )
+
     # Test with explicit model name
     cost = handle_realtime_stream_cost_calculation(
         results=results,
+        combined_usage_object=combined_usage_object,
         custom_llm_provider="openai",
         litellm_model_name="gpt-3.5-turbo",
     )
@@ -117,8 +124,10 @@ def test_handle_realtime_stream_cost_calculation():
 
     # Test with different model name in session
     results[0]["session"]["model"] = "gpt-4"
+
     cost = handle_realtime_stream_cost_calculation(
         results=results,
+        combined_usage_object=combined_usage_object,
         custom_llm_provider="openai",
         litellm_model_name="gpt-3.5-turbo",
     )
@@ -132,8 +141,12 @@ def test_handle_realtime_stream_cost_calculation():
 
     # Test with no response.done events
     results = [{"type": "session.created", "session": {"model": "gpt-3.5-turbo"}}]
+    combined_usage_object = RealtimeAPITokenUsageProcessor.collect_and_combine_usage_from_realtime_stream_results(
+        results=results,
+    )
     cost = handle_realtime_stream_cost_calculation(
         results=results,
+        combined_usage_object=combined_usage_object,
         custom_llm_provider="openai",
         litellm_model_name="gpt-3.5-turbo",
     )
diff --git a/tests/llm_translation/test_huggingface_chat_completion.py b/tests/llm_translation/test_huggingface_chat_completion.py
index 9f1e89aeb1..7d498b96df 100644
--- a/tests/llm_translation/test_huggingface_chat_completion.py
+++ b/tests/llm_translation/test_huggingface_chat_completion.py
@@ -323,7 +323,8 @@ class TestHuggingFace(BaseLLMChatTest):
             model="huggingface/fireworks-ai/meta-llama/Meta-Llama-3-8B-Instruct",
             messages=[{"role": "user", "content": "Hello"}],
             optional_params={},
-            api_key="test_api_key"
+            api_key="test_api_key",
+            litellm_params={}
         )
         
         assert headers["Authorization"] == "Bearer test_api_key"
diff --git a/tests/llm_translation/test_vertex.py b/tests/llm_translation/test_vertex.py
index d821fb415e..9118d94a6f 100644
--- a/tests/llm_translation/test_vertex.py
+++ b/tests/llm_translation/test_vertex.py
@@ -141,6 +141,7 @@ def test_build_vertex_schema():
     [
         ([{"googleSearch": {}}], "googleSearch"),
         ([{"googleSearchRetrieval": {}}], "googleSearchRetrieval"),
+        ([{"enterpriseWebSearch": {}}], "enterpriseWebSearch"),
         ([{"code_execution": {}}], "code_execution"),
     ],
 )
diff --git a/tests/local_testing/example.jsonl b/tests/local_testing/example.jsonl
new file mode 100644
index 0000000000..fc3ca40808
--- /dev/null
+++ b/tests/local_testing/example.jsonl
@@ -0,0 +1,2 @@
+{"custom_id": "request-1", "method": "POST", "url": "/v1/chat/completions", "body": {"model": "gemini-1.5-flash-001", "messages": [{"role": "system", "content": "You are a helpful assistant."}, {"role": "user", "content": "Hello world!"}], "max_tokens": 10}}
+{"custom_id": "request-2", "method": "POST", "url": "/v1/chat/completions", "body": {"model": "gemini-1.5-flash-001", "messages": [{"role": "system", "content": "You are an unhelpful assistant."}, {"role": "user", "content": "Hello world!"}], "max_tokens": 10}}
diff --git a/tests/local_testing/test_gcs_bucket.py b/tests/local_testing/test_gcs_bucket.py
index 1a8deed8a8..b64475c227 100644
--- a/tests/local_testing/test_gcs_bucket.py
+++ b/tests/local_testing/test_gcs_bucket.py
@@ -21,7 +21,7 @@ from litellm.integrations.gcs_bucket.gcs_bucket import (
     StandardLoggingPayload,
 )
 from litellm.types.utils import StandardCallbackDynamicParams
-
+from unittest.mock import patch
 verbose_logger.setLevel(logging.DEBUG)
 
 
@@ -687,3 +687,63 @@ async def test_basic_gcs_logger_with_folder_in_bucket_name():
     # clean up
     if old_bucket_name is not None:
         os.environ["GCS_BUCKET_NAME"] = old_bucket_name
+
+@pytest.mark.skip(reason="This test is flaky on ci/cd")
+def test_create_file_e2e():
+    """
+    Asserts 'create_file' is called with the correct arguments
+    """
+    load_vertex_ai_credentials()
+    test_file_content = b"test audio content"
+    test_file = ("test.wav", test_file_content, "audio/wav")
+
+    from litellm import create_file
+    response = create_file(
+        file=test_file,
+        purpose="user_data",
+        custom_llm_provider="vertex_ai",
+    )
+    print("response", response)
+    assert response is not None
+
+@pytest.mark.skip(reason="This test is flaky on ci/cd")
+def test_create_file_e2e_jsonl():
+    """
+    Asserts 'create_file' is called with the correct arguments
+    """
+    load_vertex_ai_credentials()
+    from litellm.llms.custom_httpx.http_handler import HTTPHandler
+
+    client = HTTPHandler()
+
+    example_jsonl = [{"custom_id": "request-1", "method": "POST", "url": "/v1/chat/completions", "body": {"model": "gemini-1.5-flash-001", "messages": [{"role": "system", "content": "You are a helpful assistant."},{"role": "user", "content": "Hello world!"}],"max_tokens": 10}},{"custom_id": "request-2", "method": "POST", "url": "/v1/chat/completions", "body": {"model": "gemini-1.5-flash-001", "messages": [{"role": "system", "content": "You are an unhelpful assistant."},{"role": "user", "content": "Hello world!"}],"max_tokens": 10}}]
+    
+    # Create and write to the file
+    file_path = "example.jsonl"
+    with open(file_path, "w") as f:
+        for item in example_jsonl:
+            f.write(json.dumps(item) + "\n")
+    
+    # Verify file content
+    with open(file_path, "r") as f:
+        content = f.read()
+        print("File content:", content)
+        assert len(content) > 0, "File is empty"
+
+    from litellm import create_file
+    with patch.object(client, "post") as mock_create_file:
+        try: 
+            response = create_file(
+                file=open(file_path, "rb"), 
+                purpose="user_data",
+                custom_llm_provider="vertex_ai",
+                client=client,
+            )
+        except Exception as e:
+            print("error", e)
+
+        mock_create_file.assert_called_once()
+
+        print(f"kwargs: {mock_create_file.call_args.kwargs}")
+
+        assert mock_create_file.call_args.kwargs["data"] is not None and len(mock_create_file.call_args.kwargs["data"]) > 0
\ No newline at end of file
diff --git a/tests/local_testing/test_get_model_info.py b/tests/local_testing/test_get_model_info.py
index 607a87b89a..eae1b7ef65 100644
--- a/tests/local_testing/test_get_model_info.py
+++ b/tests/local_testing/test_get_model_info.py
@@ -433,6 +433,7 @@ def test_aaamodel_prices_and_context_window_json_is_valid():
                 "input_cost_per_character_above_128k_tokens": {"type": "number"},
                 "input_cost_per_image": {"type": "number"},
                 "input_cost_per_image_above_128k_tokens": {"type": "number"},
+                "input_cost_per_token_above_200k_tokens": {"type": "number"},
                 "input_cost_per_pixel": {"type": "number"},
                 "input_cost_per_query": {"type": "number"},
                 "input_cost_per_request": {"type": "number"},
@@ -485,6 +486,7 @@ def test_aaamodel_prices_and_context_window_json_is_valid():
                 "output_cost_per_second": {"type": "number"},
                 "output_cost_per_token": {"type": "number"},
                 "output_cost_per_token_above_128k_tokens": {"type": "number"},
+                "output_cost_per_token_above_200k_tokens": {"type": "number"},
                 "output_cost_per_token_batches": {"type": "number"},
                 "output_db_cost_per_token": {"type": "number"},
                 "output_dbu_cost_per_token": {"type": "number"},
@@ -543,6 +545,13 @@ def test_aaamodel_prices_and_context_window_json_is_valid():
                         "enum": ["text", "audio", "image", "video"],
                     },
                 },
+                "supported_output_modalities": {
+                    "type": "array",
+                    "items": {
+                        "type": "string",
+                        "enum": ["text", "image"],
+                    },
+                },
                 "supports_native_streaming": {"type": "boolean"},
             },
             "additionalProperties": False,
diff --git a/tests/logging_callback_tests/gcs_pub_sub_body/spend_logs_payload.json b/tests/logging_callback_tests/gcs_pub_sub_body/spend_logs_payload.json
index 656cb6d589..2918717027 100644
--- a/tests/logging_callback_tests/gcs_pub_sub_body/spend_logs_payload.json
+++ b/tests/logging_callback_tests/gcs_pub_sub_body/spend_logs_payload.json
@@ -9,7 +9,7 @@
     "model": "gpt-4o",
     "user": "",
     "team_id": "",
-    "metadata": "{\"applied_guardrails\": [], \"batch_models\": null, \"mcp_tool_call_metadata\": null, \"additional_usage_values\": {\"completion_tokens_details\": null, \"prompt_tokens_details\": null}}",
+    "metadata": "{\"applied_guardrails\": [], \"batch_models\": null, \"mcp_tool_call_metadata\": null, \"usage_object\": {\"completion_tokens\": 20, \"prompt_tokens\": 10, \"total_tokens\": 30, \"completion_tokens_details\": null, \"prompt_tokens_details\": null}, \"model_map_information\": {\"model_map_key\": \"gpt-4o\", \"model_map_value\": {\"key\": \"gpt-4o\", \"max_tokens\": 16384, \"max_input_tokens\": 128000, \"max_output_tokens\": 16384, \"input_cost_per_token\": 2.5e-06, \"cache_creation_input_token_cost\": null, \"cache_read_input_token_cost\": 1.25e-06, \"input_cost_per_character\": null, \"input_cost_per_token_above_128k_tokens\": null, \"input_cost_per_query\": null, \"input_cost_per_second\": null, \"input_cost_per_audio_token\": null, \"input_cost_per_token_batches\": 1.25e-06, \"output_cost_per_token_batches\": 5e-06, \"output_cost_per_token\": 1e-05, \"output_cost_per_audio_token\": null, \"output_cost_per_character\": null, \"output_cost_per_token_above_128k_tokens\": null, \"output_cost_per_character_above_128k_tokens\": null, \"output_cost_per_second\": null, \"output_cost_per_image\": null, \"output_vector_size\": null, \"litellm_provider\": \"openai\", \"mode\": \"chat\", \"supports_system_messages\": true, \"supports_response_schema\": true, \"supports_vision\": true, \"supports_function_calling\": true, \"supports_tool_choice\": true, \"supports_assistant_prefill\": false, \"supports_prompt_caching\": true, \"supports_audio_input\": false, \"supports_audio_output\": false, \"supports_pdf_input\": false, \"supports_embedding_image_input\": false, \"supports_native_streaming\": null, \"supports_web_search\": true, \"search_context_cost_per_query\": {\"search_context_size_low\": 0.03, \"search_context_size_medium\": 0.035, \"search_context_size_high\": 0.05}, \"tpm\": null, \"rpm\": null, \"supported_openai_params\": [\"frequency_penalty\", \"logit_bias\", \"logprobs\", \"top_logprobs\", \"max_tokens\", \"max_completion_tokens\", \"modalities\", \"prediction\", \"n\", \"presence_penalty\", \"seed\", \"stop\", \"stream\", \"stream_options\", \"temperature\", \"top_p\", \"tools\", \"tool_choice\", \"function_call\", \"functions\", \"max_retries\", \"extra_headers\", \"parallel_tool_calls\", \"audio\", \"response_format\", \"user\"]}}, \"additional_usage_values\": {\"completion_tokens_details\": null, \"prompt_tokens_details\": null}}",
     "cache_key": "Cache OFF",
     "spend": 0.00022500000000000002,
     "total_tokens": 30,
diff --git a/tests/logging_callback_tests/log.txt b/tests/logging_callback_tests/log.txt
index 3b51ec4bff..48f1ab7a2d 100644
--- a/tests/logging_callback_tests/log.txt
+++ b/tests/logging_callback_tests/log.txt
@@ -6,10 +6,10 @@ plugins: snapshot-0.9.0, cov-5.0.0, timeout-2.2.0, postgresql-7.0.1, respx-0.21.
 asyncio: mode=Mode.STRICT
 collecting ... collected 4 items
 
-test_built_in_tools_cost_tracking.py::test_openai_responses_api_web_search_cost_tracking[tools_config0-search_context_size_low-True] PASSED [ 25%]
-test_built_in_tools_cost_tracking.py::test_openai_responses_api_web_search_cost_tracking[tools_config1-search_context_size_low-False] PASSED [ 50%]
-test_built_in_tools_cost_tracking.py::test_openai_responses_api_web_search_cost_tracking[tools_config2-search_context_size_medium-True] PASSED [ 75%]
-test_built_in_tools_cost_tracking.py::test_openai_responses_api_web_search_cost_tracking[tools_config3-search_context_size_medium-False] PASSED [100%]
+test_otel_logging.py::test_awesome_otel_with_message_logging_off[False-False] PASSED [ 25%]
+test_otel_logging.py::test_awesome_otel_with_message_logging_off[False-True] PASSED [ 50%]
+test_otel_logging.py::test_awesome_otel_with_message_logging_off[True-False] PASSED [ 75%]
+test_otel_logging.py::test_awesome_otel_with_message_logging_off[True-True] PASSED [100%]
 
 =============================== warnings summary ===============================
 ../../../../../../Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/pydantic/_internal/_config.py:295
@@ -17,10 +17,10 @@ test_built_in_tools_cost_tracking.py::test_openai_responses_api_web_search_cost_
     warnings.warn(DEPRECATION_MESSAGE, DeprecationWarning)
 
 ../../litellm/litellm_core_utils/get_model_cost_map.py:24
-test_built_in_tools_cost_tracking.py::test_openai_responses_api_web_search_cost_tracking[tools_config0-search_context_size_low-True]
-test_built_in_tools_cost_tracking.py::test_openai_responses_api_web_search_cost_tracking[tools_config1-search_context_size_low-False]
-test_built_in_tools_cost_tracking.py::test_openai_responses_api_web_search_cost_tracking[tools_config2-search_context_size_medium-True]
-test_built_in_tools_cost_tracking.py::test_openai_responses_api_web_search_cost_tracking[tools_config3-search_context_size_medium-False]
+test_otel_logging.py::test_awesome_otel_with_message_logging_off[False-False]
+test_otel_logging.py::test_awesome_otel_with_message_logging_off[False-True]
+test_otel_logging.py::test_awesome_otel_with_message_logging_off[True-False]
+test_otel_logging.py::test_awesome_otel_with_message_logging_off[True-True]
   /Users/krrishdholakia/Documents/litellm/litellm/litellm_core_utils/get_model_cost_map.py:24: DeprecationWarning: open_text is deprecated. Use files() instead. Refer to https://importlib-resources.readthedocs.io/en/latest/using.html#migrating-from-legacy for migration advice.
     with importlib.resources.open_text(
 
@@ -28,12 +28,85 @@ test_built_in_tools_cost_tracking.py::test_openai_responses_api_web_search_cost_
   /Users/krrishdholakia/Documents/litellm/litellm/utils.py:183: DeprecationWarning: open_text is deprecated. Use files() instead. Refer to https://importlib-resources.readthedocs.io/en/latest/using.html#migrating-from-legacy for migration advice.
     with resources.open_text(
 
-test_built_in_tools_cost_tracking.py::test_openai_responses_api_web_search_cost_tracking[tools_config0-search_context_size_low-True]
-test_built_in_tools_cost_tracking.py::test_openai_responses_api_web_search_cost_tracking[tools_config1-search_context_size_low-False]
-test_built_in_tools_cost_tracking.py::test_openai_responses_api_web_search_cost_tracking[tools_config2-search_context_size_medium-True]
-test_built_in_tools_cost_tracking.py::test_openai_responses_api_web_search_cost_tracking[tools_config3-search_context_size_medium-False]
-  /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/httpx/_content.py:204: DeprecationWarning: Use 'content=<...>' to upload raw bytes/text content.
-    warnings.warn(message, DeprecationWarning)
+test_otel_logging.py:145
+  /Users/krrishdholakia/Documents/litellm/tests/logging_callback_tests/test_otel_logging.py:145: PytestUnknownMarkWarning: Unknown pytest.mark.flaky - is this a typo?  You can register custom marks to avoid this warning - for details, see https://docs.pytest.org/en/stable/how-to/mark.html
+    @pytest.mark.flaky(retries=6, delay=2)
+
+test_otel_logging.py::test_awesome_otel_with_message_logging_off[False-False]
+  /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/google/rpc/__init__.py:18: DeprecationWarning: pkg_resources is deprecated as an API. See https://setuptools.pypa.io/en/latest/pkg_resources.html
+    import pkg_resources
+
+test_otel_logging.py::test_awesome_otel_with_message_logging_off[False-False]
+test_otel_logging.py::test_awesome_otel_with_message_logging_off[False-False]
+test_otel_logging.py::test_awesome_otel_with_message_logging_off[False-False]
+test_otel_logging.py::test_awesome_otel_with_message_logging_off[False-False]
+test_otel_logging.py::test_awesome_otel_with_message_logging_off[False-False]
+test_otel_logging.py::test_awesome_otel_with_message_logging_off[False-False]
+test_otel_logging.py::test_awesome_otel_with_message_logging_off[False-False]
+test_otel_logging.py::test_awesome_otel_with_message_logging_off[False-False]
+test_otel_logging.py::test_awesome_otel_with_message_logging_off[False-False]
+  /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/pkg_resources/__init__.py:2868: DeprecationWarning: Deprecated call to `pkg_resources.declare_namespace('google')`.
+  Implementing implicit namespace packages (as specified in PEP 420) is preferred to `pkg_resources.declare_namespace`. See https://setuptools.pypa.io/en/latest/references/keywords.html#keyword-namespace-packages
+    declare_namespace(pkg)
+
+test_otel_logging.py::test_awesome_otel_with_message_logging_off[False-False]
+test_otel_logging.py::test_awesome_otel_with_message_logging_off[False-False]
+test_otel_logging.py::test_awesome_otel_with_message_logging_off[False-False]
+test_otel_logging.py::test_awesome_otel_with_message_logging_off[False-False]
+test_otel_logging.py::test_awesome_otel_with_message_logging_off[False-False]
+test_otel_logging.py::test_awesome_otel_with_message_logging_off[False-False]
+  /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/pkg_resources/__init__.py:2868: DeprecationWarning: Deprecated call to `pkg_resources.declare_namespace('google.cloud')`.
+  Implementing implicit namespace packages (as specified in PEP 420) is preferred to `pkg_resources.declare_namespace`. See https://setuptools.pypa.io/en/latest/references/keywords.html#keyword-namespace-packages
+    declare_namespace(pkg)
+
+test_otel_logging.py::test_awesome_otel_with_message_logging_off[False-False]
+test_otel_logging.py::test_awesome_otel_with_message_logging_off[False-False]
+test_otel_logging.py::test_awesome_otel_with_message_logging_off[False-False]
+test_otel_logging.py::test_awesome_otel_with_message_logging_off[False-False]
+  /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/pkg_resources/__init__.py:2348: DeprecationWarning: Deprecated call to `pkg_resources.declare_namespace('google')`.
+  Implementing implicit namespace packages (as specified in PEP 420) is preferred to `pkg_resources.declare_namespace`. See https://setuptools.pypa.io/en/latest/references/keywords.html#keyword-namespace-packages
+    declare_namespace(parent)
+
+test_otel_logging.py::test_awesome_otel_with_message_logging_off[False-False]
+  /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/pkg_resources/__init__.py:2868: DeprecationWarning: Deprecated call to `pkg_resources.declare_namespace('google.logging')`.
+  Implementing implicit namespace packages (as specified in PEP 420) is preferred to `pkg_resources.declare_namespace`. See https://setuptools.pypa.io/en/latest/references/keywords.html#keyword-namespace-packages
+    declare_namespace(pkg)
+
+test_otel_logging.py::test_awesome_otel_with_message_logging_off[False-False]
+  /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/pkg_resources/__init__.py:2868: DeprecationWarning: Deprecated call to `pkg_resources.declare_namespace('google.iam')`.
+  Implementing implicit namespace packages (as specified in PEP 420) is preferred to `pkg_resources.declare_namespace`. See https://setuptools.pypa.io/en/latest/references/keywords.html#keyword-namespace-packages
+    declare_namespace(pkg)
+
+test_otel_logging.py::test_awesome_otel_with_message_logging_off[False-False]
+  /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/pkg_resources/__init__.py:2868: DeprecationWarning: Deprecated call to `pkg_resources.declare_namespace('mpl_toolkits')`.
+  Implementing implicit namespace packages (as specified in PEP 420) is preferred to `pkg_resources.declare_namespace`. See https://setuptools.pypa.io/en/latest/references/keywords.html#keyword-namespace-packages
+    declare_namespace(pkg)
+
+test_otel_logging.py::test_awesome_otel_with_message_logging_off[False-False]
+  /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/pkg_resources/__init__.py:2868: DeprecationWarning: Deprecated call to `pkg_resources.declare_namespace('sphinxcontrib')`.
+  Implementing implicit namespace packages (as specified in PEP 420) is preferred to `pkg_resources.declare_namespace`. See https://setuptools.pypa.io/en/latest/references/keywords.html#keyword-namespace-packages
+    declare_namespace(pkg)
+
+test_otel_logging.py::test_awesome_otel_with_message_logging_off[False-False]
+test_otel_logging.py::test_awesome_otel_with_message_logging_off[False-False]
+  /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/pkg_resources/__init__.py:2868: DeprecationWarning: Deprecated call to `pkg_resources.declare_namespace('testing')`.
+  Implementing implicit namespace packages (as specified in PEP 420) is preferred to `pkg_resources.declare_namespace`. See https://setuptools.pypa.io/en/latest/references/keywords.html#keyword-namespace-packages
+    declare_namespace(pkg)
+
+test_otel_logging.py::test_awesome_otel_with_message_logging_off[False-False]
+  /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/pkg_resources/__init__.py:2868: DeprecationWarning: Deprecated call to `pkg_resources.declare_namespace('testing.common')`.
+  Implementing implicit namespace packages (as specified in PEP 420) is preferred to `pkg_resources.declare_namespace`. See https://setuptools.pypa.io/en/latest/references/keywords.html#keyword-namespace-packages
+    declare_namespace(pkg)
+
+test_otel_logging.py::test_awesome_otel_with_message_logging_off[False-False]
+  /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/pkg_resources/__init__.py:2348: DeprecationWarning: Deprecated call to `pkg_resources.declare_namespace('testing')`.
+  Implementing implicit namespace packages (as specified in PEP 420) is preferred to `pkg_resources.declare_namespace`. See https://setuptools.pypa.io/en/latest/references/keywords.html#keyword-namespace-packages
+    declare_namespace(parent)
+
+test_otel_logging.py::test_awesome_otel_with_message_logging_off[False-False]
+  /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/google/rpc/__init__.py:20: DeprecationWarning: Deprecated call to `pkg_resources.declare_namespace('google.rpc')`.
+  Implementing implicit namespace packages (as specified in PEP 420) is preferred to `pkg_resources.declare_namespace`. See https://setuptools.pypa.io/en/latest/references/keywords.html#keyword-namespace-packages
+    pkg_resources.declare_namespace(__name__)
 
 -- Docs: https://docs.pytest.org/en/stable/how-to/capture-warnings.html
-======================= 4 passed, 11 warnings in 18.95s ========================
+======================== 4 passed, 37 warnings in 5.63s ========================
diff --git a/tests/logging_callback_tests/test_gcs_pub_sub.py b/tests/logging_callback_tests/test_gcs_pub_sub.py
index c39662548d..d9cb9edb73 100644
--- a/tests/logging_callback_tests/test_gcs_pub_sub.py
+++ b/tests/logging_callback_tests/test_gcs_pub_sub.py
@@ -62,6 +62,7 @@ def assert_gcs_pubsub_request_matches_expected(
             actual_request_body[field] = expected_request_body[field]
 
     # Assert the entire request body matches
+    print("actual_request_body", actual_request_body)
     assert (
         actual_request_body == expected_request_body
     ), f"Difference in request bodies: {json.dumps(actual_request_body, indent=2)} != {json.dumps(expected_request_body, indent=2)}"
diff --git a/tests/logging_callback_tests/test_otel_logging.py b/tests/logging_callback_tests/test_otel_logging.py
index 2e102ec46c..ecbeef5d88 100644
--- a/tests/logging_callback_tests/test_otel_logging.py
+++ b/tests/logging_callback_tests/test_otel_logging.py
@@ -268,6 +268,7 @@ def validate_redacted_message_span_attributes(span):
         "metadata.requester_metadata",
         "metadata.user_api_key_team_id",
         "metadata.spend_logs_metadata",
+        "metadata.usage_object",
         "metadata.user_api_key_alias",
         "metadata.user_api_key_user_id",
         "metadata.user_api_key_org_id",
diff --git a/tests/logging_callback_tests/test_spend_logs.py b/tests/logging_callback_tests/test_spend_logs.py
index 2233fa5301..972e636b48 100644
--- a/tests/logging_callback_tests/test_spend_logs.py
+++ b/tests/logging_callback_tests/test_spend_logs.py
@@ -178,6 +178,10 @@ def test_spend_logs_payload(model_id: Optional[str]):
                 "metadata": {
                     "user_api_key_end_user_id": "test-user",
                 },
+                "model_map_information": {
+                    "tpm": 1000,
+                    "rpm": 1000,
+                },
             },
         },
         "response_obj": litellm.ModelResponse(
@@ -357,6 +361,10 @@ def test_spend_logs_payload_with_prompts_enabled(monkeypatch):
             "user_api_key_end_user_id": "test-user",
         },
         "request_tags": ["model-anthropic-claude-v2.1", "app-ishaan-prod"],
+        "model_map_information": {
+            "tpm": 1000,
+            "rpm": 1000,
+        },
     }
     litellm_params = {
         "proxy_server_request": {
diff --git a/tests/pass_through_tests/test_openai_assistants_passthrough.py b/tests/pass_through_tests/test_openai_assistants_passthrough.py
index 694d3c090e..40361ab39f 100644
--- a/tests/pass_through_tests/test_openai_assistants_passthrough.py
+++ b/tests/pass_through_tests/test_openai_assistants_passthrough.py
@@ -2,14 +2,31 @@ import pytest
 import openai
 import aiohttp
 import asyncio
+import tempfile
 from typing_extensions import override
 from openai import AssistantEventHandler
 
+
 client = openai.OpenAI(base_url="http://0.0.0.0:4000/openai", api_key="sk-1234")
 
+def test_pass_through_file_operations():
+    # Create a temporary file
+    with tempfile.NamedTemporaryFile(mode='w+', suffix='.txt', delete=False) as temp_file:
+        temp_file.write("This is a test file for the OpenAI Assistants API.")
+        temp_file.flush()
+
+        # create a file
+        file = client.files.create(
+            file=open(temp_file.name, "rb"),
+            purpose="assistants",
+        )
+        print("file created", file)
+
+        # delete the file
+        delete_file = client.files.delete(file.id)
+        print("file deleted", delete_file)
 
 def test_openai_assistants_e2e_operations():
-
     assistant = client.beta.assistants.create(
         name="Math Tutor",
         instructions="You are a personal math tutor. Write and run code to answer math questions.",
diff --git a/ui/litellm-dashboard/src/app/page.tsx b/ui/litellm-dashboard/src/app/page.tsx
index 592c7bf0f2..df47c528dc 100644
--- a/ui/litellm-dashboard/src/app/page.tsx
+++ b/ui/litellm-dashboard/src/app/page.tsx
@@ -314,6 +314,8 @@ export default function CreateKeyPage() {
                 <BudgetPanel accessToken={accessToken} />
               ) : page == "guardrails" ? (
                 <GuardrailsPanel accessToken={accessToken} />
+              ): page == "transform-request" ? (
+                <TransformRequestPanel accessToken={accessToken} />
               ): page == "general-settings" ? (
                 <GeneralSettings
                   userID={userID}
diff --git a/ui/litellm-dashboard/src/components/all_keys_table.tsx b/ui/litellm-dashboard/src/components/all_keys_table.tsx
index b0313c241f..3a2bf61c5f 100644
--- a/ui/litellm-dashboard/src/components/all_keys_table.tsx
+++ b/ui/litellm-dashboard/src/components/all_keys_table.tsx
@@ -13,9 +13,12 @@ import { Organization, userListCall } from "./networking";
 import { createTeamSearchFunction } from "./key_team_helpers/team_search_fn";
 import { createOrgSearchFunction } from "./key_team_helpers/organization_search_fn";
 import { useFilterLogic } from "./key_team_helpers/filter_logic";
+import { Setter } from "@/types";
+import { updateExistingKeys } from "@/utils/dataUtils";
 
 interface AllKeysTableProps {
   keys: KeyResponse[];
+  setKeys: Setter<KeyResponse[]>;
   isLoading?: boolean;
   pagination: {
     currentPage: number;
@@ -87,6 +90,7 @@ const TeamFilter = ({
  */
 export function AllKeysTable({ 
   keys, 
+  setKeys,
   isLoading = false,
   pagination,
   onPageChange,
@@ -364,6 +368,23 @@ export function AllKeysTable({
           keyId={selectedKeyId} 
           onClose={() => setSelectedKeyId(null)}
           keyData={keys.find(k => k.token === selectedKeyId)}
+          onKeyDataUpdate={(updatedKeyData) => {
+            setKeys(keys => keys.map(key => {
+              if (key.token === updatedKeyData.token) {
+                // The shape of key is different from that of
+                // updatedKeyData(received from keyUpdateCall in networking.tsx).
+                // Hence, we can't replace key with updatedKeys since it might lead
+                // to unintended bugs/behaviors.
+                // So instead, we only update fields that are present in both.
+                return updateExistingKeys(key, updatedKeyData)
+              }
+              
+              return key
+            }))
+          }}
+          onDelete={() => {
+            setKeys(keys => keys.filter(key => key.token !== selectedKeyId))
+          }}
           accessToken={accessToken}
           userID={userID}
           userRole={userRole}
diff --git a/ui/litellm-dashboard/src/components/key_info_view.tsx b/ui/litellm-dashboard/src/components/key_info_view.tsx
index 9d50be6cf7..b7ebdc651a 100644
--- a/ui/litellm-dashboard/src/components/key_info_view.tsx
+++ b/ui/litellm-dashboard/src/components/key_info_view.tsx
@@ -27,13 +27,15 @@ interface KeyInfoViewProps {
   keyId: string;
   onClose: () => void;
   keyData: KeyResponse | undefined;
+  onKeyDataUpdate?: (data: Partial<KeyResponse>) => void;
+  onDelete?: () => void;
   accessToken: string | null;
   userID: string | null;
   userRole: string | null;
   teams: any[] | null;
 }
 
-export default function KeyInfoView({ keyId, onClose, keyData, accessToken, userID, userRole, teams }: KeyInfoViewProps) {
+export default function KeyInfoView({ keyId, onClose, keyData, accessToken, userID, userRole, teams, onKeyDataUpdate, onDelete }: KeyInfoViewProps) {
   const [isEditing, setIsEditing] = useState(false);
   const [form] = Form.useForm();
   const [isDeleteModalOpen, setIsDeleteModalOpen] = useState(false);
@@ -93,6 +95,9 @@ export default function KeyInfoView({ keyId, onClose, keyData, accessToken, user
       }
 
       const newKeyValues = await keyUpdateCall(accessToken, formValues);
+      if (onKeyDataUpdate) {
+        onKeyDataUpdate(newKeyValues)
+      }
       message.success("Key updated successfully");
       setIsEditing(false);
       // Refresh key data here if needed
@@ -107,6 +112,9 @@ export default function KeyInfoView({ keyId, onClose, keyData, accessToken, user
       if (!accessToken) return;
       await keyDeleteCall(accessToken as string, keyData.token);
       message.success("Key deleted successfully");
+      if (onDelete) {
+        onDelete()
+      }
       onClose();
     } catch (error) {
       console.error("Error deleting the key:", error);
diff --git a/ui/litellm-dashboard/src/components/key_team_helpers/key_list.tsx b/ui/litellm-dashboard/src/components/key_team_helpers/key_list.tsx
index 4c2a18d2b5..4ca0ea5720 100644
--- a/ui/litellm-dashboard/src/components/key_team_helpers/key_list.tsx
+++ b/ui/litellm-dashboard/src/components/key_team_helpers/key_list.tsx
@@ -1,5 +1,6 @@
 import { useState, useEffect } from 'react';
 import { keyListCall, Organization } from '../networking';
+import { Setter } from '@/types';
 
 export interface Team {
     team_id: string;
@@ -94,13 +95,14 @@ totalPages: number;
 totalCount: number;
 }
 
+
 interface UseKeyListReturn {
 keys: KeyResponse[];
 isLoading: boolean;
 error: Error | null;
 pagination: PaginationData;
 refresh: (params?: Record<string, unknown>) => Promise<void>;
-setKeys: (newKeysOrUpdater: KeyResponse[] | ((prevKeys: KeyResponse[]) => KeyResponse[])) => void;
+setKeys: Setter<KeyResponse[]>;
 }
 
 const useKeyList = ({
diff --git a/ui/litellm-dashboard/src/components/leftnav.tsx b/ui/litellm-dashboard/src/components/leftnav.tsx
index 8f8c5469e8..b3cf7018ac 100644
--- a/ui/litellm-dashboard/src/components/leftnav.tsx
+++ b/ui/litellm-dashboard/src/components/leftnav.tsx
@@ -74,6 +74,7 @@ const Sidebar: React.FC<SidebarProps> = ({
         { key: "10", page: "budgets", label: "Budgets", icon: <BankOutlined />, roles: all_admin_roles },
         { key: "11", page: "guardrails", label: "Guardrails", icon: <SafetyOutlined />, roles: all_admin_roles },
         { key: "12", page: "new_usage", label: "New Usage", icon: <BarChartOutlined />, roles: [...all_admin_roles, ...internalUserRoles] },
+        { key: "20", page: "transform-request", label: "API Playground", icon: <ApiOutlined />, roles: [...all_admin_roles, ...internalUserRoles] },
         { key: "18", page: "mcp-tools", label: "MCP Tools", icon: <ToolOutlined />, roles: all_admin_roles },
         { key: "19", page: "tag-management", label: "Tag Management", icon: <TagsOutlined />, roles: all_admin_roles },
       ]
diff --git a/ui/litellm-dashboard/src/components/networking.tsx b/ui/litellm-dashboard/src/components/networking.tsx
index ac79237fb8..f41222f357 100644
--- a/ui/litellm-dashboard/src/components/networking.tsx
+++ b/ui/litellm-dashboard/src/components/networking.tsx
@@ -4,6 +4,7 @@
 import { all_admin_roles } from "@/utils/roles";
 import { message } from "antd";
 import { TagNewRequest, TagUpdateRequest, TagDeleteRequest, TagInfoRequest, TagListResponse, TagInfoResponse } from "./tag_management/types";
+import { Team } from "./key_team_helpers/key_list";
 
 const isLocal = process.env.NODE_ENV === "development";
 export const proxyBaseUrl = isLocal ? "http://localhost:4000" : null;
@@ -1073,8 +1074,40 @@ export const organizationDeleteCall = async (
   }
 };
 
+export const transformRequestCall = async (accessToken: String, request: object) => {
+  /**
+   * Transform request
+   */
 
-export const userDailyActivityCall = async (accessToken: String, startTime: Date, endTime: Date) => {
+  try {
+    let url = proxyBaseUrl ? `${proxyBaseUrl}/utils/transform_request` : `/utils/transform_request`;
+
+    const response = await fetch(url, {
+      method: "POST",
+      headers: {
+        [globalLitellmHeaderName]: `Bearer ${accessToken}`,
+        "Content-Type": "application/json",
+      },
+      body: JSON.stringify(request),
+    });
+
+    if (!response.ok) {
+      const errorData = await response.text();
+      handleError(errorData);
+      throw new Error("Network response was not ok");
+    }
+
+    const data = await response.json();
+    return data;
+  } catch (error) {
+    console.error("Failed to create key:", error);
+    throw error;
+  }
+  
+  
+}
+
+export const userDailyActivityCall = async (accessToken: String, startTime: Date, endTime: Date, page: number = 1) => {
   /**
    * Get daily user activity on proxy
    */
@@ -1083,6 +1116,8 @@ export const userDailyActivityCall = async (accessToken: String, startTime: Date
     const queryParams = new URLSearchParams();
     queryParams.append('start_date', startTime.toISOString());
     queryParams.append('end_date', endTime.toISOString());
+    queryParams.append('page_size', '1000');
+    queryParams.append('page', page.toString());
     const queryString = queryParams.toString();
     if (queryString) {
       url += `?${queryString}`;
@@ -2983,7 +3018,7 @@ export const teamUpdateCall = async (
       console.error("Error response from the server:", errorData);
       throw new Error("Network response was not ok");
     }
-    const data = await response.json();
+    const data = await response.json() as { data: Team, team_id: string };
     console.log("Update Team Response:", data);
     return data;
     // Handle success - you might want to update some state or UI based on the created key
diff --git a/ui/litellm-dashboard/src/components/new_usage.tsx b/ui/litellm-dashboard/src/components/new_usage.tsx
index 9a68fe25f9..7ea0f2f8e8 100644
--- a/ui/litellm-dashboard/src/components/new_usage.tsx
+++ b/ui/litellm-dashboard/src/components/new_usage.tsx
@@ -22,15 +22,13 @@ import ViewUserSpend from "./view_user_spend";
 import TopKeyView from "./top_key_view";
 import { ActivityMetrics, processActivityData } from './activity_metrics';
 import { SpendMetrics, DailyData, ModelActivityData, MetricWithMetadata, KeyMetricWithMetadata } from './usage/types';
+
 interface NewUsagePageProps {
   accessToken: string | null;
   userRole: string | null;
   userID: string | null;
 }
 
-
-
-
 const NewUsagePage: React.FC<NewUsagePageProps> = ({
   accessToken,
   userRole,
@@ -177,8 +175,39 @@ const NewUsagePage: React.FC<NewUsagePageProps> = ({
     if (!accessToken || !dateValue.from || !dateValue.to) return;
     const startTime = dateValue.from;
     const endTime = dateValue.to;
-    const data = await userDailyActivityCall(accessToken, startTime, endTime);
-    setUserSpendData(data);
+    
+    try {
+      // Get first page
+      const firstPageData = await userDailyActivityCall(accessToken, startTime, endTime);
+      
+      // Check if we need to fetch more pages
+      if (firstPageData.metadata.total_pages > 10) {
+        throw new Error("Too many pages of data (>10). Please select a smaller date range.");
+      }
+
+      // If only one page, just set the data
+      if (firstPageData.metadata.total_pages === 1) {
+        setUserSpendData(firstPageData);
+        return;
+      }
+
+      // Fetch all pages
+      const allResults = [...firstPageData.results];
+      
+      for (let page = 2; page <= firstPageData.metadata.total_pages; page++) {
+        const pageData = await userDailyActivityCall(accessToken, startTime, endTime, page);
+        allResults.push(...pageData.results);
+      }
+
+      // Combine all results with the first page's metadata
+      setUserSpendData({
+        results: allResults,
+        metadata: firstPageData.metadata
+      });
+    } catch (error) {
+      console.error("Error fetching user spend data:", error);
+      throw error;
+    }
   };
 
   useEffect(() => {
diff --git a/ui/litellm-dashboard/src/components/team/team_info.tsx b/ui/litellm-dashboard/src/components/team/team_info.tsx
index e04680b53a..20e9d23ccf 100644
--- a/ui/litellm-dashboard/src/components/team/team_info.tsx
+++ b/ui/litellm-dashboard/src/components/team/team_info.tsx
@@ -30,6 +30,7 @@ import { PencilAltIcon, PlusIcon, TrashIcon } from "@heroicons/react/outline";
 import MemberModal from "./edit_membership";
 import UserSearchModal from "@/components/common_components/user_search_modal";
 import { getModelDisplayName } from "../key_team_helpers/fetch_available_models_team_key";
+import { Team } from "../key_team_helpers/key_list";
 
 
 interface TeamData {
@@ -69,6 +70,7 @@ interface TeamInfoProps {
   is_proxy_admin: boolean;
   userModels: string[];
   editTeam: boolean;
+  onUpdate?: (team: Team) => void
 }
 
 const TeamInfoView: React.FC<TeamInfoProps> = ({ 
@@ -78,7 +80,8 @@ const TeamInfoView: React.FC<TeamInfoProps> = ({
   is_team_admin, 
   is_proxy_admin,
   userModels,
-  editTeam
+  editTeam,
+  onUpdate
 }) => {
   const [teamData, setTeamData] = useState<TeamData | null>(null);
   const [loading, setLoading] = useState(true);
@@ -199,7 +202,10 @@ const TeamInfoView: React.FC<TeamInfoProps> = ({
       };
       
       const response = await teamUpdateCall(accessToken, updateData);
-      
+      if (onUpdate) {
+        onUpdate(response.data)
+      }
+    
       message.success("Team settings updated successfully");
       setIsEditing(false);
       fetchTeamInfo();
diff --git a/ui/litellm-dashboard/src/components/teams.tsx b/ui/litellm-dashboard/src/components/teams.tsx
index 6f516f06e2..7e3b607267 100644
--- a/ui/litellm-dashboard/src/components/teams.tsx
+++ b/ui/litellm-dashboard/src/components/teams.tsx
@@ -84,6 +84,7 @@ import {
   modelAvailableCall,
   teamListCall
 } from "./networking";
+import { updateExistingKeys } from "@/utils/dataUtils";
 
 const getOrganizationModels = (organization: Organization | null, userModels: string[]) => {
   let tempModelsToPick = [];
@@ -321,6 +322,22 @@ const Teams: React.FC<TeamProps> = ({
       {selectedTeamId ? (
         <TeamInfoView 
         teamId={selectedTeamId} 
+        onUpdate={data => {
+            setTeams(teams => {
+              if (teams == null) {
+                return teams;
+              }
+            
+              return teams.map(team => {
+                if (data.team_id === team.team_id) {
+                  return updateExistingKeys(team, data)
+                }
+                
+                return team
+              })
+            })
+
+        }}
         onClose={() => {
           setSelectedTeamId(null);
           setEditTeam(false);
diff --git a/ui/litellm-dashboard/src/components/transform_request.tsx b/ui/litellm-dashboard/src/components/transform_request.tsx
index 2b7eea65c9..b55e562dff 100644
--- a/ui/litellm-dashboard/src/components/transform_request.tsx
+++ b/ui/litellm-dashboard/src/components/transform_request.tsx
@@ -2,7 +2,7 @@ import React, { useState } from 'react';
 import { Button, Select, Tabs, message } from 'antd';
 import { CopyOutlined } from '@ant-design/icons';
 import { Title } from '@tremor/react';
-
+import { transformRequestCall } from './networking';
 interface TransformRequestPanelProps {
   accessToken: string | null;
 }
@@ -79,22 +79,13 @@ ${formattedBody}
       };
       
       // Make the API call using fetch
-      const response = await fetch('http://0.0.0.0:4000/utils/transform_request', {
-        method: 'POST',
-        headers: {
-          'Authorization': `Bearer ${accessToken}`,
-          'Content-Type': 'application/json'
-        },
-        body: JSON.stringify(payload)
-      });
-      
-      if (!response.ok) {
-        throw new Error(`HTTP error ${response.status}`);
+      if (!accessToken) {
+        message.error('No access token found');
+        setIsLoading(false);
+        return;
       }
       
-      // Parse the response as JSON
-      const data = await response.json();
-      console.log("API response:", data);
+      const data = await transformRequestCall(accessToken, payload);
       
       // Check if the response has the expected fields
       if (data.raw_request_api_base && data.raw_request_body) {
diff --git a/ui/litellm-dashboard/src/components/view_key_table.tsx b/ui/litellm-dashboard/src/components/view_key_table.tsx
index f3661c8c64..57467efa18 100644
--- a/ui/litellm-dashboard/src/components/view_key_table.tsx
+++ b/ui/litellm-dashboard/src/components/view_key_table.tsx
@@ -418,6 +418,7 @@ const ViewKeyTable: React.FC<ViewKeyTableProps> = ({
     <div>
       <AllKeysTable 
         keys={keys}
+        setKeys={setKeys}
         isLoading={isLoading}
         pagination={pagination}
         onPageChange={handlePageChange}
diff --git a/ui/litellm-dashboard/src/types.ts b/ui/litellm-dashboard/src/types.ts
new file mode 100644
index 0000000000..15ff3a2ea5
--- /dev/null
+++ b/ui/litellm-dashboard/src/types.ts
@@ -0,0 +1 @@
+export type Setter<T> = (newValueOrUpdater: T | ((previousValue: T) => T)) => void
\ No newline at end of file
diff --git a/ui/litellm-dashboard/src/utils/dataUtils.ts b/ui/litellm-dashboard/src/utils/dataUtils.ts
new file mode 100644
index 0000000000..f51940f2ef
--- /dev/null
+++ b/ui/litellm-dashboard/src/utils/dataUtils.ts
@@ -0,0 +1,14 @@
+export function updateExistingKeys<Source extends Object>(
+  target: Source,
+  source: Object
+): Source {
+  const clonedTarget = structuredClone(target);
+  
+  for (const [key, value] of Object.entries(source)) {
+    if (key in clonedTarget) {
+      (clonedTarget as any)[key] = value;
+    }
+  }
+
+  return clonedTarget;
+}