diff --git a/docs/my-website/docs/proxy/reliability.md b/docs/my-website/docs/proxy/reliability.md
index 489f4e2ef1..654c2618c2 100644
--- a/docs/my-website/docs/proxy/reliability.md
+++ b/docs/my-website/docs/proxy/reliability.md
@@ -1007,7 +1007,34 @@ curl -L -X POST 'http://0.0.0.0:4000/v1/chat/completions' \
 }'
 ```
 
-### Disable Fallbacks per key
+### Disable Fallbacks (Per Request/Key)
+
+
+<Tabs>
+
+<TabItem value="request" label="Per Request">
+
+You can disable fallbacks per key by setting `disable_fallbacks: true` in your request body.
+
+```bash
+curl -L -X POST 'http://0.0.0.0:4000/v1/chat/completions' \
+-H 'Content-Type: application/json' \
+-H 'Authorization: Bearer sk-1234' \
+-d '{
+    "messages": [
+        {
+            "role": "user",
+            "content": "List 5 important events in the XIX century"
+        }
+    ],
+    "model": "gpt-3.5-turbo",
+    "disable_fallbacks": true # 👈 DISABLE FALLBACKS
+}'
+```
+
+</TabItem>
+
+<TabItem value="key" label="Per Key">
 
 You can disable fallbacks per key by setting `disable_fallbacks: true` in your key metadata.
 
@@ -1020,4 +1047,7 @@ curl -L -X POST 'http://0.0.0.0:4000/key/generate' \
         "disable_fallbacks": true
     }
 }'
-```
\ No newline at end of file
+```
+
+</TabItem>
+</Tabs>
\ No newline at end of file
diff --git a/docs/my-website/docs/proxy/request_headers.md b/docs/my-website/docs/proxy/request_headers.md
new file mode 100644
index 0000000000..d3ccb54435
--- /dev/null
+++ b/docs/my-website/docs/proxy/request_headers.md
@@ -0,0 +1,12 @@
+# Request Headers
+
+Special headers that are supported by LiteLLM.
+
+## LiteLLM Headers
+
+`x-litellm-timeout` Optional[float]: The timeout for the request in seconds.
+
+## Anthropic Headers
+
+`anthropic-version` Optional[str]: The version of the Anthropic API to use.  
+`anthropic-beta` Optional[str]: The beta version of the Anthropic API to use.
\ No newline at end of file
diff --git a/docs/my-website/sidebars.js b/docs/my-website/sidebars.js
index d20f2a73e4..b4c9b13b31 100644
--- a/docs/my-website/sidebars.js
+++ b/docs/my-website/sidebars.js
@@ -66,6 +66,7 @@ const sidebars = {
             "proxy/user_keys",
             "proxy/clientside_auth",
             "proxy/response_headers",
+            "proxy/request_headers",
           ],
         },
         {
diff --git a/litellm/main.py b/litellm/main.py
index 0056f4751d..ec4e43fd94 100644
--- a/litellm/main.py
+++ b/litellm/main.py
@@ -75,6 +75,7 @@ from litellm.utils import (
     CustomStreamWrapper,
     ProviderConfigManager,
     Usage,
+    add_openai_metadata,
     async_mock_completion_streaming_obj,
     convert_to_model_response_object,
     create_pretrained_tokenizer,
@@ -1617,6 +1618,11 @@ def completion(  # type: ignore # noqa: PLR0915
             if extra_headers is not None:
                 optional_params["extra_headers"] = extra_headers
 
+            if (
+                litellm.enable_preview_features and metadata is not None
+            ):  # [PREVIEW] allow metadata to be passed to OPENAI
+                optional_params["metadata"] = add_openai_metadata(metadata)
+
             ## LOAD CONFIG - if set
             config = litellm.OpenAIConfig.get_config()
             for k, v in config.items():
diff --git a/litellm/proxy/_new_secret_config.yaml b/litellm/proxy/_new_secret_config.yaml
index 423032ac86..321e8b676f 100644
--- a/litellm/proxy/_new_secret_config.yaml
+++ b/litellm/proxy/_new_secret_config.yaml
@@ -13,4 +13,4 @@ model_list:
   - model_name: deepseek/*
     litellm_params:
       model: deepseek/*
-      api_key: os.environ/DEEPSEEK_API_KEY
+      api_key: os.environ/DEEPSEEK_API_KEY
\ No newline at end of file
diff --git a/litellm/proxy/_types.py b/litellm/proxy/_types.py
index bf3f6b6543..bf13d178d4 100644
--- a/litellm/proxy/_types.py
+++ b/litellm/proxy/_types.py
@@ -2204,6 +2204,7 @@ class SpecialHeaders(enum.Enum):
 class LitellmDataForBackendLLMCall(TypedDict, total=False):
     headers: dict
     organization: str
+    timeout: Optional[float]
 
 
 class JWTKeyItem(TypedDict, total=False):
diff --git a/litellm/proxy/litellm_pre_call_utils.py b/litellm/proxy/litellm_pre_call_utils.py
index 1933bfb016..b913c238db 100644
--- a/litellm/proxy/litellm_pre_call_utils.py
+++ b/litellm/proxy/litellm_pre_call_utils.py
@@ -181,6 +181,31 @@ def clean_headers(
 
 
 class LiteLLMProxyRequestSetup:
+    @staticmethod
+    def _get_timeout_from_request(headers: dict) -> Optional[float]:
+        """
+        Workaround for client request from Vercel's AI SDK.
+
+        Allow's user to set a timeout in the request headers.
+
+        Example:
+
+        ```js
+        const openaiProvider = createOpenAI({
+            baseURL: liteLLM.baseURL,
+            apiKey: liteLLM.apiKey,
+            compatibility: "compatible",
+            headers: {
+                "x-litellm-timeout": "90"
+            },
+        });
+        ```
+        """
+        timeout_header = headers.get("x-litellm-timeout", None)
+        if timeout_header is not None:
+            return float(timeout_header)
+        return None
+
     @staticmethod
     def _get_forwardable_headers(
         headers: Union[Headers, dict],
@@ -267,6 +292,11 @@ class LiteLLMProxyRequestSetup:
         )
         if _organization is not None:
             data["organization"] = _organization
+
+        timeout = LiteLLMProxyRequestSetup._get_timeout_from_request(headers)
+        if timeout is not None:
+            data["timeout"] = timeout
+
         return data
 
     @staticmethod
diff --git a/litellm/utils.py b/litellm/utils.py
index 92d6dc37db..c9a3b77cc2 100644
--- a/litellm/utils.py
+++ b/litellm/utils.py
@@ -6206,3 +6206,21 @@ def get_non_default_completion_params(kwargs: dict) -> dict:
         k: v for k, v in kwargs.items() if k not in default_params
     }  # model-specific params - pass them straight to the model/provider
     return non_default_params
+
+
+def add_openai_metadata(metadata: dict) -> dict:
+    """
+    Add metadata to openai optional parameters, excluding hidden params
+
+    Args:
+        params (dict): Dictionary of API parameters
+        metadata (dict, optional): Metadata to include in the request
+
+    Returns:
+        dict: Updated parameters dictionary with visible metadata only
+    """
+    if metadata is None:
+        return None
+    # Only include non-hidden parameters
+    visible_metadata = {k: v for k, v in metadata.items() if k != "hidden_params"}
+    return visible_metadata.copy()
diff --git a/tests/local_testing/test_completion.py b/tests/local_testing/test_completion.py
index b1aeeb98a2..dbee6e5194 100644
--- a/tests/local_testing/test_completion.py
+++ b/tests/local_testing/test_completion.py
@@ -4582,3 +4582,37 @@ def test_provider_specific_header(custom_llm_provider, expected_result):
         mock_post.assert_called_once()
         print(mock_post.call_args.kwargs["headers"])
         assert "anthropic-beta" in mock_post.call_args.kwargs["headers"]
+
+
+@pytest.mark.parametrize(
+    "enable_preview_features",
+    [True, False],
+)
+def test_completion_openai_metadata(monkeypatch, enable_preview_features):
+    from openai import OpenAI
+
+    client = OpenAI()
+
+    litellm.set_verbose = True
+
+    monkeypatch.setattr(litellm, "enable_preview_features", enable_preview_features)
+    with patch.object(
+        client.chat.completions.with_raw_response, "create", return_value=MagicMock()
+    ) as mock_completion:
+        try:
+            resp = litellm.completion(
+                model="openai/gpt-3.5-turbo",
+                messages=[{"role": "user", "content": "Hello world"}],
+                metadata={"my-test-key": "my-test-value"},
+                client=client,
+            )
+        except Exception as e:
+            print(f"Error: {e}")
+
+        mock_completion.assert_called_once()
+        if enable_preview_features:
+            assert mock_completion.call_args.kwargs["metadata"] == {
+                "my-test-key": "my-test-value"
+            }
+        else:
+            assert "metadata" not in mock_completion.call_args.kwargs
diff --git a/tests/proxy_unit_tests/test_proxy_server.py b/tests/proxy_unit_tests/test_proxy_server.py
index 4a9320c2ad..7f9d3b9081 100644
--- a/tests/proxy_unit_tests/test_proxy_server.py
+++ b/tests/proxy_unit_tests/test_proxy_server.py
@@ -2190,3 +2190,19 @@ async def test_get_ui_settings_spend_logs_threshold():
 
     # Clean up
     proxy_state.set_proxy_state_variable("spend_logs_row_count", 0)
+
+
+def test_get_timeout_from_request():
+    from litellm.proxy.litellm_pre_call_utils import LiteLLMProxyRequestSetup
+
+    headers = {
+        "x-litellm-timeout": "90",
+    }
+    timeout = LiteLLMProxyRequestSetup._get_timeout_from_request(headers)
+    assert timeout == 90
+
+    headers = {
+        "x-litellm-timeout": "90.5",
+    }
+    timeout = LiteLLMProxyRequestSetup._get_timeout_from_request(headers)
+    assert timeout == 90.5
diff --git a/tests/test_fallbacks.py b/tests/test_fallbacks.py
index 91c90448b3..b891eb3062 100644
--- a/tests/test_fallbacks.py
+++ b/tests/test_fallbacks.py
@@ -5,6 +5,7 @@ import asyncio
 import aiohttp
 from large_text import text
 import time
+from typing import Optional
 
 
 async def generate_key(
@@ -44,6 +45,7 @@ async def chat_completion(
     model: str,
     messages: list,
     return_headers: bool = False,
+    extra_headers: Optional[dict] = None,
     **kwargs,
 ):
     url = "http://0.0.0.0:4000/chat/completions"
@@ -51,6 +53,8 @@ async def chat_completion(
         "Authorization": f"Bearer {key}",
         "Content-Type": "application/json",
     }
+    if extra_headers is not None:
+        headers.update(extra_headers)
     data = {"model": model, "messages": messages, **kwargs}
 
     async with session.post(url, headers=headers, json=data) as response:
@@ -180,6 +184,38 @@ async def test_chat_completion_with_timeout():
         )  # assert model-specific timeout used
 
 
+@pytest.mark.asyncio
+async def test_chat_completion_with_timeout_from_request():
+    """
+    make chat completion call with low timeout and `mock_timeout`: true. Expect it to fail and correct timeout to be set in headers.
+    """
+    async with aiohttp.ClientSession() as session:
+        model = "fake-openai-endpoint-5"
+        messages = [
+            {"role": "system", "content": text},
+            {"role": "user", "content": "Who was Alexander?"},
+        ]
+        extra_headers = {
+            "x-litellm-timeout": "0.001",
+        }
+        start_time = time.time()
+        response, headers = await chat_completion(
+            session=session,
+            key="sk-1234",
+            model=model,
+            messages=messages,
+            num_retries=0,
+            mock_timeout=True,
+            extra_headers=extra_headers,
+            return_headers=True,
+        )
+        end_time = time.time()
+        print(f"headers: {headers}")
+        assert (
+            headers["x-litellm-timeout"] == "0.001"
+        )  # assert model-specific timeout used
+
+
 @pytest.mark.parametrize("has_access", [True, False])
 @pytest.mark.asyncio
 async def test_chat_completion_client_fallbacks_with_custom_message(has_access):