Litellm dev 01 27 2025 p3 (#8047)

* docs(reliability.md): add doc on disabling fallbacks per request * feat(litellm_pre_call_utils.py): support reading request timeout from request headers - new `x-litellm-timeout` param Allows setting dynamic model timeouts from vercel's AI sdk * test(test_proxy_server.py): add simple unit test for reading request timeout * test(test_fallbacks.py): add e2e test to confirm timeout passed in request headers is correctly read * feat(main.py): support passing metadata to openai in preview Resolves https://github.com/BerriAI/litellm/issues/6022#issuecomment-2616119371 * fix(main.py): fix passing openai metadata * docs(request_headers.md): document new request headers * build: Merge branch 'main' into litellm_dev_01_27_2025_p3 * test: loosen test
2025-04-27 03:34:10 +00:00 · 2025-01-28 18:01:27 -08:00 · 2025-01-28 18:01:27 -08:00 · d9eb8f42ff
commit d9eb8f42ff
parent 9c20c69915
11 changed files with 187 additions and 3 deletions
--- a/litellm/proxy/_new_secret_config.yaml
+++ b/litellm/proxy/_new_secret_config.yaml
@ -13,4 +13,4 @@ model_list:
  - model_name: deepseek/*
    litellm_params:
      model: deepseek/*
-      api_key: os.environ/DEEPSEEK_API_KEY
+      api_key: os.environ/DEEPSEEK_API_KEY
--- a/litellm/proxy/_types.py
+++ b/litellm/proxy/_types.py
@ -2204,6 +2204,7 @@ class SpecialHeaders(enum.Enum):
 class LitellmDataForBackendLLMCall(TypedDict, total=False):
    headers: dict
    organization: str
+    timeout: Optional[float]


 class JWTKeyItem(TypedDict, total=False):
--- a/litellm/proxy/litellm_pre_call_utils.py
+++ b/litellm/proxy/litellm_pre_call_utils.py
@ -181,6 +181,31 @@ def clean_headers(


 class LiteLLMProxyRequestSetup:
+    @staticmethod
+    def _get_timeout_from_request(headers: dict) -> Optional[float]:
+        """
+        Workaround for client request from Vercel's AI SDK.
+
+        Allow's user to set a timeout in the request headers.
+
+        Example:
+
+        ```js
+        const openaiProvider = createOpenAI({
+            baseURL: liteLLM.baseURL,
+            apiKey: liteLLM.apiKey,
+            compatibility: "compatible",
+            headers: {
+                "x-litellm-timeout": "90"
+            },
+        });
+        ```
+        """
+        timeout_header = headers.get("x-litellm-timeout", None)
+        if timeout_header is not None:
+            return float(timeout_header)
+        return None
+
    @staticmethod
    def _get_forwardable_headers(
        headers: Union[Headers, dict],
@ -267,6 +292,11 @@ class LiteLLMProxyRequestSetup:
        )
        if _organization is not None:
            data["organization"] = _organization
+
+        timeout = LiteLLMProxyRequestSetup._get_timeout_from_request(headers)
+        if timeout is not None:
+            data["timeout"] = timeout
+
        return data

    @staticmethod