mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-25 10:44:24 +00:00
Litellm dev 01 27 2025 p3 (#8047)
* docs(reliability.md): add doc on disabling fallbacks per request * feat(litellm_pre_call_utils.py): support reading request timeout from request headers - new `x-litellm-timeout` param Allows setting dynamic model timeouts from vercel's AI sdk * test(test_proxy_server.py): add simple unit test for reading request timeout * test(test_fallbacks.py): add e2e test to confirm timeout passed in request headers is correctly read * feat(main.py): support passing metadata to openai in preview Resolves https://github.com/BerriAI/litellm/issues/6022#issuecomment-2616119371 * fix(main.py): fix passing openai metadata * docs(request_headers.md): document new request headers * build: Merge branch 'main' into litellm_dev_01_27_2025_p3 * test: loosen test
This commit is contained in:
parent
9c20c69915
commit
d9eb8f42ff
11 changed files with 187 additions and 3 deletions
|
@ -181,6 +181,31 @@ def clean_headers(
|
|||
|
||||
|
||||
class LiteLLMProxyRequestSetup:
|
||||
@staticmethod
|
||||
def _get_timeout_from_request(headers: dict) -> Optional[float]:
|
||||
"""
|
||||
Workaround for client request from Vercel's AI SDK.
|
||||
|
||||
Allow's user to set a timeout in the request headers.
|
||||
|
||||
Example:
|
||||
|
||||
```js
|
||||
const openaiProvider = createOpenAI({
|
||||
baseURL: liteLLM.baseURL,
|
||||
apiKey: liteLLM.apiKey,
|
||||
compatibility: "compatible",
|
||||
headers: {
|
||||
"x-litellm-timeout": "90"
|
||||
},
|
||||
});
|
||||
```
|
||||
"""
|
||||
timeout_header = headers.get("x-litellm-timeout", None)
|
||||
if timeout_header is not None:
|
||||
return float(timeout_header)
|
||||
return None
|
||||
|
||||
@staticmethod
|
||||
def _get_forwardable_headers(
|
||||
headers: Union[Headers, dict],
|
||||
|
@ -267,6 +292,11 @@ class LiteLLMProxyRequestSetup:
|
|||
)
|
||||
if _organization is not None:
|
||||
data["organization"] = _organization
|
||||
|
||||
timeout = LiteLLMProxyRequestSetup._get_timeout_from_request(headers)
|
||||
if timeout is not None:
|
||||
data["timeout"] = timeout
|
||||
|
||||
return data
|
||||
|
||||
@staticmethod
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue