Litellm dev 01 27 2025 p3 (#8047)

* docs(reliability.md): add doc on disabling fallbacks per request

* feat(litellm_pre_call_utils.py): support reading request timeout from request headers - new `x-litellm-timeout` param

Allows setting dynamic model timeouts from vercel's AI sdk

* test(test_proxy_server.py): add simple unit test for reading request timeout

* test(test_fallbacks.py): add e2e test to confirm timeout passed in request headers is correctly read

* feat(main.py): support passing metadata to openai in preview

Resolves https://github.com/BerriAI/litellm/issues/6022#issuecomment-2616119371

* fix(main.py): fix passing openai metadata

* docs(request_headers.md): document new request headers

* build: Merge branch 'main' into litellm_dev_01_27_2025_p3

* test: loosen test
This commit is contained in:
Krish Dholakia 2025-01-28 18:01:27 -08:00 committed by GitHub
parent 9c20c69915
commit d9eb8f42ff
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
11 changed files with 187 additions and 3 deletions

View file

@ -13,4 +13,4 @@ model_list:
- model_name: deepseek/*
litellm_params:
model: deepseek/*
api_key: os.environ/DEEPSEEK_API_KEY
api_key: os.environ/DEEPSEEK_API_KEY

View file

@ -2204,6 +2204,7 @@ class SpecialHeaders(enum.Enum):
class LitellmDataForBackendLLMCall(TypedDict, total=False):
headers: dict
organization: str
timeout: Optional[float]
class JWTKeyItem(TypedDict, total=False):

View file

@ -181,6 +181,31 @@ def clean_headers(
class LiteLLMProxyRequestSetup:
@staticmethod
def _get_timeout_from_request(headers: dict) -> Optional[float]:
"""
Workaround for client request from Vercel's AI SDK.
Allow's user to set a timeout in the request headers.
Example:
```js
const openaiProvider = createOpenAI({
baseURL: liteLLM.baseURL,
apiKey: liteLLM.apiKey,
compatibility: "compatible",
headers: {
"x-litellm-timeout": "90"
},
});
```
"""
timeout_header = headers.get("x-litellm-timeout", None)
if timeout_header is not None:
return float(timeout_header)
return None
@staticmethod
def _get_forwardable_headers(
headers: Union[Headers, dict],
@ -267,6 +292,11 @@ class LiteLLMProxyRequestSetup:
)
if _organization is not None:
data["organization"] = _organization
timeout = LiteLLMProxyRequestSetup._get_timeout_from_request(headers)
if timeout is not None:
data["timeout"] = timeout
return data
@staticmethod