mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-25 02:34:29 +00:00
Litellm dev 01 27 2025 p3 (#8047)
* docs(reliability.md): add doc on disabling fallbacks per request * feat(litellm_pre_call_utils.py): support reading request timeout from request headers - new `x-litellm-timeout` param Allows setting dynamic model timeouts from vercel's AI sdk * test(test_proxy_server.py): add simple unit test for reading request timeout * test(test_fallbacks.py): add e2e test to confirm timeout passed in request headers is correctly read * feat(main.py): support passing metadata to openai in preview Resolves https://github.com/BerriAI/litellm/issues/6022#issuecomment-2616119371 * fix(main.py): fix passing openai metadata * docs(request_headers.md): document new request headers * build: Merge branch 'main' into litellm_dev_01_27_2025_p3 * test: loosen test
This commit is contained in:
parent
9c20c69915
commit
d9eb8f42ff
11 changed files with 187 additions and 3 deletions
|
@ -5,6 +5,7 @@ import asyncio
|
|||
import aiohttp
|
||||
from large_text import text
|
||||
import time
|
||||
from typing import Optional
|
||||
|
||||
|
||||
async def generate_key(
|
||||
|
@ -44,6 +45,7 @@ async def chat_completion(
|
|||
model: str,
|
||||
messages: list,
|
||||
return_headers: bool = False,
|
||||
extra_headers: Optional[dict] = None,
|
||||
**kwargs,
|
||||
):
|
||||
url = "http://0.0.0.0:4000/chat/completions"
|
||||
|
@ -51,6 +53,8 @@ async def chat_completion(
|
|||
"Authorization": f"Bearer {key}",
|
||||
"Content-Type": "application/json",
|
||||
}
|
||||
if extra_headers is not None:
|
||||
headers.update(extra_headers)
|
||||
data = {"model": model, "messages": messages, **kwargs}
|
||||
|
||||
async with session.post(url, headers=headers, json=data) as response:
|
||||
|
@ -180,6 +184,38 @@ async def test_chat_completion_with_timeout():
|
|||
) # assert model-specific timeout used
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_chat_completion_with_timeout_from_request():
|
||||
"""
|
||||
make chat completion call with low timeout and `mock_timeout`: true. Expect it to fail and correct timeout to be set in headers.
|
||||
"""
|
||||
async with aiohttp.ClientSession() as session:
|
||||
model = "fake-openai-endpoint-5"
|
||||
messages = [
|
||||
{"role": "system", "content": text},
|
||||
{"role": "user", "content": "Who was Alexander?"},
|
||||
]
|
||||
extra_headers = {
|
||||
"x-litellm-timeout": "0.001",
|
||||
}
|
||||
start_time = time.time()
|
||||
response, headers = await chat_completion(
|
||||
session=session,
|
||||
key="sk-1234",
|
||||
model=model,
|
||||
messages=messages,
|
||||
num_retries=0,
|
||||
mock_timeout=True,
|
||||
extra_headers=extra_headers,
|
||||
return_headers=True,
|
||||
)
|
||||
end_time = time.time()
|
||||
print(f"headers: {headers}")
|
||||
assert (
|
||||
headers["x-litellm-timeout"] == "0.001"
|
||||
) # assert model-specific timeout used
|
||||
|
||||
|
||||
@pytest.mark.parametrize("has_access", [True, False])
|
||||
@pytest.mark.asyncio
|
||||
async def test_chat_completion_client_fallbacks_with_custom_message(has_access):
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue