mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-25 18:54:30 +00:00
All checks were successful
Read Version from pyproject.toml / read-version (push) Successful in 14s
* feat(router.py): add retry headers to response makes it easy to add testing to ensure model-specific retries are respected * fix(add_retry_headers.py): clarify attempted retries vs. max retries * test(test_fallbacks.py): add test for checking if max retries set for model is respected * test(test_fallbacks.py): assert values for attempted retries and max retries are as expected * fix(utils.py): return timeout in litellm proxy response headers * test(test_fallbacks.py): add test to assert model specific timeout used on timeout error * test: add bad model with timeout to proxy * fix: fix linting error * fix(router.py): fix get model list from model alias * test: loosen test restriction - account for other events on proxy
230 lines
6.9 KiB
Python
230 lines
6.9 KiB
Python
# What is this?
|
|
## This tests if the proxy fallbacks work as expected
|
|
import pytest
|
|
import asyncio
|
|
import aiohttp
|
|
from large_text import text
|
|
import time
|
|
|
|
|
|
async def generate_key(
|
|
session,
|
|
i,
|
|
models: list,
|
|
calling_key="sk-1234",
|
|
):
|
|
url = "http://0.0.0.0:4000/key/generate"
|
|
headers = {
|
|
"Authorization": f"Bearer {calling_key}",
|
|
"Content-Type": "application/json",
|
|
}
|
|
data = {
|
|
"models": models,
|
|
}
|
|
|
|
print(f"data: {data}")
|
|
|
|
async with session.post(url, headers=headers, json=data) as response:
|
|
status = response.status
|
|
response_text = await response.text()
|
|
|
|
print(f"Response {i} (Status code: {status}):")
|
|
print(response_text)
|
|
print()
|
|
|
|
if status != 200:
|
|
raise Exception(f"Request {i} did not return a 200 status code: {status}")
|
|
|
|
return await response.json()
|
|
|
|
|
|
async def chat_completion(
|
|
session,
|
|
key: str,
|
|
model: str,
|
|
messages: list,
|
|
return_headers: bool = False,
|
|
**kwargs,
|
|
):
|
|
url = "http://0.0.0.0:4000/chat/completions"
|
|
headers = {
|
|
"Authorization": f"Bearer {key}",
|
|
"Content-Type": "application/json",
|
|
}
|
|
data = {"model": model, "messages": messages, **kwargs}
|
|
|
|
async with session.post(url, headers=headers, json=data) as response:
|
|
status = response.status
|
|
response_text = await response.text()
|
|
|
|
print(response_text)
|
|
print()
|
|
|
|
if status != 200:
|
|
if return_headers:
|
|
return None, response.headers
|
|
else:
|
|
raise Exception(f"Request did not return a 200 status code: {status}")
|
|
|
|
if return_headers:
|
|
return await response.json(), response.headers
|
|
else:
|
|
return await response.json()
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_chat_completion():
|
|
"""
|
|
make chat completion call with prompt > context window. expect it to work with fallback
|
|
"""
|
|
async with aiohttp.ClientSession() as session:
|
|
model = "gpt-3.5-turbo"
|
|
messages = [
|
|
{"role": "system", "content": text},
|
|
{"role": "user", "content": "Who was Alexander?"},
|
|
]
|
|
await chat_completion(
|
|
session=session, key="sk-1234", model=model, messages=messages
|
|
)
|
|
|
|
|
|
@pytest.mark.parametrize("has_access", [True, False])
|
|
@pytest.mark.asyncio
|
|
async def test_chat_completion_client_fallbacks(has_access):
|
|
"""
|
|
make chat completion call with prompt > context window. expect it to work with fallback
|
|
"""
|
|
|
|
async with aiohttp.ClientSession() as session:
|
|
models = ["gpt-3.5-turbo"]
|
|
|
|
if has_access:
|
|
models.append("gpt-instruct")
|
|
|
|
## CREATE KEY WITH MODELS
|
|
generated_key = await generate_key(session=session, i=0, models=models)
|
|
calling_key = generated_key["key"]
|
|
model = "gpt-3.5-turbo"
|
|
messages = [
|
|
{"role": "user", "content": "Who was Alexander?"},
|
|
]
|
|
|
|
## CALL PROXY
|
|
try:
|
|
await chat_completion(
|
|
session=session,
|
|
key=calling_key,
|
|
model=model,
|
|
messages=messages,
|
|
mock_testing_fallbacks=True,
|
|
fallbacks=["gpt-instruct"],
|
|
)
|
|
if not has_access:
|
|
pytest.fail(
|
|
"Expected this to fail, submitted fallback model that key did not have access to"
|
|
)
|
|
except Exception as e:
|
|
if has_access:
|
|
pytest.fail("Expected this to work: {}".format(str(e)))
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_chat_completion_with_retries():
|
|
"""
|
|
make chat completion call with prompt > context window. expect it to work with fallback
|
|
"""
|
|
async with aiohttp.ClientSession() as session:
|
|
model = "fake-openai-endpoint-4"
|
|
messages = [
|
|
{"role": "system", "content": text},
|
|
{"role": "user", "content": "Who was Alexander?"},
|
|
]
|
|
response, headers = await chat_completion(
|
|
session=session,
|
|
key="sk-1234",
|
|
model=model,
|
|
messages=messages,
|
|
mock_testing_rate_limit_error=True,
|
|
return_headers=True,
|
|
)
|
|
print(f"headers: {headers}")
|
|
assert headers["x-litellm-attempted-retries"] == "1"
|
|
assert headers["x-litellm-max-retries"] == "50"
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_chat_completion_with_timeout():
|
|
"""
|
|
make chat completion call with low timeout and `mock_timeout`: true. Expect it to fail and correct timeout to be set in headers.
|
|
"""
|
|
async with aiohttp.ClientSession() as session:
|
|
model = "fake-openai-endpoint-5"
|
|
messages = [
|
|
{"role": "system", "content": text},
|
|
{"role": "user", "content": "Who was Alexander?"},
|
|
]
|
|
start_time = time.time()
|
|
response, headers = await chat_completion(
|
|
session=session,
|
|
key="sk-1234",
|
|
model=model,
|
|
messages=messages,
|
|
num_retries=0,
|
|
mock_timeout=True,
|
|
return_headers=True,
|
|
)
|
|
end_time = time.time()
|
|
print(f"headers: {headers}")
|
|
assert (
|
|
headers["x-litellm-timeout"] == "1.0"
|
|
) # assert model-specific timeout used
|
|
|
|
|
|
@pytest.mark.parametrize("has_access", [True, False])
|
|
@pytest.mark.asyncio
|
|
async def test_chat_completion_client_fallbacks_with_custom_message(has_access):
|
|
"""
|
|
make chat completion call with prompt > context window. expect it to work with fallback
|
|
"""
|
|
|
|
async with aiohttp.ClientSession() as session:
|
|
models = ["gpt-3.5-turbo"]
|
|
|
|
if has_access:
|
|
models.append("gpt-instruct")
|
|
|
|
## CREATE KEY WITH MODELS
|
|
generated_key = await generate_key(session=session, i=0, models=models)
|
|
calling_key = generated_key["key"]
|
|
model = "gpt-3.5-turbo"
|
|
messages = [
|
|
{"role": "user", "content": "Who was Alexander?"},
|
|
]
|
|
|
|
## CALL PROXY
|
|
try:
|
|
await chat_completion(
|
|
session=session,
|
|
key=calling_key,
|
|
model=model,
|
|
messages=messages,
|
|
mock_testing_fallbacks=True,
|
|
fallbacks=[
|
|
{
|
|
"model": "gpt-instruct",
|
|
"messages": [
|
|
{
|
|
"role": "assistant",
|
|
"content": "This is a custom message",
|
|
}
|
|
],
|
|
}
|
|
],
|
|
)
|
|
if not has_access:
|
|
pytest.fail(
|
|
"Expected this to fail, submitted fallback model that key did not have access to"
|
|
)
|
|
except Exception as e:
|
|
if has_access:
|
|
pytest.fail("Expected this to work: {}".format(str(e)))
|