mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-26 03:04:13 +00:00
refactor: add black formatting
This commit is contained in:
parent
b87d630b0a
commit
4905929de3
156 changed files with 19723 additions and 10869 deletions
|
@ -3,164 +3,184 @@
|
|||
import sys, os, time
|
||||
import traceback, asyncio
|
||||
import pytest
|
||||
|
||||
sys.path.insert(
|
||||
0, os.path.abspath("../..")
|
||||
) # Adds the parent directory to the system path
|
||||
import litellm
|
||||
from litellm import Router
|
||||
|
||||
## Scenarios
|
||||
## Scenarios
|
||||
## 1. 2 models - openai + azure - 1 model group "gpt-3.5-turbo",
|
||||
## 2. 2 models - openai, azure - 2 diff model groups, 1 caching group
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_acompletion_caching_on_router():
|
||||
# tests acompletion + caching on router
|
||||
try:
|
||||
litellm.set_verbose = True
|
||||
model_list = [
|
||||
{
|
||||
"model_name": "gpt-3.5-turbo",
|
||||
"litellm_params": {
|
||||
"model": "gpt-3.5-turbo-0613",
|
||||
"api_key": os.getenv("OPENAI_API_KEY"),
|
||||
},
|
||||
"tpm": 100000,
|
||||
"rpm": 10000,
|
||||
},
|
||||
{
|
||||
"model_name": "gpt-3.5-turbo",
|
||||
"litellm_params": {
|
||||
"model": "azure/chatgpt-v-2",
|
||||
"api_key": os.getenv("AZURE_API_KEY"),
|
||||
"api_base": os.getenv("AZURE_API_BASE"),
|
||||
"api_version": os.getenv("AZURE_API_VERSION")
|
||||
},
|
||||
"tpm": 100000,
|
||||
"rpm": 10000,
|
||||
}
|
||||
]
|
||||
|
||||
messages = [
|
||||
{"role": "user", "content": f"write a one sentence poem {time.time()}?"}
|
||||
]
|
||||
start_time = time.time()
|
||||
router = Router(model_list=model_list,
|
||||
redis_host=os.environ["REDIS_HOST"],
|
||||
redis_password=os.environ["REDIS_PASSWORD"],
|
||||
redis_port=os.environ["REDIS_PORT"],
|
||||
cache_responses=True,
|
||||
timeout=30,
|
||||
routing_strategy="simple-shuffle")
|
||||
response1 = await router.acompletion(model="gpt-3.5-turbo", messages=messages, temperature=1)
|
||||
print(f"response1: {response1}")
|
||||
await asyncio.sleep(1) # add cache is async, async sleep for cache to get set
|
||||
response2 = await router.acompletion(model="gpt-3.5-turbo", messages=messages, temperature=1)
|
||||
print(f"response2: {response2}")
|
||||
assert response1.id == response2.id
|
||||
assert len(response1.choices[0].message.content) > 0
|
||||
assert response1.choices[0].message.content == response2.choices[0].message.content
|
||||
router.reset()
|
||||
except litellm.Timeout as e:
|
||||
end_time = time.time()
|
||||
print(f"timeout error occurred: {end_time - start_time}")
|
||||
pass
|
||||
except Exception as e:
|
||||
traceback.print_exc()
|
||||
pytest.fail(f"Error occurred: {e}")
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_acompletion_caching_on_router_caching_groups():
|
||||
# tests acompletion + caching on router
|
||||
try:
|
||||
litellm.set_verbose = True
|
||||
model_list = [
|
||||
{
|
||||
"model_name": "openai-gpt-3.5-turbo",
|
||||
"litellm_params": {
|
||||
"model": "gpt-3.5-turbo-0613",
|
||||
"api_key": os.getenv("OPENAI_API_KEY"),
|
||||
},
|
||||
"tpm": 100000,
|
||||
"rpm": 10000,
|
||||
},
|
||||
{
|
||||
"model_name": "azure-gpt-3.5-turbo",
|
||||
"litellm_params": {
|
||||
"model": "azure/chatgpt-v-2",
|
||||
"api_key": os.getenv("AZURE_API_KEY"),
|
||||
"api_base": os.getenv("AZURE_API_BASE"),
|
||||
"api_version": os.getenv("AZURE_API_VERSION")
|
||||
},
|
||||
"tpm": 100000,
|
||||
"rpm": 10000,
|
||||
}
|
||||
]
|
||||
|
||||
messages = [
|
||||
{"role": "user", "content": f"write a one sentence poem {time.time()}?"}
|
||||
]
|
||||
start_time = time.time()
|
||||
router = Router(model_list=model_list,
|
||||
redis_host=os.environ["REDIS_HOST"],
|
||||
redis_password=os.environ["REDIS_PASSWORD"],
|
||||
redis_port=os.environ["REDIS_PORT"],
|
||||
cache_responses=True,
|
||||
timeout=30,
|
||||
routing_strategy="simple-shuffle",
|
||||
caching_groups=[("openai-gpt-3.5-turbo", "azure-gpt-3.5-turbo")])
|
||||
response1 = await router.acompletion(model="openai-gpt-3.5-turbo", messages=messages, temperature=1)
|
||||
print(f"response1: {response1}")
|
||||
await asyncio.sleep(1) # add cache is async, async sleep for cache to get set
|
||||
response2 = await router.acompletion(model="azure-gpt-3.5-turbo", messages=messages, temperature=1)
|
||||
print(f"response2: {response2}")
|
||||
assert response1.id == response2.id
|
||||
assert len(response1.choices[0].message.content) > 0
|
||||
assert response1.choices[0].message.content == response2.choices[0].message.content
|
||||
router.reset()
|
||||
except litellm.Timeout as e:
|
||||
end_time = time.time()
|
||||
print(f"timeout error occurred: {end_time - start_time}")
|
||||
pass
|
||||
except Exception as e:
|
||||
traceback.print_exc()
|
||||
pytest.fail(f"Error occurred: {e}")
|
||||
|
||||
def test_usage_based_routing_completion():
|
||||
model_list = [
|
||||
{
|
||||
"model_name": "gpt-3.5-turbo",
|
||||
"litellm_params": {
|
||||
"model": "gpt-3.5-turbo-0301",
|
||||
"api_key": os.getenv("OPENAI_API_KEY"),
|
||||
"custom_llm_provider": "Custom-LLM"
|
||||
},
|
||||
"tpm": 10000,
|
||||
"rpm": 5
|
||||
},
|
||||
{
|
||||
"model_name": "gpt-3.5-turbo",
|
||||
"litellm_params": {
|
||||
"model": "gpt-3.5-turbo-0301",
|
||||
"api_key": os.getenv("OPENAI_API_KEY"),
|
||||
},
|
||||
"tpm": 10000,
|
||||
"rpm": 5
|
||||
}
|
||||
]
|
||||
router = Router(model_list= model_list,
|
||||
routing_strategy= "usage-based-routing",
|
||||
set_verbose= False)
|
||||
max_requests = 5
|
||||
while max_requests > 0:
|
||||
async def test_acompletion_caching_on_router():
|
||||
# tests acompletion + caching on router
|
||||
try:
|
||||
router.completion(
|
||||
model='gpt-3.5-turbo',
|
||||
messages=[{"content": "write a one sentence poem.", "role": "user"}],
|
||||
)
|
||||
except ValueError as e:
|
||||
traceback.print_exc()
|
||||
pytest.fail(f"Error occurred: {e}")
|
||||
finally:
|
||||
max_requests -= 1
|
||||
router.reset()
|
||||
litellm.set_verbose = True
|
||||
model_list = [
|
||||
{
|
||||
"model_name": "gpt-3.5-turbo",
|
||||
"litellm_params": {
|
||||
"model": "gpt-3.5-turbo-0613",
|
||||
"api_key": os.getenv("OPENAI_API_KEY"),
|
||||
},
|
||||
"tpm": 100000,
|
||||
"rpm": 10000,
|
||||
},
|
||||
{
|
||||
"model_name": "gpt-3.5-turbo",
|
||||
"litellm_params": {
|
||||
"model": "azure/chatgpt-v-2",
|
||||
"api_key": os.getenv("AZURE_API_KEY"),
|
||||
"api_base": os.getenv("AZURE_API_BASE"),
|
||||
"api_version": os.getenv("AZURE_API_VERSION"),
|
||||
},
|
||||
"tpm": 100000,
|
||||
"rpm": 10000,
|
||||
},
|
||||
]
|
||||
|
||||
messages = [
|
||||
{"role": "user", "content": f"write a one sentence poem {time.time()}?"}
|
||||
]
|
||||
start_time = time.time()
|
||||
router = Router(
|
||||
model_list=model_list,
|
||||
redis_host=os.environ["REDIS_HOST"],
|
||||
redis_password=os.environ["REDIS_PASSWORD"],
|
||||
redis_port=os.environ["REDIS_PORT"],
|
||||
cache_responses=True,
|
||||
timeout=30,
|
||||
routing_strategy="simple-shuffle",
|
||||
)
|
||||
response1 = await router.acompletion(
|
||||
model="gpt-3.5-turbo", messages=messages, temperature=1
|
||||
)
|
||||
print(f"response1: {response1}")
|
||||
await asyncio.sleep(1) # add cache is async, async sleep for cache to get set
|
||||
response2 = await router.acompletion(
|
||||
model="gpt-3.5-turbo", messages=messages, temperature=1
|
||||
)
|
||||
print(f"response2: {response2}")
|
||||
assert response1.id == response2.id
|
||||
assert len(response1.choices[0].message.content) > 0
|
||||
assert (
|
||||
response1.choices[0].message.content == response2.choices[0].message.content
|
||||
)
|
||||
router.reset()
|
||||
except litellm.Timeout as e:
|
||||
end_time = time.time()
|
||||
print(f"timeout error occurred: {end_time - start_time}")
|
||||
pass
|
||||
except Exception as e:
|
||||
traceback.print_exc()
|
||||
pytest.fail(f"Error occurred: {e}")
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_acompletion_caching_on_router_caching_groups():
|
||||
# tests acompletion + caching on router
|
||||
try:
|
||||
litellm.set_verbose = True
|
||||
model_list = [
|
||||
{
|
||||
"model_name": "openai-gpt-3.5-turbo",
|
||||
"litellm_params": {
|
||||
"model": "gpt-3.5-turbo-0613",
|
||||
"api_key": os.getenv("OPENAI_API_KEY"),
|
||||
},
|
||||
"tpm": 100000,
|
||||
"rpm": 10000,
|
||||
},
|
||||
{
|
||||
"model_name": "azure-gpt-3.5-turbo",
|
||||
"litellm_params": {
|
||||
"model": "azure/chatgpt-v-2",
|
||||
"api_key": os.getenv("AZURE_API_KEY"),
|
||||
"api_base": os.getenv("AZURE_API_BASE"),
|
||||
"api_version": os.getenv("AZURE_API_VERSION"),
|
||||
},
|
||||
"tpm": 100000,
|
||||
"rpm": 10000,
|
||||
},
|
||||
]
|
||||
|
||||
messages = [
|
||||
{"role": "user", "content": f"write a one sentence poem {time.time()}?"}
|
||||
]
|
||||
start_time = time.time()
|
||||
router = Router(
|
||||
model_list=model_list,
|
||||
redis_host=os.environ["REDIS_HOST"],
|
||||
redis_password=os.environ["REDIS_PASSWORD"],
|
||||
redis_port=os.environ["REDIS_PORT"],
|
||||
cache_responses=True,
|
||||
timeout=30,
|
||||
routing_strategy="simple-shuffle",
|
||||
caching_groups=[("openai-gpt-3.5-turbo", "azure-gpt-3.5-turbo")],
|
||||
)
|
||||
response1 = await router.acompletion(
|
||||
model="openai-gpt-3.5-turbo", messages=messages, temperature=1
|
||||
)
|
||||
print(f"response1: {response1}")
|
||||
await asyncio.sleep(1) # add cache is async, async sleep for cache to get set
|
||||
response2 = await router.acompletion(
|
||||
model="azure-gpt-3.5-turbo", messages=messages, temperature=1
|
||||
)
|
||||
print(f"response2: {response2}")
|
||||
assert response1.id == response2.id
|
||||
assert len(response1.choices[0].message.content) > 0
|
||||
assert (
|
||||
response1.choices[0].message.content == response2.choices[0].message.content
|
||||
)
|
||||
router.reset()
|
||||
except litellm.Timeout as e:
|
||||
end_time = time.time()
|
||||
print(f"timeout error occurred: {end_time - start_time}")
|
||||
pass
|
||||
except Exception as e:
|
||||
traceback.print_exc()
|
||||
pytest.fail(f"Error occurred: {e}")
|
||||
|
||||
|
||||
def test_usage_based_routing_completion():
|
||||
model_list = [
|
||||
{
|
||||
"model_name": "gpt-3.5-turbo",
|
||||
"litellm_params": {
|
||||
"model": "gpt-3.5-turbo-0301",
|
||||
"api_key": os.getenv("OPENAI_API_KEY"),
|
||||
"custom_llm_provider": "Custom-LLM",
|
||||
},
|
||||
"tpm": 10000,
|
||||
"rpm": 5,
|
||||
},
|
||||
{
|
||||
"model_name": "gpt-3.5-turbo",
|
||||
"litellm_params": {
|
||||
"model": "gpt-3.5-turbo-0301",
|
||||
"api_key": os.getenv("OPENAI_API_KEY"),
|
||||
},
|
||||
"tpm": 10000,
|
||||
"rpm": 5,
|
||||
},
|
||||
]
|
||||
router = Router(
|
||||
model_list=model_list, routing_strategy="usage-based-routing", set_verbose=False
|
||||
)
|
||||
max_requests = 5
|
||||
while max_requests > 0:
|
||||
try:
|
||||
router.completion(
|
||||
model="gpt-3.5-turbo",
|
||||
messages=[{"content": "write a one sentence poem.", "role": "user"}],
|
||||
)
|
||||
except ValueError as e:
|
||||
traceback.print_exc()
|
||||
pytest.fail(f"Error occurred: {e}")
|
||||
finally:
|
||||
max_requests -= 1
|
||||
router.reset()
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue