mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-25 02:34:29 +00:00
1364 lines
47 KiB
Python
1364 lines
47 KiB
Python
#### What this tests ####
|
|
# This tests litellm router
|
|
|
|
import sys, os, time, openai
|
|
import traceback, asyncio
|
|
import pytest
|
|
|
|
sys.path.insert(
|
|
0, os.path.abspath("../..")
|
|
) # Adds the parent directory to the system path
|
|
import litellm
|
|
from litellm import Router
|
|
from litellm.router import Deployment, LiteLLM_Params, ModelInfo
|
|
from concurrent.futures import ThreadPoolExecutor
|
|
from collections import defaultdict
|
|
from dotenv import load_dotenv
|
|
|
|
load_dotenv()
|
|
|
|
|
|
@pytest.mark.parametrize("num_retries", [None, 2])
|
|
@pytest.mark.parametrize("max_retries", [None, 4])
|
|
def test_router_num_retries_init(num_retries, max_retries):
|
|
"""
|
|
- test when num_retries set v/s not
|
|
- test client value when max retries set v/s not
|
|
"""
|
|
router = Router(
|
|
model_list=[
|
|
{
|
|
"model_name": "gpt-3.5-turbo", # openai model name
|
|
"litellm_params": { # params for litellm completion/embedding call
|
|
"model": "azure/chatgpt-v-2",
|
|
"api_key": "bad-key",
|
|
"api_version": os.getenv("AZURE_API_VERSION"),
|
|
"api_base": os.getenv("AZURE_API_BASE"),
|
|
"max_retries": max_retries,
|
|
},
|
|
"model_info": {"id": 12345},
|
|
},
|
|
],
|
|
num_retries=num_retries,
|
|
)
|
|
|
|
if num_retries is not None:
|
|
assert router.num_retries == num_retries
|
|
else:
|
|
assert router.num_retries == openai.DEFAULT_MAX_RETRIES
|
|
|
|
model_client = router._get_client(
|
|
{"model_info": {"id": 12345}}, client_type="async", kwargs={}
|
|
)
|
|
|
|
if max_retries is not None:
|
|
assert getattr(model_client, "max_retries") == max_retries
|
|
else:
|
|
assert getattr(model_client, "max_retries") == 0
|
|
|
|
|
|
def test_exception_raising():
|
|
# this tests if the router raises an exception when invalid params are set
|
|
# in this test both deployments have bad keys - Keep this test. It validates if the router raises the most recent exception
|
|
litellm.set_verbose = True
|
|
import openai
|
|
|
|
try:
|
|
print("testing if router raises an exception")
|
|
old_api_key = os.environ["AZURE_API_KEY"]
|
|
os.environ["AZURE_API_KEY"] = ""
|
|
model_list = [
|
|
{
|
|
"model_name": "gpt-3.5-turbo", # openai model name
|
|
"litellm_params": { # params for litellm completion/embedding call
|
|
"model": "azure/chatgpt-v-2",
|
|
"api_key": "bad-key",
|
|
"api_version": os.getenv("AZURE_API_VERSION"),
|
|
"api_base": os.getenv("AZURE_API_BASE"),
|
|
},
|
|
"tpm": 240000,
|
|
"rpm": 1800,
|
|
},
|
|
{
|
|
"model_name": "gpt-3.5-turbo", # openai model name
|
|
"litellm_params": { #
|
|
"model": "gpt-3.5-turbo",
|
|
"api_key": "bad-key",
|
|
},
|
|
"tpm": 240000,
|
|
"rpm": 1800,
|
|
},
|
|
]
|
|
router = Router(
|
|
model_list=model_list,
|
|
redis_host=os.getenv("REDIS_HOST"),
|
|
redis_password=os.getenv("REDIS_PASSWORD"),
|
|
redis_port=int(os.getenv("REDIS_PORT")),
|
|
routing_strategy="simple-shuffle",
|
|
set_verbose=False,
|
|
num_retries=1,
|
|
) # type: ignore
|
|
response = router.completion(
|
|
model="gpt-3.5-turbo",
|
|
messages=[{"role": "user", "content": "hello this request will fail"}],
|
|
)
|
|
os.environ["AZURE_API_KEY"] = old_api_key
|
|
pytest.fail(f"Should have raised an Auth Error")
|
|
except openai.AuthenticationError:
|
|
print(
|
|
"Test Passed: Caught an OPENAI AUTH Error, Good job. This is what we needed!"
|
|
)
|
|
os.environ["AZURE_API_KEY"] = old_api_key
|
|
router.reset()
|
|
except Exception as e:
|
|
os.environ["AZURE_API_KEY"] = old_api_key
|
|
print("Got unexpected exception on router!", e)
|
|
|
|
|
|
# test_exception_raising()
|
|
|
|
|
|
def test_reading_key_from_model_list():
|
|
# [PROD TEST CASE]
|
|
# this tests if the router can read key from model list and make completion call, and completion + stream call. This is 90% of the router use case
|
|
# DO NOT REMOVE THIS TEST. It's an IMP ONE. Speak to Ishaan, if you are tring to remove this
|
|
litellm.set_verbose = False
|
|
import openai
|
|
|
|
try:
|
|
print("testing if router raises an exception")
|
|
old_api_key = os.environ["AZURE_API_KEY"]
|
|
os.environ.pop("AZURE_API_KEY", None)
|
|
model_list = [
|
|
{
|
|
"model_name": "gpt-3.5-turbo", # openai model name
|
|
"litellm_params": { # params for litellm completion/embedding call
|
|
"model": "azure/chatgpt-v-2",
|
|
"api_key": old_api_key,
|
|
"api_version": os.getenv("AZURE_API_VERSION"),
|
|
"api_base": os.getenv("AZURE_API_BASE"),
|
|
},
|
|
"tpm": 240000,
|
|
"rpm": 1800,
|
|
}
|
|
]
|
|
|
|
router = Router(
|
|
model_list=model_list,
|
|
redis_host=os.getenv("REDIS_HOST"),
|
|
redis_password=os.getenv("REDIS_PASSWORD"),
|
|
redis_port=int(os.getenv("REDIS_PORT")),
|
|
routing_strategy="simple-shuffle",
|
|
set_verbose=True,
|
|
num_retries=1,
|
|
) # type: ignore
|
|
response = router.completion(
|
|
model="gpt-3.5-turbo",
|
|
messages=[{"role": "user", "content": "hello this request will fail"}],
|
|
)
|
|
print("\n response", response)
|
|
str_response = response.choices[0].message.content
|
|
print("\n str_response", str_response)
|
|
assert len(str_response) > 0
|
|
|
|
print("\n Testing streaming response")
|
|
response = router.completion(
|
|
model="gpt-3.5-turbo",
|
|
messages=[{"role": "user", "content": "hello this request will fail"}],
|
|
stream=True,
|
|
)
|
|
completed_response = ""
|
|
for chunk in response:
|
|
if chunk is not None:
|
|
print(chunk)
|
|
completed_response += chunk.choices[0].delta.content or ""
|
|
print("\n completed_response", completed_response)
|
|
assert len(completed_response) > 0
|
|
print("\n Passed Streaming")
|
|
os.environ["AZURE_API_KEY"] = old_api_key
|
|
router.reset()
|
|
except Exception as e:
|
|
os.environ["AZURE_API_KEY"] = old_api_key
|
|
print(f"FAILED TEST")
|
|
pytest.fail(f"Got unexpected exception on router! - {e}")
|
|
|
|
|
|
# test_reading_key_from_model_list()
|
|
|
|
|
|
def test_call_one_endpoint():
|
|
# [PROD TEST CASE]
|
|
# user passes one deployment they want to call on the router, we call the specified one
|
|
# this test makes a completion calls azure/chatgpt-v-2, it should work
|
|
try:
|
|
print("Testing calling a specific deployment")
|
|
old_api_key = os.environ["AZURE_API_KEY"]
|
|
|
|
model_list = [
|
|
{
|
|
"model_name": "gpt-3.5-turbo", # openai model name
|
|
"litellm_params": { # params for litellm completion/embedding call
|
|
"model": "azure/chatgpt-v-2",
|
|
"api_key": old_api_key,
|
|
"api_version": os.getenv("AZURE_API_VERSION"),
|
|
"api_base": os.getenv("AZURE_API_BASE"),
|
|
},
|
|
"tpm": 240000,
|
|
"rpm": 1800,
|
|
},
|
|
{
|
|
"model_name": "text-embedding-ada-002",
|
|
"litellm_params": {
|
|
"model": "azure/azure-embedding-model",
|
|
"api_key": os.environ["AZURE_API_KEY"],
|
|
"api_base": os.environ["AZURE_API_BASE"],
|
|
},
|
|
"tpm": 100000,
|
|
"rpm": 10000,
|
|
},
|
|
]
|
|
litellm.set_verbose = True
|
|
router = Router(
|
|
model_list=model_list,
|
|
routing_strategy="simple-shuffle",
|
|
set_verbose=True,
|
|
num_retries=1,
|
|
) # type: ignore
|
|
old_api_base = os.environ.pop("AZURE_API_BASE", None)
|
|
|
|
async def call_azure_completion():
|
|
response = await router.acompletion(
|
|
model="azure/chatgpt-v-2",
|
|
messages=[{"role": "user", "content": "hello this request will pass"}],
|
|
specific_deployment=True,
|
|
)
|
|
print("\n response", response)
|
|
|
|
async def call_azure_embedding():
|
|
response = await router.aembedding(
|
|
model="azure/azure-embedding-model",
|
|
input=["good morning from litellm"],
|
|
specific_deployment=True,
|
|
)
|
|
|
|
print("\n response", response)
|
|
|
|
asyncio.run(call_azure_completion())
|
|
asyncio.run(call_azure_embedding())
|
|
|
|
os.environ["AZURE_API_BASE"] = old_api_base
|
|
os.environ["AZURE_API_KEY"] = old_api_key
|
|
except Exception as e:
|
|
print(f"FAILED TEST")
|
|
pytest.fail(f"Got unexpected exception on router! - {e}")
|
|
|
|
|
|
# test_call_one_endpoint()
|
|
|
|
|
|
def test_router_azure_acompletion():
|
|
# [PROD TEST CASE]
|
|
# This is 90% of the router use case, makes an acompletion call, acompletion + stream call and verifies it got a response
|
|
# DO NOT REMOVE THIS TEST. It's an IMP ONE. Speak to Ishaan, if you are tring to remove this
|
|
litellm.set_verbose = False
|
|
import openai
|
|
|
|
try:
|
|
print("Router Test Azure - Acompletion, Acompletion with stream")
|
|
|
|
# remove api key from env to repro how proxy passes key to router
|
|
old_api_key = os.environ["AZURE_API_KEY"]
|
|
os.environ.pop("AZURE_API_KEY", None)
|
|
|
|
model_list = [
|
|
{
|
|
"model_name": "gpt-3.5-turbo", # openai model name
|
|
"litellm_params": { # params for litellm completion/embedding call
|
|
"model": "azure/chatgpt-v-2",
|
|
"api_key": old_api_key,
|
|
"api_version": os.getenv("AZURE_API_VERSION"),
|
|
"api_base": os.getenv("AZURE_API_BASE"),
|
|
},
|
|
"rpm": 1800,
|
|
},
|
|
{
|
|
"model_name": "gpt-3.5-turbo", # openai model name
|
|
"litellm_params": { # params for litellm completion/embedding call
|
|
"model": "azure/gpt-turbo",
|
|
"api_key": os.getenv("AZURE_FRANCE_API_KEY"),
|
|
"api_version": os.getenv("AZURE_API_VERSION"),
|
|
"api_base": "https://openai-france-1234.openai.azure.com",
|
|
},
|
|
"rpm": 1800,
|
|
},
|
|
]
|
|
|
|
router = Router(
|
|
model_list=model_list, routing_strategy="simple-shuffle", set_verbose=True
|
|
) # type: ignore
|
|
|
|
async def test1():
|
|
response = await router.acompletion(
|
|
model="gpt-3.5-turbo",
|
|
messages=[{"role": "user", "content": "hello this request will pass"}],
|
|
)
|
|
str_response = response.choices[0].message.content
|
|
print("\n str_response", str_response)
|
|
assert len(str_response) > 0
|
|
print("\n response", response)
|
|
|
|
asyncio.run(test1())
|
|
|
|
print("\n Testing streaming response")
|
|
|
|
async def test2():
|
|
response = await router.acompletion(
|
|
model="gpt-3.5-turbo",
|
|
messages=[{"role": "user", "content": "hello this request will fail"}],
|
|
stream=True,
|
|
)
|
|
completed_response = ""
|
|
async for chunk in response:
|
|
if chunk is not None:
|
|
print(chunk)
|
|
completed_response += chunk.choices[0].delta.content or ""
|
|
print("\n completed_response", completed_response)
|
|
assert len(completed_response) > 0
|
|
|
|
asyncio.run(test2())
|
|
print("\n Passed Streaming")
|
|
os.environ["AZURE_API_KEY"] = old_api_key
|
|
router.reset()
|
|
except Exception as e:
|
|
os.environ["AZURE_API_KEY"] = old_api_key
|
|
print(f"FAILED TEST")
|
|
pytest.fail(f"Got unexpected exception on router! - {e}")
|
|
|
|
|
|
# test_router_azure_acompletion()
|
|
|
|
|
|
def test_router_context_window_fallback():
|
|
"""
|
|
- Give a gpt-3.5-turbo model group with different context windows (4k vs. 16k)
|
|
- Send a 5k prompt
|
|
- Assert it works
|
|
"""
|
|
from large_text import text
|
|
import os
|
|
|
|
litellm.set_verbose = False
|
|
|
|
print(f"len(text): {len(text)}")
|
|
try:
|
|
model_list = [
|
|
{
|
|
"model_name": "gpt-3.5-turbo", # openai model name
|
|
"litellm_params": { # params for litellm completion/embedding call
|
|
"model": "azure/chatgpt-v-2",
|
|
"api_key": os.getenv("AZURE_API_KEY"),
|
|
"api_version": os.getenv("AZURE_API_VERSION"),
|
|
"api_base": os.getenv("AZURE_API_BASE"),
|
|
"base_model": "azure/gpt-35-turbo",
|
|
},
|
|
},
|
|
{
|
|
"model_name": "gpt-3.5-turbo-large", # openai model name
|
|
"litellm_params": { # params for litellm completion/embedding call
|
|
"model": "gpt-3.5-turbo-1106",
|
|
"api_key": os.getenv("OPENAI_API_KEY"),
|
|
},
|
|
},
|
|
]
|
|
|
|
router = Router(model_list=model_list, set_verbose=True, context_window_fallbacks=[{"gpt-3.5-turbo": ["gpt-3.5-turbo-large"]}], num_retries=0) # type: ignore
|
|
|
|
response = router.completion(
|
|
model="gpt-3.5-turbo",
|
|
messages=[
|
|
{"role": "system", "content": text},
|
|
{"role": "user", "content": "Who was Alexander?"},
|
|
],
|
|
)
|
|
|
|
print(f"response: {response}")
|
|
assert response.model == "gpt-3.5-turbo-1106"
|
|
except Exception as e:
|
|
pytest.fail(f"Got unexpected exception on router! - {str(e)}")
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_async_router_context_window_fallback():
|
|
"""
|
|
- Give a gpt-3.5-turbo model group with different context windows (4k vs. 16k)
|
|
- Send a 5k prompt
|
|
- Assert it works
|
|
"""
|
|
from large_text import text
|
|
import os
|
|
|
|
litellm.set_verbose = False
|
|
|
|
print(f"len(text): {len(text)}")
|
|
try:
|
|
model_list = [
|
|
{
|
|
"model_name": "gpt-3.5-turbo", # openai model name
|
|
"litellm_params": { # params for litellm completion/embedding call
|
|
"model": "azure/chatgpt-v-2",
|
|
"api_key": os.getenv("AZURE_API_KEY"),
|
|
"api_version": os.getenv("AZURE_API_VERSION"),
|
|
"api_base": os.getenv("AZURE_API_BASE"),
|
|
"base_model": "azure/gpt-35-turbo",
|
|
},
|
|
},
|
|
{
|
|
"model_name": "gpt-3.5-turbo-large", # openai model name
|
|
"litellm_params": { # params for litellm completion/embedding call
|
|
"model": "gpt-3.5-turbo-1106",
|
|
"api_key": os.getenv("OPENAI_API_KEY"),
|
|
},
|
|
},
|
|
]
|
|
|
|
router = Router(model_list=model_list, set_verbose=True, context_window_fallbacks=[{"gpt-3.5-turbo": ["gpt-3.5-turbo-large"]}], num_retries=0) # type: ignore
|
|
|
|
response = await router.acompletion(
|
|
model="gpt-3.5-turbo",
|
|
messages=[
|
|
{"role": "system", "content": text},
|
|
{"role": "user", "content": "Who was Alexander?"},
|
|
],
|
|
)
|
|
|
|
print(f"response: {response}")
|
|
assert response.model == "gpt-3.5-turbo-1106"
|
|
except Exception as e:
|
|
pytest.fail(f"Got unexpected exception on router! - {str(e)}")
|
|
|
|
|
|
def test_router_rpm_pre_call_check():
|
|
"""
|
|
- for a given model not in model cost map
|
|
- with rpm set
|
|
- check if rpm check is run
|
|
"""
|
|
try:
|
|
model_list = [
|
|
{
|
|
"model_name": "fake-openai-endpoint", # openai model name
|
|
"litellm_params": { # params for litellm completion/embedding call
|
|
"model": "openai/my-fake-model",
|
|
"api_key": "my-fake-key",
|
|
"api_base": "https://openai-function-calling-workers.tasslexyz.workers.dev/",
|
|
"rpm": 0,
|
|
},
|
|
},
|
|
]
|
|
|
|
router = Router(model_list=model_list, set_verbose=True, enable_pre_call_checks=True, num_retries=0) # type: ignore
|
|
|
|
try:
|
|
router._pre_call_checks(
|
|
model="fake-openai-endpoint",
|
|
healthy_deployments=model_list,
|
|
messages=[{"role": "user", "content": "Hey, how's it going?"}],
|
|
)
|
|
pytest.fail("Expected this to fail")
|
|
except:
|
|
pass
|
|
except Exception as e:
|
|
pytest.fail(f"Got unexpected exception on router! - {str(e)}")
|
|
|
|
|
|
def test_router_context_window_check_pre_call_check_in_group():
|
|
"""
|
|
- Give a gpt-3.5-turbo model group with different context windows (4k vs. 16k)
|
|
- Send a 5k prompt
|
|
- Assert it works
|
|
"""
|
|
from large_text import text
|
|
import os
|
|
|
|
litellm.set_verbose = False
|
|
|
|
print(f"len(text): {len(text)}")
|
|
try:
|
|
model_list = [
|
|
{
|
|
"model_name": "gpt-3.5-turbo", # openai model name
|
|
"litellm_params": { # params for litellm completion/embedding call
|
|
"model": "azure/chatgpt-v-2",
|
|
"api_key": os.getenv("AZURE_API_KEY"),
|
|
"api_version": os.getenv("AZURE_API_VERSION"),
|
|
"api_base": os.getenv("AZURE_API_BASE"),
|
|
"base_model": "azure/gpt-35-turbo",
|
|
},
|
|
},
|
|
{
|
|
"model_name": "gpt-3.5-turbo", # openai model name
|
|
"litellm_params": { # params for litellm completion/embedding call
|
|
"model": "gpt-3.5-turbo-1106",
|
|
"api_key": os.getenv("OPENAI_API_KEY"),
|
|
},
|
|
},
|
|
]
|
|
|
|
router = Router(model_list=model_list, set_verbose=True, enable_pre_call_checks=True, num_retries=0) # type: ignore
|
|
|
|
response = router.completion(
|
|
model="gpt-3.5-turbo",
|
|
messages=[
|
|
{"role": "system", "content": text},
|
|
{"role": "user", "content": "Who was Alexander?"},
|
|
],
|
|
)
|
|
|
|
print(f"response: {response}")
|
|
except Exception as e:
|
|
pytest.fail(f"Got unexpected exception on router! - {str(e)}")
|
|
|
|
|
|
def test_router_context_window_check_pre_call_check_out_group():
|
|
"""
|
|
- Give 2 gpt-3.5-turbo model groups with different context windows (4k vs. 16k)
|
|
- Send a 5k prompt
|
|
- Assert it works
|
|
"""
|
|
from large_text import text
|
|
import os
|
|
|
|
litellm.set_verbose = False
|
|
|
|
print(f"len(text): {len(text)}")
|
|
try:
|
|
model_list = [
|
|
{
|
|
"model_name": "gpt-3.5-turbo-small", # openai model name
|
|
"litellm_params": { # params for litellm completion/embedding call
|
|
"model": "azure/chatgpt-v-2",
|
|
"api_key": os.getenv("AZURE_API_KEY"),
|
|
"api_version": os.getenv("AZURE_API_VERSION"),
|
|
"api_base": os.getenv("AZURE_API_BASE"),
|
|
"base_model": "azure/gpt-35-turbo",
|
|
},
|
|
},
|
|
{
|
|
"model_name": "gpt-3.5-turbo-large", # openai model name
|
|
"litellm_params": { # params for litellm completion/embedding call
|
|
"model": "gpt-3.5-turbo-1106",
|
|
"api_key": os.getenv("OPENAI_API_KEY"),
|
|
},
|
|
},
|
|
]
|
|
|
|
router = Router(model_list=model_list, set_verbose=True, enable_pre_call_checks=True, num_retries=0, context_window_fallbacks=[{"gpt-3.5-turbo-small": ["gpt-3.5-turbo-large"]}]) # type: ignore
|
|
|
|
response = router.completion(
|
|
model="gpt-3.5-turbo-small",
|
|
messages=[
|
|
{"role": "system", "content": text},
|
|
{"role": "user", "content": "Who was Alexander?"},
|
|
],
|
|
)
|
|
|
|
print(f"response: {response}")
|
|
except Exception as e:
|
|
pytest.fail(f"Got unexpected exception on router! - {str(e)}")
|
|
|
|
|
|
### FUNCTION CALLING
|
|
|
|
|
|
def test_function_calling():
|
|
model_list = [
|
|
{
|
|
"model_name": "gpt-3.5-turbo-0613",
|
|
"litellm_params": {
|
|
"model": "gpt-3.5-turbo-0613",
|
|
"api_key": os.getenv("OPENAI_API_KEY"),
|
|
},
|
|
"tpm": 100000,
|
|
"rpm": 10000,
|
|
},
|
|
]
|
|
|
|
messages = [{"role": "user", "content": "What is the weather like in Boston?"}]
|
|
functions = [
|
|
{
|
|
"name": "get_current_weather",
|
|
"description": "Get the current weather in a given location",
|
|
"parameters": {
|
|
"type": "object",
|
|
"properties": {
|
|
"location": {
|
|
"type": "string",
|
|
"description": "The city and state, e.g. San Francisco, CA",
|
|
},
|
|
"unit": {"type": "string", "enum": ["celsius", "fahrenheit"]},
|
|
},
|
|
"required": ["location"],
|
|
},
|
|
}
|
|
]
|
|
|
|
router = Router(model_list=model_list)
|
|
response = router.completion(
|
|
model="gpt-3.5-turbo-0613", messages=messages, functions=functions
|
|
)
|
|
router.reset()
|
|
print(response)
|
|
|
|
|
|
# test_acompletion_on_router()
|
|
|
|
|
|
def test_function_calling_on_router():
|
|
try:
|
|
litellm.set_verbose = True
|
|
model_list = [
|
|
{
|
|
"model_name": "gpt-3.5-turbo",
|
|
"litellm_params": {
|
|
"model": "gpt-3.5-turbo-0613",
|
|
"api_key": os.getenv("OPENAI_API_KEY"),
|
|
},
|
|
},
|
|
]
|
|
function1 = [
|
|
{
|
|
"name": "get_current_weather",
|
|
"description": "Get the current weather in a given location",
|
|
"parameters": {
|
|
"type": "object",
|
|
"properties": {
|
|
"location": {
|
|
"type": "string",
|
|
"description": "The city and state, e.g. San Francisco, CA",
|
|
},
|
|
"unit": {"type": "string", "enum": ["celsius", "fahrenheit"]},
|
|
},
|
|
"required": ["location"],
|
|
},
|
|
}
|
|
]
|
|
router = Router(
|
|
model_list=model_list,
|
|
redis_host=os.getenv("REDIS_HOST"),
|
|
redis_password=os.getenv("REDIS_PASSWORD"),
|
|
redis_port=os.getenv("REDIS_PORT"),
|
|
)
|
|
messages = [{"role": "user", "content": "what's the weather in boston"}]
|
|
response = router.completion(
|
|
model="gpt-3.5-turbo", messages=messages, functions=function1
|
|
)
|
|
print(f"final returned response: {response}")
|
|
router.reset()
|
|
assert isinstance(response["choices"][0]["message"]["function_call"], dict)
|
|
except Exception as e:
|
|
print(f"An exception occurred: {e}")
|
|
|
|
|
|
# test_function_calling_on_router()
|
|
|
|
|
|
### IMAGE GENERATION
|
|
@pytest.mark.asyncio
|
|
async def test_aimg_gen_on_router():
|
|
litellm.set_verbose = True
|
|
try:
|
|
model_list = [
|
|
{
|
|
"model_name": "dall-e-3",
|
|
"litellm_params": {
|
|
"model": "dall-e-3",
|
|
},
|
|
},
|
|
{
|
|
"model_name": "dall-e-3",
|
|
"litellm_params": {
|
|
"model": "azure/dall-e-3-test",
|
|
"api_version": "2023-12-01-preview",
|
|
"api_base": os.getenv("AZURE_SWEDEN_API_BASE"),
|
|
"api_key": os.getenv("AZURE_SWEDEN_API_KEY"),
|
|
},
|
|
},
|
|
{
|
|
"model_name": "dall-e-2",
|
|
"litellm_params": {
|
|
"model": "azure/",
|
|
"api_version": "2023-06-01-preview",
|
|
"api_base": os.getenv("AZURE_API_BASE"),
|
|
"api_key": os.getenv("AZURE_API_KEY"),
|
|
},
|
|
},
|
|
]
|
|
router = Router(model_list=model_list, num_retries=3)
|
|
response = await router.aimage_generation(
|
|
model="dall-e-3", prompt="A cute baby sea otter"
|
|
)
|
|
print(response)
|
|
assert len(response.data) > 0
|
|
|
|
response = await router.aimage_generation(
|
|
model="dall-e-2", prompt="A cute baby sea otter"
|
|
)
|
|
print(response)
|
|
assert len(response.data) > 0
|
|
|
|
router.reset()
|
|
except Exception as e:
|
|
if "Your task failed as a result of our safety system." in str(e):
|
|
pass
|
|
elif "Operation polling timed out" in str(e):
|
|
pass
|
|
elif "Connection error" in str(e):
|
|
pass
|
|
else:
|
|
traceback.print_exc()
|
|
pytest.fail(f"Error occurred: {e}")
|
|
|
|
|
|
# asyncio.run(test_aimg_gen_on_router())
|
|
|
|
|
|
def test_img_gen_on_router():
|
|
litellm.set_verbose = True
|
|
try:
|
|
model_list = [
|
|
{
|
|
"model_name": "dall-e-3",
|
|
"litellm_params": {
|
|
"model": "dall-e-3",
|
|
},
|
|
},
|
|
{
|
|
"model_name": "dall-e-3",
|
|
"litellm_params": {
|
|
"model": "azure/dall-e-3-test",
|
|
"api_version": "2023-12-01-preview",
|
|
"api_base": os.getenv("AZURE_SWEDEN_API_BASE"),
|
|
"api_key": os.getenv("AZURE_SWEDEN_API_KEY"),
|
|
},
|
|
},
|
|
]
|
|
router = Router(model_list=model_list)
|
|
response = router.image_generation(
|
|
model="dall-e-3", prompt="A cute baby sea otter"
|
|
)
|
|
print(response)
|
|
assert len(response.data) > 0
|
|
router.reset()
|
|
except litellm.RateLimitError as e:
|
|
pass
|
|
except Exception as e:
|
|
traceback.print_exc()
|
|
pytest.fail(f"Error occurred: {e}")
|
|
|
|
|
|
# test_img_gen_on_router()
|
|
###
|
|
|
|
|
|
def test_aembedding_on_router():
|
|
litellm.set_verbose = True
|
|
try:
|
|
model_list = [
|
|
{
|
|
"model_name": "text-embedding-ada-002",
|
|
"litellm_params": {
|
|
"model": "text-embedding-ada-002",
|
|
},
|
|
"tpm": 100000,
|
|
"rpm": 10000,
|
|
},
|
|
]
|
|
router = Router(model_list=model_list)
|
|
|
|
async def embedding_call():
|
|
response = await router.aembedding(
|
|
model="text-embedding-ada-002",
|
|
input=["good morning from litellm", "this is another item"],
|
|
)
|
|
print(response)
|
|
|
|
asyncio.run(embedding_call())
|
|
|
|
print("\n Making sync Embedding call\n")
|
|
response = router.embedding(
|
|
model="text-embedding-ada-002",
|
|
input=["good morning from litellm 2"],
|
|
)
|
|
router.reset()
|
|
except Exception as e:
|
|
if "Your task failed as a result of our safety system." in str(e):
|
|
pass
|
|
elif "Operation polling timed out" in str(e):
|
|
pass
|
|
elif "Connection error" in str(e):
|
|
pass
|
|
else:
|
|
traceback.print_exc()
|
|
pytest.fail(f"Error occurred: {e}")
|
|
|
|
|
|
# test_aembedding_on_router()
|
|
|
|
|
|
def test_azure_embedding_on_router():
|
|
"""
|
|
[PROD Use Case] - Makes an aembedding call + embedding call
|
|
"""
|
|
litellm.set_verbose = True
|
|
try:
|
|
model_list = [
|
|
{
|
|
"model_name": "text-embedding-ada-002",
|
|
"litellm_params": {
|
|
"model": "azure/azure-embedding-model",
|
|
"api_key": os.environ["AZURE_API_KEY"],
|
|
"api_base": os.environ["AZURE_API_BASE"],
|
|
},
|
|
"tpm": 100000,
|
|
"rpm": 10000,
|
|
},
|
|
]
|
|
router = Router(model_list=model_list)
|
|
|
|
async def embedding_call():
|
|
response = await router.aembedding(
|
|
model="text-embedding-ada-002", input=["good morning from litellm"]
|
|
)
|
|
print(response)
|
|
|
|
asyncio.run(embedding_call())
|
|
|
|
print("\n Making sync Azure Embedding call\n")
|
|
|
|
response = router.embedding(
|
|
model="text-embedding-ada-002",
|
|
input=["test 2 from litellm. async embedding"],
|
|
)
|
|
print(response)
|
|
router.reset()
|
|
except Exception as e:
|
|
traceback.print_exc()
|
|
pytest.fail(f"Error occurred: {e}")
|
|
|
|
|
|
# test_azure_embedding_on_router()
|
|
|
|
|
|
def test_bedrock_on_router():
|
|
litellm.set_verbose = True
|
|
print("\n Testing bedrock on router\n")
|
|
try:
|
|
model_list = [
|
|
{
|
|
"model_name": "claude-v1",
|
|
"litellm_params": {
|
|
"model": "bedrock/anthropic.claude-instant-v1",
|
|
},
|
|
"tpm": 100000,
|
|
"rpm": 10000,
|
|
},
|
|
]
|
|
|
|
async def test():
|
|
router = Router(model_list=model_list)
|
|
response = await router.acompletion(
|
|
model="claude-v1",
|
|
messages=[
|
|
{
|
|
"role": "user",
|
|
"content": "hello from litellm test",
|
|
}
|
|
],
|
|
)
|
|
print(response)
|
|
router.reset()
|
|
|
|
asyncio.run(test())
|
|
except Exception as e:
|
|
traceback.print_exc()
|
|
pytest.fail(f"Error occurred: {e}")
|
|
|
|
|
|
# test_bedrock_on_router()
|
|
|
|
|
|
# test openai-compatible endpoint
|
|
@pytest.mark.asyncio
|
|
async def test_mistral_on_router():
|
|
litellm.set_verbose = True
|
|
model_list = [
|
|
{
|
|
"model_name": "gpt-3.5-turbo",
|
|
"litellm_params": {
|
|
"model": "mistral/mistral-medium",
|
|
},
|
|
},
|
|
]
|
|
router = Router(model_list=model_list)
|
|
response = await router.acompletion(
|
|
model="gpt-3.5-turbo",
|
|
messages=[
|
|
{
|
|
"role": "user",
|
|
"content": "hello from litellm test",
|
|
}
|
|
],
|
|
)
|
|
print(response)
|
|
|
|
|
|
# asyncio.run(test_mistral_on_router())
|
|
|
|
|
|
def test_openai_completion_on_router():
|
|
# [PROD Use Case] - Makes an acompletion call + async acompletion call, and sync acompletion call, sync completion + stream
|
|
# 4 LLM API calls made here. If it fails, add retries. Do not remove this test.
|
|
litellm.set_verbose = True
|
|
print("\n Testing OpenAI on router\n")
|
|
try:
|
|
model_list = [
|
|
{
|
|
"model_name": "gpt-3.5-turbo",
|
|
"litellm_params": {
|
|
"model": "gpt-3.5-turbo",
|
|
},
|
|
},
|
|
]
|
|
router = Router(model_list=model_list)
|
|
|
|
async def test():
|
|
response = await router.acompletion(
|
|
model="gpt-3.5-turbo",
|
|
messages=[
|
|
{
|
|
"role": "user",
|
|
"content": "hello from litellm test",
|
|
}
|
|
],
|
|
)
|
|
print(response)
|
|
assert len(response.choices[0].message.content) > 0
|
|
|
|
print("\n streaming + acompletion test")
|
|
response = await router.acompletion(
|
|
model="gpt-3.5-turbo",
|
|
messages=[
|
|
{
|
|
"role": "user",
|
|
"content": f"hello from litellm test {time.time()}",
|
|
}
|
|
],
|
|
stream=True,
|
|
)
|
|
complete_response = ""
|
|
print(response)
|
|
# if you want to see all the attributes and methods
|
|
async for chunk in response:
|
|
print(chunk)
|
|
complete_response += chunk.choices[0].delta.content or ""
|
|
print("\n complete response: ", complete_response)
|
|
assert len(complete_response) > 0
|
|
|
|
asyncio.run(test())
|
|
print("\n Testing Sync completion calls \n")
|
|
response = router.completion(
|
|
model="gpt-3.5-turbo",
|
|
messages=[
|
|
{
|
|
"role": "user",
|
|
"content": "hello from litellm test2",
|
|
}
|
|
],
|
|
)
|
|
print(response)
|
|
assert len(response.choices[0].message.content) > 0
|
|
|
|
print("\n streaming + completion test")
|
|
response = router.completion(
|
|
model="gpt-3.5-turbo",
|
|
messages=[
|
|
{
|
|
"role": "user",
|
|
"content": "hello from litellm test3",
|
|
}
|
|
],
|
|
stream=True,
|
|
)
|
|
complete_response = ""
|
|
print(response)
|
|
for chunk in response:
|
|
print(chunk)
|
|
complete_response += chunk.choices[0].delta.content or ""
|
|
print("\n complete response: ", complete_response)
|
|
assert len(complete_response) > 0
|
|
router.reset()
|
|
except Exception as e:
|
|
traceback.print_exc()
|
|
pytest.fail(f"Error occurred: {e}")
|
|
|
|
|
|
# test_openai_completion_on_router()
|
|
|
|
|
|
def test_consistent_model_id():
|
|
"""
|
|
- For a given model group + litellm params, assert the model id is always the same
|
|
|
|
Test on `_generate_model_id`
|
|
|
|
Test on `set_model_list`
|
|
|
|
Test on `_add_deployment`
|
|
"""
|
|
model_group = "gpt-3.5-turbo"
|
|
litellm_params = {
|
|
"model": "openai/my-fake-model",
|
|
"api_key": "my-fake-key",
|
|
"api_base": "https://openai-function-calling-workers.tasslexyz.workers.dev/",
|
|
"stream_timeout": 0.001,
|
|
}
|
|
|
|
id1 = Router()._generate_model_id(
|
|
model_group=model_group, litellm_params=litellm_params
|
|
)
|
|
|
|
id2 = Router()._generate_model_id(
|
|
model_group=model_group, litellm_params=litellm_params
|
|
)
|
|
|
|
assert id1 == id2
|
|
|
|
|
|
def test_reading_keys_os_environ():
|
|
import openai
|
|
|
|
try:
|
|
model_list = [
|
|
{
|
|
"model_name": "gpt-3.5-turbo",
|
|
"litellm_params": {
|
|
"model": "gpt-3.5-turbo",
|
|
"api_key": "os.environ/AZURE_API_KEY",
|
|
"api_base": "os.environ/AZURE_API_BASE",
|
|
"api_version": "os.environ/AZURE_API_VERSION",
|
|
"timeout": "os.environ/AZURE_TIMEOUT",
|
|
"stream_timeout": "os.environ/AZURE_STREAM_TIMEOUT",
|
|
"max_retries": "os.environ/AZURE_MAX_RETRIES",
|
|
},
|
|
},
|
|
]
|
|
|
|
router = Router(model_list=model_list)
|
|
for model in router.model_list:
|
|
assert (
|
|
model["litellm_params"]["api_key"] == os.environ["AZURE_API_KEY"]
|
|
), f"{model['litellm_params']['api_key']} vs {os.environ['AZURE_API_KEY']}"
|
|
assert (
|
|
model["litellm_params"]["api_base"] == os.environ["AZURE_API_BASE"]
|
|
), f"{model['litellm_params']['api_base']} vs {os.environ['AZURE_API_BASE']}"
|
|
assert (
|
|
model["litellm_params"]["api_version"]
|
|
== os.environ["AZURE_API_VERSION"]
|
|
), f"{model['litellm_params']['api_version']} vs {os.environ['AZURE_API_VERSION']}"
|
|
assert float(model["litellm_params"]["timeout"]) == float(
|
|
os.environ["AZURE_TIMEOUT"]
|
|
), f"{model['litellm_params']['timeout']} vs {os.environ['AZURE_TIMEOUT']}"
|
|
assert float(model["litellm_params"]["stream_timeout"]) == float(
|
|
os.environ["AZURE_STREAM_TIMEOUT"]
|
|
), f"{model['litellm_params']['stream_timeout']} vs {os.environ['AZURE_STREAM_TIMEOUT']}"
|
|
assert int(model["litellm_params"]["max_retries"]) == int(
|
|
os.environ["AZURE_MAX_RETRIES"]
|
|
), f"{model['litellm_params']['max_retries']} vs {os.environ['AZURE_MAX_RETRIES']}"
|
|
print("passed testing of reading keys from os.environ")
|
|
model_id = model["model_info"]["id"]
|
|
async_client: openai.AsyncAzureOpenAI = router.cache.get_cache(f"{model_id}_async_client") # type: ignore
|
|
assert async_client.api_key == os.environ["AZURE_API_KEY"]
|
|
assert async_client.base_url == os.environ["AZURE_API_BASE"]
|
|
assert async_client.max_retries == int(
|
|
os.environ["AZURE_MAX_RETRIES"]
|
|
), f"{async_client.max_retries} vs {os.environ['AZURE_MAX_RETRIES']}"
|
|
assert async_client.timeout == int(
|
|
os.environ["AZURE_TIMEOUT"]
|
|
), f"{async_client.timeout} vs {os.environ['AZURE_TIMEOUT']}"
|
|
print("async client set correctly!")
|
|
|
|
print("\n Testing async streaming client")
|
|
|
|
stream_async_client: openai.AsyncAzureOpenAI = router.cache.get_cache(f"{model_id}_stream_async_client") # type: ignore
|
|
assert stream_async_client.api_key == os.environ["AZURE_API_KEY"]
|
|
assert stream_async_client.base_url == os.environ["AZURE_API_BASE"]
|
|
assert stream_async_client.max_retries == int(
|
|
os.environ["AZURE_MAX_RETRIES"]
|
|
), f"{stream_async_client.max_retries} vs {os.environ['AZURE_MAX_RETRIES']}"
|
|
assert stream_async_client.timeout == int(
|
|
os.environ["AZURE_STREAM_TIMEOUT"]
|
|
), f"{stream_async_client.timeout} vs {os.environ['AZURE_TIMEOUT']}"
|
|
print("async stream client set correctly!")
|
|
|
|
print("\n Testing sync client")
|
|
client: openai.AzureOpenAI = router.cache.get_cache(f"{model_id}_client") # type: ignore
|
|
assert client.api_key == os.environ["AZURE_API_KEY"]
|
|
assert client.base_url == os.environ["AZURE_API_BASE"]
|
|
assert client.max_retries == int(
|
|
os.environ["AZURE_MAX_RETRIES"]
|
|
), f"{client.max_retries} vs {os.environ['AZURE_MAX_RETRIES']}"
|
|
assert client.timeout == int(
|
|
os.environ["AZURE_TIMEOUT"]
|
|
), f"{client.timeout} vs {os.environ['AZURE_TIMEOUT']}"
|
|
print("sync client set correctly!")
|
|
|
|
print("\n Testing sync stream client")
|
|
stream_client: openai.AzureOpenAI = router.cache.get_cache(f"{model_id}_stream_client") # type: ignore
|
|
assert stream_client.api_key == os.environ["AZURE_API_KEY"]
|
|
assert stream_client.base_url == os.environ["AZURE_API_BASE"]
|
|
assert stream_client.max_retries == int(
|
|
os.environ["AZURE_MAX_RETRIES"]
|
|
), f"{stream_client.max_retries} vs {os.environ['AZURE_MAX_RETRIES']}"
|
|
assert stream_client.timeout == int(
|
|
os.environ["AZURE_STREAM_TIMEOUT"]
|
|
), f"{stream_client.timeout} vs {os.environ['AZURE_TIMEOUT']}"
|
|
print("sync stream client set correctly!")
|
|
|
|
router.reset()
|
|
except Exception as e:
|
|
traceback.print_exc()
|
|
pytest.fail(f"Error occurred: {e}")
|
|
|
|
|
|
# test_reading_keys_os_environ()
|
|
|
|
|
|
def test_reading_openai_keys_os_environ():
|
|
import openai
|
|
|
|
try:
|
|
model_list = [
|
|
{
|
|
"model_name": "gpt-3.5-turbo",
|
|
"litellm_params": {
|
|
"model": "gpt-3.5-turbo",
|
|
"api_key": "os.environ/OPENAI_API_KEY",
|
|
"timeout": "os.environ/AZURE_TIMEOUT",
|
|
"stream_timeout": "os.environ/AZURE_STREAM_TIMEOUT",
|
|
"max_retries": "os.environ/AZURE_MAX_RETRIES",
|
|
},
|
|
},
|
|
{
|
|
"model_name": "text-embedding-ada-002",
|
|
"litellm_params": {
|
|
"model": "text-embedding-ada-002",
|
|
"api_key": "os.environ/OPENAI_API_KEY",
|
|
"timeout": "os.environ/AZURE_TIMEOUT",
|
|
"stream_timeout": "os.environ/AZURE_STREAM_TIMEOUT",
|
|
"max_retries": "os.environ/AZURE_MAX_RETRIES",
|
|
},
|
|
},
|
|
]
|
|
|
|
router = Router(model_list=model_list)
|
|
for model in router.model_list:
|
|
assert (
|
|
model["litellm_params"]["api_key"] == os.environ["OPENAI_API_KEY"]
|
|
), f"{model['litellm_params']['api_key']} vs {os.environ['AZURE_API_KEY']}"
|
|
assert float(model["litellm_params"]["timeout"]) == float(
|
|
os.environ["AZURE_TIMEOUT"]
|
|
), f"{model['litellm_params']['timeout']} vs {os.environ['AZURE_TIMEOUT']}"
|
|
assert float(model["litellm_params"]["stream_timeout"]) == float(
|
|
os.environ["AZURE_STREAM_TIMEOUT"]
|
|
), f"{model['litellm_params']['stream_timeout']} vs {os.environ['AZURE_STREAM_TIMEOUT']}"
|
|
assert int(model["litellm_params"]["max_retries"]) == int(
|
|
os.environ["AZURE_MAX_RETRIES"]
|
|
), f"{model['litellm_params']['max_retries']} vs {os.environ['AZURE_MAX_RETRIES']}"
|
|
print("passed testing of reading keys from os.environ")
|
|
model_id = model["model_info"]["id"]
|
|
async_client: openai.AsyncOpenAI = router.cache.get_cache(key=f"{model_id}_async_client") # type: ignore
|
|
assert async_client.api_key == os.environ["OPENAI_API_KEY"]
|
|
assert async_client.max_retries == int(
|
|
os.environ["AZURE_MAX_RETRIES"]
|
|
), f"{async_client.max_retries} vs {os.environ['AZURE_MAX_RETRIES']}"
|
|
assert async_client.timeout == int(
|
|
os.environ["AZURE_TIMEOUT"]
|
|
), f"{async_client.timeout} vs {os.environ['AZURE_TIMEOUT']}"
|
|
print("async client set correctly!")
|
|
|
|
print("\n Testing async streaming client")
|
|
|
|
stream_async_client: openai.AsyncOpenAI = router.cache.get_cache(key=f"{model_id}_stream_async_client") # type: ignore
|
|
assert stream_async_client.api_key == os.environ["OPENAI_API_KEY"]
|
|
assert stream_async_client.max_retries == int(
|
|
os.environ["AZURE_MAX_RETRIES"]
|
|
), f"{stream_async_client.max_retries} vs {os.environ['AZURE_MAX_RETRIES']}"
|
|
assert stream_async_client.timeout == int(
|
|
os.environ["AZURE_STREAM_TIMEOUT"]
|
|
), f"{stream_async_client.timeout} vs {os.environ['AZURE_TIMEOUT']}"
|
|
print("async stream client set correctly!")
|
|
|
|
print("\n Testing sync client")
|
|
client: openai.AzureOpenAI = router.cache.get_cache(key=f"{model_id}_client") # type: ignore
|
|
assert client.api_key == os.environ["OPENAI_API_KEY"]
|
|
assert client.max_retries == int(
|
|
os.environ["AZURE_MAX_RETRIES"]
|
|
), f"{client.max_retries} vs {os.environ['AZURE_MAX_RETRIES']}"
|
|
assert client.timeout == int(
|
|
os.environ["AZURE_TIMEOUT"]
|
|
), f"{client.timeout} vs {os.environ['AZURE_TIMEOUT']}"
|
|
print("sync client set correctly!")
|
|
|
|
print("\n Testing sync stream client")
|
|
stream_client: openai.AzureOpenAI = router.cache.get_cache(key=f"{model_id}_stream_client") # type: ignore
|
|
assert stream_client.api_key == os.environ["OPENAI_API_KEY"]
|
|
assert stream_client.max_retries == int(
|
|
os.environ["AZURE_MAX_RETRIES"]
|
|
), f"{stream_client.max_retries} vs {os.environ['AZURE_MAX_RETRIES']}"
|
|
assert stream_client.timeout == int(
|
|
os.environ["AZURE_STREAM_TIMEOUT"]
|
|
), f"{stream_client.timeout} vs {os.environ['AZURE_TIMEOUT']}"
|
|
print("sync stream client set correctly!")
|
|
|
|
router.reset()
|
|
except Exception as e:
|
|
traceback.print_exc()
|
|
pytest.fail(f"Error occurred: {e}")
|
|
|
|
|
|
# test_reading_openai_keys_os_environ()
|
|
|
|
|
|
def test_router_anthropic_key_dynamic():
|
|
anthropic_api_key = os.environ.pop("ANTHROPIC_API_KEY")
|
|
model_list = [
|
|
{
|
|
"model_name": "anthropic-claude",
|
|
"litellm_params": {
|
|
"model": "claude-instant-1.2",
|
|
"api_key": anthropic_api_key,
|
|
},
|
|
}
|
|
]
|
|
|
|
router = Router(model_list=model_list)
|
|
messages = [{"role": "user", "content": "Hey, how's it going?"}]
|
|
router.completion(model="anthropic-claude", messages=messages)
|
|
os.environ["ANTHROPIC_API_KEY"] = anthropic_api_key
|
|
|
|
|
|
def test_router_timeout():
|
|
litellm.set_verbose = True
|
|
from litellm._logging import verbose_logger
|
|
import logging
|
|
|
|
verbose_logger.setLevel(logging.DEBUG)
|
|
model_list = [
|
|
{
|
|
"model_name": "gpt-3.5-turbo",
|
|
"litellm_params": {
|
|
"model": "gpt-3.5-turbo",
|
|
"api_key": "os.environ/OPENAI_API_KEY",
|
|
},
|
|
}
|
|
]
|
|
router = Router(model_list=model_list)
|
|
messages = [{"role": "user", "content": "Hey, how's it going?"}]
|
|
start_time = time.time()
|
|
try:
|
|
res = router.completion(
|
|
model="gpt-3.5-turbo", messages=messages, timeout=0.0001
|
|
)
|
|
print(res)
|
|
pytest.fail("this should have timed out")
|
|
except litellm.exceptions.Timeout as e:
|
|
print("got timeout exception")
|
|
print(e)
|
|
print(vars(e))
|
|
pass
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_router_amoderation():
|
|
model_list = [
|
|
{
|
|
"model_name": "openai-moderations",
|
|
"litellm_params": {
|
|
"model": "text-moderation-stable",
|
|
"api_key": os.getenv("OPENAI_API_KEY", None),
|
|
},
|
|
}
|
|
]
|
|
|
|
router = Router(model_list=model_list)
|
|
result = await router.amoderation(
|
|
model="openai-moderations", input="this is valid good text"
|
|
)
|
|
|
|
print("moderation result", result)
|
|
|
|
|
|
def test_router_add_deployment():
|
|
initial_model_list = [
|
|
{
|
|
"model_name": "fake-openai-endpoint",
|
|
"litellm_params": {
|
|
"model": "openai/my-fake-model",
|
|
"api_key": "my-fake-key",
|
|
"api_base": "https://openai-function-calling-workers.tasslexyz.workers.dev/",
|
|
},
|
|
},
|
|
]
|
|
router = Router(model_list=initial_model_list)
|
|
|
|
init_model_id_list = router.get_model_ids()
|
|
|
|
print(f"init_model_id_list: {init_model_id_list}")
|
|
|
|
router.add_deployment(
|
|
deployment=Deployment(
|
|
model_name="gpt-instruct",
|
|
litellm_params=LiteLLM_Params(model="gpt-3.5-turbo-instruct"),
|
|
model_info=ModelInfo(),
|
|
)
|
|
)
|
|
|
|
new_model_id_list = router.get_model_ids()
|
|
|
|
print(f"new_model_id_list: {new_model_id_list}")
|
|
|
|
assert len(new_model_id_list) > len(init_model_id_list)
|
|
|
|
assert new_model_id_list[1] != new_model_id_list[0]
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_router_text_completion_client():
|
|
# This tests if we re-use the Async OpenAI client
|
|
# This test fails when we create a new Async OpenAI client per request
|
|
try:
|
|
model_list = [
|
|
{
|
|
"model_name": "fake-openai-endpoint",
|
|
"litellm_params": {
|
|
"model": "text-completion-openai/gpt-3.5-turbo-instruct",
|
|
"api_key": os.getenv("OPENAI_API_KEY", None),
|
|
"api_base": "https://exampleopenaiendpoint-production.up.railway.app/",
|
|
},
|
|
}
|
|
]
|
|
router = Router(model_list=model_list, debug_level="DEBUG", set_verbose=True)
|
|
tasks = []
|
|
for _ in range(300):
|
|
tasks.append(
|
|
router.atext_completion(
|
|
model="fake-openai-endpoint",
|
|
prompt="hello from litellm test",
|
|
)
|
|
)
|
|
|
|
# Execute all coroutines concurrently
|
|
responses = await asyncio.gather(*tasks)
|
|
print(responses)
|
|
except Exception as e:
|
|
pytest.fail(f"Error occurred: {e}")
|