litellm-mirror/tests/local_testing/test_router_init.py

704 lines
25 KiB
Python

# # this tests if the router is initialized correctly
# import asyncio
# import os
# import sys
# import time
# import traceback
# import pytest
# sys.path.insert(
# 0, os.path.abspath("../..")
# ) # Adds the parent directory to the system path
# from collections import defaultdict
# from concurrent.futures import ThreadPoolExecutor
# from dotenv import load_dotenv
# import litellm
# from litellm import Router
# load_dotenv()
# # every time we load the router we should have 4 clients:
# # Async
# # Sync
# # Async + Stream
# # Sync + Stream
# def test_init_clients():
# litellm.set_verbose = True
# import logging
# from litellm._logging import verbose_router_logger
# verbose_router_logger.setLevel(logging.DEBUG)
# try:
# print("testing init 4 clients with diff timeouts")
# model_list = [
# {
# "model_name": "gpt-3.5-turbo",
# "litellm_params": {
# "model": "azure/chatgpt-v-2",
# "api_key": os.getenv("AZURE_API_KEY"),
# "api_version": os.getenv("AZURE_API_VERSION"),
# "api_base": os.getenv("AZURE_API_BASE"),
# "timeout": 0.01,
# "stream_timeout": 0.000_001,
# "max_retries": 7,
# },
# },
# ]
# router = Router(model_list=model_list, set_verbose=True)
# for elem in router.model_list:
# model_id = elem["model_info"]["id"]
# assert router.cache.get_cache(f"{model_id}_client") is not None
# assert router.cache.get_cache(f"{model_id}_async_client") is not None
# assert router.cache.get_cache(f"{model_id}_stream_client") is not None
# assert router.cache.get_cache(f"{model_id}_stream_async_client") is not None
# # check if timeout for stream/non stream clients is set correctly
# async_client = router.cache.get_cache(f"{model_id}_async_client")
# stream_async_client = router.cache.get_cache(
# f"{model_id}_stream_async_client"
# )
# assert async_client.timeout == 0.01
# assert stream_async_client.timeout == 0.000_001
# print(vars(async_client))
# print()
# print(async_client._base_url)
# assert (
# async_client._base_url
# == "https://openai-gpt-4-test-v-1.openai.azure.com/openai/"
# )
# assert (
# stream_async_client._base_url
# == "https://openai-gpt-4-test-v-1.openai.azure.com/openai/"
# )
# print("PASSED !")
# except Exception as e:
# traceback.print_exc()
# pytest.fail(f"Error occurred: {e}")
# # test_init_clients()
# def test_init_clients_basic():
# litellm.set_verbose = True
# try:
# print("Test basic client init")
# model_list = [
# {
# "model_name": "gpt-3.5-turbo",
# "litellm_params": {
# "model": "azure/chatgpt-v-2",
# "api_key": os.getenv("AZURE_API_KEY"),
# "api_version": os.getenv("AZURE_API_VERSION"),
# "api_base": os.getenv("AZURE_API_BASE"),
# },
# },
# ]
# router = Router(model_list=model_list)
# for elem in router.model_list:
# model_id = elem["model_info"]["id"]
# assert router.cache.get_cache(f"{model_id}_client") is not None
# assert router.cache.get_cache(f"{model_id}_async_client") is not None
# assert router.cache.get_cache(f"{model_id}_stream_client") is not None
# assert router.cache.get_cache(f"{model_id}_stream_async_client") is not None
# print("PASSED !")
# # see if we can init clients without timeout or max retries set
# except Exception as e:
# traceback.print_exc()
# pytest.fail(f"Error occurred: {e}")
# # test_init_clients_basic()
# def test_init_clients_basic_azure_cloudflare():
# # init azure + cloudflare
# # init OpenAI gpt-3.5
# # init OpenAI text-embedding
# # init OpenAI comptaible - Mistral/mistral-medium
# # init OpenAI compatible - xinference/bge
# litellm.set_verbose = True
# try:
# print("Test basic client init")
# model_list = [
# {
# "model_name": "azure-cloudflare",
# "litellm_params": {
# "model": "azure/chatgpt-v-2",
# "api_key": os.getenv("AZURE_API_KEY"),
# "api_version": os.getenv("AZURE_API_VERSION"),
# "api_base": "https://gateway.ai.cloudflare.com/v1/0399b10e77ac6668c80404a5ff49eb37/litellm-test/azure-openai/openai-gpt-4-test-v-1",
# },
# },
# {
# "model_name": "gpt-openai",
# "litellm_params": {
# "model": "gpt-3.5-turbo",
# "api_key": os.getenv("OPENAI_API_KEY"),
# },
# },
# {
# "model_name": "text-embedding-ada-002",
# "litellm_params": {
# "model": "text-embedding-ada-002",
# "api_key": os.getenv("OPENAI_API_KEY"),
# },
# },
# {
# "model_name": "mistral",
# "litellm_params": {
# "model": "mistral/mistral-tiny",
# "api_key": os.getenv("MISTRAL_API_KEY"),
# },
# },
# {
# "model_name": "bge-base-en",
# "litellm_params": {
# "model": "xinference/bge-base-en",
# "api_base": "http://127.0.0.1:9997/v1",
# "api_key": os.getenv("OPENAI_API_KEY"),
# },
# },
# ]
# router = Router(model_list=model_list)
# for elem in router.model_list:
# model_id = elem["model_info"]["id"]
# assert router.cache.get_cache(f"{model_id}_client") is not None
# assert router.cache.get_cache(f"{model_id}_async_client") is not None
# assert router.cache.get_cache(f"{model_id}_stream_client") is not None
# assert router.cache.get_cache(f"{model_id}_stream_async_client") is not None
# print("PASSED !")
# # see if we can init clients without timeout or max retries set
# except Exception as e:
# traceback.print_exc()
# pytest.fail(f"Error occurred: {e}")
# # test_init_clients_basic_azure_cloudflare()
# def test_timeouts_router():
# """
# Test the timeouts of the router with multiple clients. This HASas to raise a timeout error
# """
# import openai
# litellm.set_verbose = True
# try:
# print("testing init 4 clients with diff timeouts")
# model_list = [
# {
# "model_name": "gpt-3.5-turbo",
# "litellm_params": {
# "model": "azure/chatgpt-v-2",
# "api_key": os.getenv("AZURE_API_KEY"),
# "api_version": os.getenv("AZURE_API_VERSION"),
# "api_base": os.getenv("AZURE_API_BASE"),
# "timeout": 0.000001,
# "stream_timeout": 0.000_001,
# },
# },
# ]
# router = Router(model_list=model_list, num_retries=0)
# print("PASSED !")
# async def test():
# try:
# await router.acompletion(
# model="gpt-3.5-turbo",
# messages=[
# {"role": "user", "content": "hello, write a 20 pg essay"}
# ],
# )
# except Exception as e:
# raise e
# asyncio.run(test())
# except openai.APITimeoutError as e:
# print(
# "Passed: Raised correct exception. Got openai.APITimeoutError\nGood Job", e
# )
# print(type(e))
# pass
# except Exception as e:
# pytest.fail(
# f"Did not raise error `openai.APITimeoutError`. Instead raised error type: {type(e)}, Error: {e}"
# )
# # test_timeouts_router()
# def test_stream_timeouts_router():
# """
# Test the stream timeouts router. See if it selected the correct client with stream timeout
# """
# import openai
# litellm.set_verbose = True
# try:
# print("testing init 4 clients with diff timeouts")
# model_list = [
# {
# "model_name": "gpt-3.5-turbo",
# "litellm_params": {
# "model": "azure/chatgpt-v-2",
# "api_key": os.getenv("AZURE_API_KEY"),
# "api_version": os.getenv("AZURE_API_VERSION"),
# "api_base": os.getenv("AZURE_API_BASE"),
# "timeout": 200, # regular calls will not timeout, stream calls will
# "stream_timeout": 10,
# },
# },
# ]
# router = Router(model_list=model_list)
# print("PASSED !")
# data = {
# "model": "gpt-3.5-turbo",
# "messages": [{"role": "user", "content": "hello, write a 20 pg essay"}],
# "stream": True,
# }
# selected_client = router._get_client(
# deployment=router.model_list[0],
# kwargs=data,
# client_type=None,
# )
# print("Select client timeout", selected_client.timeout)
# assert selected_client.timeout == 10
# # make actual call
# response = router.completion(**data)
# for chunk in response:
# print(f"chunk: {chunk}")
# except openai.APITimeoutError as e:
# print(
# "Passed: Raised correct exception. Got openai.APITimeoutError\nGood Job", e
# )
# print(type(e))
# pass
# except Exception as e:
# pytest.fail(
# f"Did not raise error `openai.APITimeoutError`. Instead raised error type: {type(e)}, Error: {e}"
# )
# # test_stream_timeouts_router()
# def test_xinference_embedding():
# # [Test Init Xinference] this tests if we init xinference on the router correctly
# # [Test Exception Mapping] tests that xinference is an openai comptiable provider
# print("Testing init xinference")
# print(
# "this tests if we create an OpenAI client for Xinference, with the correct API BASE"
# )
# model_list = [
# {
# "model_name": "xinference",
# "litellm_params": {
# "model": "xinference/bge-base-en",
# "api_base": "os.environ/XINFERENCE_API_BASE",
# },
# }
# ]
# router = Router(model_list=model_list)
# print(router.model_list)
# print(router.model_list[0])
# assert (
# router.model_list[0]["litellm_params"]["api_base"] == "http://0.0.0.0:9997"
# ) # set in env
# openai_client = router._get_client(
# deployment=router.model_list[0],
# kwargs={"input": ["hello"], "model": "xinference"},
# )
# assert openai_client._base_url == "http://0.0.0.0:9997"
# assert "xinference" in litellm.openai_compatible_providers
# print("passed")
# # test_xinference_embedding()
# def test_router_init_gpt_4_vision_enhancements():
# try:
# # tests base_url set when any base_url with /openai/deployments passed to router
# print("Testing Azure GPT_Vision enhancements")
# model_list = [
# {
# "model_name": "gpt-4-vision-enhancements",
# "litellm_params": {
# "model": "azure/gpt-4-vision",
# "api_key": os.getenv("AZURE_API_KEY"),
# "base_url": "https://gpt-4-vision-resource.openai.azure.com/openai/deployments/gpt-4-vision/extensions/",
# "dataSources": [
# {
# "type": "AzureComputerVision",
# "parameters": {
# "endpoint": "os.environ/AZURE_VISION_ENHANCE_ENDPOINT",
# "key": "os.environ/AZURE_VISION_ENHANCE_KEY",
# },
# }
# ],
# },
# }
# ]
# router = Router(model_list=model_list)
# print(router.model_list)
# print(router.model_list[0])
# assert (
# router.model_list[0]["litellm_params"]["base_url"]
# == "https://gpt-4-vision-resource.openai.azure.com/openai/deployments/gpt-4-vision/extensions/"
# ) # set in env
# assert (
# router.model_list[0]["litellm_params"]["dataSources"][0]["parameters"][
# "endpoint"
# ]
# == os.environ["AZURE_VISION_ENHANCE_ENDPOINT"]
# )
# assert (
# router.model_list[0]["litellm_params"]["dataSources"][0]["parameters"][
# "key"
# ]
# == os.environ["AZURE_VISION_ENHANCE_KEY"]
# )
# azure_client = router._get_client(
# deployment=router.model_list[0],
# kwargs={"stream": True, "model": "gpt-4-vision-enhancements"},
# client_type="async",
# )
# assert (
# azure_client._base_url
# == "https://gpt-4-vision-resource.openai.azure.com/openai/deployments/gpt-4-vision/extensions/"
# )
# print("passed")
# except Exception as e:
# pytest.fail(f"Error occurred: {e}")
# @pytest.mark.parametrize("sync_mode", [True, False])
# @pytest.mark.asyncio
# async def test_openai_with_organization(sync_mode):
# try:
# print("Testing OpenAI with organization")
# model_list = [
# {
# "model_name": "openai-bad-org",
# "litellm_params": {
# "model": "gpt-3.5-turbo",
# "organization": "org-ikDc4ex8NB",
# },
# },
# {
# "model_name": "openai-good-org",
# "litellm_params": {"model": "gpt-3.5-turbo"},
# },
# ]
# router = Router(model_list=model_list)
# print(router.model_list)
# print(router.model_list[0])
# if sync_mode:
# openai_client = router._get_client(
# deployment=router.model_list[0],
# kwargs={"input": ["hello"], "model": "openai-bad-org"},
# )
# print(vars(openai_client))
# assert openai_client.organization == "org-ikDc4ex8NB"
# # bad org raises error
# try:
# response = router.completion(
# model="openai-bad-org",
# messages=[{"role": "user", "content": "this is a test"}],
# )
# pytest.fail(
# "Request should have failed - This organization does not exist"
# )
# except Exception as e:
# print("Got exception: " + str(e))
# assert "header should match organization for API key" in str(
# e
# ) or "No such organization" in str(e)
# # good org works
# response = router.completion(
# model="openai-good-org",
# messages=[{"role": "user", "content": "this is a test"}],
# max_tokens=5,
# )
# else:
# openai_client = router._get_client(
# deployment=router.model_list[0],
# kwargs={"input": ["hello"], "model": "openai-bad-org"},
# client_type="async",
# )
# print(vars(openai_client))
# assert openai_client.organization == "org-ikDc4ex8NB"
# # bad org raises error
# try:
# response = await router.acompletion(
# model="openai-bad-org",
# messages=[{"role": "user", "content": "this is a test"}],
# )
# pytest.fail(
# "Request should have failed - This organization does not exist"
# )
# except Exception as e:
# print("Got exception: " + str(e))
# assert "header should match organization for API key" in str(
# e
# ) or "No such organization" in str(e)
# # good org works
# response = await router.acompletion(
# model="openai-good-org",
# messages=[{"role": "user", "content": "this is a test"}],
# max_tokens=5,
# )
# except Exception as e:
# pytest.fail(f"Error occurred: {e}")
# def test_init_clients_azure_command_r_plus():
# # This tests that the router uses the OpenAI client for Azure/Command-R+
# # For azure/command-r-plus we need to use openai.OpenAI because of how the Azure provider requires requests being sent
# litellm.set_verbose = True
# import logging
# from litellm._logging import verbose_router_logger
# verbose_router_logger.setLevel(logging.DEBUG)
# try:
# print("testing init 4 clients with diff timeouts")
# model_list = [
# {
# "model_name": "gpt-3.5-turbo",
# "litellm_params": {
# "model": "azure/command-r-plus",
# "api_key": os.getenv("AZURE_COHERE_API_KEY"),
# "api_base": os.getenv("AZURE_COHERE_API_BASE"),
# "timeout": 0.01,
# "stream_timeout": 0.000_001,
# "max_retries": 7,
# },
# },
# ]
# router = Router(model_list=model_list, set_verbose=True)
# for elem in router.model_list:
# model_id = elem["model_info"]["id"]
# async_client = router.cache.get_cache(f"{model_id}_async_client")
# stream_async_client = router.cache.get_cache(
# f"{model_id}_stream_async_client"
# )
# # Assert the Async Clients used are OpenAI clients and not Azure
# # For using Azure/Command-R-Plus and Azure/Mistral the clients NEED to be OpenAI clients used
# # this is weirdness introduced on Azure's side
# assert "openai.AsyncOpenAI" in str(async_client)
# assert "openai.AsyncOpenAI" in str(stream_async_client)
# print("PASSED !")
# except Exception as e:
# traceback.print_exc()
# pytest.fail(f"Error occurred: {e}")
# @pytest.mark.asyncio
# async def test_aaaaatext_completion_with_organization():
# try:
# print("Testing Text OpenAI with organization")
# model_list = [
# {
# "model_name": "openai-bad-org",
# "litellm_params": {
# "model": "text-completion-openai/gpt-3.5-turbo-instruct",
# "api_key": os.getenv("OPENAI_API_KEY", None),
# "organization": "org-ikDc4ex8NB",
# },
# },
# {
# "model_name": "openai-good-org",
# "litellm_params": {
# "model": "text-completion-openai/gpt-3.5-turbo-instruct",
# "api_key": os.getenv("OPENAI_API_KEY", None),
# "organization": os.getenv("OPENAI_ORGANIZATION", None),
# },
# },
# ]
# router = Router(model_list=model_list)
# print(router.model_list)
# print(router.model_list[0])
# openai_client = router._get_client(
# deployment=router.model_list[0],
# kwargs={"input": ["hello"], "model": "openai-bad-org"},
# )
# print(vars(openai_client))
# assert openai_client.organization == "org-ikDc4ex8NB"
# # bad org raises error
# try:
# response = await router.atext_completion(
# model="openai-bad-org",
# prompt="this is a test",
# )
# pytest.fail("Request should have failed - This organization does not exist")
# except Exception as e:
# print("Got exception: " + str(e))
# assert "header should match organization for API key" in str(
# e
# ) or "No such organization" in str(e)
# # good org works
# response = await router.atext_completion(
# model="openai-good-org",
# prompt="this is a test",
# max_tokens=5,
# )
# print("working response: ", response)
# except Exception as e:
# pytest.fail(f"Error occurred: {e}")
# def test_init_clients_async_mode():
# litellm.set_verbose = True
# import logging
# from litellm._logging import verbose_router_logger
# from litellm.types.router import RouterGeneralSettings
# verbose_router_logger.setLevel(logging.DEBUG)
# try:
# print("testing init 4 clients with diff timeouts")
# model_list = [
# {
# "model_name": "gpt-3.5-turbo",
# "litellm_params": {
# "model": "azure/chatgpt-v-2",
# "api_key": os.getenv("AZURE_API_KEY"),
# "api_version": os.getenv("AZURE_API_VERSION"),
# "api_base": os.getenv("AZURE_API_BASE"),
# "timeout": 0.01,
# "stream_timeout": 0.000_001,
# "max_retries": 7,
# },
# },
# ]
# router = Router(
# model_list=model_list,
# set_verbose=True,
# router_general_settings=RouterGeneralSettings(async_only_mode=True),
# )
# for elem in router.model_list:
# model_id = elem["model_info"]["id"]
# # sync clients not initialized in async_only_mode=True
# assert router.cache.get_cache(f"{model_id}_client") is None
# assert router.cache.get_cache(f"{model_id}_stream_client") is None
# # only async clients initialized in async_only_mode=True
# assert router.cache.get_cache(f"{model_id}_async_client") is not None
# assert router.cache.get_cache(f"{model_id}_stream_async_client") is not None
# except Exception as e:
# pytest.fail(f"Error occurred: {e}")
# @pytest.mark.parametrize(
# "environment,expected_models",
# [
# ("development", ["gpt-3.5-turbo"]),
# ("production", ["gpt-4", "gpt-3.5-turbo", "gpt-4o"]),
# ],
# )
# def test_init_router_with_supported_environments(environment, expected_models):
# """
# Tests that the correct models are setup on router when LITELLM_ENVIRONMENT is set
# """
# os.environ["LITELLM_ENVIRONMENT"] = environment
# model_list = [
# {
# "model_name": "gpt-3.5-turbo",
# "litellm_params": {
# "model": "azure/chatgpt-v-2",
# "api_key": os.getenv("AZURE_API_KEY"),
# "api_version": os.getenv("AZURE_API_VERSION"),
# "api_base": os.getenv("AZURE_API_BASE"),
# "timeout": 0.01,
# "stream_timeout": 0.000_001,
# "max_retries": 7,
# },
# "model_info": {"supported_environments": ["development", "production"]},
# },
# {
# "model_name": "gpt-4",
# "litellm_params": {
# "model": "openai/gpt-4",
# "api_key": os.getenv("OPENAI_API_KEY"),
# "timeout": 0.01,
# "stream_timeout": 0.000_001,
# "max_retries": 7,
# },
# "model_info": {"supported_environments": ["production"]},
# },
# {
# "model_name": "gpt-4o",
# "litellm_params": {
# "model": "openai/gpt-4o",
# "api_key": os.getenv("OPENAI_API_KEY"),
# "timeout": 0.01,
# "stream_timeout": 0.000_001,
# "max_retries": 7,
# },
# "model_info": {"supported_environments": ["production"]},
# },
# ]
# router = Router(model_list=model_list, set_verbose=True)
# _model_list = router.get_model_names()
# print("model_list: ", _model_list)
# print("expected_models: ", expected_models)
# assert set(_model_list) == set(expected_models)
# os.environ.pop("LITELLM_ENVIRONMENT")