diff --git a/tests/local_testing/test_router_init.py b/tests/local_testing/test_router_init.py index 4fce5cbfcc..00b2daa764 100644 --- a/tests/local_testing/test_router_init.py +++ b/tests/local_testing/test_router_init.py @@ -1,704 +1,704 @@ -# this tests if the router is initialized correctly -import asyncio -import os -import sys -import time -import traceback - -import pytest - -sys.path.insert( - 0, os.path.abspath("../..") -) # Adds the parent directory to the system path -from collections import defaultdict -from concurrent.futures import ThreadPoolExecutor - -from dotenv import load_dotenv - -import litellm -from litellm import Router - -load_dotenv() - -# every time we load the router we should have 4 clients: -# Async -# Sync -# Async + Stream -# Sync + Stream - - -def test_init_clients(): - litellm.set_verbose = True - import logging - - from litellm._logging import verbose_router_logger - - verbose_router_logger.setLevel(logging.DEBUG) - try: - print("testing init 4 clients with diff timeouts") - model_list = [ - { - "model_name": "gpt-3.5-turbo", - "litellm_params": { - "model": "azure/chatgpt-v-2", - "api_key": os.getenv("AZURE_API_KEY"), - "api_version": os.getenv("AZURE_API_VERSION"), - "api_base": os.getenv("AZURE_API_BASE"), - "timeout": 0.01, - "stream_timeout": 0.000_001, - "max_retries": 7, - }, - }, - ] - router = Router(model_list=model_list, set_verbose=True) - for elem in router.model_list: - model_id = elem["model_info"]["id"] - assert router.cache.get_cache(f"{model_id}_client") is not None - assert router.cache.get_cache(f"{model_id}_async_client") is not None - assert router.cache.get_cache(f"{model_id}_stream_client") is not None - assert router.cache.get_cache(f"{model_id}_stream_async_client") is not None - - # check if timeout for stream/non stream clients is set correctly - async_client = router.cache.get_cache(f"{model_id}_async_client") - stream_async_client = router.cache.get_cache( - f"{model_id}_stream_async_client" - ) - - assert async_client.timeout == 0.01 - assert stream_async_client.timeout == 0.000_001 - print(vars(async_client)) - print() - print(async_client._base_url) - assert ( - async_client._base_url - == "https://openai-gpt-4-test-v-1.openai.azure.com/openai/" - ) - assert ( - stream_async_client._base_url - == "https://openai-gpt-4-test-v-1.openai.azure.com/openai/" - ) - - print("PASSED !") - - except Exception as e: - traceback.print_exc() - pytest.fail(f"Error occurred: {e}") - - -# test_init_clients() - - -def test_init_clients_basic(): - litellm.set_verbose = True - try: - print("Test basic client init") - model_list = [ - { - "model_name": "gpt-3.5-turbo", - "litellm_params": { - "model": "azure/chatgpt-v-2", - "api_key": os.getenv("AZURE_API_KEY"), - "api_version": os.getenv("AZURE_API_VERSION"), - "api_base": os.getenv("AZURE_API_BASE"), - }, - }, - ] - router = Router(model_list=model_list) - for elem in router.model_list: - model_id = elem["model_info"]["id"] - assert router.cache.get_cache(f"{model_id}_client") is not None - assert router.cache.get_cache(f"{model_id}_async_client") is not None - assert router.cache.get_cache(f"{model_id}_stream_client") is not None - assert router.cache.get_cache(f"{model_id}_stream_async_client") is not None - print("PASSED !") - - # see if we can init clients without timeout or max retries set - except Exception as e: - traceback.print_exc() - pytest.fail(f"Error occurred: {e}") - - -# test_init_clients_basic() - - -def test_init_clients_basic_azure_cloudflare(): - # init azure + cloudflare - # init OpenAI gpt-3.5 - # init OpenAI text-embedding - # init OpenAI comptaible - Mistral/mistral-medium - # init OpenAI compatible - xinference/bge - litellm.set_verbose = True - try: - print("Test basic client init") - model_list = [ - { - "model_name": "azure-cloudflare", - "litellm_params": { - "model": "azure/chatgpt-v-2", - "api_key": os.getenv("AZURE_API_KEY"), - "api_version": os.getenv("AZURE_API_VERSION"), - "api_base": "https://gateway.ai.cloudflare.com/v1/0399b10e77ac6668c80404a5ff49eb37/litellm-test/azure-openai/openai-gpt-4-test-v-1", - }, - }, - { - "model_name": "gpt-openai", - "litellm_params": { - "model": "gpt-3.5-turbo", - "api_key": os.getenv("OPENAI_API_KEY"), - }, - }, - { - "model_name": "text-embedding-ada-002", - "litellm_params": { - "model": "text-embedding-ada-002", - "api_key": os.getenv("OPENAI_API_KEY"), - }, - }, - { - "model_name": "mistral", - "litellm_params": { - "model": "mistral/mistral-tiny", - "api_key": os.getenv("MISTRAL_API_KEY"), - }, - }, - { - "model_name": "bge-base-en", - "litellm_params": { - "model": "xinference/bge-base-en", - "api_base": "http://127.0.0.1:9997/v1", - "api_key": os.getenv("OPENAI_API_KEY"), - }, - }, - ] - router = Router(model_list=model_list) - for elem in router.model_list: - model_id = elem["model_info"]["id"] - assert router.cache.get_cache(f"{model_id}_client") is not None - assert router.cache.get_cache(f"{model_id}_async_client") is not None - assert router.cache.get_cache(f"{model_id}_stream_client") is not None - assert router.cache.get_cache(f"{model_id}_stream_async_client") is not None - print("PASSED !") - - # see if we can init clients without timeout or max retries set - except Exception as e: - traceback.print_exc() - pytest.fail(f"Error occurred: {e}") - - -# test_init_clients_basic_azure_cloudflare() - - -def test_timeouts_router(): - """ - Test the timeouts of the router with multiple clients. This HASas to raise a timeout error - """ - import openai - - litellm.set_verbose = True - try: - print("testing init 4 clients with diff timeouts") - model_list = [ - { - "model_name": "gpt-3.5-turbo", - "litellm_params": { - "model": "azure/chatgpt-v-2", - "api_key": os.getenv("AZURE_API_KEY"), - "api_version": os.getenv("AZURE_API_VERSION"), - "api_base": os.getenv("AZURE_API_BASE"), - "timeout": 0.000001, - "stream_timeout": 0.000_001, - }, - }, - ] - router = Router(model_list=model_list, num_retries=0) - - print("PASSED !") - - async def test(): - try: - await router.acompletion( - model="gpt-3.5-turbo", - messages=[ - {"role": "user", "content": "hello, write a 20 pg essay"} - ], - ) - except Exception as e: - raise e - - asyncio.run(test()) - except openai.APITimeoutError as e: - print( - "Passed: Raised correct exception. Got openai.APITimeoutError\nGood Job", e - ) - print(type(e)) - pass - except Exception as e: - pytest.fail( - f"Did not raise error `openai.APITimeoutError`. Instead raised error type: {type(e)}, Error: {e}" - ) - - -# test_timeouts_router() - - -def test_stream_timeouts_router(): - """ - Test the stream timeouts router. See if it selected the correct client with stream timeout - """ - import openai - - litellm.set_verbose = True - try: - print("testing init 4 clients with diff timeouts") - model_list = [ - { - "model_name": "gpt-3.5-turbo", - "litellm_params": { - "model": "azure/chatgpt-v-2", - "api_key": os.getenv("AZURE_API_KEY"), - "api_version": os.getenv("AZURE_API_VERSION"), - "api_base": os.getenv("AZURE_API_BASE"), - "timeout": 200, # regular calls will not timeout, stream calls will - "stream_timeout": 10, - }, - }, - ] - router = Router(model_list=model_list) - - print("PASSED !") - data = { - "model": "gpt-3.5-turbo", - "messages": [{"role": "user", "content": "hello, write a 20 pg essay"}], - "stream": True, - } - selected_client = router._get_client( - deployment=router.model_list[0], - kwargs=data, - client_type=None, - ) - print("Select client timeout", selected_client.timeout) - assert selected_client.timeout == 10 - - # make actual call - response = router.completion(**data) - - for chunk in response: - print(f"chunk: {chunk}") - except openai.APITimeoutError as e: - print( - "Passed: Raised correct exception. Got openai.APITimeoutError\nGood Job", e - ) - print(type(e)) - pass - except Exception as e: - pytest.fail( - f"Did not raise error `openai.APITimeoutError`. Instead raised error type: {type(e)}, Error: {e}" - ) - - -# test_stream_timeouts_router() - - -def test_xinference_embedding(): - # [Test Init Xinference] this tests if we init xinference on the router correctly - # [Test Exception Mapping] tests that xinference is an openai comptiable provider - print("Testing init xinference") - print( - "this tests if we create an OpenAI client for Xinference, with the correct API BASE" - ) - - model_list = [ - { - "model_name": "xinference", - "litellm_params": { - "model": "xinference/bge-base-en", - "api_base": "os.environ/XINFERENCE_API_BASE", - }, - } - ] - - router = Router(model_list=model_list) - - print(router.model_list) - print(router.model_list[0]) - - assert ( - router.model_list[0]["litellm_params"]["api_base"] == "http://0.0.0.0:9997" - ) # set in env - - openai_client = router._get_client( - deployment=router.model_list[0], - kwargs={"input": ["hello"], "model": "xinference"}, - ) - - assert openai_client._base_url == "http://0.0.0.0:9997" - assert "xinference" in litellm.openai_compatible_providers - print("passed") - - -# test_xinference_embedding() - - -def test_router_init_gpt_4_vision_enhancements(): - try: - # tests base_url set when any base_url with /openai/deployments passed to router - print("Testing Azure GPT_Vision enhancements") - - model_list = [ - { - "model_name": "gpt-4-vision-enhancements", - "litellm_params": { - "model": "azure/gpt-4-vision", - "api_key": os.getenv("AZURE_API_KEY"), - "base_url": "https://gpt-4-vision-resource.openai.azure.com/openai/deployments/gpt-4-vision/extensions/", - "dataSources": [ - { - "type": "AzureComputerVision", - "parameters": { - "endpoint": "os.environ/AZURE_VISION_ENHANCE_ENDPOINT", - "key": "os.environ/AZURE_VISION_ENHANCE_KEY", - }, - } - ], - }, - } - ] - - router = Router(model_list=model_list) - - print(router.model_list) - print(router.model_list[0]) - - assert ( - router.model_list[0]["litellm_params"]["base_url"] - == "https://gpt-4-vision-resource.openai.azure.com/openai/deployments/gpt-4-vision/extensions/" - ) # set in env - - assert ( - router.model_list[0]["litellm_params"]["dataSources"][0]["parameters"][ - "endpoint" - ] - == os.environ["AZURE_VISION_ENHANCE_ENDPOINT"] - ) - - assert ( - router.model_list[0]["litellm_params"]["dataSources"][0]["parameters"][ - "key" - ] - == os.environ["AZURE_VISION_ENHANCE_KEY"] - ) - - azure_client = router._get_client( - deployment=router.model_list[0], - kwargs={"stream": True, "model": "gpt-4-vision-enhancements"}, - client_type="async", - ) - - assert ( - azure_client._base_url - == "https://gpt-4-vision-resource.openai.azure.com/openai/deployments/gpt-4-vision/extensions/" - ) - print("passed") - except Exception as e: - pytest.fail(f"Error occurred: {e}") - - -@pytest.mark.parametrize("sync_mode", [True, False]) -@pytest.mark.asyncio -async def test_openai_with_organization(sync_mode): - try: - print("Testing OpenAI with organization") - model_list = [ - { - "model_name": "openai-bad-org", - "litellm_params": { - "model": "gpt-3.5-turbo", - "organization": "org-ikDc4ex8NB", - }, - }, - { - "model_name": "openai-good-org", - "litellm_params": {"model": "gpt-3.5-turbo"}, - }, - ] - - router = Router(model_list=model_list) - - print(router.model_list) - print(router.model_list[0]) - - if sync_mode: - openai_client = router._get_client( - deployment=router.model_list[0], - kwargs={"input": ["hello"], "model": "openai-bad-org"}, - ) - print(vars(openai_client)) - - assert openai_client.organization == "org-ikDc4ex8NB" - - # bad org raises error - - try: - response = router.completion( - model="openai-bad-org", - messages=[{"role": "user", "content": "this is a test"}], - ) - pytest.fail( - "Request should have failed - This organization does not exist" - ) - except Exception as e: - print("Got exception: " + str(e)) - assert "header should match organization for API key" in str( - e - ) or "No such organization" in str(e) - - # good org works - response = router.completion( - model="openai-good-org", - messages=[{"role": "user", "content": "this is a test"}], - max_tokens=5, - ) - else: - openai_client = router._get_client( - deployment=router.model_list[0], - kwargs={"input": ["hello"], "model": "openai-bad-org"}, - client_type="async", - ) - print(vars(openai_client)) - - assert openai_client.organization == "org-ikDc4ex8NB" - - # bad org raises error - - try: - response = await router.acompletion( - model="openai-bad-org", - messages=[{"role": "user", "content": "this is a test"}], - ) - pytest.fail( - "Request should have failed - This organization does not exist" - ) - except Exception as e: - print("Got exception: " + str(e)) - assert "header should match organization for API key" in str( - e - ) or "No such organization" in str(e) - - # good org works - response = await router.acompletion( - model="openai-good-org", - messages=[{"role": "user", "content": "this is a test"}], - max_tokens=5, - ) - - except Exception as e: - pytest.fail(f"Error occurred: {e}") - - -def test_init_clients_azure_command_r_plus(): - # This tests that the router uses the OpenAI client for Azure/Command-R+ - # For azure/command-r-plus we need to use openai.OpenAI because of how the Azure provider requires requests being sent - litellm.set_verbose = True - import logging - - from litellm._logging import verbose_router_logger - - verbose_router_logger.setLevel(logging.DEBUG) - try: - print("testing init 4 clients with diff timeouts") - model_list = [ - { - "model_name": "gpt-3.5-turbo", - "litellm_params": { - "model": "azure/command-r-plus", - "api_key": os.getenv("AZURE_COHERE_API_KEY"), - "api_base": os.getenv("AZURE_COHERE_API_BASE"), - "timeout": 0.01, - "stream_timeout": 0.000_001, - "max_retries": 7, - }, - }, - ] - router = Router(model_list=model_list, set_verbose=True) - for elem in router.model_list: - model_id = elem["model_info"]["id"] - async_client = router.cache.get_cache(f"{model_id}_async_client") - stream_async_client = router.cache.get_cache( - f"{model_id}_stream_async_client" - ) - # Assert the Async Clients used are OpenAI clients and not Azure - # For using Azure/Command-R-Plus and Azure/Mistral the clients NEED to be OpenAI clients used - # this is weirdness introduced on Azure's side - - assert "openai.AsyncOpenAI" in str(async_client) - assert "openai.AsyncOpenAI" in str(stream_async_client) - print("PASSED !") - - except Exception as e: - traceback.print_exc() - pytest.fail(f"Error occurred: {e}") - - -@pytest.mark.asyncio -async def test_aaaaatext_completion_with_organization(): - try: - print("Testing Text OpenAI with organization") - model_list = [ - { - "model_name": "openai-bad-org", - "litellm_params": { - "model": "text-completion-openai/gpt-3.5-turbo-instruct", - "api_key": os.getenv("OPENAI_API_KEY", None), - "organization": "org-ikDc4ex8NB", - }, - }, - { - "model_name": "openai-good-org", - "litellm_params": { - "model": "text-completion-openai/gpt-3.5-turbo-instruct", - "api_key": os.getenv("OPENAI_API_KEY", None), - "organization": os.getenv("OPENAI_ORGANIZATION", None), - }, - }, - ] - - router = Router(model_list=model_list) - - print(router.model_list) - print(router.model_list[0]) - - openai_client = router._get_client( - deployment=router.model_list[0], - kwargs={"input": ["hello"], "model": "openai-bad-org"}, - ) - print(vars(openai_client)) - - assert openai_client.organization == "org-ikDc4ex8NB" - - # bad org raises error - - try: - response = await router.atext_completion( - model="openai-bad-org", - prompt="this is a test", - ) - pytest.fail("Request should have failed - This organization does not exist") - except Exception as e: - print("Got exception: " + str(e)) - assert "header should match organization for API key" in str( - e - ) or "No such organization" in str(e) - - # good org works - response = await router.atext_completion( - model="openai-good-org", - prompt="this is a test", - max_tokens=5, - ) - print("working response: ", response) - - except Exception as e: - pytest.fail(f"Error occurred: {e}") - - -def test_init_clients_async_mode(): - litellm.set_verbose = True - import logging - - from litellm._logging import verbose_router_logger - from litellm.types.router import RouterGeneralSettings - - verbose_router_logger.setLevel(logging.DEBUG) - try: - print("testing init 4 clients with diff timeouts") - model_list = [ - { - "model_name": "gpt-3.5-turbo", - "litellm_params": { - "model": "azure/chatgpt-v-2", - "api_key": os.getenv("AZURE_API_KEY"), - "api_version": os.getenv("AZURE_API_VERSION"), - "api_base": os.getenv("AZURE_API_BASE"), - "timeout": 0.01, - "stream_timeout": 0.000_001, - "max_retries": 7, - }, - }, - ] - router = Router( - model_list=model_list, - set_verbose=True, - router_general_settings=RouterGeneralSettings(async_only_mode=True), - ) - for elem in router.model_list: - model_id = elem["model_info"]["id"] - - # sync clients not initialized in async_only_mode=True - assert router.cache.get_cache(f"{model_id}_client") is None - assert router.cache.get_cache(f"{model_id}_stream_client") is None - - # only async clients initialized in async_only_mode=True - assert router.cache.get_cache(f"{model_id}_async_client") is not None - assert router.cache.get_cache(f"{model_id}_stream_async_client") is not None - except Exception as e: - pytest.fail(f"Error occurred: {e}") - - -@pytest.mark.parametrize( - "environment,expected_models", - [ - ("development", ["gpt-3.5-turbo"]), - ("production", ["gpt-4", "gpt-3.5-turbo", "gpt-4o"]), - ], -) -def test_init_router_with_supported_environments(environment, expected_models): - """ - Tests that the correct models are setup on router when LITELLM_ENVIRONMENT is set - """ - os.environ["LITELLM_ENVIRONMENT"] = environment - model_list = [ - { - "model_name": "gpt-3.5-turbo", - "litellm_params": { - "model": "azure/chatgpt-v-2", - "api_key": os.getenv("AZURE_API_KEY"), - "api_version": os.getenv("AZURE_API_VERSION"), - "api_base": os.getenv("AZURE_API_BASE"), - "timeout": 0.01, - "stream_timeout": 0.000_001, - "max_retries": 7, - }, - "model_info": {"supported_environments": ["development", "production"]}, - }, - { - "model_name": "gpt-4", - "litellm_params": { - "model": "openai/gpt-4", - "api_key": os.getenv("OPENAI_API_KEY"), - "timeout": 0.01, - "stream_timeout": 0.000_001, - "max_retries": 7, - }, - "model_info": {"supported_environments": ["production"]}, - }, - { - "model_name": "gpt-4o", - "litellm_params": { - "model": "openai/gpt-4o", - "api_key": os.getenv("OPENAI_API_KEY"), - "timeout": 0.01, - "stream_timeout": 0.000_001, - "max_retries": 7, - }, - "model_info": {"supported_environments": ["production"]}, - }, - ] - router = Router(model_list=model_list, set_verbose=True) - _model_list = router.get_model_names() - - print("model_list: ", _model_list) - print("expected_models: ", expected_models) - - assert set(_model_list) == set(expected_models) - - os.environ.pop("LITELLM_ENVIRONMENT") +# # this tests if the router is initialized correctly +# import asyncio +# import os +# import sys +# import time +# import traceback + +# import pytest + +# sys.path.insert( +# 0, os.path.abspath("../..") +# ) # Adds the parent directory to the system path +# from collections import defaultdict +# from concurrent.futures import ThreadPoolExecutor + +# from dotenv import load_dotenv + +# import litellm +# from litellm import Router + +# load_dotenv() + +# # every time we load the router we should have 4 clients: +# # Async +# # Sync +# # Async + Stream +# # Sync + Stream + + +# def test_init_clients(): +# litellm.set_verbose = True +# import logging + +# from litellm._logging import verbose_router_logger + +# verbose_router_logger.setLevel(logging.DEBUG) +# try: +# print("testing init 4 clients with diff timeouts") +# model_list = [ +# { +# "model_name": "gpt-3.5-turbo", +# "litellm_params": { +# "model": "azure/chatgpt-v-2", +# "api_key": os.getenv("AZURE_API_KEY"), +# "api_version": os.getenv("AZURE_API_VERSION"), +# "api_base": os.getenv("AZURE_API_BASE"), +# "timeout": 0.01, +# "stream_timeout": 0.000_001, +# "max_retries": 7, +# }, +# }, +# ] +# router = Router(model_list=model_list, set_verbose=True) +# for elem in router.model_list: +# model_id = elem["model_info"]["id"] +# assert router.cache.get_cache(f"{model_id}_client") is not None +# assert router.cache.get_cache(f"{model_id}_async_client") is not None +# assert router.cache.get_cache(f"{model_id}_stream_client") is not None +# assert router.cache.get_cache(f"{model_id}_stream_async_client") is not None + +# # check if timeout for stream/non stream clients is set correctly +# async_client = router.cache.get_cache(f"{model_id}_async_client") +# stream_async_client = router.cache.get_cache( +# f"{model_id}_stream_async_client" +# ) + +# assert async_client.timeout == 0.01 +# assert stream_async_client.timeout == 0.000_001 +# print(vars(async_client)) +# print() +# print(async_client._base_url) +# assert ( +# async_client._base_url +# == "https://openai-gpt-4-test-v-1.openai.azure.com/openai/" +# ) +# assert ( +# stream_async_client._base_url +# == "https://openai-gpt-4-test-v-1.openai.azure.com/openai/" +# ) + +# print("PASSED !") + +# except Exception as e: +# traceback.print_exc() +# pytest.fail(f"Error occurred: {e}") + + +# # test_init_clients() + + +# def test_init_clients_basic(): +# litellm.set_verbose = True +# try: +# print("Test basic client init") +# model_list = [ +# { +# "model_name": "gpt-3.5-turbo", +# "litellm_params": { +# "model": "azure/chatgpt-v-2", +# "api_key": os.getenv("AZURE_API_KEY"), +# "api_version": os.getenv("AZURE_API_VERSION"), +# "api_base": os.getenv("AZURE_API_BASE"), +# }, +# }, +# ] +# router = Router(model_list=model_list) +# for elem in router.model_list: +# model_id = elem["model_info"]["id"] +# assert router.cache.get_cache(f"{model_id}_client") is not None +# assert router.cache.get_cache(f"{model_id}_async_client") is not None +# assert router.cache.get_cache(f"{model_id}_stream_client") is not None +# assert router.cache.get_cache(f"{model_id}_stream_async_client") is not None +# print("PASSED !") + +# # see if we can init clients without timeout or max retries set +# except Exception as e: +# traceback.print_exc() +# pytest.fail(f"Error occurred: {e}") + + +# # test_init_clients_basic() + + +# def test_init_clients_basic_azure_cloudflare(): +# # init azure + cloudflare +# # init OpenAI gpt-3.5 +# # init OpenAI text-embedding +# # init OpenAI comptaible - Mistral/mistral-medium +# # init OpenAI compatible - xinference/bge +# litellm.set_verbose = True +# try: +# print("Test basic client init") +# model_list = [ +# { +# "model_name": "azure-cloudflare", +# "litellm_params": { +# "model": "azure/chatgpt-v-2", +# "api_key": os.getenv("AZURE_API_KEY"), +# "api_version": os.getenv("AZURE_API_VERSION"), +# "api_base": "https://gateway.ai.cloudflare.com/v1/0399b10e77ac6668c80404a5ff49eb37/litellm-test/azure-openai/openai-gpt-4-test-v-1", +# }, +# }, +# { +# "model_name": "gpt-openai", +# "litellm_params": { +# "model": "gpt-3.5-turbo", +# "api_key": os.getenv("OPENAI_API_KEY"), +# }, +# }, +# { +# "model_name": "text-embedding-ada-002", +# "litellm_params": { +# "model": "text-embedding-ada-002", +# "api_key": os.getenv("OPENAI_API_KEY"), +# }, +# }, +# { +# "model_name": "mistral", +# "litellm_params": { +# "model": "mistral/mistral-tiny", +# "api_key": os.getenv("MISTRAL_API_KEY"), +# }, +# }, +# { +# "model_name": "bge-base-en", +# "litellm_params": { +# "model": "xinference/bge-base-en", +# "api_base": "http://127.0.0.1:9997/v1", +# "api_key": os.getenv("OPENAI_API_KEY"), +# }, +# }, +# ] +# router = Router(model_list=model_list) +# for elem in router.model_list: +# model_id = elem["model_info"]["id"] +# assert router.cache.get_cache(f"{model_id}_client") is not None +# assert router.cache.get_cache(f"{model_id}_async_client") is not None +# assert router.cache.get_cache(f"{model_id}_stream_client") is not None +# assert router.cache.get_cache(f"{model_id}_stream_async_client") is not None +# print("PASSED !") + +# # see if we can init clients without timeout or max retries set +# except Exception as e: +# traceback.print_exc() +# pytest.fail(f"Error occurred: {e}") + + +# # test_init_clients_basic_azure_cloudflare() + + +# def test_timeouts_router(): +# """ +# Test the timeouts of the router with multiple clients. This HASas to raise a timeout error +# """ +# import openai + +# litellm.set_verbose = True +# try: +# print("testing init 4 clients with diff timeouts") +# model_list = [ +# { +# "model_name": "gpt-3.5-turbo", +# "litellm_params": { +# "model": "azure/chatgpt-v-2", +# "api_key": os.getenv("AZURE_API_KEY"), +# "api_version": os.getenv("AZURE_API_VERSION"), +# "api_base": os.getenv("AZURE_API_BASE"), +# "timeout": 0.000001, +# "stream_timeout": 0.000_001, +# }, +# }, +# ] +# router = Router(model_list=model_list, num_retries=0) + +# print("PASSED !") + +# async def test(): +# try: +# await router.acompletion( +# model="gpt-3.5-turbo", +# messages=[ +# {"role": "user", "content": "hello, write a 20 pg essay"} +# ], +# ) +# except Exception as e: +# raise e + +# asyncio.run(test()) +# except openai.APITimeoutError as e: +# print( +# "Passed: Raised correct exception. Got openai.APITimeoutError\nGood Job", e +# ) +# print(type(e)) +# pass +# except Exception as e: +# pytest.fail( +# f"Did not raise error `openai.APITimeoutError`. Instead raised error type: {type(e)}, Error: {e}" +# ) + + +# # test_timeouts_router() + + +# def test_stream_timeouts_router(): +# """ +# Test the stream timeouts router. See if it selected the correct client with stream timeout +# """ +# import openai + +# litellm.set_verbose = True +# try: +# print("testing init 4 clients with diff timeouts") +# model_list = [ +# { +# "model_name": "gpt-3.5-turbo", +# "litellm_params": { +# "model": "azure/chatgpt-v-2", +# "api_key": os.getenv("AZURE_API_KEY"), +# "api_version": os.getenv("AZURE_API_VERSION"), +# "api_base": os.getenv("AZURE_API_BASE"), +# "timeout": 200, # regular calls will not timeout, stream calls will +# "stream_timeout": 10, +# }, +# }, +# ] +# router = Router(model_list=model_list) + +# print("PASSED !") +# data = { +# "model": "gpt-3.5-turbo", +# "messages": [{"role": "user", "content": "hello, write a 20 pg essay"}], +# "stream": True, +# } +# selected_client = router._get_client( +# deployment=router.model_list[0], +# kwargs=data, +# client_type=None, +# ) +# print("Select client timeout", selected_client.timeout) +# assert selected_client.timeout == 10 + +# # make actual call +# response = router.completion(**data) + +# for chunk in response: +# print(f"chunk: {chunk}") +# except openai.APITimeoutError as e: +# print( +# "Passed: Raised correct exception. Got openai.APITimeoutError\nGood Job", e +# ) +# print(type(e)) +# pass +# except Exception as e: +# pytest.fail( +# f"Did not raise error `openai.APITimeoutError`. Instead raised error type: {type(e)}, Error: {e}" +# ) + + +# # test_stream_timeouts_router() + + +# def test_xinference_embedding(): +# # [Test Init Xinference] this tests if we init xinference on the router correctly +# # [Test Exception Mapping] tests that xinference is an openai comptiable provider +# print("Testing init xinference") +# print( +# "this tests if we create an OpenAI client for Xinference, with the correct API BASE" +# ) + +# model_list = [ +# { +# "model_name": "xinference", +# "litellm_params": { +# "model": "xinference/bge-base-en", +# "api_base": "os.environ/XINFERENCE_API_BASE", +# }, +# } +# ] + +# router = Router(model_list=model_list) + +# print(router.model_list) +# print(router.model_list[0]) + +# assert ( +# router.model_list[0]["litellm_params"]["api_base"] == "http://0.0.0.0:9997" +# ) # set in env + +# openai_client = router._get_client( +# deployment=router.model_list[0], +# kwargs={"input": ["hello"], "model": "xinference"}, +# ) + +# assert openai_client._base_url == "http://0.0.0.0:9997" +# assert "xinference" in litellm.openai_compatible_providers +# print("passed") + + +# # test_xinference_embedding() + + +# def test_router_init_gpt_4_vision_enhancements(): +# try: +# # tests base_url set when any base_url with /openai/deployments passed to router +# print("Testing Azure GPT_Vision enhancements") + +# model_list = [ +# { +# "model_name": "gpt-4-vision-enhancements", +# "litellm_params": { +# "model": "azure/gpt-4-vision", +# "api_key": os.getenv("AZURE_API_KEY"), +# "base_url": "https://gpt-4-vision-resource.openai.azure.com/openai/deployments/gpt-4-vision/extensions/", +# "dataSources": [ +# { +# "type": "AzureComputerVision", +# "parameters": { +# "endpoint": "os.environ/AZURE_VISION_ENHANCE_ENDPOINT", +# "key": "os.environ/AZURE_VISION_ENHANCE_KEY", +# }, +# } +# ], +# }, +# } +# ] + +# router = Router(model_list=model_list) + +# print(router.model_list) +# print(router.model_list[0]) + +# assert ( +# router.model_list[0]["litellm_params"]["base_url"] +# == "https://gpt-4-vision-resource.openai.azure.com/openai/deployments/gpt-4-vision/extensions/" +# ) # set in env + +# assert ( +# router.model_list[0]["litellm_params"]["dataSources"][0]["parameters"][ +# "endpoint" +# ] +# == os.environ["AZURE_VISION_ENHANCE_ENDPOINT"] +# ) + +# assert ( +# router.model_list[0]["litellm_params"]["dataSources"][0]["parameters"][ +# "key" +# ] +# == os.environ["AZURE_VISION_ENHANCE_KEY"] +# ) + +# azure_client = router._get_client( +# deployment=router.model_list[0], +# kwargs={"stream": True, "model": "gpt-4-vision-enhancements"}, +# client_type="async", +# ) + +# assert ( +# azure_client._base_url +# == "https://gpt-4-vision-resource.openai.azure.com/openai/deployments/gpt-4-vision/extensions/" +# ) +# print("passed") +# except Exception as e: +# pytest.fail(f"Error occurred: {e}") + + +# @pytest.mark.parametrize("sync_mode", [True, False]) +# @pytest.mark.asyncio +# async def test_openai_with_organization(sync_mode): +# try: +# print("Testing OpenAI with organization") +# model_list = [ +# { +# "model_name": "openai-bad-org", +# "litellm_params": { +# "model": "gpt-3.5-turbo", +# "organization": "org-ikDc4ex8NB", +# }, +# }, +# { +# "model_name": "openai-good-org", +# "litellm_params": {"model": "gpt-3.5-turbo"}, +# }, +# ] + +# router = Router(model_list=model_list) + +# print(router.model_list) +# print(router.model_list[0]) + +# if sync_mode: +# openai_client = router._get_client( +# deployment=router.model_list[0], +# kwargs={"input": ["hello"], "model": "openai-bad-org"}, +# ) +# print(vars(openai_client)) + +# assert openai_client.organization == "org-ikDc4ex8NB" + +# # bad org raises error + +# try: +# response = router.completion( +# model="openai-bad-org", +# messages=[{"role": "user", "content": "this is a test"}], +# ) +# pytest.fail( +# "Request should have failed - This organization does not exist" +# ) +# except Exception as e: +# print("Got exception: " + str(e)) +# assert "header should match organization for API key" in str( +# e +# ) or "No such organization" in str(e) + +# # good org works +# response = router.completion( +# model="openai-good-org", +# messages=[{"role": "user", "content": "this is a test"}], +# max_tokens=5, +# ) +# else: +# openai_client = router._get_client( +# deployment=router.model_list[0], +# kwargs={"input": ["hello"], "model": "openai-bad-org"}, +# client_type="async", +# ) +# print(vars(openai_client)) + +# assert openai_client.organization == "org-ikDc4ex8NB" + +# # bad org raises error + +# try: +# response = await router.acompletion( +# model="openai-bad-org", +# messages=[{"role": "user", "content": "this is a test"}], +# ) +# pytest.fail( +# "Request should have failed - This organization does not exist" +# ) +# except Exception as e: +# print("Got exception: " + str(e)) +# assert "header should match organization for API key" in str( +# e +# ) or "No such organization" in str(e) + +# # good org works +# response = await router.acompletion( +# model="openai-good-org", +# messages=[{"role": "user", "content": "this is a test"}], +# max_tokens=5, +# ) + +# except Exception as e: +# pytest.fail(f"Error occurred: {e}") + + +# def test_init_clients_azure_command_r_plus(): +# # This tests that the router uses the OpenAI client for Azure/Command-R+ +# # For azure/command-r-plus we need to use openai.OpenAI because of how the Azure provider requires requests being sent +# litellm.set_verbose = True +# import logging + +# from litellm._logging import verbose_router_logger + +# verbose_router_logger.setLevel(logging.DEBUG) +# try: +# print("testing init 4 clients with diff timeouts") +# model_list = [ +# { +# "model_name": "gpt-3.5-turbo", +# "litellm_params": { +# "model": "azure/command-r-plus", +# "api_key": os.getenv("AZURE_COHERE_API_KEY"), +# "api_base": os.getenv("AZURE_COHERE_API_BASE"), +# "timeout": 0.01, +# "stream_timeout": 0.000_001, +# "max_retries": 7, +# }, +# }, +# ] +# router = Router(model_list=model_list, set_verbose=True) +# for elem in router.model_list: +# model_id = elem["model_info"]["id"] +# async_client = router.cache.get_cache(f"{model_id}_async_client") +# stream_async_client = router.cache.get_cache( +# f"{model_id}_stream_async_client" +# ) +# # Assert the Async Clients used are OpenAI clients and not Azure +# # For using Azure/Command-R-Plus and Azure/Mistral the clients NEED to be OpenAI clients used +# # this is weirdness introduced on Azure's side + +# assert "openai.AsyncOpenAI" in str(async_client) +# assert "openai.AsyncOpenAI" in str(stream_async_client) +# print("PASSED !") + +# except Exception as e: +# traceback.print_exc() +# pytest.fail(f"Error occurred: {e}") + + +# @pytest.mark.asyncio +# async def test_aaaaatext_completion_with_organization(): +# try: +# print("Testing Text OpenAI with organization") +# model_list = [ +# { +# "model_name": "openai-bad-org", +# "litellm_params": { +# "model": "text-completion-openai/gpt-3.5-turbo-instruct", +# "api_key": os.getenv("OPENAI_API_KEY", None), +# "organization": "org-ikDc4ex8NB", +# }, +# }, +# { +# "model_name": "openai-good-org", +# "litellm_params": { +# "model": "text-completion-openai/gpt-3.5-turbo-instruct", +# "api_key": os.getenv("OPENAI_API_KEY", None), +# "organization": os.getenv("OPENAI_ORGANIZATION", None), +# }, +# }, +# ] + +# router = Router(model_list=model_list) + +# print(router.model_list) +# print(router.model_list[0]) + +# openai_client = router._get_client( +# deployment=router.model_list[0], +# kwargs={"input": ["hello"], "model": "openai-bad-org"}, +# ) +# print(vars(openai_client)) + +# assert openai_client.organization == "org-ikDc4ex8NB" + +# # bad org raises error + +# try: +# response = await router.atext_completion( +# model="openai-bad-org", +# prompt="this is a test", +# ) +# pytest.fail("Request should have failed - This organization does not exist") +# except Exception as e: +# print("Got exception: " + str(e)) +# assert "header should match organization for API key" in str( +# e +# ) or "No such organization" in str(e) + +# # good org works +# response = await router.atext_completion( +# model="openai-good-org", +# prompt="this is a test", +# max_tokens=5, +# ) +# print("working response: ", response) + +# except Exception as e: +# pytest.fail(f"Error occurred: {e}") + + +# def test_init_clients_async_mode(): +# litellm.set_verbose = True +# import logging + +# from litellm._logging import verbose_router_logger +# from litellm.types.router import RouterGeneralSettings + +# verbose_router_logger.setLevel(logging.DEBUG) +# try: +# print("testing init 4 clients with diff timeouts") +# model_list = [ +# { +# "model_name": "gpt-3.5-turbo", +# "litellm_params": { +# "model": "azure/chatgpt-v-2", +# "api_key": os.getenv("AZURE_API_KEY"), +# "api_version": os.getenv("AZURE_API_VERSION"), +# "api_base": os.getenv("AZURE_API_BASE"), +# "timeout": 0.01, +# "stream_timeout": 0.000_001, +# "max_retries": 7, +# }, +# }, +# ] +# router = Router( +# model_list=model_list, +# set_verbose=True, +# router_general_settings=RouterGeneralSettings(async_only_mode=True), +# ) +# for elem in router.model_list: +# model_id = elem["model_info"]["id"] + +# # sync clients not initialized in async_only_mode=True +# assert router.cache.get_cache(f"{model_id}_client") is None +# assert router.cache.get_cache(f"{model_id}_stream_client") is None + +# # only async clients initialized in async_only_mode=True +# assert router.cache.get_cache(f"{model_id}_async_client") is not None +# assert router.cache.get_cache(f"{model_id}_stream_async_client") is not None +# except Exception as e: +# pytest.fail(f"Error occurred: {e}") + + +# @pytest.mark.parametrize( +# "environment,expected_models", +# [ +# ("development", ["gpt-3.5-turbo"]), +# ("production", ["gpt-4", "gpt-3.5-turbo", "gpt-4o"]), +# ], +# ) +# def test_init_router_with_supported_environments(environment, expected_models): +# """ +# Tests that the correct models are setup on router when LITELLM_ENVIRONMENT is set +# """ +# os.environ["LITELLM_ENVIRONMENT"] = environment +# model_list = [ +# { +# "model_name": "gpt-3.5-turbo", +# "litellm_params": { +# "model": "azure/chatgpt-v-2", +# "api_key": os.getenv("AZURE_API_KEY"), +# "api_version": os.getenv("AZURE_API_VERSION"), +# "api_base": os.getenv("AZURE_API_BASE"), +# "timeout": 0.01, +# "stream_timeout": 0.000_001, +# "max_retries": 7, +# }, +# "model_info": {"supported_environments": ["development", "production"]}, +# }, +# { +# "model_name": "gpt-4", +# "litellm_params": { +# "model": "openai/gpt-4", +# "api_key": os.getenv("OPENAI_API_KEY"), +# "timeout": 0.01, +# "stream_timeout": 0.000_001, +# "max_retries": 7, +# }, +# "model_info": {"supported_environments": ["production"]}, +# }, +# { +# "model_name": "gpt-4o", +# "litellm_params": { +# "model": "openai/gpt-4o", +# "api_key": os.getenv("OPENAI_API_KEY"), +# "timeout": 0.01, +# "stream_timeout": 0.000_001, +# "max_retries": 7, +# }, +# "model_info": {"supported_environments": ["production"]}, +# }, +# ] +# router = Router(model_list=model_list, set_verbose=True) +# _model_list = router.get_model_names() + +# print("model_list: ", _model_list) +# print("expected_models: ", expected_models) + +# assert set(_model_list) == set(expected_models) + +# os.environ.pop("LITELLM_ENVIRONMENT")