(test) router.py revert to stable version

This commit is contained in:
ishaan-jaff 2024-01-23 09:05:19 -08:00
parent fa2c94aacb
commit 3f37cf0bec

View file

@ -783,6 +783,9 @@ def test_reading_keys_os_environ():
assert float(model["litellm_params"]["timeout"]) == float(
os.environ["AZURE_TIMEOUT"]
), f"{model['litellm_params']['timeout']} vs {os.environ['AZURE_TIMEOUT']}"
assert float(model["litellm_params"]["stream_timeout"]) == float(
os.environ["AZURE_STREAM_TIMEOUT"]
), f"{model['litellm_params']['stream_timeout']} vs {os.environ['AZURE_STREAM_TIMEOUT']}"
assert int(model["litellm_params"]["max_retries"]) == int(
os.environ["AZURE_MAX_RETRIES"]
), f"{model['litellm_params']['max_retries']} vs {os.environ['AZURE_MAX_RETRIES']}"
@ -791,7 +794,7 @@ def test_reading_keys_os_environ():
async_client: openai.AsyncAzureOpenAI = router.cache.get_cache(f"{model_id}_async_client") # type: ignore
assert async_client.api_key == os.environ["AZURE_API_KEY"]
assert async_client.base_url == os.environ["AZURE_API_BASE"]
assert async_client.max_retries == int(
assert async_client.max_retries == (
os.environ["AZURE_MAX_RETRIES"]
), f"{async_client.max_retries} vs {os.environ['AZURE_MAX_RETRIES']}"
assert async_client.timeout == (
@ -804,7 +807,7 @@ def test_reading_keys_os_environ():
stream_async_client: openai.AsyncAzureOpenAI = router.cache.get_cache(f"{model_id}_stream_async_client") # type: ignore
assert stream_async_client.api_key == os.environ["AZURE_API_KEY"]
assert stream_async_client.base_url == os.environ["AZURE_API_BASE"]
assert stream_async_client.max_retries == int(
assert stream_async_client.max_retries == (
os.environ["AZURE_MAX_RETRIES"]
), f"{stream_async_client.max_retries} vs {os.environ['AZURE_MAX_RETRIES']}"
assert stream_async_client.timeout == (
@ -816,7 +819,7 @@ def test_reading_keys_os_environ():
client: openai.AzureOpenAI = router.cache.get_cache(f"{model_id}_client") # type: ignore
assert client.api_key == os.environ["AZURE_API_KEY"]
assert client.base_url == os.environ["AZURE_API_BASE"]
assert client.max_retries == int(
assert client.max_retries == (
os.environ["AZURE_MAX_RETRIES"]
), f"{client.max_retries} vs {os.environ['AZURE_MAX_RETRIES']}"
assert client.timeout == (
@ -828,7 +831,7 @@ def test_reading_keys_os_environ():
stream_client: openai.AzureOpenAI = router.cache.get_cache(f"{model_id}_stream_client") # type: ignore
assert stream_client.api_key == os.environ["AZURE_API_KEY"]
assert stream_client.base_url == os.environ["AZURE_API_BASE"]
assert stream_client.max_retries == int(
assert stream_client.max_retries == (
os.environ["AZURE_MAX_RETRIES"]
), f"{stream_client.max_retries} vs {os.environ['AZURE_MAX_RETRIES']}"
assert stream_client.timeout == (
@ -880,6 +883,9 @@ def test_reading_openai_keys_os_environ():
assert float(model["litellm_params"]["timeout"]) == float(
os.environ["AZURE_TIMEOUT"]
), f"{model['litellm_params']['timeout']} vs {os.environ['AZURE_TIMEOUT']}"
assert float(model["litellm_params"]["stream_timeout"]) == float(
os.environ["AZURE_STREAM_TIMEOUT"]
), f"{model['litellm_params']['stream_timeout']} vs {os.environ['AZURE_STREAM_TIMEOUT']}"
assert int(model["litellm_params"]["max_retries"]) == int(
os.environ["AZURE_MAX_RETRIES"]
), f"{model['litellm_params']['max_retries']} vs {os.environ['AZURE_MAX_RETRIES']}"
@ -887,7 +893,7 @@ def test_reading_openai_keys_os_environ():
model_id = model["model_info"]["id"]
async_client: openai.AsyncOpenAI = router.cache.get_cache(key=f"{model_id}_async_client") # type: ignore
assert async_client.api_key == os.environ["OPENAI_API_KEY"]
assert async_client.max_retries == int(
assert async_client.max_retries == (
os.environ["AZURE_MAX_RETRIES"]
), f"{async_client.max_retries} vs {os.environ['AZURE_MAX_RETRIES']}"
assert async_client.timeout == (
@ -899,7 +905,7 @@ def test_reading_openai_keys_os_environ():
stream_async_client: openai.AsyncOpenAI = router.cache.get_cache(key=f"{model_id}_stream_async_client") # type: ignore
assert stream_async_client.api_key == os.environ["OPENAI_API_KEY"]
assert stream_async_client.max_retries == int(
assert stream_async_client.max_retries == (
os.environ["AZURE_MAX_RETRIES"]
), f"{stream_async_client.max_retries} vs {os.environ['AZURE_MAX_RETRIES']}"
assert stream_async_client.timeout == (
@ -910,7 +916,7 @@ def test_reading_openai_keys_os_environ():
print("\n Testing sync client")
client: openai.AzureOpenAI = router.cache.get_cache(key=f"{model_id}_client") # type: ignore
assert client.api_key == os.environ["OPENAI_API_KEY"]
assert client.max_retries == int(
assert client.max_retries == (
os.environ["AZURE_MAX_RETRIES"]
), f"{client.max_retries} vs {os.environ['AZURE_MAX_RETRIES']}"
assert client.timeout == (
@ -921,7 +927,7 @@ def test_reading_openai_keys_os_environ():
print("\n Testing sync stream client")
stream_client: openai.AzureOpenAI = router.cache.get_cache(key=f"{model_id}_stream_client") # type: ignore
assert stream_client.api_key == os.environ["OPENAI_API_KEY"]
assert stream_client.max_retries == int(
assert stream_client.max_retries == (
os.environ["AZURE_MAX_RETRIES"]
), f"{stream_client.max_retries} vs {os.environ['AZURE_MAX_RETRIES']}"
assert stream_client.timeout == (
@ -936,29 +942,27 @@ def test_reading_openai_keys_os_environ():
# test_reading_openai_keys_os_environ()
def test_router_timeout():
model_list = [
{
"model_name": "gpt-3.5-turbo",
"litellm_params": {
"model": "gpt-3.5-turbo",
"api_key": "os.environ/OPENAI_API_KEY",
"timeout": "os.environ/AZURE_TIMEOUT",
"stream_timeout": "os.environ/AZURE_STREAM_TIMEOUT",
"max_retries": "os.environ/AZURE_MAX_RETRIES",
},
}
]
router = Router(model_list=model_list)
messages = [{"role": "user", "content": "Hey, how's it going?"}]
start_time = time.time()
try:
router.completion(
model="gpt-3.5-turbo", messages=messages, max_tokens=500, timeout=1
)
except litellm.exceptions.Timeout as e:
pass
end_time = time.time()
assert end_time - start_time < 1.1
# def test_router_timeout():
# model_list = [
# {
# "model_name": "gpt-3.5-turbo",
# "litellm_params": {
# "model": "gpt-3.5-turbo",
# "api_key": "os.environ/OPENAI_API_KEY",
# "timeout": "os.environ/AZURE_TIMEOUT",
# "stream_timeout": "os.environ/AZURE_STREAM_TIMEOUT",
# "max_retries": "os.environ/AZURE_MAX_RETRIES",
# },
# }
# ]
# router = Router(model_list=model_list)
# messages = [{"role": "user", "content": "Hey, how's it going?"}]
# start_time = time.time()
# try:
# router.completion(
# model="gpt-3.5-turbo", messages=messages, max_tokens=500, timeout=1
# )
# except litellm.exceptions.Timeout as e:
# pass
# end_time = time.time()
# assert end_time - start_time < 1.1