mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-26 11:14:04 +00:00
test_async_router_context_window_fallback
This commit is contained in:
parent
9307f39daf
commit
78f6fdcd92
1 changed files with 18 additions and 64 deletions
|
@ -718,61 +718,9 @@ def test_router_azure_acompletion():
|
||||||
pytest.fail(f"Got unexpected exception on router! - {e}")
|
pytest.fail(f"Got unexpected exception on router! - {e}")
|
||||||
|
|
||||||
|
|
||||||
# test_router_azure_acompletion()
|
|
||||||
|
|
||||||
|
|
||||||
def test_router_context_window_fallback():
|
|
||||||
"""
|
|
||||||
- Give a gpt-3.5-turbo model group with different context windows (4k vs. 16k)
|
|
||||||
- Send a 5k prompt
|
|
||||||
- Assert it works
|
|
||||||
"""
|
|
||||||
import os
|
|
||||||
|
|
||||||
from large_text import text
|
|
||||||
|
|
||||||
litellm.set_verbose = False
|
|
||||||
|
|
||||||
print(f"len(text): {len(text)}")
|
|
||||||
try:
|
|
||||||
model_list = [
|
|
||||||
{
|
|
||||||
"model_name": "gpt-3.5-turbo", # openai model name
|
|
||||||
"litellm_params": { # params for litellm completion/embedding call
|
|
||||||
"model": "azure/chatgpt-v-2",
|
|
||||||
"api_key": os.getenv("AZURE_API_KEY"),
|
|
||||||
"api_version": os.getenv("AZURE_API_VERSION"),
|
|
||||||
"api_base": os.getenv("AZURE_API_BASE"),
|
|
||||||
"base_model": "azure/gpt-35-turbo",
|
|
||||||
},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"model_name": "gpt-3.5-turbo-large", # openai model name
|
|
||||||
"litellm_params": { # params for litellm completion/embedding call
|
|
||||||
"model": "gpt-3.5-turbo-1106",
|
|
||||||
"api_key": os.getenv("OPENAI_API_KEY"),
|
|
||||||
},
|
|
||||||
},
|
|
||||||
]
|
|
||||||
|
|
||||||
router = Router(model_list=model_list, set_verbose=True, context_window_fallbacks=[{"gpt-3.5-turbo": ["gpt-3.5-turbo-large"]}], num_retries=0) # type: ignore
|
|
||||||
|
|
||||||
response = router.completion(
|
|
||||||
model="gpt-3.5-turbo",
|
|
||||||
messages=[
|
|
||||||
{"role": "system", "content": text},
|
|
||||||
{"role": "user", "content": "Who was Alexander?"},
|
|
||||||
],
|
|
||||||
)
|
|
||||||
|
|
||||||
print(f"response: {response}")
|
|
||||||
assert response.model == "gpt-3.5-turbo-1106"
|
|
||||||
except Exception as e:
|
|
||||||
pytest.fail(f"Got unexpected exception on router! - {str(e)}")
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_async_router_context_window_fallback():
|
@pytest.mark.parametrize("sync_mode", [True, False])
|
||||||
|
async def test_async_router_context_window_fallback(sync_mode):
|
||||||
"""
|
"""
|
||||||
- Give a gpt-4 model group with different context windows (8192k vs. 128k)
|
- Give a gpt-4 model group with different context windows (8192k vs. 128k)
|
||||||
- Send a 10k prompt
|
- Send a 10k prompt
|
||||||
|
@ -806,7 +754,7 @@ async def test_async_router_context_window_fallback():
|
||||||
]
|
]
|
||||||
|
|
||||||
router = Router(model_list=model_list, set_verbose=True, context_window_fallbacks=[{"gpt-4": ["gpt-4-turbo"]}], num_retries=0) # type: ignore
|
router = Router(model_list=model_list, set_verbose=True, context_window_fallbacks=[{"gpt-4": ["gpt-4-turbo"]}], num_retries=0) # type: ignore
|
||||||
|
if sync_mode is False:
|
||||||
response = await router.acompletion(
|
response = await router.acompletion(
|
||||||
model="gpt-4",
|
model="gpt-4",
|
||||||
messages=[
|
messages=[
|
||||||
|
@ -817,6 +765,12 @@ async def test_async_router_context_window_fallback():
|
||||||
|
|
||||||
print(f"response: {response}")
|
print(f"response: {response}")
|
||||||
assert "gpt-4-turbo" in response.model
|
assert "gpt-4-turbo" in response.model
|
||||||
|
else:
|
||||||
|
response = router.completion(
|
||||||
|
model="gpt-4",
|
||||||
|
messages=[{"role": "user", "content": "Who was Alexander?"}],
|
||||||
|
)
|
||||||
|
assert "gpt-4-turbo" in response.model
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
pytest.fail(f"Got unexpected exception on router! - {str(e)}")
|
pytest.fail(f"Got unexpected exception on router! - {str(e)}")
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue