fix(router.py): allow user to control the latency routing time window

This commit is contained in:
Krrish Dholakia 2024-01-10 08:48:22 +05:30
parent 2b3fc15fa9
commit fe632c08a4
3 changed files with 59 additions and 7 deletions

View file

@ -51,9 +51,46 @@ def test_latency_updated():
latency_key = f"{model_group}_latency_map"
assert end_time - start_time == test_cache.get_cache(key=latency_key)[deployment_id][0]
# test_tpm_rpm_updated()
def test_latency_updated_custom_ttl():
"""
Invalidate the cached request.
Test that the cache is empty
"""
test_cache = DualCache()
model_list = []
cache_time = 3
lowest_latency_logger = LowestLatencyLoggingHandler(
router_cache=test_cache, model_list=model_list, routing_args={"ttl": cache_time}
)
model_group = "gpt-3.5-turbo"
deployment_id = "1234"
kwargs = {
"litellm_params": {
"metadata": {
"model_group": "gpt-3.5-turbo",
"deployment": "azure/chatgpt-v-2",
},
"model_info": {"id": deployment_id},
}
}
start_time = time.time()
response_obj = {"usage": {"total_tokens": 50}}
time.sleep(5)
end_time = time.time()
lowest_latency_logger.log_success_event(
response_obj=response_obj,
kwargs=kwargs,
start_time=start_time,
end_time=end_time,
)
latency_key = f"{model_group}_latency_map"
assert isinstance(test_cache.get_cache(key=latency_key), dict)
time.sleep(cache_time)
assert test_cache.get_cache(key=latency_key) is None
def test_get_available_deployments():
test_cache = DualCache()