Litellm dev readd prompt caching (#7299)

* fix(router.py): re-add saving model id on prompt caching valid successful deployment * fix(router.py): introduce optional pre_call_checks isolate prompt caching logic in a separate file * fix(prompt_caching_deployment_check.py): fix import * fix(router.py): new 'async_filter_deployments' event hook allows custom logger to filter deployments returned to routing strategy * feat(prompt_caching_deployment_check.py): initial working commit of prompt caching based routing * fix(cooldown_callbacks.py): fix linting error * fix(budget_limiter.py): move budget logger to async_filter_deployment hook * test: add unit test * test(test_router_helper_utils.py): add unit testing * fix(budget_limiter.py): fix linting errors * docs(config_settings.md): add 'optional_pre_call_checks' to router_settings param docs
2025-04-26 03:04:13 +00:00 · 2024-12-18 15:13:49 -08:00 · 2024-12-18 15:13:49 -08:00 · 2f08341a08
commit 2f08341a08
parent d214d3cc3f
12 changed files with 276 additions and 74 deletions
--- a/tests/router_unit_tests/test_router_helper_utils.py
+++ b/tests/router_unit_tests/test_router_helper_utils.py
@ -1058,3 +1058,28 @@ def test_has_default_fallbacks(model_list, has_default_fallbacks, expected_resul
        ),
    )
    assert router._has_default_fallbacks() is expected_result
+
+
+def test_add_optional_pre_call_checks(model_list):
+    router = Router(model_list=model_list)
+
+    router.add_optional_pre_call_checks(["prompt_caching"])
+    assert len(litellm.callbacks) > 0
+
+
+@pytest.mark.asyncio
+async def test_async_callback_filter_deployments(model_list):
+    from litellm.router_strategy.budget_limiter import RouterBudgetLimiting
+
+    router = Router(model_list=model_list)
+
+    healthy_deployments = router.get_model_list(model_name="gpt-3.5-turbo")
+
+    new_healthy_deployments = await router.async_callback_filter_deployments(
+        model="gpt-3.5-turbo",
+        healthy_deployments=healthy_deployments,
+        messages=[],
+        parent_otel_span=None,
+    )
+
+    assert len(new_healthy_deployments) == len(healthy_deployments)