diff --git a/litellm/router.py b/litellm/router.py index 2f333bf6b3..d10eee21e0 100644 --- a/litellm/router.py +++ b/litellm/router.py @@ -151,7 +151,6 @@ from litellm.utils import ( get_llm_provider, get_secret, get_utc_datetime, - is_prompt_caching_valid_prompt, is_region_allowed, ) @@ -3383,30 +3382,6 @@ class Router: litellm_router_instance=self, deployment_id=id, ) - - ## PROMPT CACHING - prompt_cache = PromptCachingCache( - cache=self.cache, - ) - if ( - standard_logging_object["messages"] is not None - and isinstance(standard_logging_object["messages"], list) - and deployment_name is not None - and isinstance(deployment_name, str) - ): - valid_prompt = is_prompt_caching_valid_prompt( - messages=standard_logging_object["messages"], # type: ignore - tools=None, - model=deployment_name, - custom_llm_provider=None, - ) - if valid_prompt: - await prompt_cache.async_add_model_id( - model_id=id, - messages=standard_logging_object["messages"], # type: ignore - tools=None, - ) - return tpm_key except Exception as e: @@ -5339,25 +5314,6 @@ class Router: messages=messages, request_kwargs=request_kwargs, ) - - if messages is not None and is_prompt_caching_valid_prompt( - messages=cast(List[AllMessageValues], messages), - model=model, - custom_llm_provider=None, - ): - prompt_cache = PromptCachingCache( - cache=self.cache, - ) - healthy_deployment = ( - await prompt_cache.async_get_prompt_caching_deployment( - router=self, - messages=cast(List[AllMessageValues], messages), - tools=None, - ) - ) - if healthy_deployment is not None: - return healthy_deployment - # check if user wants to do tag based routing healthy_deployments = await get_deployments_for_tag( # type: ignore llm_router_instance=self, diff --git a/tests/local_testing/test_anthropic_prompt_caching.py b/tests/local_testing/test_anthropic_prompt_caching.py index ad63072d74..9bf3803314 100644 --- a/tests/local_testing/test_anthropic_prompt_caching.py +++ b/tests/local_testing/test_anthropic_prompt_caching.py @@ -603,6 +603,9 @@ def test_is_prompt_caching_enabled(anthropic_messages): [("anthropic_messages", True), ("normal_messages", False)], ) @pytest.mark.asyncio() +@pytest.mark.skip( + reason="BETA FEATURE - skipping since this led to a latency impact, beta feature that is not used as yet" +) async def test_router_prompt_caching_model_stored( messages, expected_model_id, anthropic_messages ): @@ -650,6 +653,9 @@ async def test_router_prompt_caching_model_stored( @pytest.mark.asyncio() +@pytest.mark.skip( + reason="BETA FEATURE - skipping since this led to a latency impact, beta feature that is not used as yet" +) async def test_router_with_prompt_caching(anthropic_messages): """ if prompt caching supported model called with prompt caching valid prompt,