(fix) latency fix - revert prompt caching check on litellm router (#7211)

* attempt to fix latency issue * fix latency issues for router prompt caching
2025-04-25 10:44:24 +00:00 · 2024-12-12 20:50:16 -08:00 · 2024-12-12 20:50:16 -08:00 · 7ff9a905d2
commit 7ff9a905d2
parent 3de32f4106
2 changed files with 6 additions and 44 deletions
--- a/litellm/router.py
+++ b/litellm/router.py
@ -151,7 +151,6 @@ from litellm.utils import (
    get_llm_provider,
    get_secret,
    get_utc_datetime,
-    is_prompt_caching_valid_prompt,
    is_region_allowed,
 )

@ -3383,30 +3382,6 @@ class Router:
                    litellm_router_instance=self,
                    deployment_id=id,
                )
-
-                ## PROMPT CACHING
-                prompt_cache = PromptCachingCache(
-                    cache=self.cache,
-                )
-                if (
-                    standard_logging_object["messages"] is not None
-                    and isinstance(standard_logging_object["messages"], list)
-                    and deployment_name is not None
-                    and isinstance(deployment_name, str)
-                ):
-                    valid_prompt = is_prompt_caching_valid_prompt(
-                        messages=standard_logging_object["messages"],  # type: ignore
-                        tools=None,
-                        model=deployment_name,
-                        custom_llm_provider=None,
-                    )
-                    if valid_prompt:
-                        await prompt_cache.async_add_model_id(
-                            model_id=id,
-                            messages=standard_logging_object["messages"],  # type: ignore
-                            tools=None,
-                        )
-
                return tpm_key

        except Exception as e:
@ -5339,25 +5314,6 @@ class Router:
                    messages=messages,
                    request_kwargs=request_kwargs,
                )
-
-            if messages is not None and is_prompt_caching_valid_prompt(
-                messages=cast(List[AllMessageValues], messages),
-                model=model,
-                custom_llm_provider=None,
-            ):
-                prompt_cache = PromptCachingCache(
-                    cache=self.cache,
-                )
-                healthy_deployment = (
-                    await prompt_cache.async_get_prompt_caching_deployment(
-                        router=self,
-                        messages=cast(List[AllMessageValues], messages),
-                        tools=None,
-                    )
-                )
-                if healthy_deployment is not None:
-                    return healthy_deployment
-
            # check if user wants to do tag based routing
            healthy_deployments = await get_deployments_for_tag(  # type: ignore
                llm_router_instance=self,