fix(litellm/caching/caching_handler.py): fix kwargs[litellm_params][preset_cache_key] not being propogated up the call stack during a cache hit, resulting in the x-litellm-cache-key header not being returned

2025-04-25 18:54:30 +00:00 · 2025-02-25 22:04:47 -05:00 · 2025-02-25 22:04:47 -05:00 · 5bcb50d4a2
commit 5bcb50d4a2
parent f9cee4c46b
1 changed files with 9 additions and 2 deletions
--- a/litellm/caching/caching_handler.py
+++ b/litellm/caching/caching_handler.py
@ -476,14 +476,17 @@ class LLMCachingHandler:
        """
        if litellm.cache is None:
            return None
-
-        new_kwargs = kwargs.copy()
+        new_kwargs = kwargs
        new_kwargs.update(
            convert_args_to_kwargs(
                self.original_function,
                args,
            )
        )
+
+        if "litellm_params" not in new_kwargs:
+            new_kwargs["litellm_params"] = {}
+
        cached_result: Optional[Any] = None
        if call_type == CallTypes.aembedding.value and isinstance(
            new_kwargs["input"], list
@ -503,6 +506,10 @@ class LLMCachingHandler:
        else:
            if litellm.cache._supports_async() is True:
                cached_result = await litellm.cache.async_get_cache(**new_kwargs)
+                
+                if cached_result == None and list(new_kwargs["litellm_params"].keys()) == ["preset_cache_key"]:
+                    del new_kwargs["litellm_params"]
+
            else:  # for s3 caching. [NOT RECOMMENDED IN PROD - this will slow down responses since boto3 is sync]
                cached_result = litellm.cache.get_cache(**new_kwargs)
        return cached_result