fix(caching.py): use bulk writes and blockconnectionpooling for reads from Redis

2025-04-25 18:54:30 +00:00 · 2024-01-13 11:50:50 +05:30 · 2024-01-13 11:50:50 +05:30 · 01df37d8cf
commit 01df37d8cf
parent 007870390d
3 changed files with 109 additions and 19 deletions
--- a/litellm/utils.py
+++ b/litellm/utils.py
@ -2346,6 +2346,9 @@ def client(original_function):
                        kwargs["input"] = remaining_list

                        if len(non_null_list) > 0:
+                            print_verbose(
+                                f"EMBEDDING CACHE HIT! - {len(non_null_list)}"
+                            )
                            final_embedding_cached_response = EmbeddingResponse(
                                model=kwargs.get("model"),
                                data=[None] * len(original_kwargs_input),
@ -2451,19 +2454,11 @@ def client(original_function):
                    if isinstance(result, EmbeddingResponse) and isinstance(
                        kwargs["input"], list
                    ):
-                        for idx, i in enumerate(kwargs["input"]):
-                            preset_cache_key = litellm.cache.get_cache_key(
-                                *args, **{**kwargs, "input": i}
+                        asyncio.create_task(
+                            litellm.cache.async_add_cache_pipeline(
+                                result, *args, **kwargs
                            )
-                            embedding_response = result.data[idx]
-                            asyncio.create_task(
-                                litellm.cache.async_add_cache(
-                                    embedding_response,
-                                    *args,
-                                    cache_key=preset_cache_key,
-                                )
-                            )
-                        # pass
+                        )
                    else:
                        asyncio.create_task(
                            litellm.cache.async_add_cache(