From f08bb7e41f99ff416a90a8e33edaa8f09ece4292 Mon Sep 17 00:00:00 2001
From: Krrish Dholakia <krrishdholakia@gmail.com>
Date: Sat, 13 Jan 2024 16:19:30 +0530
Subject: [PATCH] fix(utils.py): exclude s3 caching from individual item
 caching for embedding list

can't bulk upload to s3, so this will slow down calls

https://github.com/BerriAI/litellm/pull/1417
---
 litellm/caching.py |  4 ++--
 litellm/utils.py   | 14 ++++++++++++--
 2 files changed, 14 insertions(+), 4 deletions(-)

diff --git a/litellm/caching.py b/litellm/caching.py
index 594310b319..c3fbaad6d1 100644
--- a/litellm/caching.py
+++ b/litellm/caching.py
@@ -444,9 +444,9 @@ class Cache:
         """
         if type == "redis":
             self.cache: BaseCache = RedisCache(host, port, password, **kwargs)
-        if type == "local":
+        elif type == "local":
             self.cache = InMemoryCache()
-        if type == "s3":
+        elif type == "s3":
             self.cache = S3Cache(
                 s3_bucket_name=s3_bucket_name,
                 s3_region_name=s3_region_name,
diff --git a/litellm/utils.py b/litellm/utils.py
index 344917118d..15494c3ef9 100644
--- a/litellm/utils.py
+++ b/litellm/utils.py
@@ -53,6 +53,7 @@ from .integrations.litedebugger import LiteDebugger
 from .proxy._types import KeyManagementSystem
 from openai import OpenAIError as OriginalError
 from openai._models import BaseModel as OpenAIObject
+from .caching import S3Cache
 from .exceptions import (
     AuthenticationError,
     BadRequestError,
@@ -2338,6 +2339,10 @@ def client(original_function):
                         call_type == CallTypes.aembedding.value
                         and cached_result is not None
                         and isinstance(cached_result, list)
+                        and litellm.cache is not None
+                        and not isinstance(
+                            litellm.cache.cache, S3Cache
+                        )  # s3 doesn't support bulk writing. Exclude.
                     ):
                         remaining_list = []
                         non_null_list = []
@@ -2458,8 +2463,13 @@ def client(original_function):
                 if isinstance(result, litellm.ModelResponse) or isinstance(
                     result, litellm.EmbeddingResponse
                 ):
-                    if isinstance(result, EmbeddingResponse) and isinstance(
-                        kwargs["input"], list
+                    if (
+                        isinstance(result, EmbeddingResponse)
+                        and isinstance(kwargs["input"], list)
+                        and litellm.cache is not None
+                        and not isinstance(
+                            litellm.cache.cache, S3Cache
+                        )  # s3 doesn't support bulk writing. Exclude.
                     ):
                         asyncio.create_task(
                             litellm.cache.async_add_cache_pipeline(