From f08bb7e41f99ff416a90a8e33edaa8f09ece4292 Mon Sep 17 00:00:00 2001 From: Krrish Dholakia Date: Sat, 13 Jan 2024 16:19:30 +0530 Subject: [PATCH] fix(utils.py): exclude s3 caching from individual item caching for embedding list can't bulk upload to s3, so this will slow down calls https://github.com/BerriAI/litellm/pull/1417 --- litellm/caching.py | 4 ++-- litellm/utils.py | 14 ++++++++++++-- 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/litellm/caching.py b/litellm/caching.py index 594310b319..c3fbaad6d1 100644 --- a/litellm/caching.py +++ b/litellm/caching.py @@ -444,9 +444,9 @@ class Cache: """ if type == "redis": self.cache: BaseCache = RedisCache(host, port, password, **kwargs) - if type == "local": + elif type == "local": self.cache = InMemoryCache() - if type == "s3": + elif type == "s3": self.cache = S3Cache( s3_bucket_name=s3_bucket_name, s3_region_name=s3_region_name, diff --git a/litellm/utils.py b/litellm/utils.py index 344917118d..15494c3ef9 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -53,6 +53,7 @@ from .integrations.litedebugger import LiteDebugger from .proxy._types import KeyManagementSystem from openai import OpenAIError as OriginalError from openai._models import BaseModel as OpenAIObject +from .caching import S3Cache from .exceptions import ( AuthenticationError, BadRequestError, @@ -2338,6 +2339,10 @@ def client(original_function): call_type == CallTypes.aembedding.value and cached_result is not None and isinstance(cached_result, list) + and litellm.cache is not None + and not isinstance( + litellm.cache.cache, S3Cache + ) # s3 doesn't support bulk writing. Exclude. ): remaining_list = [] non_null_list = [] @@ -2458,8 +2463,13 @@ def client(original_function): if isinstance(result, litellm.ModelResponse) or isinstance( result, litellm.EmbeddingResponse ): - if isinstance(result, EmbeddingResponse) and isinstance( - kwargs["input"], list + if ( + isinstance(result, EmbeddingResponse) + and isinstance(kwargs["input"], list) + and litellm.cache is not None + and not isinstance( + litellm.cache.cache, S3Cache + ) # s3 doesn't support bulk writing. Exclude. ): asyncio.create_task( litellm.cache.async_add_cache_pipeline(