feat(batch_redis_get.py): batch redis GET requests for a given key + call type

reduces the number of GET requests we're making in high-throughput scenarios
2024-03-15 14:40:11 -07:00 · 2024-03-15 14:40:11 -07:00 · 226953e1d8
commit 226953e1d8
parent e033e84720
5 changed files with 189 additions and 5 deletions
--- a/litellm/utils.py
+++ b/litellm/utils.py
@ -72,7 +72,7 @@ from .integrations.litedebugger import LiteDebugger
 from .proxy._types import KeyManagementSystem
 from openai import OpenAIError as OriginalError
 from openai._models import BaseModel as OpenAIObject
-from .caching import S3Cache, RedisSemanticCache
+from .caching import S3Cache, RedisSemanticCache, RedisCache
 from .exceptions import (
    AuthenticationError,
    BadRequestError,
@ -2806,7 +2806,9 @@ def client(original_function):
                        ):
                            if len(cached_result) == 1 and cached_result[0] is None:
                                cached_result = None
-                    elif isinstance(litellm.cache.cache, RedisSemanticCache):
+                    elif isinstance(
+                        litellm.cache.cache, RedisSemanticCache
+                    ) or isinstance(litellm.cache.cache, RedisCache):
                        preset_cache_key = litellm.cache.get_cache_key(*args, **kwargs)
                        kwargs["preset_cache_key"] = (
                            preset_cache_key  # for streaming calls, we need to pass the preset_cache_key