qdrant semantic caching added

2025-04-25 18:54:30 +00:00 · 2024-08-02 21:07:19 +05:30 · 2024-08-02 21:07:19 +05:30 · 851db5ecea
commit 851db5ecea
parent c64b44aa0e
3 changed files with 449 additions and 5 deletions
--- a/litellm/utils.py
+++ b/litellm/utils.py
@ -113,7 +113,7 @@ import importlib.metadata
 from openai import OpenAIError as OriginalError

 from ._logging import verbose_logger
-from .caching import RedisCache, RedisSemanticCache, S3Cache
+from .caching import RedisCache, RedisSemanticCache, S3Cache, QdrantSemanticCache
 from .exceptions import (
    APIConnectionError,
    APIError,
@ -1114,6 +1114,14 @@ def client(original_function):
                        cached_result = await litellm.cache.async_get_cache(
                            *args, **kwargs
                        )
+                    elif isinstance(litellm.cache.cache, QdrantSemanticCache):
+                        preset_cache_key = litellm.cache.get_cache_key(*args, **kwargs)
+                        kwargs["preset_cache_key"] = (
+                            preset_cache_key  # for streaming calls, we need to pass the preset_cache_key
+                        )
+                        cached_result = await litellm.cache.async_get_cache(
+                            *args, **kwargs
+                        )
                    else:  # for s3 caching. [NOT RECOMMENDED IN PROD - this will slow down responses since boto3 is sync]
                        preset_cache_key = litellm.cache.get_cache_key(*args, **kwargs)
                        kwargs["preset_cache_key"] = (