qdrant semantic caching added

This commit is contained in:
Haadi Rakhangi 2024-08-02 21:07:19 +05:30
parent c64b44aa0e
commit 851db5ecea
3 changed files with 449 additions and 5 deletions

View file

@ -113,7 +113,7 @@ import importlib.metadata
from openai import OpenAIError as OriginalError
from ._logging import verbose_logger
from .caching import RedisCache, RedisSemanticCache, S3Cache
from .caching import RedisCache, RedisSemanticCache, S3Cache, QdrantSemanticCache
from .exceptions import (
APIConnectionError,
APIError,
@ -1114,6 +1114,14 @@ def client(original_function):
cached_result = await litellm.cache.async_get_cache(
*args, **kwargs
)
elif isinstance(litellm.cache.cache, QdrantSemanticCache):
preset_cache_key = litellm.cache.get_cache_key(*args, **kwargs)
kwargs["preset_cache_key"] = (
preset_cache_key # for streaming calls, we need to pass the preset_cache_key
)
cached_result = await litellm.cache.async_get_cache(
*args, **kwargs
)
else: # for s3 caching. [NOT RECOMMENDED IN PROD - this will slow down responses since boto3 is sync]
preset_cache_key = litellm.cache.get_cache_key(*args, **kwargs)
kwargs["preset_cache_key"] = (