Merge pull request #5018 from haadirakhangi/main

Qdrant Semantic Caching
This commit is contained in:
Ishaan Jaff 2024-08-21 08:50:43 -07:00 committed by GitHub
commit 7d0196191f
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 694 additions and 6 deletions

View file

@ -121,7 +121,7 @@ import importlib.metadata
from openai import OpenAIError as OriginalError
from ._logging import verbose_logger
from .caching import RedisCache, RedisSemanticCache, S3Cache
from .caching import RedisCache, RedisSemanticCache, S3Cache, QdrantSemanticCache
from .exceptions import (
APIConnectionError,
APIError,
@ -1164,6 +1164,14 @@ def client(original_function):
cached_result = await litellm.cache.async_get_cache(
*args, **kwargs
)
elif isinstance(litellm.cache.cache, QdrantSemanticCache):
preset_cache_key = litellm.cache.get_cache_key(*args, **kwargs)
kwargs["preset_cache_key"] = (
preset_cache_key # for streaming calls, we need to pass the preset_cache_key
)
cached_result = await litellm.cache.async_get_cache(
*args, **kwargs
)
else: # for s3 caching. [NOT RECOMMENDED IN PROD - this will slow down responses since boto3 is sync]
preset_cache_key = litellm.cache.get_cache_key(*args, **kwargs)
kwargs["preset_cache_key"] = (