mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-26 11:14:04 +00:00
Merge pull request #5018 from haadirakhangi/main
Qdrant Semantic Caching
This commit is contained in:
commit
a34aeafdb5
5 changed files with 694 additions and 6 deletions
|
@ -121,7 +121,7 @@ import importlib.metadata
|
|||
from openai import OpenAIError as OriginalError
|
||||
|
||||
from ._logging import verbose_logger
|
||||
from .caching import RedisCache, RedisSemanticCache, S3Cache
|
||||
from .caching import RedisCache, RedisSemanticCache, S3Cache, QdrantSemanticCache
|
||||
from .exceptions import (
|
||||
APIConnectionError,
|
||||
APIError,
|
||||
|
@ -1164,6 +1164,14 @@ def client(original_function):
|
|||
cached_result = await litellm.cache.async_get_cache(
|
||||
*args, **kwargs
|
||||
)
|
||||
elif isinstance(litellm.cache.cache, QdrantSemanticCache):
|
||||
preset_cache_key = litellm.cache.get_cache_key(*args, **kwargs)
|
||||
kwargs["preset_cache_key"] = (
|
||||
preset_cache_key # for streaming calls, we need to pass the preset_cache_key
|
||||
)
|
||||
cached_result = await litellm.cache.async_get_cache(
|
||||
*args, **kwargs
|
||||
)
|
||||
else: # for s3 caching. [NOT RECOMMENDED IN PROD - this will slow down responses since boto3 is sync]
|
||||
preset_cache_key = litellm.cache.get_cache_key(*args, **kwargs)
|
||||
kwargs["preset_cache_key"] = (
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue