fixes for using qdrant with litellm proxy

This commit is contained in:
Ishaan Jaff 2024-08-21 12:36:41 -07:00
parent a39eff07df
commit 8c83fb3f34
3 changed files with 16 additions and 19 deletions

View file

@ -161,8 +161,7 @@ random_number = random.randint(
print("testing semantic caching")
litellm.cache = Cache(
type="qdrant-semantic",
qdrant_host_type="cloud", # can be either 'cloud' or 'local'
qdrant_url=os.environ["QDRANT_URL"],
qdrant_url=os.environ["QDRANT_API_BASE"],
qdrant_api_key=os.environ["QDRANT_API_KEY"],
qdrant_collection_name="your_collection_name", # any name of your collection
similarity_threshold=0.7, # similarity threshold for cache hits, 0 == no similarity, 1 = exact matches, 0.5 == 50% similarity
@ -496,7 +495,6 @@ def __init__(
qdrant_collection_name: Optional[str] = None,
qdrant_quantization_config: Optional[str] = None,
qdrant_semantic_cache_embedding_model="text-embedding-ada-002",
qdrant_host_type: Optional[Literal["local","cloud"]] = "local",
**kwargs
):

View file

@ -1252,7 +1252,7 @@ class QdrantSemanticCache(BaseCache):
self.embedding_model = embedding_model
headers = {}
if qdrant_url is None:
qdrant_url = os.getenv("QDRANT_URL")
qdrant_url = os.getenv("QDRANT_URL") or os.getenv("QDRANT_API_BASE")
if qdrant_api_key is None:
qdrant_api_key = os.getenv("QDRANT_API_KEY")
if qdrant_url is not None and qdrant_api_key is not None:
@ -2116,7 +2116,6 @@ class Cache:
qdrant_collection_name: Optional[str] = None,
qdrant_quantization_config: Optional[str] = None,
qdrant_semantic_cache_embedding_model="text-embedding-ada-002",
qdrant_host_type: Optional[Literal["local", "cloud"]] = "local",
**kwargs,
):
"""
@ -2128,8 +2127,7 @@ class Cache:
port (int, optional): The port number for the Redis cache. Required if type is "redis".
password (str, optional): The password for the Redis cache. Required if type is "redis".
qdrant_url (str, optional): The url for your qdrant cluster. Required if type is "qdrant-semantic".
qdrant_api_key (str, optional): The api_key for the local or cloud qdrant cluster. Required if qdrant_host_type is "cloud" and optional if qdrant_host_type is "local".
qdrant_host_type (str, optional): Can be either "local" or "cloud". Should be "local" when you are running a local qdrant cluster or "cloud" when you are using a qdrant cloud cluster.
qdrant_api_key (str, optional): The api_key for the local or cloud qdrant cluster.
qdrant_collection_name (str, optional): The name for your qdrant collection. Required if type is "qdrant-semantic".
similarity_threshold (float, optional): The similarity threshold for semantic-caching, Required if type is "redis-semantic" or "qdrant-semantic".
@ -2164,7 +2162,6 @@ class Cache:
similarity_threshold=similarity_threshold,
quantization_config=qdrant_quantization_config,
embedding_model=qdrant_semantic_cache_embedding_model,
host_type=qdrant_host_type,
)
elif type == "local":
self.cache = InMemoryCache()

View file

@ -4,15 +4,17 @@ model_list:
model: openai/fake
api_key: fake-key
api_base: https://exampleopenaiendpoint-production.up.railway.app/
guardrails:
- guardrail_name: "lakera-pre-guard"
- model_name: openai-embedding
litellm_params:
guardrail: lakera # supported values: "aporia", "bedrock", "lakera"
mode: "during_call"
api_key: os.environ/LAKERA_API_KEY
api_base: os.environ/LAKERA_API_BASE
category_thresholds:
prompt_injection: 0.1
jailbreak: 0.1
model: openai/text-embedding-3-small
api_key: os.environ/OPENAI_API_KEY
litellm_settings:
set_verbose: True
cache: True # set cache responses to True, litellm defaults to using a redis cache
cache_params:
type: qdrant-semantic
qdrant_semantic_cache_embedding_model: openai-embedding
qdrant_collection_name: test_collection
qdrant_quantization_config: binary
similarity_threshold: 0.8 # similarity threshold for semantic cache