fixes for using qdrant with litellm proxy

2025-04-27 11:43:54 +00:00 · 2024-08-21 12:36:41 -07:00 · 2024-08-21 12:36:41 -07:00 · 8c83fb3f34
commit 8c83fb3f34
parent a39eff07df
3 changed files with 16 additions and 19 deletions
--- a/docs/my-website/docs/caching/all_caches.md
+++ b/docs/my-website/docs/caching/all_caches.md
@ -161,8 +161,7 @@ random_number = random.randint(
 print("testing semantic caching")
 litellm.cache = Cache(
    type="qdrant-semantic",
-    qdrant_host_type="cloud", # can be either 'cloud' or 'local'
-    qdrant_url=os.environ["QDRANT_URL"], 
+    qdrant_url=os.environ["QDRANT_API_BASE"], 
    qdrant_api_key=os.environ["QDRANT_API_KEY"],
    qdrant_collection_name="your_collection_name", # any name of your collection
    similarity_threshold=0.7, # similarity threshold for cache hits, 0 == no similarity, 1 = exact matches, 0.5 == 50% similarity
@ -496,7 +495,6 @@ def __init__(
    qdrant_collection_name: Optional[str] = None,
    qdrant_quantization_config: Optional[str] = None,
    qdrant_semantic_cache_embedding_model="text-embedding-ada-002",
-    qdrant_host_type: Optional[Literal["local","cloud"]] = "local",

    **kwargs
 ):
--- a/litellm/caching.py
+++ b/litellm/caching.py
@ -1252,7 +1252,7 @@ class QdrantSemanticCache(BaseCache):
        self.embedding_model = embedding_model
        headers = {}
        if qdrant_url is None:
-            qdrant_url = os.getenv("QDRANT_URL")
+            qdrant_url = os.getenv("QDRANT_URL") or os.getenv("QDRANT_API_BASE")
        if qdrant_api_key is None:
            qdrant_api_key = os.getenv("QDRANT_API_KEY")
        if qdrant_url is not None and qdrant_api_key is not None:
@ -2116,7 +2116,6 @@ class Cache:
        qdrant_collection_name: Optional[str] = None,
        qdrant_quantization_config: Optional[str] = None,
        qdrant_semantic_cache_embedding_model="text-embedding-ada-002",
-        qdrant_host_type: Optional[Literal["local", "cloud"]] = "local",
        **kwargs,
    ):
        """
@ -2128,8 +2127,7 @@ class Cache:
            port (int, optional): The port number for the Redis cache. Required if type is "redis".
            password (str, optional): The password for the Redis cache. Required if type is "redis".
            qdrant_url (str, optional): The url for your qdrant cluster. Required if type is "qdrant-semantic".
-            qdrant_api_key (str, optional): The api_key for the local or cloud qdrant cluster. Required if qdrant_host_type is "cloud" and optional if qdrant_host_type is "local".
-            qdrant_host_type (str, optional): Can be either "local" or "cloud". Should be "local" when you are running a local qdrant cluster or "cloud" when you are using a qdrant cloud cluster.
+            qdrant_api_key (str, optional): The api_key for the local or cloud qdrant cluster.
            qdrant_collection_name (str, optional): The name for your qdrant collection. Required if type is "qdrant-semantic".
            similarity_threshold (float, optional): The similarity threshold for semantic-caching, Required if type is "redis-semantic" or "qdrant-semantic".

@ -2164,7 +2162,6 @@ class Cache:
                similarity_threshold=similarity_threshold,
                quantization_config=qdrant_quantization_config,
                embedding_model=qdrant_semantic_cache_embedding_model,
-                host_type=qdrant_host_type,
            )
        elif type == "local":
            self.cache = InMemoryCache()
--- a/litellm/proxy/proxy_config.yaml
+++ b/litellm/proxy/proxy_config.yaml
@ -4,15 +4,17 @@ model_list:
      model: openai/fake
      api_key: fake-key
      api_base: https://exampleopenaiendpoint-production.up.railway.app/
-
-guardrails:
-  - guardrail_name: "lakera-pre-guard"
+  - model_name: openai-embedding
    litellm_params:
-      guardrail: lakera  # supported values: "aporia", "bedrock", "lakera"
-      mode: "during_call"
-      api_key: os.environ/LAKERA_API_KEY
-      api_base: os.environ/LAKERA_API_BASE
-      category_thresholds:
-        prompt_injection: 0.1
-        jailbreak: 0.1
+      model: openai/text-embedding-3-small
+      api_key: os.environ/OPENAI_API_KEY

+litellm_settings:
+  set_verbose: True
+  cache: True          # set cache responses to True, litellm defaults to using a redis cache
+  cache_params:
+    type: qdrant-semantic
+    qdrant_semantic_cache_embedding_model: openai-embedding
+    qdrant_collection_name: test_collection
+    qdrant_quantization_config: binary
+    similarity_threshold: 0.8   # similarity threshold for semantic cache