mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-27 11:43:54 +00:00
fixes for using qdrant with litellm proxy
This commit is contained in:
parent
9dbd1b089b
commit
c6dfd2d276
3 changed files with 16 additions and 19 deletions
|
@ -161,8 +161,7 @@ random_number = random.randint(
|
|||
print("testing semantic caching")
|
||||
litellm.cache = Cache(
|
||||
type="qdrant-semantic",
|
||||
qdrant_host_type="cloud", # can be either 'cloud' or 'local'
|
||||
qdrant_url=os.environ["QDRANT_URL"],
|
||||
qdrant_url=os.environ["QDRANT_API_BASE"],
|
||||
qdrant_api_key=os.environ["QDRANT_API_KEY"],
|
||||
qdrant_collection_name="your_collection_name", # any name of your collection
|
||||
similarity_threshold=0.7, # similarity threshold for cache hits, 0 == no similarity, 1 = exact matches, 0.5 == 50% similarity
|
||||
|
@ -496,7 +495,6 @@ def __init__(
|
|||
qdrant_collection_name: Optional[str] = None,
|
||||
qdrant_quantization_config: Optional[str] = None,
|
||||
qdrant_semantic_cache_embedding_model="text-embedding-ada-002",
|
||||
qdrant_host_type: Optional[Literal["local","cloud"]] = "local",
|
||||
|
||||
**kwargs
|
||||
):
|
||||
|
|
|
@ -1252,7 +1252,7 @@ class QdrantSemanticCache(BaseCache):
|
|||
self.embedding_model = embedding_model
|
||||
headers = {}
|
||||
if qdrant_url is None:
|
||||
qdrant_url = os.getenv("QDRANT_URL")
|
||||
qdrant_url = os.getenv("QDRANT_URL") or os.getenv("QDRANT_API_BASE")
|
||||
if qdrant_api_key is None:
|
||||
qdrant_api_key = os.getenv("QDRANT_API_KEY")
|
||||
if qdrant_url is not None and qdrant_api_key is not None:
|
||||
|
@ -2116,7 +2116,6 @@ class Cache:
|
|||
qdrant_collection_name: Optional[str] = None,
|
||||
qdrant_quantization_config: Optional[str] = None,
|
||||
qdrant_semantic_cache_embedding_model="text-embedding-ada-002",
|
||||
qdrant_host_type: Optional[Literal["local", "cloud"]] = "local",
|
||||
**kwargs,
|
||||
):
|
||||
"""
|
||||
|
@ -2128,8 +2127,7 @@ class Cache:
|
|||
port (int, optional): The port number for the Redis cache. Required if type is "redis".
|
||||
password (str, optional): The password for the Redis cache. Required if type is "redis".
|
||||
qdrant_url (str, optional): The url for your qdrant cluster. Required if type is "qdrant-semantic".
|
||||
qdrant_api_key (str, optional): The api_key for the local or cloud qdrant cluster. Required if qdrant_host_type is "cloud" and optional if qdrant_host_type is "local".
|
||||
qdrant_host_type (str, optional): Can be either "local" or "cloud". Should be "local" when you are running a local qdrant cluster or "cloud" when you are using a qdrant cloud cluster.
|
||||
qdrant_api_key (str, optional): The api_key for the local or cloud qdrant cluster.
|
||||
qdrant_collection_name (str, optional): The name for your qdrant collection. Required if type is "qdrant-semantic".
|
||||
similarity_threshold (float, optional): The similarity threshold for semantic-caching, Required if type is "redis-semantic" or "qdrant-semantic".
|
||||
|
||||
|
@ -2164,7 +2162,6 @@ class Cache:
|
|||
similarity_threshold=similarity_threshold,
|
||||
quantization_config=qdrant_quantization_config,
|
||||
embedding_model=qdrant_semantic_cache_embedding_model,
|
||||
host_type=qdrant_host_type,
|
||||
)
|
||||
elif type == "local":
|
||||
self.cache = InMemoryCache()
|
||||
|
|
|
@ -4,15 +4,17 @@ model_list:
|
|||
model: openai/fake
|
||||
api_key: fake-key
|
||||
api_base: https://exampleopenaiendpoint-production.up.railway.app/
|
||||
|
||||
guardrails:
|
||||
- guardrail_name: "lakera-pre-guard"
|
||||
- model_name: openai-embedding
|
||||
litellm_params:
|
||||
guardrail: lakera # supported values: "aporia", "bedrock", "lakera"
|
||||
mode: "during_call"
|
||||
api_key: os.environ/LAKERA_API_KEY
|
||||
api_base: os.environ/LAKERA_API_BASE
|
||||
category_thresholds:
|
||||
prompt_injection: 0.1
|
||||
jailbreak: 0.1
|
||||
|
||||
model: openai/text-embedding-3-small
|
||||
api_key: os.environ/OPENAI_API_KEY
|
||||
|
||||
litellm_settings:
|
||||
set_verbose: True
|
||||
cache: True # set cache responses to True, litellm defaults to using a redis cache
|
||||
cache_params:
|
||||
type: qdrant-semantic
|
||||
qdrant_semantic_cache_embedding_model: openai-embedding
|
||||
qdrant_collection_name: test_collection
|
||||
qdrant_quantization_config: binary
|
||||
similarity_threshold: 0.8 # similarity threshold for semantic cache
|
Loading…
Add table
Add a link
Reference in a new issue