diff --git a/.circleci/requirements.txt b/.circleci/requirements.txt index 356a9840f5..cada0f605e 100644 --- a/.circleci/requirements.txt +++ b/.circleci/requirements.txt @@ -4,7 +4,8 @@ python-dotenv tiktoken importlib_metadata cohere -redis +redis==5.2.1 +redisvl==0.4.1 anthropic orjson==3.9.15 pydantic==2.7.1 diff --git a/.gitignore b/.gitignore index dab6d4ec81..1cdedb83fc 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ +.python-version .venv .env .newenv diff --git a/Dockerfile b/Dockerfile index dd699c795b..e0ae2ed091 100644 --- a/Dockerfile +++ b/Dockerfile @@ -37,9 +37,6 @@ RUN pip install dist/*.whl # install dependencies as wheels RUN pip wheel --no-cache-dir --wheel-dir=/wheels/ -r requirements.txt -# install semantic-cache [Experimental]- we need this here and not in requirements.txt because redisvl pins to pydantic 1.0 -RUN pip install redisvl==0.0.7 --no-deps - # ensure pyjwt is used, not jwt RUN pip uninstall jwt -y RUN pip uninstall PyJWT -y diff --git a/docker/Dockerfile.database b/docker/Dockerfile.database index 02eb286180..f0b5eec8f9 100644 --- a/docker/Dockerfile.database +++ b/docker/Dockerfile.database @@ -59,9 +59,6 @@ COPY --from=builder /wheels/ /wheels/ # Install the built wheel using pip; again using a wildcard if it's the only file RUN pip install *.whl /wheels/* --no-index --find-links=/wheels/ && rm -f *.whl && rm -rf /wheels -# install semantic-cache [Experimental]- we need this here and not in requirements.txt because redisvl pins to pydantic 1.0 -RUN pip install redisvl==0.0.7 --no-deps - # ensure pyjwt is used, not jwt RUN pip uninstall jwt -y RUN pip uninstall PyJWT -y diff --git a/docker/Dockerfile.non_root b/docker/Dockerfile.non_root index 3a4cdb59d5..079778cafb 100644 --- a/docker/Dockerfile.non_root +++ b/docker/Dockerfile.non_root @@ -14,7 +14,7 @@ SHELL ["/bin/bash", "-o", "pipefail", "-c"] # Install build dependencies RUN apt-get clean && apt-get update && \ - apt-get install -y gcc python3-dev && \ + apt-get install -y gcc g++ python3-dev && \ rm -rf /var/lib/apt/lists/* RUN pip install --no-cache-dir --upgrade pip && \ @@ -56,10 +56,8 @@ COPY --from=builder /wheels/ /wheels/ # Install the built wheel using pip; again using a wildcard if it's the only file RUN pip install *.whl /wheels/* --no-index --find-links=/wheels/ && rm -f *.whl && rm -rf /wheels -# install semantic-cache [Experimental]- we need this here and not in requirements.txt because redisvl pins to pydantic 1.0 # ensure pyjwt is used, not jwt -RUN pip install redisvl==0.0.7 --no-deps --no-cache-dir && \ - pip uninstall jwt -y && \ +RUN pip uninstall jwt -y && \ pip uninstall PyJWT -y && \ pip install PyJWT==2.9.0 --no-cache-dir diff --git a/docs/my-website/docs/caching/all_caches.md b/docs/my-website/docs/caching/all_caches.md index dc1951cc77..08f99ee321 100644 --- a/docs/my-website/docs/caching/all_caches.md +++ b/docs/my-website/docs/caching/all_caches.md @@ -26,7 +26,7 @@ Install redis pip install redis ``` -For the hosted version you can setup your own Redis DB here: https://app.redislabs.com/ +For the hosted version you can setup your own Redis DB here: https://redis.io/try-free/ ```python import litellm @@ -37,11 +37,11 @@ litellm.cache = Cache(type="redis", host=, port=, password= -Install redis +Install redisvl client ```shell -pip install redisvl==0.0.7 +pip install redisvl==0.4.1 ``` -For the hosted version you can setup your own Redis DB here: https://app.redislabs.com/ +For the hosted version you can setup your own Redis DB here: https://redis.io/try-free/ ```python import litellm @@ -114,6 +114,7 @@ litellm.cache = Cache( port=os.environ["REDIS_PORT"], password=os.environ["REDIS_PASSWORD"], similarity_threshold=0.8, # similarity threshold for cache hits, 0 == no similarity, 1 = exact matches, 0.5 == 50% similarity + ttl=120, redis_semantic_cache_embedding_model="text-embedding-ada-002", # this model is passed to litellm.embedding(), any litellm.embedding() model is supported here ) response1 = completion( @@ -471,11 +472,13 @@ def __init__( password: Optional[str] = None, namespace: Optional[str] = None, default_in_redis_ttl: Optional[float] = None, - similarity_threshold: Optional[float] = None, - redis_semantic_cache_use_async=False, - redis_semantic_cache_embedding_model="text-embedding-ada-002", redis_flush_size=None, + # redis semantic cache params + similarity_threshold: Optional[float] = None, + redis_semantic_cache_embedding_model: str = "text-embedding-ada-002", + redis_semantic_cache_index_name: Optional[str] = None, + # s3 Bucket, boto3 configuration s3_bucket_name: Optional[str] = None, s3_region_name: Optional[str] = None, diff --git a/litellm/caching/caching.py b/litellm/caching/caching.py index 415c49edff..affb8e3855 100644 --- a/litellm/caching/caching.py +++ b/litellm/caching/caching.py @@ -88,16 +88,16 @@ class Cache: s3_aws_session_token: Optional[str] = None, s3_config: Optional[Any] = None, s3_path: Optional[str] = None, - redis_semantic_cache_use_async=False, - redis_semantic_cache_embedding_model="text-embedding-ada-002", + redis_semantic_cache_embedding_model: str = "text-embedding-ada-002", + redis_semantic_cache_index_name: Optional[str] = None, redis_flush_size: Optional[int] = None, redis_startup_nodes: Optional[List] = None, - disk_cache_dir=None, + disk_cache_dir: Optional[str] = None, qdrant_api_base: Optional[str] = None, qdrant_api_key: Optional[str] = None, qdrant_collection_name: Optional[str] = None, qdrant_quantization_config: Optional[str] = None, - qdrant_semantic_cache_embedding_model="text-embedding-ada-002", + qdrant_semantic_cache_embedding_model: str = "text-embedding-ada-002", **kwargs, ): """ @@ -170,8 +170,8 @@ class Cache: port=port, password=password, similarity_threshold=similarity_threshold, - use_async=redis_semantic_cache_use_async, embedding_model=redis_semantic_cache_embedding_model, + index_name=redis_semantic_cache_index_name, **kwargs, ) elif type == LiteLLMCacheType.QDRANT_SEMANTIC: diff --git a/litellm/caching/redis_semantic_cache.py b/litellm/caching/redis_semantic_cache.py index b609286a55..f46bb661ef 100644 --- a/litellm/caching/redis_semantic_cache.py +++ b/litellm/caching/redis_semantic_cache.py @@ -1,337 +1,437 @@ """ -Redis Semantic Cache implementation +Redis Semantic Cache implementation for LiteLLM -Has 4 methods: - - set_cache - - get_cache - - async_set_cache - - async_get_cache +The RedisSemanticCache provides semantic caching functionality using Redis as a backend. +This cache stores responses based on the semantic similarity of prompts rather than +exact matching, allowing for more flexible caching of LLM responses. + +This implementation uses RedisVL's SemanticCache to find semantically similar prompts +and their cached responses. """ import ast import asyncio import json -from typing import Any +import os +from typing import Any, Dict, List, Optional, Tuple import litellm from litellm._logging import print_verbose - +from litellm.litellm_core_utils.prompt_templates.common_utils import get_str_from_messages from .base_cache import BaseCache class RedisSemanticCache(BaseCache): + """ + Redis-backed semantic cache for LLM responses. + + This cache uses vector similarity to find semantically similar prompts that have been + previously sent to the LLM, allowing for cache hits even when prompts are not identical + but carry similar meaning. + """ + + DEFAULT_REDIS_INDEX_NAME: str = "litellm_semantic_cache_index" + def __init__( self, - host=None, - port=None, - password=None, - redis_url=None, - similarity_threshold=None, - use_async=False, - embedding_model="text-embedding-ada-002", + host: Optional[str] = None, + port: Optional[str] = None, + password: Optional[str] = None, + redis_url: Optional[str] = None, + similarity_threshold: Optional[float] = None, + embedding_model: str = "text-embedding-ada-002", + index_name: Optional[str] = None, **kwargs, ): - from redisvl.index import SearchIndex - - print_verbose( - "redis semantic-cache initializing INDEX - litellm_semantic_cache_index" - ) - if similarity_threshold is None: - raise Exception("similarity_threshold must be provided, passed None") - self.similarity_threshold = similarity_threshold - self.embedding_model = embedding_model - schema = { - "index": { - "name": "litellm_semantic_cache_index", - "prefix": "litellm", - "storage_type": "hash", - }, - "fields": { - "text": [{"name": "response"}], - "vector": [ - { - "name": "litellm_embedding", - "dims": 1536, - "distance_metric": "cosine", - "algorithm": "flat", - "datatype": "float32", - } - ], - }, - } - if redis_url is None: - # if no url passed, check if host, port and password are passed, if not raise an Exception - if host is None or port is None or password is None: - # try checking env for host, port and password - import os - - host = os.getenv("REDIS_HOST") - port = os.getenv("REDIS_PORT") - password = os.getenv("REDIS_PASSWORD") - if host is None or port is None or password is None: - raise Exception("Redis host, port, and password must be provided") - - redis_url = "redis://:" + password + "@" + host + ":" + port - print_verbose(f"redis semantic-cache redis_url: {redis_url}") - if use_async is False: - self.index = SearchIndex.from_dict(schema) - self.index.connect(redis_url=redis_url) - try: - self.index.create(overwrite=False) # don't overwrite existing index - except Exception as e: - print_verbose(f"Got exception creating semantic cache index: {str(e)}") - elif use_async is True: - schema["index"]["name"] = "litellm_semantic_cache_index_async" - self.index = SearchIndex.from_dict(schema) - self.index.connect(redis_url=redis_url, use_async=True) - - # - def _get_cache_logic(self, cached_response: Any): """ - Common 'get_cache_logic' across sync + async redis client implementations + Initialize the Redis Semantic Cache. + + Args: + host: Redis host address + port: Redis port + password: Redis password + redis_url: Full Redis URL (alternative to separate host/port/password) + similarity_threshold: Threshold for semantic similarity (0.0 to 1.0) + where 1.0 requires exact matches and 0.0 accepts any match + embedding_model: Model to use for generating embeddings + index_name: Name for the Redis index + ttl: Default time-to-live for cache entries in seconds + **kwargs: Additional arguments passed to the Redis client + + Raises: + Exception: If similarity_threshold is not provided or required Redis + connection information is missing + """ + from redisvl.extensions.llmcache import SemanticCache + from redisvl.utils.vectorize import CustomTextVectorizer + + if index_name is None: + index_name = self.DEFAULT_REDIS_INDEX_NAME + + print_verbose(f"Redis semantic-cache initializing index - {index_name}") + + # Validate similarity threshold + if similarity_threshold is None: + raise ValueError("similarity_threshold must be provided, passed None") + + # Store configuration + self.similarity_threshold = similarity_threshold + + # Convert similarity threshold [0,1] to distance threshold [0,2] + # For cosine distance: 0 = most similar, 2 = least similar + # While similarity: 1 = most similar, 0 = least similar + self.distance_threshold = 1 - similarity_threshold + self.embedding_model = embedding_model + + # Set up Redis connection + if redis_url is None: + try: + # Attempt to use provided parameters or fallback to environment variables + host = host or os.environ['REDIS_HOST'] + port = port or os.environ['REDIS_PORT'] + password = password or os.environ['REDIS_PASSWORD'] + except KeyError as e: + # Raise a more informative exception if any of the required keys are missing + missing_var = e.args[0] + raise ValueError(f"Missing required Redis configuration: {missing_var}. " + f"Provide {missing_var} or redis_url.") from e + + redis_url = f"redis://:{password}@{host}:{port}" + + print_verbose(f"Redis semantic-cache redis_url: {redis_url}") + + # Initialize the Redis vectorizer and cache + cache_vectorizer = CustomTextVectorizer(self._get_embedding) + + self.llmcache = SemanticCache( + name=index_name, + redis_url=redis_url, + vectorizer=cache_vectorizer, + distance_threshold=self.distance_threshold, + overwrite=False, + ) + + def _get_ttl(self, **kwargs) -> Optional[int]: + """ + Get the TTL (time-to-live) value for cache entries. + + Args: + **kwargs: Keyword arguments that may contain a custom TTL + + Returns: + Optional[int]: The TTL value in seconds, or None if no TTL should be applied + """ + ttl = kwargs.get("ttl") + if ttl is not None: + ttl = int(ttl) + return ttl + + def _get_embedding(self, prompt: str) -> List[float]: + """ + Generate an embedding vector for the given prompt using the configured embedding model. + + Args: + prompt: The text to generate an embedding for + + Returns: + List[float]: The embedding vector + """ + # Create an embedding from prompt + embedding_response = litellm.embedding( + model=self.embedding_model, + input=prompt, + cache={"no-store": True, "no-cache": True}, + ) + embedding = embedding_response["data"][0]["embedding"] + return embedding + + def _get_cache_logic(self, cached_response: Any) -> Any: + """ + Process the cached response to prepare it for use. + + Args: + cached_response: The raw cached response + + Returns: + The processed cache response, or None if input was None """ if cached_response is None: return cached_response - # check if cached_response is bytes + # Convert bytes to string if needed if isinstance(cached_response, bytes): cached_response = cached_response.decode("utf-8") + # Convert string representation to Python object try: - cached_response = json.loads( - cached_response - ) # Convert string to dictionary - except Exception: - cached_response = ast.literal_eval(cached_response) + cached_response = json.loads(cached_response) + except json.JSONDecodeError: + try: + cached_response = ast.literal_eval(cached_response) + except (ValueError, SyntaxError) as e: + print_verbose(f"Error parsing cached response: {str(e)}") + return None + return cached_response - def set_cache(self, key, value, **kwargs): - import numpy as np - - print_verbose(f"redis semantic-cache set_cache, kwargs: {kwargs}") - - # get the prompt - messages = kwargs["messages"] - prompt = "".join(message["content"] for message in messages) - - # create an embedding for prompt - embedding_response = litellm.embedding( - model=self.embedding_model, - input=prompt, - cache={"no-store": True, "no-cache": True}, - ) - - # get the embedding - embedding = embedding_response["data"][0]["embedding"] - - # make the embedding a numpy array, convert to bytes - embedding_bytes = np.array(embedding, dtype=np.float32).tobytes() - value = str(value) - assert isinstance(value, str) - - new_data = [ - {"response": value, "prompt": prompt, "litellm_embedding": embedding_bytes} - ] - - # Add more data - self.index.load(new_data) - - return - - def get_cache(self, key, **kwargs): - print_verbose(f"sync redis semantic-cache get_cache, kwargs: {kwargs}") - from redisvl.query import VectorQuery - - # query - # get the messages - messages = kwargs["messages"] - prompt = "".join(message["content"] for message in messages) - - # convert to embedding - embedding_response = litellm.embedding( - model=self.embedding_model, - input=prompt, - cache={"no-store": True, "no-cache": True}, - ) - - # get the embedding - embedding = embedding_response["data"][0]["embedding"] - - query = VectorQuery( - vector=embedding, - vector_field_name="litellm_embedding", - return_fields=["response", "prompt", "vector_distance"], - num_results=1, - ) - - results = self.index.query(query) - if results is None: - return None - if isinstance(results, list): - if len(results) == 0: - return None - - vector_distance = results[0]["vector_distance"] - vector_distance = float(vector_distance) - similarity = 1 - vector_distance - cached_prompt = results[0]["prompt"] - - # check similarity, if more than self.similarity_threshold, return results - print_verbose( - f"semantic cache: similarity threshold: {self.similarity_threshold}, similarity: {similarity}, prompt: {prompt}, closest_cached_prompt: {cached_prompt}" - ) - if similarity > self.similarity_threshold: - # cache hit ! - cached_value = results[0]["response"] - print_verbose( - f"got a cache hit, similarity: {similarity}, Current prompt: {prompt}, cached_prompt: {cached_prompt}" - ) - return self._get_cache_logic(cached_response=cached_value) - else: - # cache miss ! - return None - - pass - - async def async_set_cache(self, key, value, **kwargs): - import numpy as np - - from litellm.proxy.proxy_server import llm_model_list, llm_router + def set_cache(self, key: str, value: Any, **kwargs) -> None: + """ + Store a value in the semantic cache. + + Args: + key: The cache key (not directly used in semantic caching) + value: The response value to cache + **kwargs: Additional arguments including 'messages' for the prompt + and optional 'ttl' for time-to-live + """ + print_verbose(f"Redis semantic-cache set_cache, kwargs: {kwargs}") try: - await self.index.acreate(overwrite=False) # don't overwrite existing index + # Extract the prompt from messages + messages = kwargs.get("messages", []) + if not messages: + print_verbose("No messages provided for semantic caching") + return + + prompt = get_str_from_messages(messages) + value_str = str(value) + + # Get TTL and store in Redis semantic cache + ttl = self._get_ttl(**kwargs) + if ttl is not None: + self.llmcache.store(prompt, value_str, ttl=int(ttl)) + else: + self.llmcache.store(prompt, value_str) except Exception as e: - print_verbose(f"Got exception creating semantic cache index: {str(e)}") - print_verbose(f"async redis semantic-cache set_cache, kwargs: {kwargs}") + print_verbose(f"Error setting {value_str} in the Redis semantic cache: {str(e)}") - # get the prompt - messages = kwargs["messages"] - prompt = "".join(message["content"] for message in messages) - # create an embedding for prompt - router_model_names = ( - [m["model_name"] for m in llm_model_list] - if llm_model_list is not None - else [] - ) - if llm_router is not None and self.embedding_model in router_model_names: - user_api_key = kwargs.get("metadata", {}).get("user_api_key", "") - embedding_response = await llm_router.aembedding( - model=self.embedding_model, - input=prompt, - cache={"no-store": True, "no-cache": True}, - metadata={ - "user_api_key": user_api_key, - "semantic-cache-embedding": True, - "trace_id": kwargs.get("metadata", {}).get("trace_id", None), - }, - ) - else: - # convert to embedding - embedding_response = await litellm.aembedding( - model=self.embedding_model, - input=prompt, - cache={"no-store": True, "no-cache": True}, - ) + def get_cache(self, key: str, **kwargs) -> Any: + """ + Retrieve a semantically similar cached response. + + Args: + key: The cache key (not directly used in semantic caching) + **kwargs: Additional arguments including 'messages' for the prompt + + Returns: + The cached response if a semantically similar prompt is found, else None + """ + print_verbose(f"Redis semantic-cache get_cache, kwargs: {kwargs}") - # get the embedding - embedding = embedding_response["data"][0]["embedding"] + try: + # Extract the prompt from messages + messages = kwargs.get("messages", []) + if not messages: + print_verbose("No messages provided for semantic cache lookup") + return None + + prompt = get_str_from_messages(messages) + # Check the cache for semantically similar prompts + results = self.llmcache.check(prompt=prompt) - # make the embedding a numpy array, convert to bytes - embedding_bytes = np.array(embedding, dtype=np.float32).tobytes() - value = str(value) - assert isinstance(value, str) - - new_data = [ - {"response": value, "prompt": prompt, "litellm_embedding": embedding_bytes} - ] - - # Add more data - await self.index.aload(new_data) - return - - async def async_get_cache(self, key, **kwargs): - print_verbose(f"async redis semantic-cache get_cache, kwargs: {kwargs}") - from redisvl.query import VectorQuery - - from litellm.proxy.proxy_server import llm_model_list, llm_router - - # query - # get the messages - messages = kwargs["messages"] - prompt = "".join(message["content"] for message in messages) - - router_model_names = ( - [m["model_name"] for m in llm_model_list] - if llm_model_list is not None - else [] - ) - if llm_router is not None and self.embedding_model in router_model_names: - user_api_key = kwargs.get("metadata", {}).get("user_api_key", "") - embedding_response = await llm_router.aembedding( - model=self.embedding_model, - input=prompt, - cache={"no-store": True, "no-cache": True}, - metadata={ - "user_api_key": user_api_key, - "semantic-cache-embedding": True, - "trace_id": kwargs.get("metadata", {}).get("trace_id", None), - }, - ) - else: - # convert to embedding - embedding_response = await litellm.aembedding( - model=self.embedding_model, - input=prompt, - cache={"no-store": True, "no-cache": True}, - ) - - # get the embedding - embedding = embedding_response["data"][0]["embedding"] - - query = VectorQuery( - vector=embedding, - vector_field_name="litellm_embedding", - return_fields=["response", "prompt", "vector_distance"], - ) - results = await self.index.aquery(query) - if results is None: - kwargs.setdefault("metadata", {})["semantic-similarity"] = 0.0 - return None - if isinstance(results, list): - if len(results) == 0: - kwargs.setdefault("metadata", {})["semantic-similarity"] = 0.0 + # Return None if no similar prompts found + if not results: return None - vector_distance = results[0]["vector_distance"] - vector_distance = float(vector_distance) - similarity = 1 - vector_distance - cached_prompt = results[0]["prompt"] + # Process the best matching result + cache_hit = results[0] + vector_distance = float(cache_hit["vector_distance"]) + + # Convert vector distance back to similarity score + # For cosine distance: 0 = most similar, 2 = least similar + # While similarity: 1 = most similar, 0 = least similar + similarity = 1 - vector_distance + + cached_prompt = cache_hit["prompt"] + cached_response = cache_hit["response"] - # check similarity, if more than self.similarity_threshold, return results - print_verbose( - f"semantic cache: similarity threshold: {self.similarity_threshold}, similarity: {similarity}, prompt: {prompt}, closest_cached_prompt: {cached_prompt}" - ) - - # update kwargs["metadata"] with similarity, don't rewrite the original metadata - kwargs.setdefault("metadata", {})["semantic-similarity"] = similarity - - if similarity > self.similarity_threshold: - # cache hit ! - cached_value = results[0]["response"] print_verbose( - f"got a cache hit, similarity: {similarity}, Current prompt: {prompt}, cached_prompt: {cached_prompt}" + f"Cache hit: similarity threshold: {self.similarity_threshold}, " + f"actual similarity: {similarity}, " + f"current prompt: {prompt}, " + f"cached prompt: {cached_prompt}" ) - return self._get_cache_logic(cached_response=cached_value) - else: - # cache miss ! - return None - pass + + return self._get_cache_logic(cached_response=cached_response) + except Exception as e: + print_verbose(f"Error retrieving from Redis semantic cache: {str(e)}") + + async def _get_async_embedding(self, prompt: str, **kwargs) -> List[float]: + """ + Asynchronously generate an embedding for the given prompt. + + Args: + prompt: The text to generate an embedding for + **kwargs: Additional arguments that may contain metadata + + Returns: + List[float]: The embedding vector + """ + from litellm.proxy.proxy_server import llm_model_list, llm_router - async def _index_info(self): - return await self.index.ainfo() + # Route the embedding request through the proxy if appropriate + router_model_names = ( + [m["model_name"] for m in llm_model_list] + if llm_model_list is not None + else [] + ) + + try: + if llm_router is not None and self.embedding_model in router_model_names: + # Use the router for embedding generation + user_api_key = kwargs.get("metadata", {}).get("user_api_key", "") + embedding_response = await llm_router.aembedding( + model=self.embedding_model, + input=prompt, + cache={"no-store": True, "no-cache": True}, + metadata={ + "user_api_key": user_api_key, + "semantic-cache-embedding": True, + "trace_id": kwargs.get("metadata", {}).get("trace_id", None), + }, + ) + else: + # Generate embedding directly + embedding_response = await litellm.aembedding( + model=self.embedding_model, + input=prompt, + cache={"no-store": True, "no-cache": True}, + ) - async def async_set_cache_pipeline(self, cache_list, **kwargs): - tasks = [] - for val in cache_list: - tasks.append(self.async_set_cache(val[0], val[1], **kwargs)) - await asyncio.gather(*tasks) + # Extract and return the embedding vector + return embedding_response["data"][0]["embedding"] + except Exception as e: + print_verbose(f"Error generating async embedding: {str(e)}") + raise ValueError(f"Failed to generate embedding: {str(e)}") from e + + async def async_set_cache(self, key: str, value: Any, **kwargs) -> None: + """ + Asynchronously store a value in the semantic cache. + + Args: + key: The cache key (not directly used in semantic caching) + value: The response value to cache + **kwargs: Additional arguments including 'messages' for the prompt + and optional 'ttl' for time-to-live + """ + print_verbose(f"Async Redis semantic-cache set_cache, kwargs: {kwargs}") + + try: + # Extract the prompt from messages + messages = kwargs.get("messages", []) + if not messages: + print_verbose("No messages provided for semantic caching") + return + + prompt = get_str_from_messages(messages) + value_str = str(value) + + # Generate embedding for the value (response) to cache + prompt_embedding = await self._get_async_embedding(prompt, **kwargs) + + # Get TTL and store in Redis semantic cache + ttl = self._get_ttl(**kwargs) + if ttl is not None: + await self.llmcache.astore( + prompt, + value_str, + vector=prompt_embedding, # Pass through custom embedding + ttl=ttl + ) + else: + await self.llmcache.astore( + prompt, + value_str, + vector=prompt_embedding # Pass through custom embedding + ) + except Exception as e: + print_verbose(f"Error in async_set_cache: {str(e)}") + + async def async_get_cache(self, key: str, **kwargs) -> Any: + """ + Asynchronously retrieve a semantically similar cached response. + + Args: + key: The cache key (not directly used in semantic caching) + **kwargs: Additional arguments including 'messages' for the prompt + + Returns: + The cached response if a semantically similar prompt is found, else None + """ + print_verbose(f"Async Redis semantic-cache get_cache, kwargs: {kwargs}") + + try: + # Extract the prompt from messages + messages = kwargs.get("messages", []) + if not messages: + print_verbose("No messages provided for semantic cache lookup") + kwargs.setdefault("metadata", {})["semantic-similarity"] = 0.0 + return None + + prompt = get_str_from_messages(messages) + + # Generate embedding for the prompt + prompt_embedding = await self._get_async_embedding(prompt, **kwargs) + + # Check the cache for semantically similar prompts + results = await self.llmcache.acheck( + prompt=prompt, + vector=prompt_embedding + ) + + # handle results / cache hit + if not results: + kwargs.setdefault("metadata", {})["semantic-similarity"] = 0.0 # TODO why here but not above?? + return None + + cache_hit = results[0] + vector_distance = float(cache_hit["vector_distance"]) + + # Convert vector distance back to similarity + # For cosine distance: 0 = most similar, 2 = least similar + # While similarity: 1 = most similar, 0 = least similar + similarity = 1 - vector_distance + + cached_prompt = cache_hit["prompt"] + cached_response = cache_hit["response"] + + # update kwargs["metadata"] with similarity, don't rewrite the original metadata + kwargs.setdefault("metadata", {})["semantic-similarity"] = similarity + + print_verbose( + f"Cache hit: similarity threshold: {self.similarity_threshold}, " + f"actual similarity: {similarity}, " + f"current prompt: {prompt}, " + f"cached prompt: {cached_prompt}" + ) + + return self._get_cache_logic(cached_response=cached_response) + except Exception as e: + print_verbose(f"Error in async_get_cache: {str(e)}") + kwargs.setdefault("metadata", {})["semantic-similarity"] = 0.0 + + async def _index_info(self) -> Dict[str, Any]: + """ + Get information about the Redis index. + + Returns: + Dict[str, Any]: Information about the Redis index + """ + aindex = await self.llmcache._get_async_index() + return await aindex.info() + + async def async_set_cache_pipeline(self, cache_list: List[Tuple[str, Any]], **kwargs) -> None: + """ + Asynchronously store multiple values in the semantic cache. + + Args: + cache_list: List of (key, value) tuples to cache + **kwargs: Additional arguments + """ + try: + tasks = [] + for val in cache_list: + tasks.append(self.async_set_cache(val[0], val[1], **kwargs)) + await asyncio.gather(*tasks) + except Exception as e: + print_verbose(f"Error in async_set_cache_pipeline: {str(e)}") diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py index dc353bb2de..cac416e75f 100644 --- a/litellm/proxy/proxy_server.py +++ b/litellm/proxy/proxy_server.py @@ -1784,9 +1784,6 @@ class ProxyConfig: reset_color_code, cache_password, ) - if cache_type == "redis-semantic": - # by default this should always be async - cache_params.update({"redis_semantic_cache_use_async": True}) # users can pass os.environ/ variables on the proxy - we should read them from the env for key, value in cache_params.items(): diff --git a/poetry.lock b/poetry.lock index 5834dd2e70..fc2b4743bf 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 2.0.0 and should not be changed by hand. +# This file is automatically @generated by Poetry 2.0.1 and should not be changed by hand. [[package]] name = "aiohappyeyeballs" @@ -215,7 +215,7 @@ description = "Timeout context manager for asyncio programs" optional = false python-versions = ">=3.8" groups = ["main"] -markers = "python_full_version < \"3.11.3\" and extra == \"proxy\" or python_version < \"3.11\"" +markers = "python_full_version < \"3.11.3\" and (extra == \"extra-proxy\" or extra == \"proxy\" or python_version < \"3.11\")" files = [ {file = "async_timeout-5.0.1-py3-none-any.whl", hash = "sha256:39e3809566ff85354557ec2398b55e096c8364bacac9405a7a1fa429e77fe76c"}, {file = "async_timeout-5.0.1.tar.gz", hash = "sha256:d9321a7a3d5a6a5e187e824d2fa0793ce379a202935782d555d6e9d2735677d3"}, @@ -428,8 +428,8 @@ files = [ jmespath = ">=0.7.1,<2.0.0" python-dateutil = ">=2.1,<3.0.0" urllib3 = [ - {version = ">=1.25.4,<1.27", markers = "python_version < \"3.10\""}, {version = ">=1.25.4,<2.2.0 || >2.2.0,<3", markers = "python_version >= \"3.10\""}, + {version = ">=1.25.4,<1.27", markers = "python_version < \"3.10\""}, ] [package.extras] @@ -671,6 +671,25 @@ files = [ ] markers = {main = "platform_system == \"Windows\"", dev = "platform_system == \"Windows\" or sys_platform == \"win32\""} +[[package]] +name = "coloredlogs" +version = "15.0.1" +description = "Colored terminal output for Python's logging module" +optional = true +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" +groups = ["main"] +markers = "python_version >= \"3.9\" and extra == \"extra-proxy\" and python_version < \"3.14\"" +files = [ + {file = "coloredlogs-15.0.1-py2.py3-none-any.whl", hash = "sha256:612ee75c546f53e92e70049c9dbfcc18c935a2b9a53b66085ce9ef6a6e5c0934"}, + {file = "coloredlogs-15.0.1.tar.gz", hash = "sha256:7c991aa71a4577af2f82600d8f8f3a89f936baeaf9b50a9c197da014e5bf16b0"}, +] + +[package.dependencies] +humanfriendly = ">=9.1" + +[package.extras] +cron = ["capturer (>=2.4)"] + [[package]] name = "cryptography" version = "43.0.3" @@ -791,15 +810,15 @@ test = ["pytest (>=6)"] [[package]] name = "fastapi" -version = "0.115.11" +version = "0.115.12" description = "FastAPI framework, high performance, easy to learn, fast to code, ready for production" optional = true python-versions = ">=3.8" groups = ["main"] markers = "extra == \"proxy\"" files = [ - {file = "fastapi-0.115.11-py3-none-any.whl", hash = "sha256:32e1541b7b74602e4ef4a0260ecaf3aadf9d4f19590bba3e1bf2ac4666aa2c64"}, - {file = "fastapi-0.115.11.tar.gz", hash = "sha256:cc81f03f688678b92600a65a5e618b93592c65005db37157147204d8924bf94f"}, + {file = "fastapi-0.115.12-py3-none-any.whl", hash = "sha256:e94613d6c05e27be7ffebdd6ea5f388112e5e430c8f7d6494a9d1d88d43e814d"}, + {file = "fastapi-0.115.12.tar.gz", hash = "sha256:1e2c2a2646905f9e83d32f04a3f86aff4a286669c6c950ca95b5fd68c2602681"}, ] [package.dependencies] @@ -1024,12 +1043,12 @@ files = [ google-auth = ">=2.14.1,<3.0.0" googleapis-common-protos = ">=1.56.2,<2.0.0" grpcio = [ - {version = ">=1.33.2,<2.0dev", optional = true, markers = "python_version < \"3.11\" and extra == \"grpc\""}, {version = ">=1.49.1,<2.0dev", optional = true, markers = "python_version >= \"3.11\" and extra == \"grpc\""}, + {version = ">=1.33.2,<2.0dev", optional = true, markers = "python_version < \"3.11\" and extra == \"grpc\""}, ] grpcio-status = [ - {version = ">=1.33.2,<2.0.dev0", optional = true, markers = "python_version < \"3.11\" and extra == \"grpc\""}, {version = ">=1.49.1,<2.0.dev0", optional = true, markers = "python_version >= \"3.11\" and extra == \"grpc\""}, + {version = ">=1.33.2,<2.0.dev0", optional = true, markers = "python_version < \"3.11\" and extra == \"grpc\""}, ] proto-plus = [ {version = ">=1.22.3,<2.0.0", markers = "python_version < \"3.13\""}, @@ -1135,7 +1154,7 @@ description = "HTTP/2-based RPC framework" optional = true python-versions = ">=3.8" groups = ["main"] -markers = "extra == \"extra-proxy\"" +markers = "extra == \"extra-proxy\" and python_version < \"3.11\"" files = [ {file = "grpcio-1.70.0-cp310-cp310-linux_armv7l.whl", hash = "sha256:95469d1977429f45fe7df441f586521361e235982a0b39e33841549143ae2851"}, {file = "grpcio-1.70.0-cp310-cp310-macosx_12_0_universal2.whl", hash = "sha256:ed9718f17fbdb472e33b869c77a16d0b55e166b100ec57b016dc7de9c8d236bf"}, @@ -1197,6 +1216,71 @@ files = [ [package.extras] protobuf = ["grpcio-tools (>=1.70.0)"] +[[package]] +name = "grpcio" +version = "1.71.0" +description = "HTTP/2-based RPC framework" +optional = true +python-versions = ">=3.9" +groups = ["main"] +markers = "python_version >= \"3.11\" and extra == \"extra-proxy\"" +files = [ + {file = "grpcio-1.71.0-cp310-cp310-linux_armv7l.whl", hash = "sha256:c200cb6f2393468142eb50ab19613229dcc7829b5ccee8b658a36005f6669fdd"}, + {file = "grpcio-1.71.0-cp310-cp310-macosx_12_0_universal2.whl", hash = "sha256:b2266862c5ad664a380fbbcdbdb8289d71464c42a8c29053820ee78ba0119e5d"}, + {file = "grpcio-1.71.0-cp310-cp310-manylinux_2_17_aarch64.whl", hash = "sha256:0ab8b2864396663a5b0b0d6d79495657ae85fa37dcb6498a2669d067c65c11ea"}, + {file = "grpcio-1.71.0-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c30f393f9d5ff00a71bb56de4aa75b8fe91b161aeb61d39528db6b768d7eac69"}, + {file = "grpcio-1.71.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f250ff44843d9a0615e350c77f890082102a0318d66a99540f54769c8766ab73"}, + {file = "grpcio-1.71.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:e6d8de076528f7c43a2f576bc311799f89d795aa6c9b637377cc2b1616473804"}, + {file = "grpcio-1.71.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:9b91879d6da1605811ebc60d21ab6a7e4bae6c35f6b63a061d61eb818c8168f6"}, + {file = "grpcio-1.71.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:f71574afdf944e6652203cd1badcda195b2a27d9c83e6d88dc1ce3cfb73b31a5"}, + {file = "grpcio-1.71.0-cp310-cp310-win32.whl", hash = "sha256:8997d6785e93308f277884ee6899ba63baafa0dfb4729748200fcc537858a509"}, + {file = "grpcio-1.71.0-cp310-cp310-win_amd64.whl", hash = "sha256:7d6ac9481d9d0d129224f6d5934d5832c4b1cddb96b59e7eba8416868909786a"}, + {file = "grpcio-1.71.0-cp311-cp311-linux_armv7l.whl", hash = "sha256:d6aa986318c36508dc1d5001a3ff169a15b99b9f96ef5e98e13522c506b37eef"}, + {file = "grpcio-1.71.0-cp311-cp311-macosx_10_14_universal2.whl", hash = "sha256:d2c170247315f2d7e5798a22358e982ad6eeb68fa20cf7a820bb74c11f0736e7"}, + {file = "grpcio-1.71.0-cp311-cp311-manylinux_2_17_aarch64.whl", hash = "sha256:e6f83a583ed0a5b08c5bc7a3fe860bb3c2eac1f03f1f63e0bc2091325605d2b7"}, + {file = "grpcio-1.71.0-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4be74ddeeb92cc87190e0e376dbc8fc7736dbb6d3d454f2fa1f5be1dee26b9d7"}, + {file = "grpcio-1.71.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4dd0dfbe4d5eb1fcfec9490ca13f82b089a309dc3678e2edabc144051270a66e"}, + {file = "grpcio-1.71.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:a2242d6950dc892afdf9e951ed7ff89473aaf744b7d5727ad56bdaace363722b"}, + {file = "grpcio-1.71.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:0fa05ee31a20456b13ae49ad2e5d585265f71dd19fbd9ef983c28f926d45d0a7"}, + {file = "grpcio-1.71.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:3d081e859fb1ebe176de33fc3adb26c7d46b8812f906042705346b314bde32c3"}, + {file = "grpcio-1.71.0-cp311-cp311-win32.whl", hash = "sha256:d6de81c9c00c8a23047136b11794b3584cdc1460ed7cbc10eada50614baa1444"}, + {file = "grpcio-1.71.0-cp311-cp311-win_amd64.whl", hash = "sha256:24e867651fc67717b6f896d5f0cac0ec863a8b5fb7d6441c2ab428f52c651c6b"}, + {file = "grpcio-1.71.0-cp312-cp312-linux_armv7l.whl", hash = "sha256:0ff35c8d807c1c7531d3002be03221ff9ae15712b53ab46e2a0b4bb271f38537"}, + {file = "grpcio-1.71.0-cp312-cp312-macosx_10_14_universal2.whl", hash = "sha256:b78a99cd1ece4be92ab7c07765a0b038194ded2e0a26fd654591ee136088d8d7"}, + {file = "grpcio-1.71.0-cp312-cp312-manylinux_2_17_aarch64.whl", hash = "sha256:dc1a1231ed23caac1de9f943d031f1bc38d0f69d2a3b243ea0d664fc1fbd7fec"}, + {file = "grpcio-1.71.0-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e6beeea5566092c5e3c4896c6d1d307fb46b1d4bdf3e70c8340b190a69198594"}, + {file = "grpcio-1.71.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d5170929109450a2c031cfe87d6716f2fae39695ad5335d9106ae88cc32dc84c"}, + {file = "grpcio-1.71.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:5b08d03ace7aca7b2fadd4baf291139b4a5f058805a8327bfe9aece7253b6d67"}, + {file = "grpcio-1.71.0-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:f903017db76bf9cc2b2d8bdd37bf04b505bbccad6be8a81e1542206875d0e9db"}, + {file = "grpcio-1.71.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:469f42a0b410883185eab4689060a20488a1a0a00f8bbb3cbc1061197b4c5a79"}, + {file = "grpcio-1.71.0-cp312-cp312-win32.whl", hash = "sha256:ad9f30838550695b5eb302add33f21f7301b882937460dd24f24b3cc5a95067a"}, + {file = "grpcio-1.71.0-cp312-cp312-win_amd64.whl", hash = "sha256:652350609332de6dac4ece254e5d7e1ff834e203d6afb769601f286886f6f3a8"}, + {file = "grpcio-1.71.0-cp313-cp313-linux_armv7l.whl", hash = "sha256:cebc1b34ba40a312ab480ccdb396ff3c529377a2fce72c45a741f7215bfe8379"}, + {file = "grpcio-1.71.0-cp313-cp313-macosx_10_14_universal2.whl", hash = "sha256:85da336e3649a3d2171e82f696b5cad2c6231fdd5bad52616476235681bee5b3"}, + {file = "grpcio-1.71.0-cp313-cp313-manylinux_2_17_aarch64.whl", hash = "sha256:f9a412f55bb6e8f3bb000e020dbc1e709627dcb3a56f6431fa7076b4c1aab0db"}, + {file = "grpcio-1.71.0-cp313-cp313-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:47be9584729534660416f6d2a3108aaeac1122f6b5bdbf9fd823e11fe6fbaa29"}, + {file = "grpcio-1.71.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7c9c80ac6091c916db81131d50926a93ab162a7e97e4428ffc186b6e80d6dda4"}, + {file = "grpcio-1.71.0-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:789d5e2a3a15419374b7b45cd680b1e83bbc1e52b9086e49308e2c0b5bbae6e3"}, + {file = "grpcio-1.71.0-cp313-cp313-musllinux_1_1_i686.whl", hash = "sha256:1be857615e26a86d7363e8a163fade914595c81fec962b3d514a4b1e8760467b"}, + {file = "grpcio-1.71.0-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:a76d39b5fafd79ed604c4be0a869ec3581a172a707e2a8d7a4858cb05a5a7637"}, + {file = "grpcio-1.71.0-cp313-cp313-win32.whl", hash = "sha256:74258dce215cb1995083daa17b379a1a5a87d275387b7ffe137f1d5131e2cfbb"}, + {file = "grpcio-1.71.0-cp313-cp313-win_amd64.whl", hash = "sha256:22c3bc8d488c039a199f7a003a38cb7635db6656fa96437a8accde8322ce2366"}, + {file = "grpcio-1.71.0-cp39-cp39-linux_armv7l.whl", hash = "sha256:c6a0a28450c16809f94e0b5bfe52cabff63e7e4b97b44123ebf77f448534d07d"}, + {file = "grpcio-1.71.0-cp39-cp39-macosx_10_14_universal2.whl", hash = "sha256:a371e6b6a5379d3692cc4ea1cb92754d2a47bdddeee755d3203d1f84ae08e03e"}, + {file = "grpcio-1.71.0-cp39-cp39-manylinux_2_17_aarch64.whl", hash = "sha256:39983a9245d37394fd59de71e88c4b295eb510a3555e0a847d9965088cdbd033"}, + {file = "grpcio-1.71.0-cp39-cp39-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9182e0063112e55e74ee7584769ec5a0b4f18252c35787f48738627e23a62b97"}, + {file = "grpcio-1.71.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:693bc706c031aeb848849b9d1c6b63ae6bcc64057984bb91a542332b75aa4c3d"}, + {file = "grpcio-1.71.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:20e8f653abd5ec606be69540f57289274c9ca503ed38388481e98fa396ed0b41"}, + {file = "grpcio-1.71.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:8700a2a57771cc43ea295296330daaddc0d93c088f0a35cc969292b6db959bf3"}, + {file = "grpcio-1.71.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:d35a95f05a8a2cbe8e02be137740138b3b2ea5f80bd004444e4f9a1ffc511e32"}, + {file = "grpcio-1.71.0-cp39-cp39-win32.whl", hash = "sha256:f9c30c464cb2ddfbc2ddf9400287701270fdc0f14be5f08a1e3939f1e749b455"}, + {file = "grpcio-1.71.0-cp39-cp39-win_amd64.whl", hash = "sha256:63e41b91032f298b3e973b3fa4093cbbc620c875e2da7b93e249d4728b54559a"}, + {file = "grpcio-1.71.0.tar.gz", hash = "sha256:2b85f7820475ad3edec209d3d89a7909ada16caab05d3f2e08a7e8ae3200a55c"}, +] + +[package.extras] +protobuf = ["grpcio-tools (>=1.71.0)"] + [[package]] name = "grpcio-status" version = "1.70.0" @@ -1204,7 +1288,7 @@ description = "Status proto mapping for gRPC" optional = true python-versions = ">=3.8" groups = ["main"] -markers = "extra == \"extra-proxy\"" +markers = "extra == \"extra-proxy\" and python_version < \"3.11\"" files = [ {file = "grpcio_status-1.70.0-py3-none-any.whl", hash = "sha256:fc5a2ae2b9b1c1969cc49f3262676e6854aa2398ec69cb5bd6c47cd501904a85"}, {file = "grpcio_status-1.70.0.tar.gz", hash = "sha256:0e7b42816512433b18b9d764285ff029bde059e9d41f8fe10a60631bd8348101"}, @@ -1215,6 +1299,24 @@ googleapis-common-protos = ">=1.5.5" grpcio = ">=1.70.0" protobuf = ">=5.26.1,<6.0dev" +[[package]] +name = "grpcio-status" +version = "1.71.0" +description = "Status proto mapping for gRPC" +optional = true +python-versions = ">=3.9" +groups = ["main"] +markers = "python_version >= \"3.11\" and extra == \"extra-proxy\"" +files = [ + {file = "grpcio_status-1.71.0-py3-none-any.whl", hash = "sha256:843934ef8c09e3e858952887467f8256aac3910c55f077a359a65b2b3cde3e68"}, + {file = "grpcio_status-1.71.0.tar.gz", hash = "sha256:11405fed67b68f406b3f3c7c5ae5104a79d2d309666d10d61b152e91d28fb968"}, +] + +[package.dependencies] +googleapis-common-protos = ">=1.5.5" +grpcio = ">=1.71.0" +protobuf = ">=5.26.1,<6.0dev" + [[package]] name = "gunicorn" version = "23.0.0" @@ -1332,6 +1434,22 @@ testing = ["InquirerPy (==0.3.4)", "Jinja2", "Pillow", "aiohttp", "fastapi", "gr torch = ["safetensors[torch]", "torch"] typing = ["types-PyYAML", "types-requests", "types-simplejson", "types-toml", "types-tqdm", "types-urllib3", "typing-extensions (>=4.8.0)"] +[[package]] +name = "humanfriendly" +version = "10.0" +description = "Human friendly output for text interfaces using Python" +optional = true +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" +groups = ["main"] +markers = "python_version >= \"3.9\" and extra == \"extra-proxy\" and python_version < \"3.14\"" +files = [ + {file = "humanfriendly-10.0-py2.py3-none-any.whl", hash = "sha256:1697e1a8a8f550fd43c2865cd84542fc175a61dcb779b6fee18cf6b6ccba1477"}, + {file = "humanfriendly-10.0.tar.gz", hash = "sha256:6b0b831ce8f15f7300721aa49829fc4e83921a9a301cc7f606be6686a2288ddc"}, +] + +[package.dependencies] +pyreadline3 = {version = "*", markers = "sys_platform == \"win32\" and python_version >= \"3.8\""} + [[package]] name = "idna" version = "3.10" @@ -1397,14 +1515,14 @@ type = ["pytest-mypy"] [[package]] name = "iniconfig" -version = "2.0.0" +version = "2.1.0" description = "brain-dead simple config-ini parsing" optional = false -python-versions = ">=3.7" +python-versions = ">=3.8" groups = ["dev"] files = [ - {file = "iniconfig-2.0.0-py3-none-any.whl", hash = "sha256:b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374"}, - {file = "iniconfig-2.0.0.tar.gz", hash = "sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3"}, + {file = "iniconfig-2.1.0-py3-none-any.whl", hash = "sha256:9deba5723312380e77435581c6bf4935c94cbfab9b1ed33ef8d238ea168eb760"}, + {file = "iniconfig-2.1.0.tar.gz", hash = "sha256:3abbd2e30b36733fee78f9c7f7308f2d0050e88f0087fd25c2645f63c773e1c7"}, ] [[package]] @@ -1659,6 +1777,45 @@ files = [ {file = "mccabe-0.7.0.tar.gz", hash = "sha256:348e0240c33b60bbdf4e523192ef919f28cb2c3d7d5c7794f74009290f236325"}, ] +[[package]] +name = "ml-dtypes" +version = "0.4.1" +description = "" +optional = true +python-versions = ">=3.9" +groups = ["main"] +markers = "python_version >= \"3.9\" and extra == \"extra-proxy\" and python_version < \"3.14\"" +files = [ + {file = "ml_dtypes-0.4.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:1fe8b5b5e70cd67211db94b05cfd58dace592f24489b038dc6f9fe347d2e07d5"}, + {file = "ml_dtypes-0.4.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8c09a6d11d8475c2a9fd2bc0695628aec105f97cab3b3a3fb7c9660348ff7d24"}, + {file = "ml_dtypes-0.4.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9f5e8f75fa371020dd30f9196e7d73babae2abd51cf59bdd56cb4f8de7e13354"}, + {file = "ml_dtypes-0.4.1-cp310-cp310-win_amd64.whl", hash = "sha256:15fdd922fea57e493844e5abb930b9c0bd0af217d9edd3724479fc3d7ce70e3f"}, + {file = "ml_dtypes-0.4.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:2d55b588116a7085d6e074cf0cdb1d6fa3875c059dddc4d2c94a4cc81c23e975"}, + {file = "ml_dtypes-0.4.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e138a9b7a48079c900ea969341a5754019a1ad17ae27ee330f7ebf43f23877f9"}, + {file = "ml_dtypes-0.4.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:74c6cfb5cf78535b103fde9ea3ded8e9f16f75bc07789054edc7776abfb3d752"}, + {file = "ml_dtypes-0.4.1-cp311-cp311-win_amd64.whl", hash = "sha256:274cc7193dd73b35fb26bef6c5d40ae3eb258359ee71cd82f6e96a8c948bdaa6"}, + {file = "ml_dtypes-0.4.1-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:827d3ca2097085cf0355f8fdf092b888890bb1b1455f52801a2d7756f056f54b"}, + {file = "ml_dtypes-0.4.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:772426b08a6172a891274d581ce58ea2789cc8abc1c002a27223f314aaf894e7"}, + {file = "ml_dtypes-0.4.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:126e7d679b8676d1a958f2651949fbfa182832c3cd08020d8facd94e4114f3e9"}, + {file = "ml_dtypes-0.4.1-cp312-cp312-win_amd64.whl", hash = "sha256:df0fb650d5c582a9e72bb5bd96cfebb2cdb889d89daff621c8fbc60295eba66c"}, + {file = "ml_dtypes-0.4.1-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:e35e486e97aee577d0890bc3bd9e9f9eece50c08c163304008587ec8cfe7575b"}, + {file = "ml_dtypes-0.4.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:560be16dc1e3bdf7c087eb727e2cf9c0e6a3d87e9f415079d2491cc419b3ebf5"}, + {file = "ml_dtypes-0.4.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ad0b757d445a20df39035c4cdeed457ec8b60d236020d2560dbc25887533cf50"}, + {file = "ml_dtypes-0.4.1-cp39-cp39-win_amd64.whl", hash = "sha256:ef0d7e3fece227b49b544fa69e50e607ac20948f0043e9f76b44f35f229ea450"}, + {file = "ml_dtypes-0.4.1.tar.gz", hash = "sha256:fad5f2de464fd09127e49b7fd1252b9006fb43d2edc1ff112d390c324af5ca7a"}, +] + +[package.dependencies] +numpy = [ + {version = ">=1.23.3", markers = "python_version >= \"3.11\" and python_version < \"3.12\""}, + {version = ">=1.21.2", markers = "python_version >= \"3.10\" and python_version < \"3.11\""}, + {version = ">1.20", markers = "python_version < \"3.10\""}, + {version = ">=1.26.0", markers = "python_version >= \"3.12\""}, +] + +[package.extras] +dev = ["absl-py", "pyink", "pylint (>=2.6.0)", "pytest", "pytest-xdist"] + [[package]] name = "msal" version = "1.32.0" @@ -1889,6 +2046,119 @@ files = [ {file = "nodeenv-1.9.1.tar.gz", hash = "sha256:6ec12890a2dab7946721edbfbcd91f3319c6ccc9aec47be7c7e6b7011ee6645f"}, ] +[[package]] +name = "numpy" +version = "1.26.4" +description = "Fundamental package for array computing in Python" +optional = true +python-versions = ">=3.9" +groups = ["main"] +markers = "python_version >= \"3.9\" and extra == \"extra-proxy\" and python_version < \"3.12\"" +files = [ + {file = "numpy-1.26.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:9ff0f4f29c51e2803569d7a51c2304de5554655a60c5d776e35b4a41413830d0"}, + {file = "numpy-1.26.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:2e4ee3380d6de9c9ec04745830fd9e2eccb3e6cf790d39d7b98ffd19b0dd754a"}, + {file = "numpy-1.26.4-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d209d8969599b27ad20994c8e41936ee0964e6da07478d6c35016bc386b66ad4"}, + {file = "numpy-1.26.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ffa75af20b44f8dba823498024771d5ac50620e6915abac414251bd971b4529f"}, + {file = "numpy-1.26.4-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:62b8e4b1e28009ef2846b4c7852046736bab361f7aeadeb6a5b89ebec3c7055a"}, + {file = "numpy-1.26.4-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:a4abb4f9001ad2858e7ac189089c42178fcce737e4169dc61321660f1a96c7d2"}, + {file = "numpy-1.26.4-cp310-cp310-win32.whl", hash = "sha256:bfe25acf8b437eb2a8b2d49d443800a5f18508cd811fea3181723922a8a82b07"}, + {file = "numpy-1.26.4-cp310-cp310-win_amd64.whl", hash = "sha256:b97fe8060236edf3662adfc2c633f56a08ae30560c56310562cb4f95500022d5"}, + {file = "numpy-1.26.4-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:4c66707fabe114439db9068ee468c26bbdf909cac0fb58686a42a24de1760c71"}, + {file = "numpy-1.26.4-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:edd8b5fe47dab091176d21bb6de568acdd906d1887a4584a15a9a96a1dca06ef"}, + {file = "numpy-1.26.4-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7ab55401287bfec946ced39700c053796e7cc0e3acbef09993a9ad2adba6ca6e"}, + {file = "numpy-1.26.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:666dbfb6ec68962c033a450943ded891bed2d54e6755e35e5835d63f4f6931d5"}, + {file = "numpy-1.26.4-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:96ff0b2ad353d8f990b63294c8986f1ec3cb19d749234014f4e7eb0112ceba5a"}, + {file = "numpy-1.26.4-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:60dedbb91afcbfdc9bc0b1f3f402804070deed7392c23eb7a7f07fa857868e8a"}, + {file = "numpy-1.26.4-cp311-cp311-win32.whl", hash = "sha256:1af303d6b2210eb850fcf03064d364652b7120803a0b872f5211f5234b399f20"}, + {file = "numpy-1.26.4-cp311-cp311-win_amd64.whl", hash = "sha256:cd25bcecc4974d09257ffcd1f098ee778f7834c3ad767fe5db785be9a4aa9cb2"}, + {file = "numpy-1.26.4-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:b3ce300f3644fb06443ee2222c2201dd3a89ea6040541412b8fa189341847218"}, + {file = "numpy-1.26.4-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:03a8c78d01d9781b28a6989f6fa1bb2c4f2d51201cf99d3dd875df6fbd96b23b"}, + {file = "numpy-1.26.4-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9fad7dcb1aac3c7f0584a5a8133e3a43eeb2fe127f47e3632d43d677c66c102b"}, + {file = "numpy-1.26.4-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:675d61ffbfa78604709862923189bad94014bef562cc35cf61d3a07bba02a7ed"}, + {file = "numpy-1.26.4-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:ab47dbe5cc8210f55aa58e4805fe224dac469cde56b9f731a4c098b91917159a"}, + {file = "numpy-1.26.4-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:1dda2e7b4ec9dd512f84935c5f126c8bd8b9f2fc001e9f54af255e8c5f16b0e0"}, + {file = "numpy-1.26.4-cp312-cp312-win32.whl", hash = "sha256:50193e430acfc1346175fcbdaa28ffec49947a06918b7b92130744e81e640110"}, + {file = "numpy-1.26.4-cp312-cp312-win_amd64.whl", hash = "sha256:08beddf13648eb95f8d867350f6a018a4be2e5ad54c8d8caed89ebca558b2818"}, + {file = "numpy-1.26.4-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:7349ab0fa0c429c82442a27a9673fc802ffdb7c7775fad780226cb234965e53c"}, + {file = "numpy-1.26.4-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:52b8b60467cd7dd1e9ed082188b4e6bb35aa5cdd01777621a1658910745b90be"}, + {file = "numpy-1.26.4-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d5241e0a80d808d70546c697135da2c613f30e28251ff8307eb72ba696945764"}, + {file = "numpy-1.26.4-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f870204a840a60da0b12273ef34f7051e98c3b5961b61b0c2c1be6dfd64fbcd3"}, + {file = "numpy-1.26.4-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:679b0076f67ecc0138fd2ede3a8fd196dddc2ad3254069bcb9faf9a79b1cebcd"}, + {file = "numpy-1.26.4-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:47711010ad8555514b434df65f7d7b076bb8261df1ca9bb78f53d3b2db02e95c"}, + {file = "numpy-1.26.4-cp39-cp39-win32.whl", hash = "sha256:a354325ee03388678242a4d7ebcd08b5c727033fcff3b2f536aea978e15ee9e6"}, + {file = "numpy-1.26.4-cp39-cp39-win_amd64.whl", hash = "sha256:3373d5d70a5fe74a2c1bb6d2cfd9609ecf686d47a2d7b1d37a8f3b6bf6003aea"}, + {file = "numpy-1.26.4-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:afedb719a9dcfc7eaf2287b839d8198e06dcd4cb5d276a3df279231138e83d30"}, + {file = "numpy-1.26.4-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:95a7476c59002f2f6c590b9b7b998306fba6a5aa646b1e22ddfeaf8f78c3a29c"}, + {file = "numpy-1.26.4-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:7e50d0a0cc3189f9cb0aeb3a6a6af18c16f59f004b866cd2be1c14b36134a4a0"}, + {file = "numpy-1.26.4.tar.gz", hash = "sha256:2a02aba9ed12e4ac4eb3ea9421c420301a0c6460d9830d74a9df87efa4912010"}, +] + +[[package]] +name = "numpy" +version = "2.2.4" +description = "Fundamental package for array computing in Python" +optional = true +python-versions = ">=3.10" +groups = ["main"] +markers = "python_version >= \"3.12\" and extra == \"extra-proxy\" and python_version < \"3.14\"" +files = [ + {file = "numpy-2.2.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:8146f3550d627252269ac42ae660281d673eb6f8b32f113538e0cc2a9aed42b9"}, + {file = "numpy-2.2.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:e642d86b8f956098b564a45e6f6ce68a22c2c97a04f5acd3f221f57b8cb850ae"}, + {file = "numpy-2.2.4-cp310-cp310-macosx_14_0_arm64.whl", hash = "sha256:a84eda42bd12edc36eb5b53bbcc9b406820d3353f1994b6cfe453a33ff101775"}, + {file = "numpy-2.2.4-cp310-cp310-macosx_14_0_x86_64.whl", hash = "sha256:4ba5054787e89c59c593a4169830ab362ac2bee8a969249dc56e5d7d20ff8df9"}, + {file = "numpy-2.2.4-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7716e4a9b7af82c06a2543c53ca476fa0b57e4d760481273e09da04b74ee6ee2"}, + {file = "numpy-2.2.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:adf8c1d66f432ce577d0197dceaac2ac00c0759f573f28516246351c58a85020"}, + {file = "numpy-2.2.4-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:218f061d2faa73621fa23d6359442b0fc658d5b9a70801373625d958259eaca3"}, + {file = "numpy-2.2.4-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:df2f57871a96bbc1b69733cd4c51dc33bea66146b8c63cacbfed73eec0883017"}, + {file = "numpy-2.2.4-cp310-cp310-win32.whl", hash = "sha256:a0258ad1f44f138b791327961caedffbf9612bfa504ab9597157806faa95194a"}, + {file = "numpy-2.2.4-cp310-cp310-win_amd64.whl", hash = "sha256:0d54974f9cf14acf49c60f0f7f4084b6579d24d439453d5fc5805d46a165b542"}, + {file = "numpy-2.2.4-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:e9e0a277bb2eb5d8a7407e14688b85fd8ad628ee4e0c7930415687b6564207a4"}, + {file = "numpy-2.2.4-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:9eeea959168ea555e556b8188da5fa7831e21d91ce031e95ce23747b7609f8a4"}, + {file = "numpy-2.2.4-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:bd3ad3b0a40e713fc68f99ecfd07124195333f1e689387c180813f0e94309d6f"}, + {file = "numpy-2.2.4-cp311-cp311-macosx_14_0_x86_64.whl", hash = "sha256:cf28633d64294969c019c6df4ff37f5698e8326db68cc2b66576a51fad634880"}, + {file = "numpy-2.2.4-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2fa8fa7697ad1646b5c93de1719965844e004fcad23c91228aca1cf0800044a1"}, + {file = "numpy-2.2.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f4162988a360a29af158aeb4a2f4f09ffed6a969c9776f8f3bdee9b06a8ab7e5"}, + {file = "numpy-2.2.4-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:892c10d6a73e0f14935c31229e03325a7b3093fafd6ce0af704be7f894d95687"}, + {file = "numpy-2.2.4-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:db1f1c22173ac1c58db249ae48aa7ead29f534b9a948bc56828337aa84a32ed6"}, + {file = "numpy-2.2.4-cp311-cp311-win32.whl", hash = "sha256:ea2bb7e2ae9e37d96835b3576a4fa4b3a97592fbea8ef7c3587078b0068b8f09"}, + {file = "numpy-2.2.4-cp311-cp311-win_amd64.whl", hash = "sha256:f7de08cbe5551911886d1ab60de58448c6df0f67d9feb7d1fb21e9875ef95e91"}, + {file = "numpy-2.2.4-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:a7b9084668aa0f64e64bd00d27ba5146ef1c3a8835f3bd912e7a9e01326804c4"}, + {file = "numpy-2.2.4-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:dbe512c511956b893d2dacd007d955a3f03d555ae05cfa3ff1c1ff6df8851854"}, + {file = "numpy-2.2.4-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:bb649f8b207ab07caebba230d851b579a3c8711a851d29efe15008e31bb4de24"}, + {file = "numpy-2.2.4-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:f34dc300df798742b3d06515aa2a0aee20941c13579d7a2f2e10af01ae4901ee"}, + {file = "numpy-2.2.4-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c3f7ac96b16955634e223b579a3e5798df59007ca43e8d451a0e6a50f6bfdfba"}, + {file = "numpy-2.2.4-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4f92084defa704deadd4e0a5ab1dc52d8ac9e8a8ef617f3fbb853e79b0ea3592"}, + {file = "numpy-2.2.4-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:7a4e84a6283b36632e2a5b56e121961f6542ab886bc9e12f8f9818b3c266bfbb"}, + {file = "numpy-2.2.4-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:11c43995255eb4127115956495f43e9343736edb7fcdb0d973defd9de14cd84f"}, + {file = "numpy-2.2.4-cp312-cp312-win32.whl", hash = "sha256:65ef3468b53269eb5fdb3a5c09508c032b793da03251d5f8722b1194f1790c00"}, + {file = "numpy-2.2.4-cp312-cp312-win_amd64.whl", hash = "sha256:2aad3c17ed2ff455b8eaafe06bcdae0062a1db77cb99f4b9cbb5f4ecb13c5146"}, + {file = "numpy-2.2.4-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:1cf4e5c6a278d620dee9ddeb487dc6a860f9b199eadeecc567f777daace1e9e7"}, + {file = "numpy-2.2.4-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:1974afec0b479e50438fc3648974268f972e2d908ddb6d7fb634598cdb8260a0"}, + {file = "numpy-2.2.4-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:79bd5f0a02aa16808fcbc79a9a376a147cc1045f7dfe44c6e7d53fa8b8a79392"}, + {file = "numpy-2.2.4-cp313-cp313-macosx_14_0_x86_64.whl", hash = "sha256:3387dd7232804b341165cedcb90694565a6015433ee076c6754775e85d86f1fc"}, + {file = "numpy-2.2.4-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6f527d8fdb0286fd2fd97a2a96c6be17ba4232da346931d967a0630050dfd298"}, + {file = "numpy-2.2.4-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bce43e386c16898b91e162e5baaad90c4b06f9dcbe36282490032cec98dc8ae7"}, + {file = "numpy-2.2.4-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:31504f970f563d99f71a3512d0c01a645b692b12a63630d6aafa0939e52361e6"}, + {file = "numpy-2.2.4-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:81413336ef121a6ba746892fad881a83351ee3e1e4011f52e97fba79233611fd"}, + {file = "numpy-2.2.4-cp313-cp313-win32.whl", hash = "sha256:f486038e44caa08dbd97275a9a35a283a8f1d2f0ee60ac260a1790e76660833c"}, + {file = "numpy-2.2.4-cp313-cp313-win_amd64.whl", hash = "sha256:207a2b8441cc8b6a2a78c9ddc64d00d20c303d79fba08c577752f080c4007ee3"}, + {file = "numpy-2.2.4-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:8120575cb4882318c791f839a4fd66161a6fa46f3f0a5e613071aae35b5dd8f8"}, + {file = "numpy-2.2.4-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:a761ba0fa886a7bb33c6c8f6f20213735cb19642c580a931c625ee377ee8bd39"}, + {file = "numpy-2.2.4-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:ac0280f1ba4a4bfff363a99a6aceed4f8e123f8a9b234c89140f5e894e452ecd"}, + {file = "numpy-2.2.4-cp313-cp313t-macosx_14_0_x86_64.whl", hash = "sha256:879cf3a9a2b53a4672a168c21375166171bc3932b7e21f622201811c43cdd3b0"}, + {file = "numpy-2.2.4-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f05d4198c1bacc9124018109c5fba2f3201dbe7ab6e92ff100494f236209c960"}, + {file = "numpy-2.2.4-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e2f085ce2e813a50dfd0e01fbfc0c12bbe5d2063d99f8b29da30e544fb6483b8"}, + {file = "numpy-2.2.4-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:92bda934a791c01d6d9d8e038363c50918ef7c40601552a58ac84c9613a665bc"}, + {file = "numpy-2.2.4-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:ee4d528022f4c5ff67332469e10efe06a267e32f4067dc76bb7e2cddf3cd25ff"}, + {file = "numpy-2.2.4-cp313-cp313t-win32.whl", hash = "sha256:05c076d531e9998e7e694c36e8b349969c56eadd2cdcd07242958489d79a7286"}, + {file = "numpy-2.2.4-cp313-cp313t-win_amd64.whl", hash = "sha256:188dcbca89834cc2e14eb2f106c96d6d46f200fe0200310fc29089657379c58d"}, + {file = "numpy-2.2.4-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:7051ee569db5fbac144335e0f3b9c2337e0c8d5c9fee015f259a5bd70772b7e8"}, + {file = "numpy-2.2.4-pp310-pypy310_pp73-macosx_14_0_x86_64.whl", hash = "sha256:ab2939cd5bec30a7430cbdb2287b63151b77cf9624de0532d629c9a1c59b1d5c"}, + {file = "numpy-2.2.4-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d0f35b19894a9e08639fd60a1ec1978cb7f5f7f1eace62f38dd36be8aecdef4d"}, + {file = "numpy-2.2.4-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:b4adfbbc64014976d2f91084915ca4e626fbf2057fb81af209c1a6d776d23e3d"}, + {file = "numpy-2.2.4.tar.gz", hash = "sha256:9ba03692a45d3eef66559efe1d1096c4b9b75c0986b5dff5530c378fb8331d4f"}, +] + [[package]] name = "oauthlib" version = "3.2.2" @@ -1909,14 +2179,14 @@ signedtoken = ["cryptography (>=3.0.0)", "pyjwt (>=2.0.0,<3)"] [[package]] name = "openai" -version = "1.66.3" +version = "1.68.2" description = "The official Python library for the openai API" optional = false python-versions = ">=3.8" groups = ["main"] files = [ - {file = "openai-1.66.3-py3-none-any.whl", hash = "sha256:a427c920f727711877ab17c11b95f1230b27767ba7a01e5b66102945141ceca9"}, - {file = "openai-1.66.3.tar.gz", hash = "sha256:8dde3aebe2d081258d4159c4cb27bdc13b5bb3f7ea2201d9bd940b9a89faf0c9"}, + {file = "openai-1.68.2-py3-none-any.whl", hash = "sha256:24484cb5c9a33b58576fdc5acf0e5f92603024a4e39d0b99793dfa1eb14c2b36"}, + {file = "openai-1.68.2.tar.gz", hash = "sha256:b720f0a95a1dbe1429c0d9bb62096a0d98057bcda82516f6e8af10284bdd5b19"}, ] [package.dependencies] @@ -1932,6 +2202,7 @@ typing-extensions = ">=4.11,<5" [package.extras] datalib = ["numpy (>=1)", "pandas (>=1.2.3)", "pandas-stubs (>=1.1.0.11)"] realtime = ["websockets (>=13,<15)"] +voice-helpers = ["numpy (>=2.0.2)", "sounddevice (>=0.5.1)"] [[package]] name = "orjson" @@ -2249,24 +2520,24 @@ testing = ["google-api-core (>=1.31.5)"] [[package]] name = "protobuf" -version = "5.29.3" +version = "5.29.4" description = "" optional = true python-versions = ">=3.8" groups = ["main"] markers = "extra == \"extra-proxy\"" files = [ - {file = "protobuf-5.29.3-cp310-abi3-win32.whl", hash = "sha256:3ea51771449e1035f26069c4c7fd51fba990d07bc55ba80701c78f886bf9c888"}, - {file = "protobuf-5.29.3-cp310-abi3-win_amd64.whl", hash = "sha256:a4fa6f80816a9a0678429e84973f2f98cbc218cca434abe8db2ad0bffc98503a"}, - {file = "protobuf-5.29.3-cp38-abi3-macosx_10_9_universal2.whl", hash = "sha256:a8434404bbf139aa9e1300dbf989667a83d42ddda9153d8ab76e0d5dcaca484e"}, - {file = "protobuf-5.29.3-cp38-abi3-manylinux2014_aarch64.whl", hash = "sha256:daaf63f70f25e8689c072cfad4334ca0ac1d1e05a92fc15c54eb9cf23c3efd84"}, - {file = "protobuf-5.29.3-cp38-abi3-manylinux2014_x86_64.whl", hash = "sha256:c027e08a08be10b67c06bf2370b99c811c466398c357e615ca88c91c07f0910f"}, - {file = "protobuf-5.29.3-cp38-cp38-win32.whl", hash = "sha256:84a57163a0ccef3f96e4b6a20516cedcf5bb3a95a657131c5c3ac62200d23252"}, - {file = "protobuf-5.29.3-cp38-cp38-win_amd64.whl", hash = "sha256:b89c115d877892a512f79a8114564fb435943b59067615894c3b13cd3e1fa107"}, - {file = "protobuf-5.29.3-cp39-cp39-win32.whl", hash = "sha256:0eb32bfa5219fc8d4111803e9a690658aa2e6366384fd0851064b963b6d1f2a7"}, - {file = "protobuf-5.29.3-cp39-cp39-win_amd64.whl", hash = "sha256:6ce8cc3389a20693bfde6c6562e03474c40851b44975c9b2bf6df7d8c4f864da"}, - {file = "protobuf-5.29.3-py3-none-any.whl", hash = "sha256:0a18ed4a24198528f2333802eb075e59dea9d679ab7a6c5efb017a59004d849f"}, - {file = "protobuf-5.29.3.tar.gz", hash = "sha256:5da0f41edaf117bde316404bad1a486cb4ededf8e4a54891296f648e8e076620"}, + {file = "protobuf-5.29.4-cp310-abi3-win32.whl", hash = "sha256:13eb236f8eb9ec34e63fc8b1d6efd2777d062fa6aaa68268fb67cf77f6839ad7"}, + {file = "protobuf-5.29.4-cp310-abi3-win_amd64.whl", hash = "sha256:bcefcdf3976233f8a502d265eb65ea740c989bacc6c30a58290ed0e519eb4b8d"}, + {file = "protobuf-5.29.4-cp38-abi3-macosx_10_9_universal2.whl", hash = "sha256:307ecba1d852ec237e9ba668e087326a67564ef83e45a0189a772ede9e854dd0"}, + {file = "protobuf-5.29.4-cp38-abi3-manylinux2014_aarch64.whl", hash = "sha256:aec4962f9ea93c431d5714ed1be1c93f13e1a8618e70035ba2b0564d9e633f2e"}, + {file = "protobuf-5.29.4-cp38-abi3-manylinux2014_x86_64.whl", hash = "sha256:d7d3f7d1d5a66ed4942d4fefb12ac4b14a29028b209d4bfb25c68ae172059922"}, + {file = "protobuf-5.29.4-cp38-cp38-win32.whl", hash = "sha256:1832f0515b62d12d8e6ffc078d7e9eb06969aa6dc13c13e1036e39d73bebc2de"}, + {file = "protobuf-5.29.4-cp38-cp38-win_amd64.whl", hash = "sha256:476cb7b14914c780605a8cf62e38c2a85f8caff2e28a6a0bad827ec7d6c85d68"}, + {file = "protobuf-5.29.4-cp39-cp39-win32.whl", hash = "sha256:fd32223020cb25a2cc100366f1dedc904e2d71d9322403224cdde5fdced0dabe"}, + {file = "protobuf-5.29.4-cp39-cp39-win_amd64.whl", hash = "sha256:678974e1e3a9b975b8bc2447fca458db5f93a2fb6b0c8db46b6675b5b5346812"}, + {file = "protobuf-5.29.4-py3-none-any.whl", hash = "sha256:3fde11b505e1597f71b875ef2fc52062b6a9740e5f7c8997ce878b6009145862"}, + {file = "protobuf-5.29.4.tar.gz", hash = "sha256:4f1dfcd7997b31ef8f53ec82781ff434a28bf71d9102ddde14d076adcfc78c99"}, ] [[package]] @@ -2520,6 +2791,22 @@ cffi = ">=1.4.1" docs = ["sphinx (>=1.6.5)", "sphinx-rtd-theme"] tests = ["hypothesis (>=3.27.0)", "pytest (>=3.2.1,!=3.3.0)"] +[[package]] +name = "pyreadline3" +version = "3.5.4" +description = "A python implementation of GNU readline." +optional = true +python-versions = ">=3.8" +groups = ["main"] +markers = "python_version >= \"3.9\" and extra == \"extra-proxy\" and sys_platform == \"win32\" and python_version < \"3.14\"" +files = [ + {file = "pyreadline3-3.5.4-py3-none-any.whl", hash = "sha256:eaf8e6cc3c49bcccf145fc6067ba8643d1df34d604a1ec0eccbf7a18e6d3fae6"}, + {file = "pyreadline3-3.5.4.tar.gz", hash = "sha256:8d57d53039a1c75adba8e50dd3d992b28143480816187ea5efbd5c78e6c885b7"}, +] + +[package.extras] +dev = ["build", "flake8", "mypy", "pytest", "twine"] + [[package]] name = "pytest" version = "7.4.4" @@ -2543,6 +2830,25 @@ tomli = {version = ">=1.0.0", markers = "python_version < \"3.11\""} [package.extras] testing = ["argcomplete", "attrs (>=19.2.0)", "hypothesis (>=3.56)", "mock", "nose", "pygments (>=2.7.2)", "requests", "setuptools", "xmlschema"] +[[package]] +name = "pytest-asyncio" +version = "0.21.2" +description = "Pytest support for asyncio" +optional = false +python-versions = ">=3.7" +groups = ["dev"] +files = [ + {file = "pytest_asyncio-0.21.2-py3-none-any.whl", hash = "sha256:ab664c88bb7998f711d8039cacd4884da6430886ae8bbd4eded552ed2004f16b"}, + {file = "pytest_asyncio-0.21.2.tar.gz", hash = "sha256:d67738fc232b94b326b9d060750beb16e0074210b98dd8b58a5239fa2a154f45"}, +] + +[package.dependencies] +pytest = ">=7.0.0" + +[package.extras] +docs = ["sphinx (>=5.3)", "sphinx-rtd-theme (>=1.0)"] +testing = ["coverage (>=6.2)", "flaky (>=3.5.0)", "hypothesis (>=5.7.1)", "mypy (>=0.931)", "pytest-trio (>=0.7.0)"] + [[package]] name = "pytest-mock" version = "3.14.0" @@ -2605,6 +2911,22 @@ files = [ {file = "python_multipart-0.0.18.tar.gz", hash = "sha256:7a68db60c8bfb82e460637fa4750727b45af1d5e2ed215593f917f64694d34fe"}, ] +[[package]] +name = "python-ulid" +version = "3.0.0" +description = "Universally unique lexicographically sortable identifier" +optional = true +python-versions = ">=3.9" +groups = ["main"] +markers = "python_version >= \"3.9\" and extra == \"extra-proxy\" and python_version < \"3.14\"" +files = [ + {file = "python_ulid-3.0.0-py3-none-any.whl", hash = "sha256:e4c4942ff50dbd79167ad01ac725ec58f924b4018025ce22c858bfcff99a5e31"}, + {file = "python_ulid-3.0.0.tar.gz", hash = "sha256:e50296a47dc8209d28629a22fc81ca26c00982c78934bd7766377ba37ea49a9f"}, +] + +[package.extras] +pydantic = ["pydantic (>=2.0)"] + [[package]] name = "pyyaml" version = "6.0.2" @@ -2675,7 +2997,7 @@ description = "Python client for Redis database and key-value store" optional = true python-versions = ">=3.8" groups = ["main"] -markers = "extra == \"proxy\"" +markers = "python_version >= \"3.9\" and extra == \"extra-proxy\" and python_version < \"3.14\" or extra == \"proxy\"" files = [ {file = "redis-5.2.1-py3-none-any.whl", hash = "sha256:ee7e1056b9aea0f04c6c2ed59452947f34c4940ee025f5dd83e6a6418b6989e4"}, {file = "redis-5.2.1.tar.gz", hash = "sha256:16f2e22dff21d5125e8481515e386711a34cbec50f0e44413dd7d9c060a54e0f"}, @@ -2688,6 +3010,42 @@ async-timeout = {version = ">=4.0.3", markers = "python_full_version < \"3.11.3\ hiredis = ["hiredis (>=3.0.0)"] ocsp = ["cryptography (>=36.0.1)", "pyopenssl (==23.2.1)", "requests (>=2.31.0)"] +[[package]] +name = "redisvl" +version = "0.4.1" +description = "Python client library and CLI for using Redis as a vector database" +optional = true +python-versions = "<3.14,>=3.9" +groups = ["main"] +markers = "python_version >= \"3.9\" and extra == \"extra-proxy\" and python_version < \"3.14\"" +files = [ + {file = "redisvl-0.4.1-py3-none-any.whl", hash = "sha256:6db5d5bc95b1fe8032a1cdae74ce1c65bc7fe9054e5429b5d34d5a91d28bae5f"}, + {file = "redisvl-0.4.1.tar.gz", hash = "sha256:fd6a36426ba94792c0efca20915c31232d4ee3cc58eb23794a62c142696401e6"}, +] + +[package.dependencies] +coloredlogs = ">=15.0,<16.0" +ml-dtypes = ">=0.4.0,<0.5.0" +numpy = [ + {version = ">=1,<2", markers = "python_version < \"3.12\""}, + {version = ">=1.26.0,<3", markers = "python_version >= \"3.12\""}, +] +pydantic = ">=2,<3" +python-ulid = ">=3.0.0,<4.0.0" +pyyaml = ">=5.4,<7.0" +redis = ">=5.0,<6.0" +tabulate = ">=0.9.0,<0.10.0" +tenacity = ">=8.2.2" + +[package.extras] +bedrock = ["boto3[bedrock] (>=1.36.0,<2.0.0)"] +cohere = ["cohere (>=4.44)"] +mistralai = ["mistralai (>=1.0.0)"] +openai = ["openai (>=1.13.0,<2.0.0)"] +sentence-transformers = ["scipy (<1.15)", "scipy (>=1.15,<2.0)", "sentence-transformers (>=3.4.0,<4.0.0)"] +vertexai = ["google-cloud-aiplatform (>=1.26,<2.0)", "protobuf (>=5.29.1,<6.0.0)"] +voyageai = ["voyageai (>=0.2.2)"] + [[package]] name = "referencing" version = "0.35.1" @@ -2961,15 +3319,15 @@ files = [ [[package]] name = "rq" -version = "2.1.0" +version = "2.2.0" description = "RQ is a simple, lightweight, library for creating background jobs, and processing them." optional = true python-versions = ">=3.8" groups = ["main"] markers = "extra == \"proxy\"" files = [ - {file = "rq-2.1.0-py3-none-any.whl", hash = "sha256:3c6892c6ca848e5fb47c1875399a66f13656bf0e123bf725d9aa9a12718e2fdf"}, - {file = "rq-2.1.0.tar.gz", hash = "sha256:764585b6cab69ef1412f4aee523347e5aa7ece3ca175c118b1d92223dd8c2826"}, + {file = "rq-2.2.0-py3-none-any.whl", hash = "sha256:dacbfe1ccb79a45c8cd95dec7951620679fa0195570b63da3f9347622d33accc"}, + {file = "rq-2.2.0.tar.gz", hash = "sha256:b636760f1e4c183022031c142faa0483e687885824e9732ba2953f994104e203"}, ] [package.dependencies] @@ -3056,6 +3414,39 @@ typing-extensions = {version = ">=3.10.0", markers = "python_version < \"3.10\"" [package.extras] full = ["httpx (>=0.27.0,<0.29.0)", "itsdangerous", "jinja2", "python-multipart (>=0.0.18)", "pyyaml"] +[[package]] +name = "tabulate" +version = "0.9.0" +description = "Pretty-print tabular data" +optional = true +python-versions = ">=3.7" +groups = ["main"] +markers = "python_version >= \"3.9\" and extra == \"extra-proxy\" and python_version < \"3.14\"" +files = [ + {file = "tabulate-0.9.0-py3-none-any.whl", hash = "sha256:024ca478df22e9340661486f85298cff5f6dcdba14f3813e8830015b9ed1948f"}, + {file = "tabulate-0.9.0.tar.gz", hash = "sha256:0095b12bf5966de529c0feb1fa08671671b3368eec77d7ef7ab114be2c068b3c"}, +] + +[package.extras] +widechars = ["wcwidth"] + +[[package]] +name = "tenacity" +version = "9.0.0" +description = "Retry code until it succeeds" +optional = true +python-versions = ">=3.8" +groups = ["main"] +markers = "python_version >= \"3.9\" and extra == \"extra-proxy\" and python_version < \"3.14\"" +files = [ + {file = "tenacity-9.0.0-py3-none-any.whl", hash = "sha256:93de0c98785b27fcf659856aa9f54bfbd399e29969b0621bc7f762bd441b4539"}, + {file = "tenacity-9.0.0.tar.gz", hash = "sha256:807f37ca97d62aa361264d497b0e31e92b8027044942bfa756160d908320d73b"}, +] + +[package.extras] +doc = ["reno", "sphinx"] +test = ["pytest", "tornado (>=4.5)", "typeguard"] + [[package]] name = "tiktoken" version = "0.7.0" @@ -3234,15 +3625,15 @@ files = [ [[package]] name = "tzdata" -version = "2025.1" +version = "2025.2" description = "Provider of IANA time zone data" optional = true python-versions = ">=2" groups = ["main"] markers = "extra == \"proxy\" and platform_system == \"Windows\"" files = [ - {file = "tzdata-2025.1-py2.py3-none-any.whl", hash = "sha256:7e127113816800496f027041c570f50bcd464a020098a3b6b199517772303639"}, - {file = "tzdata-2025.1.tar.gz", hash = "sha256:24894909e88cdb28bd1636c6887801df64cb485bd593f2fd83ef29075a81d694"}, + {file = "tzdata-2025.2-py2.py3-none-any.whl", hash = "sha256:1a403fada01ff9221ca8044d701868fa132215d84beb92242d9acd2147f667a8"}, + {file = "tzdata-2025.2.tar.gz", hash = "sha256:b60a638fcc0daffadf82fe0f57e53d06bdec2f36c4df66280ae79bce6bd6f2b9"}, ] [[package]] @@ -3607,10 +3998,10 @@ test = ["big-O", "importlib-resources", "jaraco.functools", "jaraco.itertools", type = ["pytest-mypy"] [extras] -extra-proxy = ["azure-identity", "azure-keyvault-secrets", "google-cloud-kms", "prisma", "resend"] +extra-proxy = ["azure-identity", "azure-keyvault-secrets", "google-cloud-kms", "prisma", "redisvl", "resend"] proxy = ["PyJWT", "apscheduler", "backoff", "boto3", "cryptography", "fastapi", "fastapi-sso", "gunicorn", "orjson", "pynacl", "python-multipart", "pyyaml", "rq", "uvicorn", "uvloop", "websockets"] [metadata] lock-version = "2.1" python-versions = ">=3.8.1,<4.0, !=3.9.7" -content-hash = "55078af47c1af79bd3ebadacb7ba92844d550a577bb0c49f5096693701ea4322" +content-hash = "6850286db1cedd6507c4688767fde27c2f8cc8e657a0a0d792656664eec63d5d" diff --git a/pyproject.toml b/pyproject.toml index 58fd5a7ded..a34f0c5dbb 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -53,6 +53,7 @@ resend = {version = "^0.8.0", optional = true} pynacl = {version = "^1.5.0", optional = true} websockets = {version = "^13.1.0", optional = true} boto3 = {version = "1.34.34", optional = true} +redisvl = {version = "^0.4.1", optional = true, markers = "python_version >= '3.9' and python_version < '3.14'"} [tool.poetry.extras] proxy = [ @@ -80,6 +81,7 @@ extra_proxy = [ "azure-keyvault-secrets", "google-cloud-kms", "resend", + "redisvl" ] [tool.isort] @@ -94,6 +96,7 @@ black = "^23.12.0" mypy = "^1.0" pytest = "^7.4.3" pytest-mock = "^3.12.0" +pytest-asyncio = "^0.21.1" [build-system] requires = ["poetry-core", "wheel"] diff --git a/requirements.txt b/requirements.txt index 93a23e0f2f..621a4d1dd2 100644 --- a/requirements.txt +++ b/requirements.txt @@ -9,8 +9,8 @@ uvicorn==0.29.0 # server dep gunicorn==23.0.0 # server dep uvloop==0.21.0 # uvicorn dep, gives us much better performance under load boto3==1.34.34 # aws bedrock/sagemaker calls -redis==5.0.0 # caching -numpy==2.1.1 # semantic caching +redis==5.2.1 # redis caching +redisvl==0.4.1 # semantic caching prisma==0.11.0 # for db mangum==0.17.0 # for aws lambda functions pynacl==1.5.0 # for encrypting keys diff --git a/tests/litellm/caching/test_redis_cache.py b/tests/litellm/caching/test_redis_cache.py index 10064c0b13..8750242706 100644 --- a/tests/litellm/caching/test_redis_cache.py +++ b/tests/litellm/caching/test_redis_cache.py @@ -1,13 +1,8 @@ -import asyncio -import json import os import sys -import time from unittest.mock import MagicMock, patch -import httpx import pytest -import respx from fastapi.testclient import TestClient sys.path.insert( @@ -18,9 +13,18 @@ from unittest.mock import AsyncMock from litellm.caching.redis_cache import RedisCache +@pytest.fixture +def redis_no_ping(): + """Patch RedisCache initialization to prevent async ping tasks from being created""" + with patch('asyncio.get_running_loop') as mock_get_loop: + # Either raise an exception or return a mock that will handle the task creation + mock_get_loop.side_effect = RuntimeError("No running event loop") + yield + + @pytest.mark.parametrize("namespace", [None, "test"]) @pytest.mark.asyncio -async def test_redis_cache_async_increment(namespace, monkeypatch): +async def test_redis_cache_async_increment(namespace, monkeypatch, redis_no_ping): monkeypatch.setenv("REDIS_HOST", "https://my-test-host") redis_cache = RedisCache(namespace=namespace) # Create an AsyncMock for the Redis client @@ -47,10 +51,46 @@ async def test_redis_cache_async_increment(namespace, monkeypatch): @pytest.mark.asyncio -async def test_redis_client_init_with_socket_timeout(monkeypatch): +async def test_redis_client_init_with_socket_timeout(monkeypatch, redis_no_ping): monkeypatch.setenv("REDIS_HOST", "my-fake-host") redis_cache = RedisCache(socket_timeout=1.0) assert redis_cache.redis_kwargs["socket_timeout"] == 1.0 client = redis_cache.init_async_client() assert client is not None assert client.connection_pool.connection_kwargs["socket_timeout"] == 1.0 + + +@pytest.mark.asyncio +async def test_redis_cache_async_batch_get_cache(monkeypatch, redis_no_ping): + monkeypatch.setenv("REDIS_HOST", "https://my-test-host") + redis_cache = RedisCache() + + # Create an AsyncMock for the Redis client + mock_redis_instance = AsyncMock() + + # Make sure the mock can be used as an async context manager + mock_redis_instance.__aenter__.return_value = mock_redis_instance + mock_redis_instance.__aexit__.return_value = None + + # Setup the return value for mget + mock_redis_instance.mget.return_value = [ + b'{"key1": "value1"}', + None, + b'{"key3": "value3"}' + ] + + test_keys = ["key1", "key2", "key3"] + + with patch.object( + redis_cache, "init_async_client", return_value=mock_redis_instance + ): + # Call async_batch_get_cache + result = await redis_cache.async_batch_get_cache(key_list=test_keys) + + # Verify mget was called with the correct keys + mock_redis_instance.mget.assert_called_once() + + # Check that results were properly decoded + assert result["key1"] == {"key1": "value1"} + assert result["key2"] is None + assert result["key3"] == {"key3": "value3"} diff --git a/tests/litellm/caching/test_redis_semantic_cache.py b/tests/litellm/caching/test_redis_semantic_cache.py new file mode 100644 index 0000000000..142f7990c4 --- /dev/null +++ b/tests/litellm/caching/test_redis_semantic_cache.py @@ -0,0 +1,130 @@ +import os +import sys +from unittest.mock import MagicMock, patch, AsyncMock + +import pytest + +sys.path.insert( + 0, os.path.abspath("../../..") +) # Adds the parent directory to the system path + + +# Tests for RedisSemanticCache +def test_redis_semantic_cache_initialization(monkeypatch): + # Mock the redisvl import + semantic_cache_mock = MagicMock() + with patch.dict("sys.modules", { + "redisvl.extensions.llmcache": MagicMock(SemanticCache=semantic_cache_mock), + "redisvl.utils.vectorize": MagicMock(CustomTextVectorizer=MagicMock()) + }): + from litellm.caching.redis_semantic_cache import RedisSemanticCache + + # Set environment variables + monkeypatch.setenv("REDIS_HOST", "localhost") + monkeypatch.setenv("REDIS_PORT", "6379") + monkeypatch.setenv("REDIS_PASSWORD", "test_password") + + # Initialize the cache with a similarity threshold + redis_semantic_cache = RedisSemanticCache(similarity_threshold=0.8) + + # Verify the semantic cache was initialized with correct parameters + assert redis_semantic_cache.similarity_threshold == 0.8 + + # Use pytest.approx for floating point comparison to handle precision issues + assert redis_semantic_cache.distance_threshold == pytest.approx(0.2, abs=1e-10) + assert redis_semantic_cache.embedding_model == "text-embedding-ada-002" + + # Test initialization with missing similarity_threshold + with pytest.raises(ValueError, match="similarity_threshold must be provided"): + RedisSemanticCache() + + +def test_redis_semantic_cache_get_cache(monkeypatch): + # Mock the redisvl import and embedding function + semantic_cache_mock = MagicMock() + custom_vectorizer_mock = MagicMock() + + with patch.dict("sys.modules", { + "redisvl.extensions.llmcache": MagicMock(SemanticCache=semantic_cache_mock), + "redisvl.utils.vectorize": MagicMock(CustomTextVectorizer=custom_vectorizer_mock) + }): + from litellm.caching.redis_semantic_cache import RedisSemanticCache + + # Set environment variables + monkeypatch.setenv("REDIS_HOST", "localhost") + monkeypatch.setenv("REDIS_PORT", "6379") + monkeypatch.setenv("REDIS_PASSWORD", "test_password") + + # Initialize cache + redis_semantic_cache = RedisSemanticCache(similarity_threshold=0.8) + + # Mock the llmcache.check method to return a result + mock_result = [ + { + "prompt": "What is the capital of France?", + "response": '{"content": "Paris is the capital of France."}', + "vector_distance": 0.1 # Distance of 0.1 means similarity of 0.9 + } + ] + redis_semantic_cache.llmcache.check = MagicMock(return_value=mock_result) + + # Mock the embedding function + with patch("litellm.embedding", return_value={"data": [{"embedding": [0.1, 0.2, 0.3]}]}): + # Test get_cache with a message + result = redis_semantic_cache.get_cache( + key="test_key", + messages=[{"content": "What is the capital of France?"}] + ) + + # Verify result is properly parsed + assert result == {"content": "Paris is the capital of France."} + + # Verify llmcache.check was called + redis_semantic_cache.llmcache.check.assert_called_once() + + +@pytest.mark.asyncio +async def test_redis_semantic_cache_async_get_cache(monkeypatch): + # Mock the redisvl import + semantic_cache_mock = MagicMock() + custom_vectorizer_mock = MagicMock() + + with patch.dict("sys.modules", { + "redisvl.extensions.llmcache": MagicMock(SemanticCache=semantic_cache_mock), + "redisvl.utils.vectorize": MagicMock(CustomTextVectorizer=custom_vectorizer_mock) + }): + from litellm.caching.redis_semantic_cache import RedisSemanticCache + + # Set environment variables + monkeypatch.setenv("REDIS_HOST", "localhost") + monkeypatch.setenv("REDIS_PORT", "6379") + monkeypatch.setenv("REDIS_PASSWORD", "test_password") + + # Initialize cache + redis_semantic_cache = RedisSemanticCache(similarity_threshold=0.8) + + # Mock the async methods + mock_result = [ + { + "prompt": "What is the capital of France?", + "response": '{"content": "Paris is the capital of France."}', + "vector_distance": 0.1 # Distance of 0.1 means similarity of 0.9 + } + ] + + redis_semantic_cache.llmcache.acheck = AsyncMock(return_value=mock_result) + redis_semantic_cache._get_async_embedding = AsyncMock(return_value=[0.1, 0.2, 0.3]) + + # Test async_get_cache with a message + result = await redis_semantic_cache.async_get_cache( + key="test_key", + messages=[{"content": "What is the capital of France?"}], + metadata={} + ) + + # Verify result is properly parsed + assert result == {"content": "Paris is the capital of France."} + + # Verify methods were called + redis_semantic_cache._get_async_embedding.assert_called_once() + redis_semantic_cache.llmcache.acheck.assert_called_once() \ No newline at end of file diff --git a/tests/local_testing/test_caching.py b/tests/local_testing/test_caching.py index ac04d06c12..e4815f2023 100644 --- a/tests/local_testing/test_caching.py +++ b/tests/local_testing/test_caching.py @@ -794,7 +794,7 @@ def test_redis_cache_completion(): response3 = completion( model="gpt-3.5-turbo", messages=messages, caching=True, temperature=0.5 ) - response4 = completion(model="azure/chatgpt-v-2", messages=messages, caching=True) + response4 = completion(model="gpt-4o-mini", messages=messages, caching=True) print("\nresponse 1", response1) print("\nresponse 2", response2) @@ -1690,20 +1690,12 @@ def test_cache_context_managers(): print("VARS of litellm.cache", vars(litellm.cache)) -# test_cache_context_managers() - - -@pytest.mark.skip(reason="beta test - new redis semantic cache") def test_redis_semantic_cache_completion(): litellm.set_verbose = True import logging logging.basicConfig(level=logging.DEBUG) - random_number = random.randint( - 1, 100000 - ) # add a random number to ensure it's always adding /reading from cache - print("testing semantic caching") litellm.cache = Cache( type="redis-semantic", @@ -1718,33 +1710,30 @@ def test_redis_semantic_cache_completion(): messages=[ { "role": "user", - "content": f"write a one sentence poem about: {random_number}", + "content": "write a one sentence poem about summer", } ], max_tokens=20, ) print(f"response1: {response1}") - random_number = random.randint(1, 100000) - response2 = completion( model="gpt-3.5-turbo", messages=[ { "role": "user", - "content": f"write a one sentence poem about: {random_number}", + "content": "write a one sentence poem about summertime", } ], max_tokens=20, ) - print(f"response2: {response1}") + print(f"response2: {response2}") assert response1.id == response2.id # test_redis_cache_completion() -@pytest.mark.skip(reason="beta test - new redis semantic cache") @pytest.mark.asyncio async def test_redis_semantic_cache_acompletion(): litellm.set_verbose = True @@ -1752,38 +1741,32 @@ async def test_redis_semantic_cache_acompletion(): logging.basicConfig(level=logging.DEBUG) - random_number = random.randint( - 1, 100000 - ) # add a random number to ensure it's always adding / reading from cache - print("testing semantic caching") litellm.cache = Cache( type="redis-semantic", host=os.environ["REDIS_HOST"], port=os.environ["REDIS_PORT"], password=os.environ["REDIS_PASSWORD"], - similarity_threshold=0.8, - redis_semantic_cache_use_async=True, + similarity_threshold=0.7, ) response1 = await litellm.acompletion( model="gpt-3.5-turbo", messages=[ { "role": "user", - "content": f"write a one sentence poem about: {random_number}", + "content": "write a one sentence poem about summer", } ], max_tokens=5, ) print(f"response1: {response1}") - random_number = random.randint(1, 100000) response2 = await litellm.acompletion( model="gpt-3.5-turbo", messages=[ { "role": "user", - "content": f"write a one sentence poem about: {random_number}", + "content": "write a one sentence poem about summertime", } ], max_tokens=5,