From 4b798624dcaeb2bc9ae5c47ddded57483fc0966b Mon Sep 17 00:00:00 2001 From: ishaan-jaff Date: Tue, 6 Feb 2024 10:53:28 -0800 Subject: [PATCH] (docs) redis cache --- docs/my-website/docs/caching/redis_cache.md | 68 +++++++++++++++++++-- 1 file changed, 64 insertions(+), 4 deletions(-) diff --git a/docs/my-website/docs/caching/redis_cache.md b/docs/my-website/docs/caching/redis_cache.md index 8a580f087c..7b21d35b6c 100644 --- a/docs/my-website/docs/caching/redis_cache.md +++ b/docs/my-website/docs/caching/redis_cache.md @@ -1,11 +1,11 @@ import Tabs from '@theme/Tabs'; import TabItem from '@theme/TabItem'; -# Caching - In-Memory, Redis, s3 +# Caching - In-Memory, Redis, s3, Redis Semantic Cache [**See Code**](https://github.com/BerriAI/litellm/blob/main/litellm/caching.py) -## Initialize Cache - In Memory, Redis, s3 Bucket +## Initialize Cache - In Memory, Redis, s3 Bucket, Redis Semantic Cache @@ -18,7 +18,7 @@ pip install redis ``` For the hosted version you can setup your own Redis DB here: https://app.redislabs.com/ -### Quick Start + ```python import litellm from litellm import completion @@ -55,7 +55,7 @@ Set AWS environment variables AWS_ACCESS_KEY_ID = "AKI*******" AWS_SECRET_ACCESS_KEY = "WOl*****" ``` -### Quick Start + ```python import litellm from litellm import completion @@ -80,6 +80,66 @@ response2 = completion( + + +Install redis +```shell +pip install redisvl==0.0.7 +``` + +For the hosted version you can setup your own Redis DB here: https://app.redislabs.com/ + +```python +import litellm +from litellm import completion +from litellm.caching import Cache + +random_number = random.randint( + 1, 100000 +) # add a random number to ensure it's always adding / reading from cache + +print("testing semantic caching") +litellm.cache = Cache( + type="redis-semantic", + host=os.environ["REDIS_HOST"], + port=os.environ["REDIS_PORT"], + password=os.environ["REDIS_PASSWORD"], + similarity_threshold=0.8, + redis_semantic_cache_embedding_model="text-embedding-ada-002", # this model is passed to litellm.embedding(), any litellm.embedding() model is supported here +) +response1 = completion( + model="gpt-3.5-turbo", + messages=[ + { + "role": "user", + "content": f"write a one sentence poem about: {random_number}", + } + ], + max_tokens=20, +) +print(f"response1: {response1}") + +random_number = random.randint(1, 100000) + +response2 = completion( + model="gpt-3.5-turbo", + messages=[ + { + "role": "user", + "content": f"write a one sentence poem about: {random_number}", + } + ], + max_tokens=20, +) +print(f"response2: {response1}") +assert response1.id == response2.id +# response1 == response2, response 1 is cached +``` + + + + + ### Quick Start