caching updates

This commit is contained in:
ishaan-jaff 2023-09-08 18:06:46 -07:00
parent 47c1f57a24
commit 0ab62f13e8
4 changed files with 31 additions and 7 deletions

View file

@ -30,6 +30,7 @@ class RedisCache():
# cached_response is in `b{} convert it to ModelResponse
cached_response = cached_response.decode("utf-8") # Convert bytes to string
cached_response = json.loads(cached_response) # Convert string to dictionary
cached_response['cache'] = True # set cache-hit flag to True
return cached_response

View file

@ -114,6 +114,7 @@ def completion(
top_k=40,
request_timeout=0, # unused var for old version of OpenAI API
fallbacks=[],
caching = False,
) -> ModelResponse:
args = locals()
try:

View file

@ -12,7 +12,7 @@ import pytest
import litellm
from litellm import embedding, completion
from litellm.caching import Cache
litellm.set_verbose=True
# litellm.set_verbose=True
messages = [{"role": "user", "content": "who is ishaan Github? "}]
# comment
@ -270,5 +270,26 @@ def test_redis_cache_completion():
# test_redis_cache_completion()
# redis cache with custom keys
def custom_get_cache_key(*args, **kwargs):
# return key to use for your cache:
key = kwargs.get("model", "") + str(kwargs.get("messages", "")) + str(kwargs.get("temperature", "")) + str(kwargs.get("logit_bias", ""))
print("key for cache", key)
return key
def test_custom_redis_cache_with_key():
messages = [{"role": "user", "content": "how many stars does litellm have? "}]
litellm.cache = Cache(type="redis", host=os.environ['REDIS_HOST'], port=os.environ['REDIS_PORT'], password=os.environ['REDIS_PASSWORD'])
litellm.cache.get_cache_key = custom_get_cache_key
response1 = completion(model="gpt-3.5-turbo", messages=messages, temperature=0.1, caching=True)
response2 = completion(model="gpt-3.5-turbo", messages=messages, temperature=0.1, caching=True)
response3 = completion(model="gpt-3.5-turbo", messages=messages, temperature=0.1, caching=False)
print(f"response1: {response1}")
print(f"response2: {response2}")
print(f"response3: {response3}")
# test_custom_redis_cache_with_key()

View file

@ -520,12 +520,13 @@ def client(original_function):
if (litellm.caching or litellm.caching_with_models) and litellm.cache is None:
litellm.cache = Cache()
# checking cache
if (litellm.cache != None or litellm.caching or litellm.caching_with_models):
print_verbose(f"LiteLLM: Checking Cache")
cached_result = litellm.cache.get_cache(*args, **kwargs)
if cached_result != None:
return cached_result
if kwargs.get("caching", False): # allow users to control returning cached responses from the completion function
# checking cache
if (litellm.cache != None or litellm.caching or litellm.caching_with_models):
print_verbose(f"LiteLLM: Checking Cache")
cached_result = litellm.cache.get_cache(*args, **kwargs)
if cached_result != None:
return cached_result
# MODEL CALL
result = original_function(*args, **kwargs)