forked from phoenix/litellm-mirror
caching updates
This commit is contained in:
parent
47c1f57a24
commit
0ab62f13e8
4 changed files with 31 additions and 7 deletions
|
@ -30,6 +30,7 @@ class RedisCache():
|
|||
# cached_response is in `b{} convert it to ModelResponse
|
||||
cached_response = cached_response.decode("utf-8") # Convert bytes to string
|
||||
cached_response = json.loads(cached_response) # Convert string to dictionary
|
||||
cached_response['cache'] = True # set cache-hit flag to True
|
||||
return cached_response
|
||||
|
||||
|
||||
|
|
|
@ -114,6 +114,7 @@ def completion(
|
|||
top_k=40,
|
||||
request_timeout=0, # unused var for old version of OpenAI API
|
||||
fallbacks=[],
|
||||
caching = False,
|
||||
) -> ModelResponse:
|
||||
args = locals()
|
||||
try:
|
||||
|
|
|
@ -12,7 +12,7 @@ import pytest
|
|||
import litellm
|
||||
from litellm import embedding, completion
|
||||
from litellm.caching import Cache
|
||||
litellm.set_verbose=True
|
||||
# litellm.set_verbose=True
|
||||
|
||||
messages = [{"role": "user", "content": "who is ishaan Github? "}]
|
||||
# comment
|
||||
|
@ -270,5 +270,26 @@ def test_redis_cache_completion():
|
|||
|
||||
# test_redis_cache_completion()
|
||||
|
||||
# redis cache with custom keys
|
||||
def custom_get_cache_key(*args, **kwargs):
|
||||
# return key to use for your cache:
|
||||
key = kwargs.get("model", "") + str(kwargs.get("messages", "")) + str(kwargs.get("temperature", "")) + str(kwargs.get("logit_bias", ""))
|
||||
print("key for cache", key)
|
||||
return key
|
||||
|
||||
def test_custom_redis_cache_with_key():
|
||||
messages = [{"role": "user", "content": "how many stars does litellm have? "}]
|
||||
litellm.cache = Cache(type="redis", host=os.environ['REDIS_HOST'], port=os.environ['REDIS_PORT'], password=os.environ['REDIS_PASSWORD'])
|
||||
litellm.cache.get_cache_key = custom_get_cache_key
|
||||
|
||||
response1 = completion(model="gpt-3.5-turbo", messages=messages, temperature=0.1, caching=True)
|
||||
response2 = completion(model="gpt-3.5-turbo", messages=messages, temperature=0.1, caching=True)
|
||||
response3 = completion(model="gpt-3.5-turbo", messages=messages, temperature=0.1, caching=False)
|
||||
|
||||
print(f"response1: {response1}")
|
||||
print(f"response2: {response2}")
|
||||
print(f"response3: {response3}")
|
||||
|
||||
# test_custom_redis_cache_with_key()
|
||||
|
||||
|
||||
|
|
|
@ -520,12 +520,13 @@ def client(original_function):
|
|||
if (litellm.caching or litellm.caching_with_models) and litellm.cache is None:
|
||||
litellm.cache = Cache()
|
||||
|
||||
# checking cache
|
||||
if (litellm.cache != None or litellm.caching or litellm.caching_with_models):
|
||||
print_verbose(f"LiteLLM: Checking Cache")
|
||||
cached_result = litellm.cache.get_cache(*args, **kwargs)
|
||||
if cached_result != None:
|
||||
return cached_result
|
||||
if kwargs.get("caching", False): # allow users to control returning cached responses from the completion function
|
||||
# checking cache
|
||||
if (litellm.cache != None or litellm.caching or litellm.caching_with_models):
|
||||
print_verbose(f"LiteLLM: Checking Cache")
|
||||
cached_result = litellm.cache.get_cache(*args, **kwargs)
|
||||
if cached_result != None:
|
||||
return cached_result
|
||||
|
||||
# MODEL CALL
|
||||
result = original_function(*args, **kwargs)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue