mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-26 03:04:13 +00:00
caching updates
This commit is contained in:
parent
47c1f57a24
commit
0ab62f13e8
4 changed files with 31 additions and 7 deletions
|
@ -30,6 +30,7 @@ class RedisCache():
|
||||||
# cached_response is in `b{} convert it to ModelResponse
|
# cached_response is in `b{} convert it to ModelResponse
|
||||||
cached_response = cached_response.decode("utf-8") # Convert bytes to string
|
cached_response = cached_response.decode("utf-8") # Convert bytes to string
|
||||||
cached_response = json.loads(cached_response) # Convert string to dictionary
|
cached_response = json.loads(cached_response) # Convert string to dictionary
|
||||||
|
cached_response['cache'] = True # set cache-hit flag to True
|
||||||
return cached_response
|
return cached_response
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -114,6 +114,7 @@ def completion(
|
||||||
top_k=40,
|
top_k=40,
|
||||||
request_timeout=0, # unused var for old version of OpenAI API
|
request_timeout=0, # unused var for old version of OpenAI API
|
||||||
fallbacks=[],
|
fallbacks=[],
|
||||||
|
caching = False,
|
||||||
) -> ModelResponse:
|
) -> ModelResponse:
|
||||||
args = locals()
|
args = locals()
|
||||||
try:
|
try:
|
||||||
|
|
|
@ -12,7 +12,7 @@ import pytest
|
||||||
import litellm
|
import litellm
|
||||||
from litellm import embedding, completion
|
from litellm import embedding, completion
|
||||||
from litellm.caching import Cache
|
from litellm.caching import Cache
|
||||||
litellm.set_verbose=True
|
# litellm.set_verbose=True
|
||||||
|
|
||||||
messages = [{"role": "user", "content": "who is ishaan Github? "}]
|
messages = [{"role": "user", "content": "who is ishaan Github? "}]
|
||||||
# comment
|
# comment
|
||||||
|
@ -270,5 +270,26 @@ def test_redis_cache_completion():
|
||||||
|
|
||||||
# test_redis_cache_completion()
|
# test_redis_cache_completion()
|
||||||
|
|
||||||
|
# redis cache with custom keys
|
||||||
|
def custom_get_cache_key(*args, **kwargs):
|
||||||
|
# return key to use for your cache:
|
||||||
|
key = kwargs.get("model", "") + str(kwargs.get("messages", "")) + str(kwargs.get("temperature", "")) + str(kwargs.get("logit_bias", ""))
|
||||||
|
print("key for cache", key)
|
||||||
|
return key
|
||||||
|
|
||||||
|
def test_custom_redis_cache_with_key():
|
||||||
|
messages = [{"role": "user", "content": "how many stars does litellm have? "}]
|
||||||
|
litellm.cache = Cache(type="redis", host=os.environ['REDIS_HOST'], port=os.environ['REDIS_PORT'], password=os.environ['REDIS_PASSWORD'])
|
||||||
|
litellm.cache.get_cache_key = custom_get_cache_key
|
||||||
|
|
||||||
|
response1 = completion(model="gpt-3.5-turbo", messages=messages, temperature=0.1, caching=True)
|
||||||
|
response2 = completion(model="gpt-3.5-turbo", messages=messages, temperature=0.1, caching=True)
|
||||||
|
response3 = completion(model="gpt-3.5-turbo", messages=messages, temperature=0.1, caching=False)
|
||||||
|
|
||||||
|
print(f"response1: {response1}")
|
||||||
|
print(f"response2: {response2}")
|
||||||
|
print(f"response3: {response3}")
|
||||||
|
|
||||||
|
# test_custom_redis_cache_with_key()
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -520,12 +520,13 @@ def client(original_function):
|
||||||
if (litellm.caching or litellm.caching_with_models) and litellm.cache is None:
|
if (litellm.caching or litellm.caching_with_models) and litellm.cache is None:
|
||||||
litellm.cache = Cache()
|
litellm.cache = Cache()
|
||||||
|
|
||||||
# checking cache
|
if kwargs.get("caching", False): # allow users to control returning cached responses from the completion function
|
||||||
if (litellm.cache != None or litellm.caching or litellm.caching_with_models):
|
# checking cache
|
||||||
print_verbose(f"LiteLLM: Checking Cache")
|
if (litellm.cache != None or litellm.caching or litellm.caching_with_models):
|
||||||
cached_result = litellm.cache.get_cache(*args, **kwargs)
|
print_verbose(f"LiteLLM: Checking Cache")
|
||||||
if cached_result != None:
|
cached_result = litellm.cache.get_cache(*args, **kwargs)
|
||||||
return cached_result
|
if cached_result != None:
|
||||||
|
return cached_result
|
||||||
|
|
||||||
# MODEL CALL
|
# MODEL CALL
|
||||||
result = original_function(*args, **kwargs)
|
result = original_function(*args, **kwargs)
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue