forked from phoenix/litellm-mirror
cache improvements
This commit is contained in:
parent
43fea52d7c
commit
1799f1bfe9
4 changed files with 15 additions and 15 deletions
|
@ -30,9 +30,9 @@ togetherai_api_key: Optional[str] = None
|
||||||
baseten_key: Optional[str] = None
|
baseten_key: Optional[str] = None
|
||||||
use_client = False
|
use_client = False
|
||||||
logging = True
|
logging = True
|
||||||
caching = False
|
caching = False # deprecated son
|
||||||
cache: Optional[Cache] = None # set to litellm.caching Cache() object
|
caching_with_models = False # if you want the caching key to be model + prompt # deprecated soon
|
||||||
caching_with_models = False # if you want the caching key to be model + prompt
|
cache: Optional[Cache] = None # cache object
|
||||||
model_alias_map: Dict[str, str] = {}
|
model_alias_map: Dict[str, str] = {}
|
||||||
model_cost = {
|
model_cost = {
|
||||||
"babbage-002": {
|
"babbage-002": {
|
||||||
|
|
|
@ -75,7 +75,6 @@ class Cache():
|
||||||
if cache_key is not None:
|
if cache_key is not None:
|
||||||
self.cache.set_cache(cache_key, result)
|
self.cache.set_cache(cache_key, result)
|
||||||
except:
|
except:
|
||||||
|
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
@ -83,6 +82,3 @@ class Cache():
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -29,13 +29,11 @@ def test_caching():
|
||||||
if response2 != response1:
|
if response2 != response1:
|
||||||
print(f"response1: {response1}")
|
print(f"response1: {response1}")
|
||||||
print(f"response2: {response2}")
|
print(f"response2: {response2}")
|
||||||
pytest.fail(f"Error occurred: {e}")
|
pytest.fail(f"Error occurred: responses are not equal")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
litellm.caching = False
|
litellm.caching = False
|
||||||
print(f"error occurred: {traceback.format_exc()}")
|
|
||||||
pytest.fail(f"Error occurred: {e}")
|
pytest.fail(f"Error occurred: {e}")
|
||||||
|
|
||||||
|
|
||||||
def test_caching_with_models():
|
def test_caching_with_models():
|
||||||
litellm.caching_with_models = True
|
litellm.caching_with_models = True
|
||||||
response1 = completion(model="gpt-3.5-turbo", messages=messages)
|
response1 = completion(model="gpt-3.5-turbo", messages=messages)
|
||||||
|
|
|
@ -26,6 +26,7 @@ from .exceptions import (
|
||||||
OpenAIError,
|
OpenAIError,
|
||||||
)
|
)
|
||||||
from typing import List, Dict, Union, Optional
|
from typing import List, Dict, Union, Optional
|
||||||
|
from .caching import Cache
|
||||||
|
|
||||||
|
|
||||||
####### ENVIRONMENT VARIABLES ####################
|
####### ENVIRONMENT VARIABLES ####################
|
||||||
|
@ -402,11 +403,16 @@ def client(original_function):
|
||||||
kwargs["litellm_call_id"] = litellm_call_id
|
kwargs["litellm_call_id"] = litellm_call_id
|
||||||
start_time = datetime.datetime.now()
|
start_time = datetime.datetime.now()
|
||||||
# [OPTIONAL] CHECK CACHE
|
# [OPTIONAL] CHECK CACHE
|
||||||
if (litellm.caching or litellm.caching_with_models or litellm.cache != None) and (
|
# remove this after deprecating litellm.caching
|
||||||
cached_result := litellm.cache.get_cache(*args, **kwargs)
|
if (litellm.caching or litellm.caching_with_models) and litellm.cache is None:
|
||||||
) is not None:
|
litellm.cache = Cache()
|
||||||
result = cached_result
|
|
||||||
return result
|
# checking cache
|
||||||
|
if (litellm.cache != None or litellm.caching or litellm.caching_with_models):
|
||||||
|
cached_result = litellm.cache.get_cache(*args, **kwargs)
|
||||||
|
if cached_result != None:
|
||||||
|
return cached_result
|
||||||
|
|
||||||
# MODEL CALL
|
# MODEL CALL
|
||||||
result = original_function(*args, **kwargs)
|
result = original_function(*args, **kwargs)
|
||||||
if "stream" in kwargs and kwargs["stream"] == True:
|
if "stream" in kwargs and kwargs["stream"] == True:
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue