fix(utils.py): fix 'no-cache': true when caching is turned on

This commit is contained in:
Krrish Dholakia 2024-04-23 12:58:30 -07:00
parent 0b604146ae
commit 161e836427
4 changed files with 70 additions and 25 deletions

View file

@ -129,8 +129,6 @@ class PrometheusServicesLogger:
if self.mock_testing:
self.mock_testing_success_calls += 1
print(f"payload call type: {payload.call_type}")
if payload.service.value in self.payload_to_prometheus_map:
prom_objects = self.payload_to_prometheus_map[payload.service.value]
for obj in prom_objects:
@ -151,8 +149,6 @@ class PrometheusServicesLogger:
if self.mock_testing:
self.mock_testing_failure_calls += 1
print(f"payload call type: {payload.call_type}")
if payload.service.value in self.payload_to_prometheus_map:
prom_objects = self.payload_to_prometheus_map[payload.service.value]
for obj in prom_objects:
@ -170,8 +166,6 @@ class PrometheusServicesLogger:
if self.mock_testing:
self.mock_testing_success_calls += 1
print(f"payload call type: {payload.call_type}")
if payload.service.value in self.payload_to_prometheus_map:
prom_objects = self.payload_to_prometheus_map[payload.service.value]
for obj in prom_objects:
@ -193,8 +187,6 @@ class PrometheusServicesLogger:
if self.mock_testing:
self.mock_testing_failure_calls += 1
print(f"payload call type: {payload.call_type}")
if payload.service.value in self.payload_to_prometheus_map:
prom_objects = self.payload_to_prometheus_map[payload.service.value]
for obj in prom_objects:

View file

@ -18,10 +18,10 @@ model_list:
api_version: "2023-07-01-preview"
stream_timeout: 0.001
model_name: azure-gpt-3.5
# - model_name: text-embedding-ada-002
# litellm_params:
# model: text-embedding-ada-002
# api_key: os.environ/OPENAI_API_KEY
- model_name: text-embedding-ada-002
litellm_params:
model: text-embedding-ada-002
api_key: os.environ/OPENAI_API_KEY
- model_name: gpt-instruct
litellm_params:
model: text-completion-openai/gpt-3.5-turbo-instruct
@ -42,6 +42,9 @@ litellm_settings:
success_callback: ["prometheus"]
failure_callback: ["prometheus"]
service_callback: ["prometheus_system"]
cache: True
cache_params:
type: "redis"
general_settings:

View file

@ -1111,6 +1111,7 @@ async def test_cache_control_overrides():
"content": "hello who are you" + unique_num,
}
],
caching=True,
)
print(response1)
@ -1125,6 +1126,55 @@ async def test_cache_control_overrides():
"content": "hello who are you" + unique_num,
}
],
caching=True,
cache={"no-cache": True},
)
print(response2)
assert response1.id != response2.id
def test_sync_cache_control_overrides():
# we use the cache controls to ensure there is no cache hit on this test
litellm.cache = Cache(
type="redis",
host=os.environ["REDIS_HOST"],
port=os.environ["REDIS_PORT"],
password=os.environ["REDIS_PASSWORD"],
)
print("Testing cache override")
litellm.set_verbose = True
import uuid
unique_num = str(uuid.uuid4())
start_time = time.time()
response1 = litellm.completion(
model="gpt-3.5-turbo",
messages=[
{
"role": "user",
"content": "hello who are you" + unique_num,
}
],
caching=True,
)
print(response1)
time.sleep(2)
response2 = litellm.completion(
model="gpt-3.5-turbo",
messages=[
{
"role": "user",
"content": "hello who are you" + unique_num,
}
],
caching=True,
cache={"no-cache": True},
)

View file

@ -2720,6 +2720,7 @@ def client(original_function):
)
# if caching is false or cache["no-cache"]==True, don't run this
if (
(
(
(
kwargs.get("caching", None) is None
@ -2727,9 +2728,10 @@ def client(original_function):
and litellm.cache is not None
)
or kwargs.get("caching", False) == True
or (
kwargs.get("cache", None) is not None
and kwargs.get("cache", {}).get("no-cache", False) != True
)
and (
kwargs.get("cache", None) is None
or kwargs["cache"].get("no-cache", False) != True
)
)
and kwargs.get("aembedding", False) != True
@ -3011,9 +3013,8 @@ def client(original_function):
)
# [OPTIONAL] CHECK CACHE
print_verbose(f"litellm.cache: {litellm.cache}")
print_verbose(
f"kwargs[caching]: {kwargs.get('caching', False)}; litellm.cache: {litellm.cache}"
f"kwargs[caching]: {kwargs.get('caching', False)}; litellm.cache: {litellm.cache}; kwargs.get('cache'): {kwargs.get('cache', None)}"
)
# if caching is false, don't run this
final_embedding_cached_response = None
@ -3025,10 +3026,9 @@ def client(original_function):
and litellm.cache is not None
)
or kwargs.get("caching", False) == True
or (
kwargs.get("cache", None) is not None
and kwargs.get("cache").get("no-cache", False) != True
)
) and (
kwargs.get("cache", None) is None
or kwargs["cache"].get("no-cache", False) != True
): # allow users to control returning cached responses from the completion function
# checking cache
print_verbose("INSIDE CHECKING CACHE")