forked from phoenix/litellm-mirror
fix(utils.py): fix 'no-cache': true when caching is turned on
This commit is contained in:
parent
0b604146ae
commit
161e836427
4 changed files with 70 additions and 25 deletions
|
@ -129,8 +129,6 @@ class PrometheusServicesLogger:
|
||||||
if self.mock_testing:
|
if self.mock_testing:
|
||||||
self.mock_testing_success_calls += 1
|
self.mock_testing_success_calls += 1
|
||||||
|
|
||||||
print(f"payload call type: {payload.call_type}")
|
|
||||||
|
|
||||||
if payload.service.value in self.payload_to_prometheus_map:
|
if payload.service.value in self.payload_to_prometheus_map:
|
||||||
prom_objects = self.payload_to_prometheus_map[payload.service.value]
|
prom_objects = self.payload_to_prometheus_map[payload.service.value]
|
||||||
for obj in prom_objects:
|
for obj in prom_objects:
|
||||||
|
@ -151,8 +149,6 @@ class PrometheusServicesLogger:
|
||||||
if self.mock_testing:
|
if self.mock_testing:
|
||||||
self.mock_testing_failure_calls += 1
|
self.mock_testing_failure_calls += 1
|
||||||
|
|
||||||
print(f"payload call type: {payload.call_type}")
|
|
||||||
|
|
||||||
if payload.service.value in self.payload_to_prometheus_map:
|
if payload.service.value in self.payload_to_prometheus_map:
|
||||||
prom_objects = self.payload_to_prometheus_map[payload.service.value]
|
prom_objects = self.payload_to_prometheus_map[payload.service.value]
|
||||||
for obj in prom_objects:
|
for obj in prom_objects:
|
||||||
|
@ -170,8 +166,6 @@ class PrometheusServicesLogger:
|
||||||
if self.mock_testing:
|
if self.mock_testing:
|
||||||
self.mock_testing_success_calls += 1
|
self.mock_testing_success_calls += 1
|
||||||
|
|
||||||
print(f"payload call type: {payload.call_type}")
|
|
||||||
|
|
||||||
if payload.service.value in self.payload_to_prometheus_map:
|
if payload.service.value in self.payload_to_prometheus_map:
|
||||||
prom_objects = self.payload_to_prometheus_map[payload.service.value]
|
prom_objects = self.payload_to_prometheus_map[payload.service.value]
|
||||||
for obj in prom_objects:
|
for obj in prom_objects:
|
||||||
|
@ -193,8 +187,6 @@ class PrometheusServicesLogger:
|
||||||
if self.mock_testing:
|
if self.mock_testing:
|
||||||
self.mock_testing_failure_calls += 1
|
self.mock_testing_failure_calls += 1
|
||||||
|
|
||||||
print(f"payload call type: {payload.call_type}")
|
|
||||||
|
|
||||||
if payload.service.value in self.payload_to_prometheus_map:
|
if payload.service.value in self.payload_to_prometheus_map:
|
||||||
prom_objects = self.payload_to_prometheus_map[payload.service.value]
|
prom_objects = self.payload_to_prometheus_map[payload.service.value]
|
||||||
for obj in prom_objects:
|
for obj in prom_objects:
|
||||||
|
|
|
@ -18,10 +18,10 @@ model_list:
|
||||||
api_version: "2023-07-01-preview"
|
api_version: "2023-07-01-preview"
|
||||||
stream_timeout: 0.001
|
stream_timeout: 0.001
|
||||||
model_name: azure-gpt-3.5
|
model_name: azure-gpt-3.5
|
||||||
# - model_name: text-embedding-ada-002
|
- model_name: text-embedding-ada-002
|
||||||
# litellm_params:
|
litellm_params:
|
||||||
# model: text-embedding-ada-002
|
model: text-embedding-ada-002
|
||||||
# api_key: os.environ/OPENAI_API_KEY
|
api_key: os.environ/OPENAI_API_KEY
|
||||||
- model_name: gpt-instruct
|
- model_name: gpt-instruct
|
||||||
litellm_params:
|
litellm_params:
|
||||||
model: text-completion-openai/gpt-3.5-turbo-instruct
|
model: text-completion-openai/gpt-3.5-turbo-instruct
|
||||||
|
@ -42,6 +42,9 @@ litellm_settings:
|
||||||
success_callback: ["prometheus"]
|
success_callback: ["prometheus"]
|
||||||
failure_callback: ["prometheus"]
|
failure_callback: ["prometheus"]
|
||||||
service_callback: ["prometheus_system"]
|
service_callback: ["prometheus_system"]
|
||||||
|
cache: True
|
||||||
|
cache_params:
|
||||||
|
type: "redis"
|
||||||
|
|
||||||
|
|
||||||
general_settings:
|
general_settings:
|
||||||
|
|
|
@ -1111,6 +1111,7 @@ async def test_cache_control_overrides():
|
||||||
"content": "hello who are you" + unique_num,
|
"content": "hello who are you" + unique_num,
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
|
caching=True,
|
||||||
)
|
)
|
||||||
|
|
||||||
print(response1)
|
print(response1)
|
||||||
|
@ -1125,6 +1126,55 @@ async def test_cache_control_overrides():
|
||||||
"content": "hello who are you" + unique_num,
|
"content": "hello who are you" + unique_num,
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
|
caching=True,
|
||||||
|
cache={"no-cache": True},
|
||||||
|
)
|
||||||
|
|
||||||
|
print(response2)
|
||||||
|
|
||||||
|
assert response1.id != response2.id
|
||||||
|
|
||||||
|
|
||||||
|
def test_sync_cache_control_overrides():
|
||||||
|
# we use the cache controls to ensure there is no cache hit on this test
|
||||||
|
litellm.cache = Cache(
|
||||||
|
type="redis",
|
||||||
|
host=os.environ["REDIS_HOST"],
|
||||||
|
port=os.environ["REDIS_PORT"],
|
||||||
|
password=os.environ["REDIS_PASSWORD"],
|
||||||
|
)
|
||||||
|
print("Testing cache override")
|
||||||
|
litellm.set_verbose = True
|
||||||
|
import uuid
|
||||||
|
|
||||||
|
unique_num = str(uuid.uuid4())
|
||||||
|
|
||||||
|
start_time = time.time()
|
||||||
|
|
||||||
|
response1 = litellm.completion(
|
||||||
|
model="gpt-3.5-turbo",
|
||||||
|
messages=[
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": "hello who are you" + unique_num,
|
||||||
|
}
|
||||||
|
],
|
||||||
|
caching=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
print(response1)
|
||||||
|
|
||||||
|
time.sleep(2)
|
||||||
|
|
||||||
|
response2 = litellm.completion(
|
||||||
|
model="gpt-3.5-turbo",
|
||||||
|
messages=[
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": "hello who are you" + unique_num,
|
||||||
|
}
|
||||||
|
],
|
||||||
|
caching=True,
|
||||||
cache={"no-cache": True},
|
cache={"no-cache": True},
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
|
@ -2722,14 +2722,16 @@ def client(original_function):
|
||||||
if (
|
if (
|
||||||
(
|
(
|
||||||
(
|
(
|
||||||
kwargs.get("caching", None) is None
|
(
|
||||||
and kwargs.get("cache", None) is None
|
kwargs.get("caching", None) is None
|
||||||
and litellm.cache is not None
|
and kwargs.get("cache", None) is None
|
||||||
|
and litellm.cache is not None
|
||||||
|
)
|
||||||
|
or kwargs.get("caching", False) == True
|
||||||
)
|
)
|
||||||
or kwargs.get("caching", False) == True
|
and (
|
||||||
or (
|
kwargs.get("cache", None) is None
|
||||||
kwargs.get("cache", None) is not None
|
or kwargs["cache"].get("no-cache", False) != True
|
||||||
and kwargs.get("cache", {}).get("no-cache", False) != True
|
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
and kwargs.get("aembedding", False) != True
|
and kwargs.get("aembedding", False) != True
|
||||||
|
@ -3011,9 +3013,8 @@ def client(original_function):
|
||||||
)
|
)
|
||||||
|
|
||||||
# [OPTIONAL] CHECK CACHE
|
# [OPTIONAL] CHECK CACHE
|
||||||
print_verbose(f"litellm.cache: {litellm.cache}")
|
|
||||||
print_verbose(
|
print_verbose(
|
||||||
f"kwargs[caching]: {kwargs.get('caching', False)}; litellm.cache: {litellm.cache}"
|
f"kwargs[caching]: {kwargs.get('caching', False)}; litellm.cache: {litellm.cache}; kwargs.get('cache'): {kwargs.get('cache', None)}"
|
||||||
)
|
)
|
||||||
# if caching is false, don't run this
|
# if caching is false, don't run this
|
||||||
final_embedding_cached_response = None
|
final_embedding_cached_response = None
|
||||||
|
@ -3025,10 +3026,9 @@ def client(original_function):
|
||||||
and litellm.cache is not None
|
and litellm.cache is not None
|
||||||
)
|
)
|
||||||
or kwargs.get("caching", False) == True
|
or kwargs.get("caching", False) == True
|
||||||
or (
|
) and (
|
||||||
kwargs.get("cache", None) is not None
|
kwargs.get("cache", None) is None
|
||||||
and kwargs.get("cache").get("no-cache", False) != True
|
or kwargs["cache"].get("no-cache", False) != True
|
||||||
)
|
|
||||||
): # allow users to control returning cached responses from the completion function
|
): # allow users to control returning cached responses from the completion function
|
||||||
# checking cache
|
# checking cache
|
||||||
print_verbose("INSIDE CHECKING CACHE")
|
print_verbose("INSIDE CHECKING CACHE")
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue