fix(utils.py): fix 'no-cache': true when caching is turned on

2024-04-23 12:58:30 -07:00 · 2024-04-23 12:58:30 -07:00 · 161e836427
commit 161e836427
parent 0b604146ae
4 changed files with 70 additions and 25 deletions
--- a/litellm/integrations/prometheus_services.py
+++ b/litellm/integrations/prometheus_services.py
@ -129,8 +129,6 @@ class PrometheusServicesLogger:
        if self.mock_testing:
            self.mock_testing_success_calls += 1
        print(f"payload call type: {payload.call_type}")
        if payload.service.value in self.payload_to_prometheus_map:
            prom_objects = self.payload_to_prometheus_map[payload.service.value]
            for obj in prom_objects:
@ -151,8 +149,6 @@ class PrometheusServicesLogger:
        if self.mock_testing:
            self.mock_testing_failure_calls += 1
        print(f"payload call type: {payload.call_type}")
        if payload.service.value in self.payload_to_prometheus_map:
            prom_objects = self.payload_to_prometheus_map[payload.service.value]
            for obj in prom_objects:
@ -170,8 +166,6 @@ class PrometheusServicesLogger:
        if self.mock_testing:
            self.mock_testing_success_calls += 1
        print(f"payload call type: {payload.call_type}")
        if payload.service.value in self.payload_to_prometheus_map:
            prom_objects = self.payload_to_prometheus_map[payload.service.value]
            for obj in prom_objects:
@ -193,8 +187,6 @@ class PrometheusServicesLogger:
        if self.mock_testing:
            self.mock_testing_failure_calls += 1
        print(f"payload call type: {payload.call_type}")
        if payload.service.value in self.payload_to_prometheus_map:
            prom_objects = self.payload_to_prometheus_map[payload.service.value]
            for obj in prom_objects:
--- a/litellm/proxy/_new_secret_config.yaml
+++ b/litellm/proxy/_new_secret_config.yaml
@ -18,10 +18,10 @@ model_list:
      api_version: "2023-07-01-preview"
      stream_timeout: 0.001
  model_name: azure-gpt-3.5
-# - model_name: text-embedding-ada-002
+- model_name: text-embedding-ada-002
-#   litellm_params:
+  litellm_params:
-#     model: text-embedding-ada-002
+    model: text-embedding-ada-002
-#     api_key: os.environ/OPENAI_API_KEY
+    api_key: os.environ/OPENAI_API_KEY
 - model_name: gpt-instruct
  litellm_params:
    model: text-completion-openai/gpt-3.5-turbo-instruct
@ -42,6 +42,9 @@ litellm_settings:
  success_callback: ["prometheus"]
  failure_callback: ["prometheus"]
  service_callback: ["prometheus_system"]
  cache: True
  cache_params:
    type: "redis"
 general_settings:
--- a/litellm/tests/test_caching.py
+++ b/litellm/tests/test_caching.py
@ -1111,6 +1111,7 @@ async def test_cache_control_overrides():
                "content": "hello who are you" + unique_num,
            }
        ],
        caching=True,
    )
    print(response1)
@ -1125,6 +1126,55 @@ async def test_cache_control_overrides():
                "content": "hello who are you" + unique_num,
            }
        ],
        caching=True,
        cache={"no-cache": True},
    )
    print(response2)
    assert response1.id != response2.id
 def test_sync_cache_control_overrides():
    # we use the cache controls to ensure there is no cache hit on this test
    litellm.cache = Cache(
        type="redis",
        host=os.environ["REDIS_HOST"],
        port=os.environ["REDIS_PORT"],
        password=os.environ["REDIS_PASSWORD"],
    )
    print("Testing cache override")
    litellm.set_verbose = True
    import uuid
    unique_num = str(uuid.uuid4())
    start_time = time.time()
    response1 = litellm.completion(
        model="gpt-3.5-turbo",
        messages=[
            {
                "role": "user",
                "content": "hello who are you" + unique_num,
            }
        ],
        caching=True,
    )
    print(response1)
    time.sleep(2)
    response2 = litellm.completion(
        model="gpt-3.5-turbo",
        messages=[
            {
                "role": "user",
                "content": "hello who are you" + unique_num,
            }
        ],
        caching=True,
        cache={"no-cache": True},
    )
--- a/litellm/utils.py
+++ b/litellm/utils.py
@ -2722,14 +2722,16 @@ def client(original_function):
            if (
                (
                    (
-                        kwargs.get("caching", None) is None
+                        (
-                        and kwargs.get("cache", None) is None
+                            kwargs.get("caching", None) is None
-                        and litellm.cache is not None
+                            and kwargs.get("cache", None) is None
                            and litellm.cache is not None
                        )
                        or kwargs.get("caching", False) == True
                    )
-                    or kwargs.get("caching", False) == True
+                    and (
-                    or (
+                        kwargs.get("cache", None) is None
-                        kwargs.get("cache", None) is not None
+                        or kwargs["cache"].get("no-cache", False) != True
                        and kwargs.get("cache", {}).get("no-cache", False) != True
                    )
                )
                and kwargs.get("aembedding", False) != True
@ -3011,9 +3013,8 @@ def client(original_function):
                    )
            # [OPTIONAL] CHECK CACHE
            print_verbose(f"litellm.cache: {litellm.cache}")
            print_verbose(
-                f"kwargs[caching]: {kwargs.get('caching', False)}; litellm.cache: {litellm.cache}"
+                f"kwargs[caching]: {kwargs.get('caching', False)}; litellm.cache: {litellm.cache}; kwargs.get('cache'): {kwargs.get('cache', None)}"
            )
            # if caching is false, don't run this
            final_embedding_cached_response = None
@ -3025,10 +3026,9 @@ def client(original_function):
                    and litellm.cache is not None
                )
                or kwargs.get("caching", False) == True
-                or (
+            ) and (
-                    kwargs.get("cache", None) is not None
+                kwargs.get("cache", None) is None
-                    and kwargs.get("cache").get("no-cache", False) != True
+                or kwargs["cache"].get("no-cache", False) != True
                )
            ):  # allow users to control returning cached responses from the completion function
                # checking cache
                print_verbose("INSIDE CHECKING CACHE")