fix(litellm_logging.py): fix async caching for sync streaming calls (don't do it)

Checks if call is async before running async caching for streaming call Fixes https://github.com/BerriAI/litellm/issues/4511#issuecomment-2233211808
2024-07-17 11:15:30 -07:00 · 2024-07-17 11:15:30 -07:00 · d2e64f21f3
commit d2e64f21f3
parent e7f8ee2aba
2 changed files with 22 additions and 7 deletions
--- a/litellm/litellm_core_utils/litellm_logging.py
+++ b/litellm/litellm_core_utils/litellm_logging.py
@ -1337,7 +1337,14 @@ class Logging:
                if kwargs.get("no-log", False) == True:
                    print_verbose("no-log request, skipping logging")
                    continue
-                if callback == "cache" and litellm.cache is not None:
+                if (
+                    callback == "cache"
+                    and litellm.cache is not None
+                    and self.model_call_details.get("litellm_params", {}).get(
+                        "acompletion", False
+                    )
+                    is True
+                ):
                    # set_cache once complete streaming response is built
                    print_verbose("async success_callback: reaches cache for logging!")
                    kwargs = self.model_call_details
--- a/litellm/tests/test_caching.py
+++ b/litellm/tests/test_caching.py
@ -1579,18 +1579,21 @@ async def test_redis_semantic_cache_acompletion():
    assert response1.id == response2.id


-def test_caching_redis_simple(caplog):
+def test_caching_redis_simple(caplog, capsys):
    """
    Relevant issue - https://github.com/BerriAI/litellm/issues/4511
    """
+    litellm.set_verbose = True  ## REQUIRED FOR TEST.
    litellm.cache = Cache(
        type="redis", url=os.getenv("REDIS_SSL_URL")
    )  # passing `supported_call_types = ["completion"]` has no effect

    s = time.time()
+
+    uuid_str = str(uuid.uuid4())
    x = completion(
-        model="gpt-4o",
-        messages=[{"role": "user", "content": "Hello, how are you? Wink"}],
+        model="gpt-3.5-turbo",
+        messages=[{"role": "user", "content": f"Hello, how are you? Wink {uuid_str}"}],
        stream=True,
    )
    for m in x:
@ -1599,8 +1602,8 @@ def test_caching_redis_simple(caplog):

    s2 = time.time()
    x = completion(
-        model="gpt-4o",
-        messages=[{"role": "user", "content": "Hello, how are you? Wink"}],
+        model="gpt-3.5-turbo",
+        messages=[{"role": "user", "content": f"Hello, how are you? Wink {uuid_str}"}],
        stream=True,
    )
    for m in x:
@ -1609,11 +1612,15 @@ def test_caching_redis_simple(caplog):

    redis_async_caching_error = False
    redis_service_logging_error = False
+    captured = capsys.readouterr()
    captured_logs = [rec.message for rec in caplog.records]

    print(f"captured_logs: {captured_logs}")
    for item in captured_logs:
-        if "Error connecting to Async Redis client" in item:
+        if (
+            "Error connecting to Async Redis client" in item
+            or "Set ASYNC Redis Cache" in item
+        ):
            redis_async_caching_error = True

        if "ServiceLogging.async_service_success_hook" in item:
@ -1621,3 +1628,4 @@ def test_caching_redis_simple(caplog):

    assert redis_async_caching_error is False
    assert redis_service_logging_error is False
+    assert "async success_callback: reaches cache for logging" not in captured.out