fix(litellm_logging.py): fix async caching for sync streaming calls (don't do it)

Checks if call is async before running async caching for streaming call Fixes https://github.com/BerriAI/litellm/issues/4511#issuecomment-2233211808
2024-07-17 11:15:30 -07:00 · 2024-07-17 11:15:30 -07:00 · d2e64f21f3
commit d2e64f21f3
parent e7f8ee2aba
2 changed files with 22 additions and 7 deletions
--- a/litellm/litellm_core_utils/litellm_logging.py
+++ b/litellm/litellm_core_utils/litellm_logging.py
@ -1337,7 +1337,14 @@ class Logging:
                if kwargs.get("no-log", False) == True:
                    print_verbose("no-log request, skipping logging")
                    continue
-                if callback == "cache" and litellm.cache is not None:
+                if (
+                    callback == "cache"
+                    and litellm.cache is not None
+                    and self.model_call_details.get("litellm_params", {}).get(
+                        "acompletion", False
+                    )
+                    is True
+                ):
                    # set_cache once complete streaming response is built
                    print_verbose("async success_callback: reaches cache for logging!")
                    kwargs = self.model_call_details