Merge 57f1d436e1 into f5996b2f6b

2025-04-27 03:34:10 +00:00 · 2025-04-23 00:48:24 -07:00 · 2025-04-23 00:48:24 -07:00 · 2c20b3726b
commit 2c20b3726b
parent f5996b2f6b 57f1d436e1
9 changed files with 189 additions and 104 deletions
--- a/litellm/litellm_core_utils/litellm_logging.py
+++ b/litellm/litellm_core_utils/litellm_logging.py
@ -8,12 +8,13 @@ import os
 import re
 import subprocess
 import sys
+import threading
 import time
 import traceback
 import uuid
 from datetime import datetime as dt_object
 from functools import lru_cache
-from typing import Any, Callable, Dict, List, Literal, Optional, Tuple, Union, cast
+from typing import Any, Callable, Dict, List, Literal, Optional, Sequence, Tuple, Union, cast

 from pydantic import BaseModel

@ -1226,7 +1227,36 @@ class Logging(LiteLLMLoggingBaseClass):
        except Exception as e:
            raise Exception(f"[Non-Blocking] LiteLLM.Success_Call Error: {str(e)}")

-    def success_handler(  # noqa: PLR0915
+    def success_handler(
+        self,
+        result=None,
+        start_time=None,
+        end_time=None,
+        cache_hit=None,
+        synchronous=None,
+        **kwargs
+    ):
+        """
+        Execute the success handler function in a sync or async manner.
+        If synchronous argument is not provided, global `litellm.sync_logging` config is used.
+        """
+        if synchronous is None:
+            synchronous = litellm.sync_logging
+
+        if synchronous:
+            self._success_handler(result, start_time, end_time, cache_hit, **kwargs)
+        else:
+            executor.submit(
+                self._success_handler,
+                result,
+                start_time,
+                end_time,
+                cache_hit,
+                **kwargs,
+            )
+
+
+    def _success_handler(  # noqa: PLR0915
        self, result=None, start_time=None, end_time=None, cache_hit=None, **kwargs
    ):
        verbose_logger.debug(
@ -2376,12 +2406,7 @@ class Logging(LiteLLMLoggingBaseClass):
        if self._should_run_sync_callbacks_for_async_calls() is False:
            return

-        executor.submit(
-            self.success_handler,
-            result,
-            start_time,
-            end_time,
-        )
+        self.success_handler(result, start_time, end_time)

    def _should_run_sync_callbacks_for_async_calls(self) -> bool:
        """
--- a/litellm/litellm_core_utils/streaming_handler.py
+++ b/litellm/litellm_core_utils/streaming_handler.py
@ -1445,32 +1445,47 @@ class CustomStreamWrapper:
        """
        Runs success logging in a thread and adds the response to the cache
        """
-        if litellm.disable_streaming_logging is True:
-            """
-            [NOT RECOMMENDED]
-            Set this via `litellm.disable_streaming_logging = True`.
+        def _run():
+            if litellm.disable_streaming_logging is True:
+                """
+                [NOT RECOMMENDED]
+                Set this via `litellm.disable_streaming_logging = True`.

-            Disables streaming logging.
-            """
-            return
-        ## ASYNC LOGGING
-        # Create an event loop for the new thread
-        if self.logging_loop is not None:
-            future = asyncio.run_coroutine_threadsafe(
-                self.logging_obj.async_success_handler(
-                    processed_chunk, None, None, cache_hit
-                ),
-                loop=self.logging_loop,
-            )
-            future.result()
-        else:
-            asyncio.run(
-                self.logging_obj.async_success_handler(
-                    processed_chunk, None, None, cache_hit
+                Disables streaming logging.
+                """
+                return
+
+            if not litellm.sync_logging:
+                ## ASYNC LOGGING
+                # Create an event loop for the new thread
+                if self.logging_loop is not None:
+                    future = asyncio.run_coroutine_threadsafe(
+                        self.logging_obj.async_success_handler(
+                            processed_chunk, None, None, cache_hit
+                        ),
+                        loop=self.logging_loop,
+                    )
+                    future.result()
+                else:
+                    asyncio.run(
+                        self.logging_obj.async_success_handler(
+                            processed_chunk, None, None, cache_hit
+                        )
+                    )
+
+            ## SYNC LOGGING
+            self.logging_obj.success_handler(processed_chunk, None, None, cache_hit)
+
+            ## Sync store in cache
+            if self.logging_obj._llm_caching_handler is not None:
+                self.logging_obj._llm_caching_handler._sync_add_streaming_response_to_cache(
+                    processed_chunk
                )
-            )
-        ## SYNC LOGGING
-        self.logging_obj.success_handler(processed_chunk, None, None, cache_hit)
+
+        if litellm.sync_logging:
+            _run()
+        else:
+            executor.submit(_run)

    def finish_reason_handler(self):
        model_response = self.model_response_creator()
@ -1522,11 +1537,8 @@ class CustomStreamWrapper:
                            completion_start_time=datetime.datetime.now()
                        )
                    ## LOGGING
-                    executor.submit(
-                        self.run_success_logging_and_cache_storage,
-                        response,
-                        cache_hit,
-                    )  # log response
+                    self.run_success_logging_and_cache_storage(response, cache_hit)
+
                    choice = response.choices[0]
                    if isinstance(choice, StreamingChoices):
                        self.response_uptil_now += choice.delta.get("content", "") or ""
@ -1576,21 +1588,12 @@ class CustomStreamWrapper:
                        ),
                        cache_hit=cache_hit,
                    )
-                    executor.submit(
-                        self.logging_obj.success_handler,
-                        complete_streaming_response.model_copy(deep=True),
-                        None,
-                        None,
-                        cache_hit,
-                    )
+                    logging_result = complete_streaming_response.model_copy(deep=True)
                else:
-                    executor.submit(
-                        self.logging_obj.success_handler,
-                        response,
-                        None,
-                        None,
-                        cache_hit,
-                    )
+                    logging_result = response
+
+                self.logging_obj.success_handler(logging_result, None, None, cache_hit)
+
                if self.sent_stream_usage is False and self.send_stream_usage is True:
                    self.sent_stream_usage = True
                    return response
@ -1602,11 +1605,7 @@ class CustomStreamWrapper:
                    usage = calculate_total_usage(chunks=self.chunks)
                    processed_chunk._hidden_params["usage"] = usage
                ## LOGGING
-                executor.submit(
-                    self.run_success_logging_and_cache_storage,
-                    processed_chunk,
-                    cache_hit,
-                )  # log response
+                self.run_success_logging_and_cache_storage(processed_chunk, cache_hit)
                return processed_chunk
        except Exception as e:
            traceback_exception = traceback.format_exc()
@ -1762,22 +1761,19 @@ class CustomStreamWrapper:
                    self.sent_stream_usage = True
                    return response

-                asyncio.create_task(
-                    self.logging_obj.async_success_handler(
-                        complete_streaming_response,
-                        cache_hit=cache_hit,
-                        start_time=None,
-                        end_time=None,
-                    )
-                )

-                executor.submit(
-                    self.logging_obj.success_handler,
-                    complete_streaming_response,
+                logging_params = dict(
+                    result=complete_streaming_response,
                    cache_hit=cache_hit,
                    start_time=None,
                    end_time=None,
                )
+                if litellm.sync_logging:
+                    await self.logging_obj.async_success_handler(**logging_params)
+                else:
+                    asyncio.create_task(self.logging_obj.async_success_handler(**logging_params))
+
+                self.logging_obj.success_handler(**logging_params)

                raise StopAsyncIteration  # Re-raise StopIteration
            else: