This commit is contained in:
Yuki Watanabe 2025-04-23 00:48:24 -07:00 committed by GitHub
commit 2c20b3726b
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
9 changed files with 189 additions and 104 deletions

View file

@ -8,12 +8,13 @@ import os
import re
import subprocess
import sys
import threading
import time
import traceback
import uuid
from datetime import datetime as dt_object
from functools import lru_cache
from typing import Any, Callable, Dict, List, Literal, Optional, Tuple, Union, cast
from typing import Any, Callable, Dict, List, Literal, Optional, Sequence, Tuple, Union, cast
from pydantic import BaseModel
@ -1226,7 +1227,36 @@ class Logging(LiteLLMLoggingBaseClass):
except Exception as e:
raise Exception(f"[Non-Blocking] LiteLLM.Success_Call Error: {str(e)}")
def success_handler( # noqa: PLR0915
def success_handler(
self,
result=None,
start_time=None,
end_time=None,
cache_hit=None,
synchronous=None,
**kwargs
):
"""
Execute the success handler function in a sync or async manner.
If synchronous argument is not provided, global `litellm.sync_logging` config is used.
"""
if synchronous is None:
synchronous = litellm.sync_logging
if synchronous:
self._success_handler(result, start_time, end_time, cache_hit, **kwargs)
else:
executor.submit(
self._success_handler,
result,
start_time,
end_time,
cache_hit,
**kwargs,
)
def _success_handler( # noqa: PLR0915
self, result=None, start_time=None, end_time=None, cache_hit=None, **kwargs
):
verbose_logger.debug(
@ -2376,12 +2406,7 @@ class Logging(LiteLLMLoggingBaseClass):
if self._should_run_sync_callbacks_for_async_calls() is False:
return
executor.submit(
self.success_handler,
result,
start_time,
end_time,
)
self.success_handler(result, start_time, end_time)
def _should_run_sync_callbacks_for_async_calls(self) -> bool:
"""

View file

@ -1445,32 +1445,47 @@ class CustomStreamWrapper:
"""
Runs success logging in a thread and adds the response to the cache
"""
if litellm.disable_streaming_logging is True:
"""
[NOT RECOMMENDED]
Set this via `litellm.disable_streaming_logging = True`.
def _run():
if litellm.disable_streaming_logging is True:
"""
[NOT RECOMMENDED]
Set this via `litellm.disable_streaming_logging = True`.
Disables streaming logging.
"""
return
## ASYNC LOGGING
# Create an event loop for the new thread
if self.logging_loop is not None:
future = asyncio.run_coroutine_threadsafe(
self.logging_obj.async_success_handler(
processed_chunk, None, None, cache_hit
),
loop=self.logging_loop,
)
future.result()
else:
asyncio.run(
self.logging_obj.async_success_handler(
processed_chunk, None, None, cache_hit
Disables streaming logging.
"""
return
if not litellm.sync_logging:
## ASYNC LOGGING
# Create an event loop for the new thread
if self.logging_loop is not None:
future = asyncio.run_coroutine_threadsafe(
self.logging_obj.async_success_handler(
processed_chunk, None, None, cache_hit
),
loop=self.logging_loop,
)
future.result()
else:
asyncio.run(
self.logging_obj.async_success_handler(
processed_chunk, None, None, cache_hit
)
)
## SYNC LOGGING
self.logging_obj.success_handler(processed_chunk, None, None, cache_hit)
## Sync store in cache
if self.logging_obj._llm_caching_handler is not None:
self.logging_obj._llm_caching_handler._sync_add_streaming_response_to_cache(
processed_chunk
)
)
## SYNC LOGGING
self.logging_obj.success_handler(processed_chunk, None, None, cache_hit)
if litellm.sync_logging:
_run()
else:
executor.submit(_run)
def finish_reason_handler(self):
model_response = self.model_response_creator()
@ -1522,11 +1537,8 @@ class CustomStreamWrapper:
completion_start_time=datetime.datetime.now()
)
## LOGGING
executor.submit(
self.run_success_logging_and_cache_storage,
response,
cache_hit,
) # log response
self.run_success_logging_and_cache_storage(response, cache_hit)
choice = response.choices[0]
if isinstance(choice, StreamingChoices):
self.response_uptil_now += choice.delta.get("content", "") or ""
@ -1576,21 +1588,12 @@ class CustomStreamWrapper:
),
cache_hit=cache_hit,
)
executor.submit(
self.logging_obj.success_handler,
complete_streaming_response.model_copy(deep=True),
None,
None,
cache_hit,
)
logging_result = complete_streaming_response.model_copy(deep=True)
else:
executor.submit(
self.logging_obj.success_handler,
response,
None,
None,
cache_hit,
)
logging_result = response
self.logging_obj.success_handler(logging_result, None, None, cache_hit)
if self.sent_stream_usage is False and self.send_stream_usage is True:
self.sent_stream_usage = True
return response
@ -1602,11 +1605,7 @@ class CustomStreamWrapper:
usage = calculate_total_usage(chunks=self.chunks)
processed_chunk._hidden_params["usage"] = usage
## LOGGING
executor.submit(
self.run_success_logging_and_cache_storage,
processed_chunk,
cache_hit,
) # log response
self.run_success_logging_and_cache_storage(processed_chunk, cache_hit)
return processed_chunk
except Exception as e:
traceback_exception = traceback.format_exc()
@ -1762,22 +1761,19 @@ class CustomStreamWrapper:
self.sent_stream_usage = True
return response
asyncio.create_task(
self.logging_obj.async_success_handler(
complete_streaming_response,
cache_hit=cache_hit,
start_time=None,
end_time=None,
)
)
executor.submit(
self.logging_obj.success_handler,
complete_streaming_response,
logging_params = dict(
result=complete_streaming_response,
cache_hit=cache_hit,
start_time=None,
end_time=None,
)
if litellm.sync_logging:
await self.logging_obj.async_success_handler(**logging_params)
else:
asyncio.create_task(self.logging_obj.async_success_handler(**logging_params))
self.logging_obj.success_handler(**logging_params)
raise StopAsyncIteration # Re-raise StopIteration
else: