mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-27 03:34:10 +00:00
Merge 57f1d436e1
into f5996b2f6b
This commit is contained in:
commit
2c20b3726b
9 changed files with 189 additions and 104 deletions
|
@ -8,12 +8,13 @@ import os
|
|||
import re
|
||||
import subprocess
|
||||
import sys
|
||||
import threading
|
||||
import time
|
||||
import traceback
|
||||
import uuid
|
||||
from datetime import datetime as dt_object
|
||||
from functools import lru_cache
|
||||
from typing import Any, Callable, Dict, List, Literal, Optional, Tuple, Union, cast
|
||||
from typing import Any, Callable, Dict, List, Literal, Optional, Sequence, Tuple, Union, cast
|
||||
|
||||
from pydantic import BaseModel
|
||||
|
||||
|
@ -1226,7 +1227,36 @@ class Logging(LiteLLMLoggingBaseClass):
|
|||
except Exception as e:
|
||||
raise Exception(f"[Non-Blocking] LiteLLM.Success_Call Error: {str(e)}")
|
||||
|
||||
def success_handler( # noqa: PLR0915
|
||||
def success_handler(
|
||||
self,
|
||||
result=None,
|
||||
start_time=None,
|
||||
end_time=None,
|
||||
cache_hit=None,
|
||||
synchronous=None,
|
||||
**kwargs
|
||||
):
|
||||
"""
|
||||
Execute the success handler function in a sync or async manner.
|
||||
If synchronous argument is not provided, global `litellm.sync_logging` config is used.
|
||||
"""
|
||||
if synchronous is None:
|
||||
synchronous = litellm.sync_logging
|
||||
|
||||
if synchronous:
|
||||
self._success_handler(result, start_time, end_time, cache_hit, **kwargs)
|
||||
else:
|
||||
executor.submit(
|
||||
self._success_handler,
|
||||
result,
|
||||
start_time,
|
||||
end_time,
|
||||
cache_hit,
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
|
||||
def _success_handler( # noqa: PLR0915
|
||||
self, result=None, start_time=None, end_time=None, cache_hit=None, **kwargs
|
||||
):
|
||||
verbose_logger.debug(
|
||||
|
@ -2376,12 +2406,7 @@ class Logging(LiteLLMLoggingBaseClass):
|
|||
if self._should_run_sync_callbacks_for_async_calls() is False:
|
||||
return
|
||||
|
||||
executor.submit(
|
||||
self.success_handler,
|
||||
result,
|
||||
start_time,
|
||||
end_time,
|
||||
)
|
||||
self.success_handler(result, start_time, end_time)
|
||||
|
||||
def _should_run_sync_callbacks_for_async_calls(self) -> bool:
|
||||
"""
|
||||
|
|
|
@ -1445,32 +1445,47 @@ class CustomStreamWrapper:
|
|||
"""
|
||||
Runs success logging in a thread and adds the response to the cache
|
||||
"""
|
||||
if litellm.disable_streaming_logging is True:
|
||||
"""
|
||||
[NOT RECOMMENDED]
|
||||
Set this via `litellm.disable_streaming_logging = True`.
|
||||
def _run():
|
||||
if litellm.disable_streaming_logging is True:
|
||||
"""
|
||||
[NOT RECOMMENDED]
|
||||
Set this via `litellm.disable_streaming_logging = True`.
|
||||
|
||||
Disables streaming logging.
|
||||
"""
|
||||
return
|
||||
## ASYNC LOGGING
|
||||
# Create an event loop for the new thread
|
||||
if self.logging_loop is not None:
|
||||
future = asyncio.run_coroutine_threadsafe(
|
||||
self.logging_obj.async_success_handler(
|
||||
processed_chunk, None, None, cache_hit
|
||||
),
|
||||
loop=self.logging_loop,
|
||||
)
|
||||
future.result()
|
||||
else:
|
||||
asyncio.run(
|
||||
self.logging_obj.async_success_handler(
|
||||
processed_chunk, None, None, cache_hit
|
||||
Disables streaming logging.
|
||||
"""
|
||||
return
|
||||
|
||||
if not litellm.sync_logging:
|
||||
## ASYNC LOGGING
|
||||
# Create an event loop for the new thread
|
||||
if self.logging_loop is not None:
|
||||
future = asyncio.run_coroutine_threadsafe(
|
||||
self.logging_obj.async_success_handler(
|
||||
processed_chunk, None, None, cache_hit
|
||||
),
|
||||
loop=self.logging_loop,
|
||||
)
|
||||
future.result()
|
||||
else:
|
||||
asyncio.run(
|
||||
self.logging_obj.async_success_handler(
|
||||
processed_chunk, None, None, cache_hit
|
||||
)
|
||||
)
|
||||
|
||||
## SYNC LOGGING
|
||||
self.logging_obj.success_handler(processed_chunk, None, None, cache_hit)
|
||||
|
||||
## Sync store in cache
|
||||
if self.logging_obj._llm_caching_handler is not None:
|
||||
self.logging_obj._llm_caching_handler._sync_add_streaming_response_to_cache(
|
||||
processed_chunk
|
||||
)
|
||||
)
|
||||
## SYNC LOGGING
|
||||
self.logging_obj.success_handler(processed_chunk, None, None, cache_hit)
|
||||
|
||||
if litellm.sync_logging:
|
||||
_run()
|
||||
else:
|
||||
executor.submit(_run)
|
||||
|
||||
def finish_reason_handler(self):
|
||||
model_response = self.model_response_creator()
|
||||
|
@ -1522,11 +1537,8 @@ class CustomStreamWrapper:
|
|||
completion_start_time=datetime.datetime.now()
|
||||
)
|
||||
## LOGGING
|
||||
executor.submit(
|
||||
self.run_success_logging_and_cache_storage,
|
||||
response,
|
||||
cache_hit,
|
||||
) # log response
|
||||
self.run_success_logging_and_cache_storage(response, cache_hit)
|
||||
|
||||
choice = response.choices[0]
|
||||
if isinstance(choice, StreamingChoices):
|
||||
self.response_uptil_now += choice.delta.get("content", "") or ""
|
||||
|
@ -1576,21 +1588,12 @@ class CustomStreamWrapper:
|
|||
),
|
||||
cache_hit=cache_hit,
|
||||
)
|
||||
executor.submit(
|
||||
self.logging_obj.success_handler,
|
||||
complete_streaming_response.model_copy(deep=True),
|
||||
None,
|
||||
None,
|
||||
cache_hit,
|
||||
)
|
||||
logging_result = complete_streaming_response.model_copy(deep=True)
|
||||
else:
|
||||
executor.submit(
|
||||
self.logging_obj.success_handler,
|
||||
response,
|
||||
None,
|
||||
None,
|
||||
cache_hit,
|
||||
)
|
||||
logging_result = response
|
||||
|
||||
self.logging_obj.success_handler(logging_result, None, None, cache_hit)
|
||||
|
||||
if self.sent_stream_usage is False and self.send_stream_usage is True:
|
||||
self.sent_stream_usage = True
|
||||
return response
|
||||
|
@ -1602,11 +1605,7 @@ class CustomStreamWrapper:
|
|||
usage = calculate_total_usage(chunks=self.chunks)
|
||||
processed_chunk._hidden_params["usage"] = usage
|
||||
## LOGGING
|
||||
executor.submit(
|
||||
self.run_success_logging_and_cache_storage,
|
||||
processed_chunk,
|
||||
cache_hit,
|
||||
) # log response
|
||||
self.run_success_logging_and_cache_storage(processed_chunk, cache_hit)
|
||||
return processed_chunk
|
||||
except Exception as e:
|
||||
traceback_exception = traceback.format_exc()
|
||||
|
@ -1762,22 +1761,19 @@ class CustomStreamWrapper:
|
|||
self.sent_stream_usage = True
|
||||
return response
|
||||
|
||||
asyncio.create_task(
|
||||
self.logging_obj.async_success_handler(
|
||||
complete_streaming_response,
|
||||
cache_hit=cache_hit,
|
||||
start_time=None,
|
||||
end_time=None,
|
||||
)
|
||||
)
|
||||
|
||||
executor.submit(
|
||||
self.logging_obj.success_handler,
|
||||
complete_streaming_response,
|
||||
logging_params = dict(
|
||||
result=complete_streaming_response,
|
||||
cache_hit=cache_hit,
|
||||
start_time=None,
|
||||
end_time=None,
|
||||
)
|
||||
if litellm.sync_logging:
|
||||
await self.logging_obj.async_success_handler(**logging_params)
|
||||
else:
|
||||
asyncio.create_task(self.logging_obj.async_success_handler(**logging_params))
|
||||
|
||||
self.logging_obj.success_handler(**logging_params)
|
||||
|
||||
raise StopAsyncIteration # Re-raise StopIteration
|
||||
else:
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue