mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-26 11:14:04 +00:00
Fixes https://github.com/BerriAI/litellm/issues/5401
This commit is contained in:
parent
8ce4b8e195
commit
3e8f5009f4
3 changed files with 103 additions and 26 deletions
|
@ -610,12 +610,23 @@ class Logging:
|
||||||
self.model_call_details["litellm_params"]["metadata"][
|
self.model_call_details["litellm_params"]["metadata"][
|
||||||
"hidden_params"
|
"hidden_params"
|
||||||
] = result._hidden_params
|
] = result._hidden_params
|
||||||
|
## STANDARDIZED LOGGING PAYLOAD
|
||||||
|
|
||||||
|
self.model_call_details["standard_logging_object"] = (
|
||||||
|
get_standard_logging_object_payload(
|
||||||
|
kwargs=self.model_call_details,
|
||||||
|
init_response_obj=result,
|
||||||
|
start_time=start_time,
|
||||||
|
end_time=end_time,
|
||||||
|
logging_obj=self,
|
||||||
|
)
|
||||||
|
)
|
||||||
else: # streaming chunks + image gen.
|
else: # streaming chunks + image gen.
|
||||||
self.model_call_details["response_cost"] = None
|
self.model_call_details["response_cost"] = None
|
||||||
|
|
||||||
if (
|
if (
|
||||||
litellm.max_budget
|
litellm.max_budget
|
||||||
and self.stream == False
|
and self.stream is False
|
||||||
and result is not None
|
and result is not None
|
||||||
and "content" in result
|
and "content" in result
|
||||||
):
|
):
|
||||||
|
@ -628,17 +639,6 @@ class Logging:
|
||||||
total_time=float_diff,
|
total_time=float_diff,
|
||||||
)
|
)
|
||||||
|
|
||||||
## STANDARDIZED LOGGING PAYLOAD
|
|
||||||
|
|
||||||
self.model_call_details["standard_logging_object"] = (
|
|
||||||
get_standard_logging_object_payload(
|
|
||||||
kwargs=self.model_call_details,
|
|
||||||
init_response_obj=result,
|
|
||||||
start_time=start_time,
|
|
||||||
end_time=end_time,
|
|
||||||
logging_obj=self,
|
|
||||||
)
|
|
||||||
)
|
|
||||||
return start_time, end_time, result
|
return start_time, end_time, result
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
raise Exception(f"[Non-Blocking] LiteLLM.Success_Call Error: {str(e)}")
|
raise Exception(f"[Non-Blocking] LiteLLM.Success_Call Error: {str(e)}")
|
||||||
|
@ -646,9 +646,7 @@ class Logging:
|
||||||
def success_handler(
|
def success_handler(
|
||||||
self, result=None, start_time=None, end_time=None, cache_hit=None, **kwargs
|
self, result=None, start_time=None, end_time=None, cache_hit=None, **kwargs
|
||||||
):
|
):
|
||||||
verbose_logger.debug(
|
print_verbose(f"Logging Details LiteLLM-Success Call: Cache_hit={cache_hit}")
|
||||||
f"Logging Details LiteLLM-Success Call: Cache_hit={cache_hit}"
|
|
||||||
)
|
|
||||||
start_time, end_time, result = self._success_handler_helper_fn(
|
start_time, end_time, result = self._success_handler_helper_fn(
|
||||||
start_time=start_time,
|
start_time=start_time,
|
||||||
end_time=end_time,
|
end_time=end_time,
|
||||||
|
@ -695,6 +693,16 @@ class Logging:
|
||||||
self.model_call_details["response_cost"] = (
|
self.model_call_details["response_cost"] = (
|
||||||
self._response_cost_calculator(result=complete_streaming_response)
|
self._response_cost_calculator(result=complete_streaming_response)
|
||||||
)
|
)
|
||||||
|
## STANDARDIZED LOGGING PAYLOAD
|
||||||
|
self.model_call_details["standard_logging_object"] = (
|
||||||
|
get_standard_logging_object_payload(
|
||||||
|
kwargs=self.model_call_details,
|
||||||
|
init_response_obj=complete_streaming_response,
|
||||||
|
start_time=start_time,
|
||||||
|
end_time=end_time,
|
||||||
|
logging_obj=self,
|
||||||
|
)
|
||||||
|
)
|
||||||
if self.dynamic_success_callbacks is not None and isinstance(
|
if self.dynamic_success_callbacks is not None and isinstance(
|
||||||
self.dynamic_success_callbacks, list
|
self.dynamic_success_callbacks, list
|
||||||
):
|
):
|
||||||
|
@ -714,7 +722,6 @@ class Logging:
|
||||||
)
|
)
|
||||||
|
|
||||||
## LOGGING HOOK ##
|
## LOGGING HOOK ##
|
||||||
|
|
||||||
for callback in callbacks:
|
for callback in callbacks:
|
||||||
if isinstance(callback, CustomLogger):
|
if isinstance(callback, CustomLogger):
|
||||||
self.model_call_details, result = callback.logging_hook(
|
self.model_call_details, result = callback.logging_hook(
|
||||||
|
@ -726,7 +733,7 @@ class Logging:
|
||||||
for callback in callbacks:
|
for callback in callbacks:
|
||||||
try:
|
try:
|
||||||
litellm_params = self.model_call_details.get("litellm_params", {})
|
litellm_params = self.model_call_details.get("litellm_params", {})
|
||||||
if litellm_params.get("no-log", False) == True:
|
if litellm_params.get("no-log", False) is True:
|
||||||
# proxy cost tracking cal backs should run
|
# proxy cost tracking cal backs should run
|
||||||
if not (
|
if not (
|
||||||
isinstance(callback, CustomLogger)
|
isinstance(callback, CustomLogger)
|
||||||
|
@ -1192,6 +1199,7 @@ class Logging:
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
result = self.model_call_details["complete_response"]
|
result = self.model_call_details["complete_response"]
|
||||||
|
|
||||||
callback.log_success_event(
|
callback.log_success_event(
|
||||||
kwargs=self.model_call_details,
|
kwargs=self.model_call_details,
|
||||||
response_obj=result,
|
response_obj=result,
|
||||||
|
@ -1199,7 +1207,7 @@ class Logging:
|
||||||
end_time=end_time,
|
end_time=end_time,
|
||||||
)
|
)
|
||||||
if (
|
if (
|
||||||
callable(callback) == True
|
callable(callback) is True
|
||||||
and self.model_call_details.get("litellm_params", {}).get(
|
and self.model_call_details.get("litellm_params", {}).get(
|
||||||
"acompletion", False
|
"acompletion", False
|
||||||
)
|
)
|
||||||
|
@ -1301,6 +1309,7 @@ class Logging:
|
||||||
result=complete_streaming_response
|
result=complete_streaming_response
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
verbose_logger.debug(
|
verbose_logger.debug(
|
||||||
f"Model={self.model}; cost={self.model_call_details['response_cost']}"
|
f"Model={self.model}; cost={self.model_call_details['response_cost']}"
|
||||||
)
|
)
|
||||||
|
@ -1310,6 +1319,16 @@ class Logging:
|
||||||
)
|
)
|
||||||
self.model_call_details["response_cost"] = None
|
self.model_call_details["response_cost"] = None
|
||||||
|
|
||||||
|
## STANDARDIZED LOGGING PAYLOAD
|
||||||
|
self.model_call_details["standard_logging_object"] = (
|
||||||
|
get_standard_logging_object_payload(
|
||||||
|
kwargs=self.model_call_details,
|
||||||
|
init_response_obj=complete_streaming_response,
|
||||||
|
start_time=start_time,
|
||||||
|
end_time=end_time,
|
||||||
|
logging_obj=self,
|
||||||
|
)
|
||||||
|
)
|
||||||
if self.dynamic_async_success_callbacks is not None and isinstance(
|
if self.dynamic_async_success_callbacks is not None and isinstance(
|
||||||
self.dynamic_async_success_callbacks, list
|
self.dynamic_async_success_callbacks, list
|
||||||
):
|
):
|
||||||
|
|
|
@ -1297,3 +1297,53 @@ def test_aaastandard_logging_payload_cache_hit():
|
||||||
assert standard_logging_object["cache_hit"] is True
|
assert standard_logging_object["cache_hit"] is True
|
||||||
assert standard_logging_object["response_cost"] == 0
|
assert standard_logging_object["response_cost"] == 0
|
||||||
assert standard_logging_object["saved_cache_cost"] > 0
|
assert standard_logging_object["saved_cache_cost"] > 0
|
||||||
|
|
||||||
|
|
||||||
|
def test_logging_async_cache_hit_sync_call():
|
||||||
|
from litellm.types.utils import StandardLoggingPayload
|
||||||
|
|
||||||
|
litellm.cache = Cache()
|
||||||
|
|
||||||
|
response = litellm.completion(
|
||||||
|
model="gpt-3.5-turbo",
|
||||||
|
messages=[{"role": "user", "content": "Hey, how's it going?"}],
|
||||||
|
caching=True,
|
||||||
|
stream=True,
|
||||||
|
)
|
||||||
|
for chunk in response:
|
||||||
|
print(chunk)
|
||||||
|
|
||||||
|
time.sleep(3)
|
||||||
|
customHandler = CompletionCustomHandler()
|
||||||
|
litellm.callbacks = [customHandler]
|
||||||
|
litellm.success_callback = []
|
||||||
|
|
||||||
|
with patch.object(
|
||||||
|
customHandler, "log_success_event", new=MagicMock()
|
||||||
|
) as mock_client:
|
||||||
|
resp = litellm.completion(
|
||||||
|
model="gpt-3.5-turbo",
|
||||||
|
messages=[{"role": "user", "content": "Hey, how's it going?"}],
|
||||||
|
caching=True,
|
||||||
|
stream=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
for chunk in resp:
|
||||||
|
print(chunk)
|
||||||
|
|
||||||
|
time.sleep(2)
|
||||||
|
mock_client.assert_called_once()
|
||||||
|
|
||||||
|
assert "standard_logging_object" in mock_client.call_args.kwargs["kwargs"]
|
||||||
|
assert (
|
||||||
|
mock_client.call_args.kwargs["kwargs"]["standard_logging_object"]
|
||||||
|
is not None
|
||||||
|
)
|
||||||
|
|
||||||
|
standard_logging_object: StandardLoggingPayload = mock_client.call_args.kwargs[
|
||||||
|
"kwargs"
|
||||||
|
]["standard_logging_object"]
|
||||||
|
|
||||||
|
assert standard_logging_object["cache_hit"] is True
|
||||||
|
assert standard_logging_object["response_cost"] == 0
|
||||||
|
assert standard_logging_object["saved_cache_cost"] > 0
|
||||||
|
|
|
@ -10548,8 +10548,8 @@ class CustomStreamWrapper:
|
||||||
"""
|
"""
|
||||||
self.logging_loop = loop
|
self.logging_loop = loop
|
||||||
|
|
||||||
def run_success_logging_in_thread(self, processed_chunk):
|
def run_success_logging_in_thread(self, processed_chunk, cache_hit: bool):
|
||||||
if litellm.disable_streaming_logging == True:
|
if litellm.disable_streaming_logging is True:
|
||||||
"""
|
"""
|
||||||
[NOT RECOMMENDED]
|
[NOT RECOMMENDED]
|
||||||
Set this via `litellm.disable_streaming_logging = True`.
|
Set this via `litellm.disable_streaming_logging = True`.
|
||||||
|
@ -10561,14 +10561,20 @@ class CustomStreamWrapper:
|
||||||
# Create an event loop for the new thread
|
# Create an event loop for the new thread
|
||||||
if self.logging_loop is not None:
|
if self.logging_loop is not None:
|
||||||
future = asyncio.run_coroutine_threadsafe(
|
future = asyncio.run_coroutine_threadsafe(
|
||||||
self.logging_obj.async_success_handler(processed_chunk),
|
self.logging_obj.async_success_handler(
|
||||||
|
processed_chunk, None, None, cache_hit
|
||||||
|
),
|
||||||
loop=self.logging_loop,
|
loop=self.logging_loop,
|
||||||
)
|
)
|
||||||
result = future.result()
|
result = future.result()
|
||||||
else:
|
else:
|
||||||
asyncio.run(self.logging_obj.async_success_handler(processed_chunk))
|
asyncio.run(
|
||||||
|
self.logging_obj.async_success_handler(
|
||||||
|
processed_chunk, None, None, cache_hit
|
||||||
|
)
|
||||||
|
)
|
||||||
## SYNC LOGGING
|
## SYNC LOGGING
|
||||||
self.logging_obj.success_handler(processed_chunk)
|
self.logging_obj.success_handler(processed_chunk, None, None, cache_hit)
|
||||||
|
|
||||||
def finish_reason_handler(self):
|
def finish_reason_handler(self):
|
||||||
model_response = self.model_response_creator()
|
model_response = self.model_response_creator()
|
||||||
|
@ -10616,7 +10622,8 @@ class CustomStreamWrapper:
|
||||||
continue
|
continue
|
||||||
## LOGGING
|
## LOGGING
|
||||||
threading.Thread(
|
threading.Thread(
|
||||||
target=self.run_success_logging_in_thread, args=(response,)
|
target=self.run_success_logging_in_thread,
|
||||||
|
args=(response, cache_hit),
|
||||||
).start() # log response
|
).start() # log response
|
||||||
self.response_uptil_now += (
|
self.response_uptil_now += (
|
||||||
response.choices[0].delta.get("content", "") or ""
|
response.choices[0].delta.get("content", "") or ""
|
||||||
|
@ -10678,8 +10685,8 @@ class CustomStreamWrapper:
|
||||||
processed_chunk._hidden_params["usage"] = usage
|
processed_chunk._hidden_params["usage"] = usage
|
||||||
## LOGGING
|
## LOGGING
|
||||||
threading.Thread(
|
threading.Thread(
|
||||||
target=self.logging_obj.success_handler,
|
target=self.run_success_logging_in_thread,
|
||||||
args=(processed_chunk, None, None, cache_hit),
|
args=(processed_chunk, cache_hit),
|
||||||
).start() # log response
|
).start() # log response
|
||||||
return processed_chunk
|
return processed_chunk
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
@ -10776,6 +10783,7 @@ class CustomStreamWrapper:
|
||||||
if processed_chunk is None:
|
if processed_chunk is None:
|
||||||
continue
|
continue
|
||||||
## LOGGING
|
## LOGGING
|
||||||
|
## LOGGING
|
||||||
threading.Thread(
|
threading.Thread(
|
||||||
target=self.logging_obj.success_handler,
|
target=self.logging_obj.success_handler,
|
||||||
args=(processed_chunk, None, None, cache_hit),
|
args=(processed_chunk, None, None, cache_hit),
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue