fix(utils.py): correctly log streaming cache hits (#5417) (#5426)

Fixes https://github.com/BerriAI/litellm/issues/5401
This commit is contained in:
Krish Dholakia 2024-08-28 22:50:33 -07:00 committed by GitHub
parent 8ce4b8e195
commit 3e8f5009f4
3 changed files with 103 additions and 26 deletions

View file

@ -10548,8 +10548,8 @@ class CustomStreamWrapper:
"""
self.logging_loop = loop
def run_success_logging_in_thread(self, processed_chunk):
if litellm.disable_streaming_logging == True:
def run_success_logging_in_thread(self, processed_chunk, cache_hit: bool):
if litellm.disable_streaming_logging is True:
"""
[NOT RECOMMENDED]
Set this via `litellm.disable_streaming_logging = True`.
@ -10561,14 +10561,20 @@ class CustomStreamWrapper:
# Create an event loop for the new thread
if self.logging_loop is not None:
future = asyncio.run_coroutine_threadsafe(
self.logging_obj.async_success_handler(processed_chunk),
self.logging_obj.async_success_handler(
processed_chunk, None, None, cache_hit
),
loop=self.logging_loop,
)
result = future.result()
else:
asyncio.run(self.logging_obj.async_success_handler(processed_chunk))
asyncio.run(
self.logging_obj.async_success_handler(
processed_chunk, None, None, cache_hit
)
)
## SYNC LOGGING
self.logging_obj.success_handler(processed_chunk)
self.logging_obj.success_handler(processed_chunk, None, None, cache_hit)
def finish_reason_handler(self):
model_response = self.model_response_creator()
@ -10616,7 +10622,8 @@ class CustomStreamWrapper:
continue
## LOGGING
threading.Thread(
target=self.run_success_logging_in_thread, args=(response,)
target=self.run_success_logging_in_thread,
args=(response, cache_hit),
).start() # log response
self.response_uptil_now += (
response.choices[0].delta.get("content", "") or ""
@ -10678,8 +10685,8 @@ class CustomStreamWrapper:
processed_chunk._hidden_params["usage"] = usage
## LOGGING
threading.Thread(
target=self.logging_obj.success_handler,
args=(processed_chunk, None, None, cache_hit),
target=self.run_success_logging_in_thread,
args=(processed_chunk, cache_hit),
).start() # log response
return processed_chunk
except Exception as e:
@ -10776,6 +10783,7 @@ class CustomStreamWrapper:
if processed_chunk is None:
continue
## LOGGING
## LOGGING
threading.Thread(
target=self.logging_obj.success_handler,
args=(processed_chunk, None, None, cache_hit),