mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-26 19:24:27 +00:00
fix(streaming_handler.py): support logging complete streaming response on cache hit
This commit is contained in:
parent
ba6369e359
commit
301375bf84
3 changed files with 66 additions and 22 deletions
|
@ -1481,6 +1481,15 @@ class CustomStreamWrapper:
|
|||
processed_chunk
|
||||
)
|
||||
|
||||
async def async_cache_streaming_response(self, processed_chunk, cache_hit: bool):
|
||||
"""
|
||||
Caches the streaming response
|
||||
"""
|
||||
if not cache_hit and self.logging_obj._llm_caching_handler is not None:
|
||||
await self.logging_obj._llm_caching_handler._add_streaming_response_to_cache(
|
||||
processed_chunk
|
||||
)
|
||||
|
||||
def run_success_logging_and_cache_storage(self, processed_chunk, cache_hit: bool):
|
||||
"""
|
||||
Runs success logging in a thread and adds the response to the cache
|
||||
|
@ -1711,13 +1720,6 @@ class CustomStreamWrapper:
|
|||
if processed_chunk is None:
|
||||
continue
|
||||
|
||||
if self.logging_obj._llm_caching_handler is not None:
|
||||
asyncio.create_task(
|
||||
self.logging_obj._llm_caching_handler._add_streaming_response_to_cache(
|
||||
processed_chunk=cast(ModelResponse, processed_chunk),
|
||||
)
|
||||
)
|
||||
|
||||
choice = processed_chunk.choices[0]
|
||||
if isinstance(choice, StreamingChoices):
|
||||
self.response_uptil_now += choice.delta.get("content", "") or ""
|
||||
|
@ -1788,6 +1790,14 @@ class CustomStreamWrapper:
|
|||
"usage",
|
||||
getattr(complete_streaming_response, "usage"),
|
||||
)
|
||||
asyncio.create_task(
|
||||
self.async_cache_streaming_response(
|
||||
processed_chunk=complete_streaming_response.model_copy(
|
||||
deep=True
|
||||
),
|
||||
cache_hit=cache_hit,
|
||||
)
|
||||
)
|
||||
if self.sent_stream_usage is False and self.send_stream_usage is True:
|
||||
self.sent_stream_usage = True
|
||||
return response
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue