mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-25 18:54:30 +00:00
fix(streaming_handler.py): support logging complete streaming response on cache hit
This commit is contained in:
parent
dd9e79adbd
commit
c4b2e0ae3d
3 changed files with 66 additions and 22 deletions
|
@ -790,6 +790,7 @@ class LLMCachingHandler:
|
|||
- Else append the chunk to self.async_streaming_chunks
|
||||
|
||||
"""
|
||||
|
||||
complete_streaming_response: Optional[
|
||||
Union[ModelResponse, TextCompletionResponse]
|
||||
] = _assemble_complete_response_from_streaming_chunks(
|
||||
|
@ -800,7 +801,6 @@ class LLMCachingHandler:
|
|||
streaming_chunks=self.async_streaming_chunks,
|
||||
is_async=True,
|
||||
)
|
||||
|
||||
# if a complete_streaming_response is assembled, add it to the cache
|
||||
if complete_streaming_response is not None:
|
||||
await self.async_set_cache(
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue