fix(streaming_handler.py): support logging complete streaming response on cache hit

This commit is contained in:
Krrish Dholakia 2025-03-17 18:10:39 -07:00
parent dd9e79adbd
commit c4b2e0ae3d
3 changed files with 66 additions and 22 deletions

View file

@ -790,6 +790,7 @@ class LLMCachingHandler:
- Else append the chunk to self.async_streaming_chunks
"""
complete_streaming_response: Optional[
Union[ModelResponse, TextCompletionResponse]
] = _assemble_complete_response_from_streaming_chunks(
@ -800,7 +801,6 @@ class LLMCachingHandler:
streaming_chunks=self.async_streaming_chunks,
is_async=True,
)
# if a complete_streaming_response is assembled, add it to the cache
if complete_streaming_response is not None:
await self.async_set_cache(