fix(streaming_handler.py): support logging complete streaming response on cache hit

2025-04-25 18:54:30 +00:00 · 2025-03-17 18:10:39 -07:00 · 2025-03-17 18:10:39 -07:00 · c4b2e0ae3d
commit c4b2e0ae3d
parent dd9e79adbd
3 changed files with 66 additions and 22 deletions
--- a/litellm/caching/caching_handler.py
+++ b/litellm/caching/caching_handler.py
@ -790,6 +790,7 @@ class LLMCachingHandler:
        - Else append the chunk to self.async_streaming_chunks

        """
+
        complete_streaming_response: Optional[
            Union[ModelResponse, TextCompletionResponse]
        ] = _assemble_complete_response_from_streaming_chunks(
@ -800,7 +801,6 @@ class LLMCachingHandler:
            streaming_chunks=self.async_streaming_chunks,
            is_async=True,
        )
-
        # if a complete_streaming_response is assembled, add it to the cache
        if complete_streaming_response is not None:
            await self.async_set_cache(