(refactor) use helper function _assemble_complete_response_from_streaming_chunks to assemble complete responses in caching and logging callbacks (#6220)

* (refactor) use _assemble_complete_response_from_streaming_chunks * add unit test for test_assemble_complete_response_from_streaming_chunks_1 * fix assemble complete_streaming_response * config add logging_testing * add logging_coverage in codecov * test test_assemble_complete_response_from_streaming_chunks_3 * add unit tests for _assemble_complete_response_from_streaming_chunks * fix remove unused / junk function * add test for streaming_chunks when error assembling
2025-04-25 10:44:24 +00:00 · 2024-10-15 12:45:12 +05:30 · 2024-10-15 12:45:12 +05:30 · a69c670baa
commit a69c670baa
parent e9a46b992c
9 changed files with 571 additions and 90 deletions
--- a/litellm/caching/caching_handler.py
+++ b/litellm/caching/caching_handler.py
@ -26,6 +26,9 @@ from litellm.caching.caching import (
    RedisSemanticCache,
    S3Cache,
 )
+from litellm.litellm_core_utils.logging_utils import (
+    _assemble_complete_response_from_streaming_chunks,
+)
 from litellm.types.rerank import RerankResponse
 from litellm.types.utils import (
    CallTypes,
@ -517,28 +520,14 @@ class LLMCachingHandler:
        """
        complete_streaming_response: Optional[
            Union[ModelResponse, TextCompletionResponse]
-        ] = None
-        if (
-            processed_chunk.choices[0].finish_reason is not None
-        ):  # if it's the last chunk
-            self.async_streaming_chunks.append(processed_chunk)
-            try:
-                end_time: datetime.datetime = datetime.datetime.now()
-                complete_streaming_response = litellm.stream_chunk_builder(
-                    self.async_streaming_chunks,
-                    messages=self.request_kwargs.get("messages", None),
-                    start_time=self.start_time,
-                    end_time=end_time,
-                )
-            except Exception as e:
-                verbose_logger.exception(
-                    "Error occurred building stream chunk in success logging: {}".format(
-                        str(e)
-                    )
-                )
-                complete_streaming_response = None
-        else:
-            self.async_streaming_chunks.append(processed_chunk)
+        ] = _assemble_complete_response_from_streaming_chunks(
+            result=processed_chunk,
+            start_time=self.start_time,
+            end_time=datetime.datetime.now(),
+            request_kwargs=self.request_kwargs,
+            streaming_chunks=self.async_streaming_chunks,
+            is_async=True,
+        )

        # if a complete_streaming_response is assembled, add it to the cache
        if complete_streaming_response is not None: