(refactor) use helper function _assemble_complete_response_from_streaming_chunks to assemble complete responses in caching and logging callbacks (#6220)

* (refactor) use _assemble_complete_response_from_streaming_chunks

* add unit test for test_assemble_complete_response_from_streaming_chunks_1

* fix assemble complete_streaming_response

* config add logging_testing

* add logging_coverage in codecov

* test test_assemble_complete_response_from_streaming_chunks_3

* add unit tests for _assemble_complete_response_from_streaming_chunks

* fix remove unused / junk function

* add test for streaming_chunks when error assembling
This commit is contained in:
Ishaan Jaff 2024-10-15 12:45:12 +05:30 committed by GitHub
parent e9a46b992c
commit a69c670baa
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
9 changed files with 571 additions and 90 deletions

View file

@ -26,6 +26,9 @@ from litellm.caching.caching import (
RedisSemanticCache,
S3Cache,
)
from litellm.litellm_core_utils.logging_utils import (
_assemble_complete_response_from_streaming_chunks,
)
from litellm.types.rerank import RerankResponse
from litellm.types.utils import (
CallTypes,
@ -517,28 +520,14 @@ class LLMCachingHandler:
"""
complete_streaming_response: Optional[
Union[ModelResponse, TextCompletionResponse]
] = None
if (
processed_chunk.choices[0].finish_reason is not None
): # if it's the last chunk
self.async_streaming_chunks.append(processed_chunk)
try:
end_time: datetime.datetime = datetime.datetime.now()
complete_streaming_response = litellm.stream_chunk_builder(
self.async_streaming_chunks,
messages=self.request_kwargs.get("messages", None),
start_time=self.start_time,
end_time=end_time,
)
except Exception as e:
verbose_logger.exception(
"Error occurred building stream chunk in success logging: {}".format(
str(e)
)
)
complete_streaming_response = None
else:
self.async_streaming_chunks.append(processed_chunk)
] = _assemble_complete_response_from_streaming_chunks(
result=processed_chunk,
start_time=self.start_time,
end_time=datetime.datetime.now(),
request_kwargs=self.request_kwargs,
streaming_chunks=self.async_streaming_chunks,
is_async=True,
)
# if a complete_streaming_response is assembled, add it to the cache
if complete_streaming_response is not None: