diff --git a/.circleci/config.yml b/.circleci/config.yml index dbe4ac5fb..1116ad741 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -328,6 +328,48 @@ jobs: paths: - llm_translation_coverage.xml - llm_translation_coverage + logging_testing: + docker: + - image: cimg/python:3.11 + auth: + username: ${DOCKERHUB_USERNAME} + password: ${DOCKERHUB_PASSWORD} + working_directory: ~/project + + steps: + - checkout + - run: + name: Install Dependencies + command: | + python -m pip install --upgrade pip + python -m pip install -r requirements.txt + pip install "pytest==7.3.1" + pip install "pytest-retry==1.6.3" + pip install "pytest-cov==5.0.0" + pip install "pytest-asyncio==0.21.1" + pip install "respx==0.21.1" + # Run pytest and generate JUnit XML report + - run: + name: Run tests + command: | + pwd + ls + python -m pytest -vv tests/logging_callback_tests --cov=litellm --cov-report=xml -x -s -v --junitxml=test-results/junit.xml --durations=5 + no_output_timeout: 120m + - run: + name: Rename the coverage files + command: | + mv coverage.xml logging_coverage.xml + mv .coverage logging_coverage + + # Store test results + - store_test_results: + path: test-results + - persist_to_workspace: + root: . + paths: + - logging_coverage.xml + - logging_coverage installing_litellm_on_python: docker: - image: circleci/python:3.8 @@ -769,7 +811,7 @@ jobs: python -m venv venv . venv/bin/activate pip install coverage - coverage combine llm_translation_coverage litellm_router_coverage local_testing_coverage litellm_assistants_api_coverage ui_endpoint_testing_coverage + coverage combine llm_translation_coverage logging_coverage litellm_router_coverage local_testing_coverage litellm_assistants_api_coverage ui_endpoint_testing_coverage coverage xml - codecov/upload: file: ./coverage.xml @@ -1005,9 +1047,16 @@ workflows: only: - main - /litellm_.*/ + - logging_testing: + filters: + branches: + only: + - main + - /litellm_.*/ - upload-coverage: requires: - llm_translation_testing + - logging_testing - litellm_router_testing - local_testing - litellm_assistants_api_testing @@ -1036,6 +1085,7 @@ workflows: - build_and_test - load_testing - llm_translation_testing + - logging_testing - litellm_router_testing - litellm_assistants_api_testing - ui_endpoint_testing diff --git a/litellm/caching/caching_handler.py b/litellm/caching/caching_handler.py index 264fb405b..479b3bd1f 100644 --- a/litellm/caching/caching_handler.py +++ b/litellm/caching/caching_handler.py @@ -26,6 +26,9 @@ from litellm.caching.caching import ( RedisSemanticCache, S3Cache, ) +from litellm.litellm_core_utils.logging_utils import ( + _assemble_complete_response_from_streaming_chunks, +) from litellm.types.rerank import RerankResponse from litellm.types.utils import ( CallTypes, @@ -517,28 +520,14 @@ class LLMCachingHandler: """ complete_streaming_response: Optional[ Union[ModelResponse, TextCompletionResponse] - ] = None - if ( - processed_chunk.choices[0].finish_reason is not None - ): # if it's the last chunk - self.async_streaming_chunks.append(processed_chunk) - try: - end_time: datetime.datetime = datetime.datetime.now() - complete_streaming_response = litellm.stream_chunk_builder( - self.async_streaming_chunks, - messages=self.request_kwargs.get("messages", None), - start_time=self.start_time, - end_time=end_time, - ) - except Exception as e: - verbose_logger.exception( - "Error occurred building stream chunk in success logging: {}".format( - str(e) - ) - ) - complete_streaming_response = None - else: - self.async_streaming_chunks.append(processed_chunk) + ] = _assemble_complete_response_from_streaming_chunks( + result=processed_chunk, + start_time=self.start_time, + end_time=datetime.datetime.now(), + request_kwargs=self.request_kwargs, + streaming_chunks=self.async_streaming_chunks, + is_async=True, + ) # if a complete_streaming_response is assembled, add it to the cache if complete_streaming_response is not None: diff --git a/litellm/integrations/gcs_bucket/gcs_bucket.py b/litellm/integrations/gcs_bucket/gcs_bucket.py index 744361453..3d99c0257 100644 --- a/litellm/integrations/gcs_bucket/gcs_bucket.py +++ b/litellm/integrations/gcs_bucket/gcs_bucket.py @@ -12,9 +12,6 @@ import litellm from litellm._logging import verbose_logger from litellm.integrations.custom_logger import CustomLogger from litellm.integrations.gcs_bucket.gcs_bucket_base import GCSBucketBase -from litellm.litellm_core_utils.logging_utils import ( - convert_litellm_response_object_to_dict, -) from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler from litellm.proxy._types import CommonProxyErrors, SpendLogsMetadata, SpendLogsPayload from litellm.types.utils import ( diff --git a/litellm/integrations/gcs_bucket/gcs_bucket_base.py b/litellm/integrations/gcs_bucket/gcs_bucket_base.py index b9e0930be..dac0790b6 100644 --- a/litellm/integrations/gcs_bucket/gcs_bucket_base.py +++ b/litellm/integrations/gcs_bucket/gcs_bucket_base.py @@ -10,9 +10,6 @@ from pydantic import BaseModel, Field import litellm from litellm._logging import verbose_logger from litellm.integrations.custom_logger import CustomLogger -from litellm.litellm_core_utils.logging_utils import ( - convert_litellm_response_object_to_dict, -) from litellm.llms.custom_httpx.http_handler import ( get_async_httpx_client, httpxSpecialProvider, diff --git a/litellm/litellm_core_utils/litellm_logging.py b/litellm/litellm_core_utils/litellm_logging.py index 1d4a3e3cc..f7afbb7b8 100644 --- a/litellm/litellm_core_utils/litellm_logging.py +++ b/litellm/litellm_core_utils/litellm_logging.py @@ -86,6 +86,7 @@ from ..integrations.supabase import Supabase from ..integrations.traceloop import TraceloopLogger from ..integrations.weights_biases import WeightsBiasesLogger from .exception_mapping_utils import _get_response_headers +from .logging_utils import _assemble_complete_response_from_streaming_chunks try: from ..proxy.enterprise.enterprise_callbacks.generic_api_callback import ( @@ -878,32 +879,24 @@ class Logging: # print(f"original response in success handler: {self.model_call_details['original_response']}") try: verbose_logger.debug(f"success callbacks: {litellm.success_callback}") + ## BUILD COMPLETE STREAMED RESPONSE - complete_streaming_response = None + complete_streaming_response: Optional[ + Union[ModelResponse, TextCompletionResponse] + ] = None if "complete_streaming_response" in self.model_call_details: return # break out of this. - if self.stream and isinstance(result, ModelResponse): - if ( - result.choices[0].finish_reason is not None - ): # if it's the last chunk - self.sync_streaming_chunks.append(result) - # print_verbose(f"final set of received chunks: {self.sync_streaming_chunks}") - try: - complete_streaming_response = litellm.stream_chunk_builder( - self.sync_streaming_chunks, - messages=self.model_call_details.get("messages", None), - start_time=start_time, - end_time=end_time, - ) - except Exception as e: - verbose_logger.exception( - "LiteLLM.LoggingError: [Non-Blocking] Exception occurred while building complete streaming response in success logging {}".format( - str(e) - ) - ) - complete_streaming_response = None - else: - self.sync_streaming_chunks.append(result) + if self.stream: + complete_streaming_response: Optional[ + Union[ModelResponse, TextCompletionResponse] + ] = _assemble_complete_response_from_streaming_chunks( + result=result, + start_time=start_time, + end_time=end_time, + request_kwargs=self.model_call_details, + streaming_chunks=self.sync_streaming_chunks, + is_async=False, + ) _caching_complete_streaming_response: Optional[ Union[ModelResponse, TextCompletionResponse] ] = None @@ -1495,29 +1488,23 @@ class Logging: start_time=start_time, end_time=end_time, result=result, cache_hit=cache_hit ) ## BUILD COMPLETE STREAMED RESPONSE - complete_streaming_response = None if "async_complete_streaming_response" in self.model_call_details: return # break out of this. - if self.stream: - if result.choices[0].finish_reason is not None: # if it's the last chunk - self.streaming_chunks.append(result) - # verbose_logger.debug(f"final set of received chunks: {self.streaming_chunks}") - try: - complete_streaming_response = litellm.stream_chunk_builder( - self.streaming_chunks, - messages=self.model_call_details.get("messages", None), - start_time=start_time, - end_time=end_time, - ) - except Exception as e: - verbose_logger.exception( - "Error occurred building stream chunk in success logging: {}".format( - str(e) - ) - ) - complete_streaming_response = None - else: - self.streaming_chunks.append(result) + complete_streaming_response: Optional[ + Union[ModelResponse, TextCompletionResponse] + ] = None + if self.stream is True: + complete_streaming_response: Optional[ + Union[ModelResponse, TextCompletionResponse] + ] = _assemble_complete_response_from_streaming_chunks( + result=result, + start_time=start_time, + end_time=end_time, + request_kwargs=self.model_call_details, + streaming_chunks=self.streaming_chunks, + is_async=True, + ) + if complete_streaming_response is not None: print_verbose("Async success callbacks: Got a complete streaming response") diff --git a/litellm/litellm_core_utils/logging_utils.py b/litellm/litellm_core_utils/logging_utils.py index 7fa1be9d8..0a9fc8342 100644 --- a/litellm/litellm_core_utils/logging_utils.py +++ b/litellm/litellm_core_utils/logging_utils.py @@ -1,4 +1,8 @@ -from typing import TYPE_CHECKING, Any, Optional, Union +from datetime import datetime +from typing import TYPE_CHECKING, Any, List, Optional, Union + +from litellm._logging import verbose_logger +from litellm.types.utils import ModelResponse, TextCompletionResponse if TYPE_CHECKING: from litellm import ModelResponse as _ModelResponse @@ -15,21 +19,6 @@ Helper utils used for logging callbacks """ -def convert_litellm_response_object_to_dict(response_obj: Any) -> dict: - """ - Convert a LiteLLM response object to a dictionary - - """ - if isinstance(response_obj, dict): - return response_obj - for _type in litellm.ALL_LITELLM_RESPONSE_TYPES: - if isinstance(response_obj, _type): - return response_obj.model_dump() - - # If it's not a LiteLLM type, return the object as is - return dict(response_obj) - - def convert_litellm_response_object_to_str( response_obj: Union[Any, LiteLLMModelResponse] ) -> Optional[str]: @@ -46,3 +35,55 @@ def convert_litellm_response_object_to_str( return response_str return None + + +def _assemble_complete_response_from_streaming_chunks( + result: Union[ModelResponse, TextCompletionResponse], + start_time: datetime, + end_time: datetime, + request_kwargs: dict, + streaming_chunks: List[Any], + is_async: bool, +): + """ + Assemble a complete response from a streaming chunks + + - assemble a complete streaming response if result.choices[0].finish_reason is not None + - else append the chunk to the streaming_chunks + + + Args: + result: ModelResponse + start_time: datetime + end_time: datetime + request_kwargs: dict + streaming_chunks: List[Any] + is_async: bool + + Returns: + Optional[Union[ModelResponse, TextCompletionResponse]]: Complete streaming response + + """ + complete_streaming_response: Optional[ + Union[ModelResponse, TextCompletionResponse] + ] = None + if result.choices[0].finish_reason is not None: # if it's the last chunk + streaming_chunks.append(result) + try: + complete_streaming_response = litellm.stream_chunk_builder( + chunks=streaming_chunks, + messages=request_kwargs.get("messages", None), + start_time=start_time, + end_time=end_time, + ) + except Exception as e: + log_message = ( + "Error occurred building stream chunk in {} success logging: {}".format( + "async" if is_async else "sync", str(e) + ) + ) + verbose_logger.exception(log_message) + complete_streaming_response = None + else: + streaming_chunks.append(result) + return complete_streaming_response diff --git a/tests/local_testing/test_custom_callback_input.py b/tests/local_testing/test_custom_callback_input.py index c079123e7..33ae09bc1 100644 --- a/tests/local_testing/test_custom_callback_input.py +++ b/tests/local_testing/test_custom_callback_input.py @@ -280,6 +280,9 @@ class CompletionCustomHandler( async def async_log_success_event(self, kwargs, response_obj, start_time, end_time): try: + print( + "in async_log_success_event", kwargs, response_obj, start_time, end_time + ) self.states.append("async_success") ## START TIME assert isinstance(start_time, datetime) @@ -522,6 +525,7 @@ async def test_async_chat_azure_stream(): @pytest.mark.asyncio async def test_async_chat_openai_stream_options(): try: + litellm.set_verbose = True customHandler = CompletionCustomHandler() litellm.callbacks = [customHandler] with patch.object( @@ -536,7 +540,7 @@ async def test_async_chat_openai_stream_options(): async for chunk in response: continue - + print("mock client args list=", mock_client.await_args_list) mock_client.assert_awaited_once() except Exception as e: pytest.fail(f"An exception occurred: {str(e)}") diff --git a/tests/logging_callback_tests/conftest.py b/tests/logging_callback_tests/conftest.py new file mode 100644 index 000000000..eca0bc431 --- /dev/null +++ b/tests/logging_callback_tests/conftest.py @@ -0,0 +1,54 @@ +# conftest.py + +import importlib +import os +import sys + +import pytest + +sys.path.insert( + 0, os.path.abspath("../..") +) # Adds the parent directory to the system path +import litellm + + +@pytest.fixture(scope="function", autouse=True) +def setup_and_teardown(): + """ + This fixture reloads litellm before every function. To speed up testing by removing callbacks being chained. + """ + curr_dir = os.getcwd() # Get the current working directory + sys.path.insert( + 0, os.path.abspath("../..") + ) # Adds the project directory to the system path + + import litellm + from litellm import Router + + importlib.reload(litellm) + import asyncio + + loop = asyncio.get_event_loop_policy().new_event_loop() + asyncio.set_event_loop(loop) + print(litellm) + # from litellm import Router, completion, aembedding, acompletion, embedding + yield + + # Teardown code (executes after the yield point) + loop.close() # Close the loop created earlier + asyncio.set_event_loop(None) # Remove the reference to the loop + + +def pytest_collection_modifyitems(config, items): + # Separate tests in 'test_amazing_proxy_custom_logger.py' and other tests + custom_logger_tests = [ + item for item in items if "custom_logger" in item.parent.name + ] + other_tests = [item for item in items if "custom_logger" not in item.parent.name] + + # Sort tests based on their names + custom_logger_tests.sort(key=lambda x: x.name) + other_tests.sort(key=lambda x: x.name) + + # Reorder the items list + items[:] = custom_logger_tests + other_tests diff --git a/tests/logging_callback_tests/test_assemble_streaming_responses.py b/tests/logging_callback_tests/test_assemble_streaming_responses.py new file mode 100644 index 000000000..5b52194bf --- /dev/null +++ b/tests/logging_callback_tests/test_assemble_streaming_responses.py @@ -0,0 +1,362 @@ +""" +Testing for _assemble_complete_response_from_streaming_chunks + +- Test 1 - ModelResponse with 1 list of streaming chunks. Assert chunks are added to the streaming_chunks, after final chunk sent assert complete_streaming_response is not None +- Test 2 - TextCompletionResponse with 1 list of streaming chunks. Assert chunks are added to the streaming_chunks, after final chunk sent assert complete_streaming_response is not None +- Test 3 - Have multiple lists of streaming chunks, Assert that chunks are added to the correct list and that complete_streaming_response is None. After final chunk sent assert complete_streaming_response is not None +- Test 4 - build a complete response when 1 chunk is poorly formatted + +""" + +import json +import os +import sys +from datetime import datetime +from unittest.mock import AsyncMock + +from pydantic.main import Model + +sys.path.insert( + 0, os.path.abspath("../..") +) # Adds the parent directory to the system path + + +import httpx +import pytest +from respx import MockRouter + +import litellm +from litellm import Choices, Message, ModelResponse, TextCompletionResponse, TextChoices + +from litellm.litellm_core_utils.litellm_logging import ( + _assemble_complete_response_from_streaming_chunks, +) + + +@pytest.mark.parametrize("is_async", [True, False]) +def test_assemble_complete_response_from_streaming_chunks_1(is_async): + """ + Test 1 - ModelResponse with 1 list of streaming chunks. Assert chunks are added to the streaming_chunks, after final chunk sent assert complete_streaming_response is not None + """ + + request_kwargs = { + "model": "test_model", + "messages": [{"role": "user", "content": "Hello, world!"}], + } + + list_streaming_chunks = [] + chunk = { + "id": "chatcmpl-9mWtyDnikZZoB75DyfUzWUxiiE2Pi", + "choices": [ + litellm.utils.StreamingChoices( + delta=litellm.utils.Delta( + content="hello in response", + function_call=None, + role=None, + tool_calls=None, + ), + index=0, + logprobs=None, + ) + ], + "created": 1721353246, + "model": "gpt-3.5-turbo", + "object": "chat.completion.chunk", + "system_fingerprint": None, + "usage": None, + } + chunk = litellm.ModelResponse(**chunk, stream=True) + complete_streaming_response = _assemble_complete_response_from_streaming_chunks( + result=chunk, + start_time=datetime.now(), + end_time=datetime.now(), + request_kwargs=request_kwargs, + streaming_chunks=list_streaming_chunks, + is_async=is_async, + ) + + # this is the 1st chunk - complete_streaming_response should be None + + print("list_streaming_chunks", list_streaming_chunks) + print("complete_streaming_response", complete_streaming_response) + assert complete_streaming_response is None + assert len(list_streaming_chunks) == 1 + assert list_streaming_chunks[0] == chunk + + # Add final chunk + chunk = { + "id": "chatcmpl-9mWtyDnikZZoB75DyfUzWUxiiE2Pi", + "choices": [ + litellm.utils.StreamingChoices( + finish_reason="stop", + delta=litellm.utils.Delta( + content="end of response", + function_call=None, + role=None, + tool_calls=None, + ), + index=0, + logprobs=None, + ) + ], + "created": 1721353246, + "model": "gpt-3.5-turbo", + "object": "chat.completion.chunk", + "system_fingerprint": None, + "usage": None, + } + chunk = litellm.ModelResponse(**chunk, stream=True) + complete_streaming_response = _assemble_complete_response_from_streaming_chunks( + result=chunk, + start_time=datetime.now(), + end_time=datetime.now(), + request_kwargs=request_kwargs, + streaming_chunks=list_streaming_chunks, + is_async=is_async, + ) + + print("list_streaming_chunks", list_streaming_chunks) + print("complete_streaming_response", complete_streaming_response) + + # this is the 2nd chunk - complete_streaming_response should not be None + assert complete_streaming_response is not None + assert len(list_streaming_chunks) == 2 + + assert isinstance(complete_streaming_response, ModelResponse) + assert isinstance(complete_streaming_response.choices[0], Choices) + + pass + + +@pytest.mark.parametrize("is_async", [True, False]) +def test_assemble_complete_response_from_streaming_chunks_2(is_async): + """ + Test 2 - TextCompletionResponse with 1 list of streaming chunks. Assert chunks are added to the streaming_chunks, after final chunk sent assert complete_streaming_response is not None + """ + + from litellm.utils import TextCompletionStreamWrapper + + _text_completion_stream_wrapper = TextCompletionStreamWrapper( + completion_stream=None, model="test_model" + ) + + request_kwargs = { + "model": "test_model", + "messages": [{"role": "user", "content": "Hello, world!"}], + } + + list_streaming_chunks = [] + chunk = { + "id": "chatcmpl-9mWtyDnikZZoB75DyfUzWUxiiE2Pi", + "choices": [ + litellm.utils.StreamingChoices( + delta=litellm.utils.Delta( + content="hello in response", + function_call=None, + role=None, + tool_calls=None, + ), + index=0, + logprobs=None, + ) + ], + "created": 1721353246, + "model": "gpt-3.5-turbo", + "object": "chat.completion.chunk", + "system_fingerprint": None, + "usage": None, + } + chunk = litellm.ModelResponse(**chunk, stream=True) + chunk = _text_completion_stream_wrapper.convert_to_text_completion_object(chunk) + + complete_streaming_response = _assemble_complete_response_from_streaming_chunks( + result=chunk, + start_time=datetime.now(), + end_time=datetime.now(), + request_kwargs=request_kwargs, + streaming_chunks=list_streaming_chunks, + is_async=is_async, + ) + + # this is the 1st chunk - complete_streaming_response should be None + + print("list_streaming_chunks", list_streaming_chunks) + print("complete_streaming_response", complete_streaming_response) + assert complete_streaming_response is None + assert len(list_streaming_chunks) == 1 + assert list_streaming_chunks[0] == chunk + + # Add final chunk + chunk = { + "id": "chatcmpl-9mWtyDnikZZoB75DyfUzWUxiiE2Pi", + "choices": [ + litellm.utils.StreamingChoices( + finish_reason="stop", + delta=litellm.utils.Delta( + content="end of response", + function_call=None, + role=None, + tool_calls=None, + ), + index=0, + logprobs=None, + ) + ], + "created": 1721353246, + "model": "gpt-3.5-turbo", + "object": "chat.completion.chunk", + "system_fingerprint": None, + "usage": None, + } + chunk = litellm.ModelResponse(**chunk, stream=True) + chunk = _text_completion_stream_wrapper.convert_to_text_completion_object(chunk) + complete_streaming_response = _assemble_complete_response_from_streaming_chunks( + result=chunk, + start_time=datetime.now(), + end_time=datetime.now(), + request_kwargs=request_kwargs, + streaming_chunks=list_streaming_chunks, + is_async=is_async, + ) + + print("list_streaming_chunks", list_streaming_chunks) + print("complete_streaming_response", complete_streaming_response) + + # this is the 2nd chunk - complete_streaming_response should not be None + assert complete_streaming_response is not None + assert len(list_streaming_chunks) == 2 + + assert isinstance(complete_streaming_response, TextCompletionResponse) + assert isinstance(complete_streaming_response.choices[0], TextChoices) + + pass + + +@pytest.mark.parametrize("is_async", [True, False]) +def test_assemble_complete_response_from_streaming_chunks_3(is_async): + + request_kwargs = { + "model": "test_model", + "messages": [{"role": "user", "content": "Hello, world!"}], + } + + list_streaming_chunks_1 = [] + list_streaming_chunks_2 = [] + + chunk = { + "id": "chatcmpl-9mWtyDnikZZoB75DyfUzWUxiiE2Pi", + "choices": [ + litellm.utils.StreamingChoices( + delta=litellm.utils.Delta( + content="hello in response", + function_call=None, + role=None, + tool_calls=None, + ), + index=0, + logprobs=None, + ) + ], + "created": 1721353246, + "model": "gpt-3.5-turbo", + "object": "chat.completion.chunk", + "system_fingerprint": None, + "usage": None, + } + chunk = litellm.ModelResponse(**chunk, stream=True) + complete_streaming_response = _assemble_complete_response_from_streaming_chunks( + result=chunk, + start_time=datetime.now(), + end_time=datetime.now(), + request_kwargs=request_kwargs, + streaming_chunks=list_streaming_chunks_1, + is_async=is_async, + ) + + # this is the 1st chunk - complete_streaming_response should be None + + print("list_streaming_chunks_1", list_streaming_chunks_1) + print("complete_streaming_response", complete_streaming_response) + assert complete_streaming_response is None + assert len(list_streaming_chunks_1) == 1 + assert list_streaming_chunks_1[0] == chunk + assert len(list_streaming_chunks_2) == 0 + + # now add a chunk to the 2nd list + + complete_streaming_response = _assemble_complete_response_from_streaming_chunks( + result=chunk, + start_time=datetime.now(), + end_time=datetime.now(), + request_kwargs=request_kwargs, + streaming_chunks=list_streaming_chunks_2, + is_async=is_async, + ) + + print("list_streaming_chunks_2", list_streaming_chunks_2) + print("complete_streaming_response", complete_streaming_response) + assert complete_streaming_response is None + assert len(list_streaming_chunks_2) == 1 + assert list_streaming_chunks_2[0] == chunk + assert len(list_streaming_chunks_1) == 1 + + # now add a chunk to the 1st list + + +@pytest.mark.parametrize("is_async", [True, False]) +def test_assemble_complete_response_from_streaming_chunks_4(is_async): + """ + Test 4 - build a complete response when 1 chunk is poorly formatted + + - Assert complete_streaming_response is None + - Assert list_streaming_chunks is not empty + """ + + request_kwargs = { + "model": "test_model", + "messages": [{"role": "user", "content": "Hello, world!"}], + } + + list_streaming_chunks = [] + + chunk = { + "id": "chatcmpl-9mWtyDnikZZoB75DyfUzWUxiiE2Pi", + "choices": [ + litellm.utils.StreamingChoices( + finish_reason="stop", + delta=litellm.utils.Delta( + content="end of response", + function_call=None, + role=None, + tool_calls=None, + ), + index=0, + logprobs=None, + ) + ], + "created": 1721353246, + "model": "gpt-3.5-turbo", + "object": "chat.completion.chunk", + "system_fingerprint": None, + "usage": None, + } + chunk = litellm.ModelResponse(**chunk, stream=True) + + # remove attribute id from chunk + del chunk.id + + complete_streaming_response = _assemble_complete_response_from_streaming_chunks( + result=chunk, + start_time=datetime.now(), + end_time=datetime.now(), + request_kwargs=request_kwargs, + streaming_chunks=list_streaming_chunks, + is_async=is_async, + ) + + print("complete_streaming_response", complete_streaming_response) + assert complete_streaming_response is None + + print("list_streaming_chunks", list_streaming_chunks) + + assert len(list_streaming_chunks) == 1