forked from phoenix/litellm-mirror
(refactor) use helper function _assemble_complete_response_from_streaming_chunks
to assemble complete responses in caching and logging callbacks (#6220)
* (refactor) use _assemble_complete_response_from_streaming_chunks * add unit test for test_assemble_complete_response_from_streaming_chunks_1 * fix assemble complete_streaming_response * config add logging_testing * add logging_coverage in codecov * test test_assemble_complete_response_from_streaming_chunks_3 * add unit tests for _assemble_complete_response_from_streaming_chunks * fix remove unused / junk function * add test for streaming_chunks when error assembling
This commit is contained in:
parent
e9a46b992c
commit
a69c670baa
9 changed files with 571 additions and 90 deletions
|
@ -328,6 +328,48 @@ jobs:
|
||||||
paths:
|
paths:
|
||||||
- llm_translation_coverage.xml
|
- llm_translation_coverage.xml
|
||||||
- llm_translation_coverage
|
- llm_translation_coverage
|
||||||
|
logging_testing:
|
||||||
|
docker:
|
||||||
|
- image: cimg/python:3.11
|
||||||
|
auth:
|
||||||
|
username: ${DOCKERHUB_USERNAME}
|
||||||
|
password: ${DOCKERHUB_PASSWORD}
|
||||||
|
working_directory: ~/project
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- checkout
|
||||||
|
- run:
|
||||||
|
name: Install Dependencies
|
||||||
|
command: |
|
||||||
|
python -m pip install --upgrade pip
|
||||||
|
python -m pip install -r requirements.txt
|
||||||
|
pip install "pytest==7.3.1"
|
||||||
|
pip install "pytest-retry==1.6.3"
|
||||||
|
pip install "pytest-cov==5.0.0"
|
||||||
|
pip install "pytest-asyncio==0.21.1"
|
||||||
|
pip install "respx==0.21.1"
|
||||||
|
# Run pytest and generate JUnit XML report
|
||||||
|
- run:
|
||||||
|
name: Run tests
|
||||||
|
command: |
|
||||||
|
pwd
|
||||||
|
ls
|
||||||
|
python -m pytest -vv tests/logging_callback_tests --cov=litellm --cov-report=xml -x -s -v --junitxml=test-results/junit.xml --durations=5
|
||||||
|
no_output_timeout: 120m
|
||||||
|
- run:
|
||||||
|
name: Rename the coverage files
|
||||||
|
command: |
|
||||||
|
mv coverage.xml logging_coverage.xml
|
||||||
|
mv .coverage logging_coverage
|
||||||
|
|
||||||
|
# Store test results
|
||||||
|
- store_test_results:
|
||||||
|
path: test-results
|
||||||
|
- persist_to_workspace:
|
||||||
|
root: .
|
||||||
|
paths:
|
||||||
|
- logging_coverage.xml
|
||||||
|
- logging_coverage
|
||||||
installing_litellm_on_python:
|
installing_litellm_on_python:
|
||||||
docker:
|
docker:
|
||||||
- image: circleci/python:3.8
|
- image: circleci/python:3.8
|
||||||
|
@ -769,7 +811,7 @@ jobs:
|
||||||
python -m venv venv
|
python -m venv venv
|
||||||
. venv/bin/activate
|
. venv/bin/activate
|
||||||
pip install coverage
|
pip install coverage
|
||||||
coverage combine llm_translation_coverage litellm_router_coverage local_testing_coverage litellm_assistants_api_coverage ui_endpoint_testing_coverage
|
coverage combine llm_translation_coverage logging_coverage litellm_router_coverage local_testing_coverage litellm_assistants_api_coverage ui_endpoint_testing_coverage
|
||||||
coverage xml
|
coverage xml
|
||||||
- codecov/upload:
|
- codecov/upload:
|
||||||
file: ./coverage.xml
|
file: ./coverage.xml
|
||||||
|
@ -1005,9 +1047,16 @@ workflows:
|
||||||
only:
|
only:
|
||||||
- main
|
- main
|
||||||
- /litellm_.*/
|
- /litellm_.*/
|
||||||
|
- logging_testing:
|
||||||
|
filters:
|
||||||
|
branches:
|
||||||
|
only:
|
||||||
|
- main
|
||||||
|
- /litellm_.*/
|
||||||
- upload-coverage:
|
- upload-coverage:
|
||||||
requires:
|
requires:
|
||||||
- llm_translation_testing
|
- llm_translation_testing
|
||||||
|
- logging_testing
|
||||||
- litellm_router_testing
|
- litellm_router_testing
|
||||||
- local_testing
|
- local_testing
|
||||||
- litellm_assistants_api_testing
|
- litellm_assistants_api_testing
|
||||||
|
@ -1036,6 +1085,7 @@ workflows:
|
||||||
- build_and_test
|
- build_and_test
|
||||||
- load_testing
|
- load_testing
|
||||||
- llm_translation_testing
|
- llm_translation_testing
|
||||||
|
- logging_testing
|
||||||
- litellm_router_testing
|
- litellm_router_testing
|
||||||
- litellm_assistants_api_testing
|
- litellm_assistants_api_testing
|
||||||
- ui_endpoint_testing
|
- ui_endpoint_testing
|
||||||
|
|
|
@ -26,6 +26,9 @@ from litellm.caching.caching import (
|
||||||
RedisSemanticCache,
|
RedisSemanticCache,
|
||||||
S3Cache,
|
S3Cache,
|
||||||
)
|
)
|
||||||
|
from litellm.litellm_core_utils.logging_utils import (
|
||||||
|
_assemble_complete_response_from_streaming_chunks,
|
||||||
|
)
|
||||||
from litellm.types.rerank import RerankResponse
|
from litellm.types.rerank import RerankResponse
|
||||||
from litellm.types.utils import (
|
from litellm.types.utils import (
|
||||||
CallTypes,
|
CallTypes,
|
||||||
|
@ -517,28 +520,14 @@ class LLMCachingHandler:
|
||||||
"""
|
"""
|
||||||
complete_streaming_response: Optional[
|
complete_streaming_response: Optional[
|
||||||
Union[ModelResponse, TextCompletionResponse]
|
Union[ModelResponse, TextCompletionResponse]
|
||||||
] = None
|
] = _assemble_complete_response_from_streaming_chunks(
|
||||||
if (
|
result=processed_chunk,
|
||||||
processed_chunk.choices[0].finish_reason is not None
|
|
||||||
): # if it's the last chunk
|
|
||||||
self.async_streaming_chunks.append(processed_chunk)
|
|
||||||
try:
|
|
||||||
end_time: datetime.datetime = datetime.datetime.now()
|
|
||||||
complete_streaming_response = litellm.stream_chunk_builder(
|
|
||||||
self.async_streaming_chunks,
|
|
||||||
messages=self.request_kwargs.get("messages", None),
|
|
||||||
start_time=self.start_time,
|
start_time=self.start_time,
|
||||||
end_time=end_time,
|
end_time=datetime.datetime.now(),
|
||||||
|
request_kwargs=self.request_kwargs,
|
||||||
|
streaming_chunks=self.async_streaming_chunks,
|
||||||
|
is_async=True,
|
||||||
)
|
)
|
||||||
except Exception as e:
|
|
||||||
verbose_logger.exception(
|
|
||||||
"Error occurred building stream chunk in success logging: {}".format(
|
|
||||||
str(e)
|
|
||||||
)
|
|
||||||
)
|
|
||||||
complete_streaming_response = None
|
|
||||||
else:
|
|
||||||
self.async_streaming_chunks.append(processed_chunk)
|
|
||||||
|
|
||||||
# if a complete_streaming_response is assembled, add it to the cache
|
# if a complete_streaming_response is assembled, add it to the cache
|
||||||
if complete_streaming_response is not None:
|
if complete_streaming_response is not None:
|
||||||
|
|
|
@ -12,9 +12,6 @@ import litellm
|
||||||
from litellm._logging import verbose_logger
|
from litellm._logging import verbose_logger
|
||||||
from litellm.integrations.custom_logger import CustomLogger
|
from litellm.integrations.custom_logger import CustomLogger
|
||||||
from litellm.integrations.gcs_bucket.gcs_bucket_base import GCSBucketBase
|
from litellm.integrations.gcs_bucket.gcs_bucket_base import GCSBucketBase
|
||||||
from litellm.litellm_core_utils.logging_utils import (
|
|
||||||
convert_litellm_response_object_to_dict,
|
|
||||||
)
|
|
||||||
from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler
|
from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler
|
||||||
from litellm.proxy._types import CommonProxyErrors, SpendLogsMetadata, SpendLogsPayload
|
from litellm.proxy._types import CommonProxyErrors, SpendLogsMetadata, SpendLogsPayload
|
||||||
from litellm.types.utils import (
|
from litellm.types.utils import (
|
||||||
|
|
|
@ -10,9 +10,6 @@ from pydantic import BaseModel, Field
|
||||||
import litellm
|
import litellm
|
||||||
from litellm._logging import verbose_logger
|
from litellm._logging import verbose_logger
|
||||||
from litellm.integrations.custom_logger import CustomLogger
|
from litellm.integrations.custom_logger import CustomLogger
|
||||||
from litellm.litellm_core_utils.logging_utils import (
|
|
||||||
convert_litellm_response_object_to_dict,
|
|
||||||
)
|
|
||||||
from litellm.llms.custom_httpx.http_handler import (
|
from litellm.llms.custom_httpx.http_handler import (
|
||||||
get_async_httpx_client,
|
get_async_httpx_client,
|
||||||
httpxSpecialProvider,
|
httpxSpecialProvider,
|
||||||
|
|
|
@ -86,6 +86,7 @@ from ..integrations.supabase import Supabase
|
||||||
from ..integrations.traceloop import TraceloopLogger
|
from ..integrations.traceloop import TraceloopLogger
|
||||||
from ..integrations.weights_biases import WeightsBiasesLogger
|
from ..integrations.weights_biases import WeightsBiasesLogger
|
||||||
from .exception_mapping_utils import _get_response_headers
|
from .exception_mapping_utils import _get_response_headers
|
||||||
|
from .logging_utils import _assemble_complete_response_from_streaming_chunks
|
||||||
|
|
||||||
try:
|
try:
|
||||||
from ..proxy.enterprise.enterprise_callbacks.generic_api_callback import (
|
from ..proxy.enterprise.enterprise_callbacks.generic_api_callback import (
|
||||||
|
@ -878,32 +879,24 @@ class Logging:
|
||||||
# print(f"original response in success handler: {self.model_call_details['original_response']}")
|
# print(f"original response in success handler: {self.model_call_details['original_response']}")
|
||||||
try:
|
try:
|
||||||
verbose_logger.debug(f"success callbacks: {litellm.success_callback}")
|
verbose_logger.debug(f"success callbacks: {litellm.success_callback}")
|
||||||
|
|
||||||
## BUILD COMPLETE STREAMED RESPONSE
|
## BUILD COMPLETE STREAMED RESPONSE
|
||||||
complete_streaming_response = None
|
complete_streaming_response: Optional[
|
||||||
|
Union[ModelResponse, TextCompletionResponse]
|
||||||
|
] = None
|
||||||
if "complete_streaming_response" in self.model_call_details:
|
if "complete_streaming_response" in self.model_call_details:
|
||||||
return # break out of this.
|
return # break out of this.
|
||||||
if self.stream and isinstance(result, ModelResponse):
|
if self.stream:
|
||||||
if (
|
complete_streaming_response: Optional[
|
||||||
result.choices[0].finish_reason is not None
|
Union[ModelResponse, TextCompletionResponse]
|
||||||
): # if it's the last chunk
|
] = _assemble_complete_response_from_streaming_chunks(
|
||||||
self.sync_streaming_chunks.append(result)
|
result=result,
|
||||||
# print_verbose(f"final set of received chunks: {self.sync_streaming_chunks}")
|
|
||||||
try:
|
|
||||||
complete_streaming_response = litellm.stream_chunk_builder(
|
|
||||||
self.sync_streaming_chunks,
|
|
||||||
messages=self.model_call_details.get("messages", None),
|
|
||||||
start_time=start_time,
|
start_time=start_time,
|
||||||
end_time=end_time,
|
end_time=end_time,
|
||||||
|
request_kwargs=self.model_call_details,
|
||||||
|
streaming_chunks=self.sync_streaming_chunks,
|
||||||
|
is_async=False,
|
||||||
)
|
)
|
||||||
except Exception as e:
|
|
||||||
verbose_logger.exception(
|
|
||||||
"LiteLLM.LoggingError: [Non-Blocking] Exception occurred while building complete streaming response in success logging {}".format(
|
|
||||||
str(e)
|
|
||||||
)
|
|
||||||
)
|
|
||||||
complete_streaming_response = None
|
|
||||||
else:
|
|
||||||
self.sync_streaming_chunks.append(result)
|
|
||||||
_caching_complete_streaming_response: Optional[
|
_caching_complete_streaming_response: Optional[
|
||||||
Union[ModelResponse, TextCompletionResponse]
|
Union[ModelResponse, TextCompletionResponse]
|
||||||
] = None
|
] = None
|
||||||
|
@ -1495,29 +1488,23 @@ class Logging:
|
||||||
start_time=start_time, end_time=end_time, result=result, cache_hit=cache_hit
|
start_time=start_time, end_time=end_time, result=result, cache_hit=cache_hit
|
||||||
)
|
)
|
||||||
## BUILD COMPLETE STREAMED RESPONSE
|
## BUILD COMPLETE STREAMED RESPONSE
|
||||||
complete_streaming_response = None
|
|
||||||
if "async_complete_streaming_response" in self.model_call_details:
|
if "async_complete_streaming_response" in self.model_call_details:
|
||||||
return # break out of this.
|
return # break out of this.
|
||||||
if self.stream:
|
complete_streaming_response: Optional[
|
||||||
if result.choices[0].finish_reason is not None: # if it's the last chunk
|
Union[ModelResponse, TextCompletionResponse]
|
||||||
self.streaming_chunks.append(result)
|
] = None
|
||||||
# verbose_logger.debug(f"final set of received chunks: {self.streaming_chunks}")
|
if self.stream is True:
|
||||||
try:
|
complete_streaming_response: Optional[
|
||||||
complete_streaming_response = litellm.stream_chunk_builder(
|
Union[ModelResponse, TextCompletionResponse]
|
||||||
self.streaming_chunks,
|
] = _assemble_complete_response_from_streaming_chunks(
|
||||||
messages=self.model_call_details.get("messages", None),
|
result=result,
|
||||||
start_time=start_time,
|
start_time=start_time,
|
||||||
end_time=end_time,
|
end_time=end_time,
|
||||||
|
request_kwargs=self.model_call_details,
|
||||||
|
streaming_chunks=self.streaming_chunks,
|
||||||
|
is_async=True,
|
||||||
)
|
)
|
||||||
except Exception as e:
|
|
||||||
verbose_logger.exception(
|
|
||||||
"Error occurred building stream chunk in success logging: {}".format(
|
|
||||||
str(e)
|
|
||||||
)
|
|
||||||
)
|
|
||||||
complete_streaming_response = None
|
|
||||||
else:
|
|
||||||
self.streaming_chunks.append(result)
|
|
||||||
if complete_streaming_response is not None:
|
if complete_streaming_response is not None:
|
||||||
print_verbose("Async success callbacks: Got a complete streaming response")
|
print_verbose("Async success callbacks: Got a complete streaming response")
|
||||||
|
|
||||||
|
|
|
@ -1,4 +1,8 @@
|
||||||
from typing import TYPE_CHECKING, Any, Optional, Union
|
from datetime import datetime
|
||||||
|
from typing import TYPE_CHECKING, Any, List, Optional, Union
|
||||||
|
|
||||||
|
from litellm._logging import verbose_logger
|
||||||
|
from litellm.types.utils import ModelResponse, TextCompletionResponse
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
if TYPE_CHECKING:
|
||||||
from litellm import ModelResponse as _ModelResponse
|
from litellm import ModelResponse as _ModelResponse
|
||||||
|
@ -15,21 +19,6 @@ Helper utils used for logging callbacks
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
|
||||||
def convert_litellm_response_object_to_dict(response_obj: Any) -> dict:
|
|
||||||
"""
|
|
||||||
Convert a LiteLLM response object to a dictionary
|
|
||||||
|
|
||||||
"""
|
|
||||||
if isinstance(response_obj, dict):
|
|
||||||
return response_obj
|
|
||||||
for _type in litellm.ALL_LITELLM_RESPONSE_TYPES:
|
|
||||||
if isinstance(response_obj, _type):
|
|
||||||
return response_obj.model_dump()
|
|
||||||
|
|
||||||
# If it's not a LiteLLM type, return the object as is
|
|
||||||
return dict(response_obj)
|
|
||||||
|
|
||||||
|
|
||||||
def convert_litellm_response_object_to_str(
|
def convert_litellm_response_object_to_str(
|
||||||
response_obj: Union[Any, LiteLLMModelResponse]
|
response_obj: Union[Any, LiteLLMModelResponse]
|
||||||
) -> Optional[str]:
|
) -> Optional[str]:
|
||||||
|
@ -46,3 +35,55 @@ def convert_litellm_response_object_to_str(
|
||||||
return response_str
|
return response_str
|
||||||
|
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def _assemble_complete_response_from_streaming_chunks(
|
||||||
|
result: Union[ModelResponse, TextCompletionResponse],
|
||||||
|
start_time: datetime,
|
||||||
|
end_time: datetime,
|
||||||
|
request_kwargs: dict,
|
||||||
|
streaming_chunks: List[Any],
|
||||||
|
is_async: bool,
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
Assemble a complete response from a streaming chunks
|
||||||
|
|
||||||
|
- assemble a complete streaming response if result.choices[0].finish_reason is not None
|
||||||
|
- else append the chunk to the streaming_chunks
|
||||||
|
|
||||||
|
|
||||||
|
Args:
|
||||||
|
result: ModelResponse
|
||||||
|
start_time: datetime
|
||||||
|
end_time: datetime
|
||||||
|
request_kwargs: dict
|
||||||
|
streaming_chunks: List[Any]
|
||||||
|
is_async: bool
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Optional[Union[ModelResponse, TextCompletionResponse]]: Complete streaming response
|
||||||
|
|
||||||
|
"""
|
||||||
|
complete_streaming_response: Optional[
|
||||||
|
Union[ModelResponse, TextCompletionResponse]
|
||||||
|
] = None
|
||||||
|
if result.choices[0].finish_reason is not None: # if it's the last chunk
|
||||||
|
streaming_chunks.append(result)
|
||||||
|
try:
|
||||||
|
complete_streaming_response = litellm.stream_chunk_builder(
|
||||||
|
chunks=streaming_chunks,
|
||||||
|
messages=request_kwargs.get("messages", None),
|
||||||
|
start_time=start_time,
|
||||||
|
end_time=end_time,
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
log_message = (
|
||||||
|
"Error occurred building stream chunk in {} success logging: {}".format(
|
||||||
|
"async" if is_async else "sync", str(e)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
verbose_logger.exception(log_message)
|
||||||
|
complete_streaming_response = None
|
||||||
|
else:
|
||||||
|
streaming_chunks.append(result)
|
||||||
|
return complete_streaming_response
|
||||||
|
|
|
@ -280,6 +280,9 @@ class CompletionCustomHandler(
|
||||||
|
|
||||||
async def async_log_success_event(self, kwargs, response_obj, start_time, end_time):
|
async def async_log_success_event(self, kwargs, response_obj, start_time, end_time):
|
||||||
try:
|
try:
|
||||||
|
print(
|
||||||
|
"in async_log_success_event", kwargs, response_obj, start_time, end_time
|
||||||
|
)
|
||||||
self.states.append("async_success")
|
self.states.append("async_success")
|
||||||
## START TIME
|
## START TIME
|
||||||
assert isinstance(start_time, datetime)
|
assert isinstance(start_time, datetime)
|
||||||
|
@ -522,6 +525,7 @@ async def test_async_chat_azure_stream():
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_async_chat_openai_stream_options():
|
async def test_async_chat_openai_stream_options():
|
||||||
try:
|
try:
|
||||||
|
litellm.set_verbose = True
|
||||||
customHandler = CompletionCustomHandler()
|
customHandler = CompletionCustomHandler()
|
||||||
litellm.callbacks = [customHandler]
|
litellm.callbacks = [customHandler]
|
||||||
with patch.object(
|
with patch.object(
|
||||||
|
@ -536,7 +540,7 @@ async def test_async_chat_openai_stream_options():
|
||||||
|
|
||||||
async for chunk in response:
|
async for chunk in response:
|
||||||
continue
|
continue
|
||||||
|
print("mock client args list=", mock_client.await_args_list)
|
||||||
mock_client.assert_awaited_once()
|
mock_client.assert_awaited_once()
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
pytest.fail(f"An exception occurred: {str(e)}")
|
pytest.fail(f"An exception occurred: {str(e)}")
|
||||||
|
|
54
tests/logging_callback_tests/conftest.py
Normal file
54
tests/logging_callback_tests/conftest.py
Normal file
|
@ -0,0 +1,54 @@
|
||||||
|
# conftest.py
|
||||||
|
|
||||||
|
import importlib
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
sys.path.insert(
|
||||||
|
0, os.path.abspath("../..")
|
||||||
|
) # Adds the parent directory to the system path
|
||||||
|
import litellm
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture(scope="function", autouse=True)
|
||||||
|
def setup_and_teardown():
|
||||||
|
"""
|
||||||
|
This fixture reloads litellm before every function. To speed up testing by removing callbacks being chained.
|
||||||
|
"""
|
||||||
|
curr_dir = os.getcwd() # Get the current working directory
|
||||||
|
sys.path.insert(
|
||||||
|
0, os.path.abspath("../..")
|
||||||
|
) # Adds the project directory to the system path
|
||||||
|
|
||||||
|
import litellm
|
||||||
|
from litellm import Router
|
||||||
|
|
||||||
|
importlib.reload(litellm)
|
||||||
|
import asyncio
|
||||||
|
|
||||||
|
loop = asyncio.get_event_loop_policy().new_event_loop()
|
||||||
|
asyncio.set_event_loop(loop)
|
||||||
|
print(litellm)
|
||||||
|
# from litellm import Router, completion, aembedding, acompletion, embedding
|
||||||
|
yield
|
||||||
|
|
||||||
|
# Teardown code (executes after the yield point)
|
||||||
|
loop.close() # Close the loop created earlier
|
||||||
|
asyncio.set_event_loop(None) # Remove the reference to the loop
|
||||||
|
|
||||||
|
|
||||||
|
def pytest_collection_modifyitems(config, items):
|
||||||
|
# Separate tests in 'test_amazing_proxy_custom_logger.py' and other tests
|
||||||
|
custom_logger_tests = [
|
||||||
|
item for item in items if "custom_logger" in item.parent.name
|
||||||
|
]
|
||||||
|
other_tests = [item for item in items if "custom_logger" not in item.parent.name]
|
||||||
|
|
||||||
|
# Sort tests based on their names
|
||||||
|
custom_logger_tests.sort(key=lambda x: x.name)
|
||||||
|
other_tests.sort(key=lambda x: x.name)
|
||||||
|
|
||||||
|
# Reorder the items list
|
||||||
|
items[:] = custom_logger_tests + other_tests
|
|
@ -0,0 +1,362 @@
|
||||||
|
"""
|
||||||
|
Testing for _assemble_complete_response_from_streaming_chunks
|
||||||
|
|
||||||
|
- Test 1 - ModelResponse with 1 list of streaming chunks. Assert chunks are added to the streaming_chunks, after final chunk sent assert complete_streaming_response is not None
|
||||||
|
- Test 2 - TextCompletionResponse with 1 list of streaming chunks. Assert chunks are added to the streaming_chunks, after final chunk sent assert complete_streaming_response is not None
|
||||||
|
- Test 3 - Have multiple lists of streaming chunks, Assert that chunks are added to the correct list and that complete_streaming_response is None. After final chunk sent assert complete_streaming_response is not None
|
||||||
|
- Test 4 - build a complete response when 1 chunk is poorly formatted
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
from datetime import datetime
|
||||||
|
from unittest.mock import AsyncMock
|
||||||
|
|
||||||
|
from pydantic.main import Model
|
||||||
|
|
||||||
|
sys.path.insert(
|
||||||
|
0, os.path.abspath("../..")
|
||||||
|
) # Adds the parent directory to the system path
|
||||||
|
|
||||||
|
|
||||||
|
import httpx
|
||||||
|
import pytest
|
||||||
|
from respx import MockRouter
|
||||||
|
|
||||||
|
import litellm
|
||||||
|
from litellm import Choices, Message, ModelResponse, TextCompletionResponse, TextChoices
|
||||||
|
|
||||||
|
from litellm.litellm_core_utils.litellm_logging import (
|
||||||
|
_assemble_complete_response_from_streaming_chunks,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize("is_async", [True, False])
|
||||||
|
def test_assemble_complete_response_from_streaming_chunks_1(is_async):
|
||||||
|
"""
|
||||||
|
Test 1 - ModelResponse with 1 list of streaming chunks. Assert chunks are added to the streaming_chunks, after final chunk sent assert complete_streaming_response is not None
|
||||||
|
"""
|
||||||
|
|
||||||
|
request_kwargs = {
|
||||||
|
"model": "test_model",
|
||||||
|
"messages": [{"role": "user", "content": "Hello, world!"}],
|
||||||
|
}
|
||||||
|
|
||||||
|
list_streaming_chunks = []
|
||||||
|
chunk = {
|
||||||
|
"id": "chatcmpl-9mWtyDnikZZoB75DyfUzWUxiiE2Pi",
|
||||||
|
"choices": [
|
||||||
|
litellm.utils.StreamingChoices(
|
||||||
|
delta=litellm.utils.Delta(
|
||||||
|
content="hello in response",
|
||||||
|
function_call=None,
|
||||||
|
role=None,
|
||||||
|
tool_calls=None,
|
||||||
|
),
|
||||||
|
index=0,
|
||||||
|
logprobs=None,
|
||||||
|
)
|
||||||
|
],
|
||||||
|
"created": 1721353246,
|
||||||
|
"model": "gpt-3.5-turbo",
|
||||||
|
"object": "chat.completion.chunk",
|
||||||
|
"system_fingerprint": None,
|
||||||
|
"usage": None,
|
||||||
|
}
|
||||||
|
chunk = litellm.ModelResponse(**chunk, stream=True)
|
||||||
|
complete_streaming_response = _assemble_complete_response_from_streaming_chunks(
|
||||||
|
result=chunk,
|
||||||
|
start_time=datetime.now(),
|
||||||
|
end_time=datetime.now(),
|
||||||
|
request_kwargs=request_kwargs,
|
||||||
|
streaming_chunks=list_streaming_chunks,
|
||||||
|
is_async=is_async,
|
||||||
|
)
|
||||||
|
|
||||||
|
# this is the 1st chunk - complete_streaming_response should be None
|
||||||
|
|
||||||
|
print("list_streaming_chunks", list_streaming_chunks)
|
||||||
|
print("complete_streaming_response", complete_streaming_response)
|
||||||
|
assert complete_streaming_response is None
|
||||||
|
assert len(list_streaming_chunks) == 1
|
||||||
|
assert list_streaming_chunks[0] == chunk
|
||||||
|
|
||||||
|
# Add final chunk
|
||||||
|
chunk = {
|
||||||
|
"id": "chatcmpl-9mWtyDnikZZoB75DyfUzWUxiiE2Pi",
|
||||||
|
"choices": [
|
||||||
|
litellm.utils.StreamingChoices(
|
||||||
|
finish_reason="stop",
|
||||||
|
delta=litellm.utils.Delta(
|
||||||
|
content="end of response",
|
||||||
|
function_call=None,
|
||||||
|
role=None,
|
||||||
|
tool_calls=None,
|
||||||
|
),
|
||||||
|
index=0,
|
||||||
|
logprobs=None,
|
||||||
|
)
|
||||||
|
],
|
||||||
|
"created": 1721353246,
|
||||||
|
"model": "gpt-3.5-turbo",
|
||||||
|
"object": "chat.completion.chunk",
|
||||||
|
"system_fingerprint": None,
|
||||||
|
"usage": None,
|
||||||
|
}
|
||||||
|
chunk = litellm.ModelResponse(**chunk, stream=True)
|
||||||
|
complete_streaming_response = _assemble_complete_response_from_streaming_chunks(
|
||||||
|
result=chunk,
|
||||||
|
start_time=datetime.now(),
|
||||||
|
end_time=datetime.now(),
|
||||||
|
request_kwargs=request_kwargs,
|
||||||
|
streaming_chunks=list_streaming_chunks,
|
||||||
|
is_async=is_async,
|
||||||
|
)
|
||||||
|
|
||||||
|
print("list_streaming_chunks", list_streaming_chunks)
|
||||||
|
print("complete_streaming_response", complete_streaming_response)
|
||||||
|
|
||||||
|
# this is the 2nd chunk - complete_streaming_response should not be None
|
||||||
|
assert complete_streaming_response is not None
|
||||||
|
assert len(list_streaming_chunks) == 2
|
||||||
|
|
||||||
|
assert isinstance(complete_streaming_response, ModelResponse)
|
||||||
|
assert isinstance(complete_streaming_response.choices[0], Choices)
|
||||||
|
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize("is_async", [True, False])
|
||||||
|
def test_assemble_complete_response_from_streaming_chunks_2(is_async):
|
||||||
|
"""
|
||||||
|
Test 2 - TextCompletionResponse with 1 list of streaming chunks. Assert chunks are added to the streaming_chunks, after final chunk sent assert complete_streaming_response is not None
|
||||||
|
"""
|
||||||
|
|
||||||
|
from litellm.utils import TextCompletionStreamWrapper
|
||||||
|
|
||||||
|
_text_completion_stream_wrapper = TextCompletionStreamWrapper(
|
||||||
|
completion_stream=None, model="test_model"
|
||||||
|
)
|
||||||
|
|
||||||
|
request_kwargs = {
|
||||||
|
"model": "test_model",
|
||||||
|
"messages": [{"role": "user", "content": "Hello, world!"}],
|
||||||
|
}
|
||||||
|
|
||||||
|
list_streaming_chunks = []
|
||||||
|
chunk = {
|
||||||
|
"id": "chatcmpl-9mWtyDnikZZoB75DyfUzWUxiiE2Pi",
|
||||||
|
"choices": [
|
||||||
|
litellm.utils.StreamingChoices(
|
||||||
|
delta=litellm.utils.Delta(
|
||||||
|
content="hello in response",
|
||||||
|
function_call=None,
|
||||||
|
role=None,
|
||||||
|
tool_calls=None,
|
||||||
|
),
|
||||||
|
index=0,
|
||||||
|
logprobs=None,
|
||||||
|
)
|
||||||
|
],
|
||||||
|
"created": 1721353246,
|
||||||
|
"model": "gpt-3.5-turbo",
|
||||||
|
"object": "chat.completion.chunk",
|
||||||
|
"system_fingerprint": None,
|
||||||
|
"usage": None,
|
||||||
|
}
|
||||||
|
chunk = litellm.ModelResponse(**chunk, stream=True)
|
||||||
|
chunk = _text_completion_stream_wrapper.convert_to_text_completion_object(chunk)
|
||||||
|
|
||||||
|
complete_streaming_response = _assemble_complete_response_from_streaming_chunks(
|
||||||
|
result=chunk,
|
||||||
|
start_time=datetime.now(),
|
||||||
|
end_time=datetime.now(),
|
||||||
|
request_kwargs=request_kwargs,
|
||||||
|
streaming_chunks=list_streaming_chunks,
|
||||||
|
is_async=is_async,
|
||||||
|
)
|
||||||
|
|
||||||
|
# this is the 1st chunk - complete_streaming_response should be None
|
||||||
|
|
||||||
|
print("list_streaming_chunks", list_streaming_chunks)
|
||||||
|
print("complete_streaming_response", complete_streaming_response)
|
||||||
|
assert complete_streaming_response is None
|
||||||
|
assert len(list_streaming_chunks) == 1
|
||||||
|
assert list_streaming_chunks[0] == chunk
|
||||||
|
|
||||||
|
# Add final chunk
|
||||||
|
chunk = {
|
||||||
|
"id": "chatcmpl-9mWtyDnikZZoB75DyfUzWUxiiE2Pi",
|
||||||
|
"choices": [
|
||||||
|
litellm.utils.StreamingChoices(
|
||||||
|
finish_reason="stop",
|
||||||
|
delta=litellm.utils.Delta(
|
||||||
|
content="end of response",
|
||||||
|
function_call=None,
|
||||||
|
role=None,
|
||||||
|
tool_calls=None,
|
||||||
|
),
|
||||||
|
index=0,
|
||||||
|
logprobs=None,
|
||||||
|
)
|
||||||
|
],
|
||||||
|
"created": 1721353246,
|
||||||
|
"model": "gpt-3.5-turbo",
|
||||||
|
"object": "chat.completion.chunk",
|
||||||
|
"system_fingerprint": None,
|
||||||
|
"usage": None,
|
||||||
|
}
|
||||||
|
chunk = litellm.ModelResponse(**chunk, stream=True)
|
||||||
|
chunk = _text_completion_stream_wrapper.convert_to_text_completion_object(chunk)
|
||||||
|
complete_streaming_response = _assemble_complete_response_from_streaming_chunks(
|
||||||
|
result=chunk,
|
||||||
|
start_time=datetime.now(),
|
||||||
|
end_time=datetime.now(),
|
||||||
|
request_kwargs=request_kwargs,
|
||||||
|
streaming_chunks=list_streaming_chunks,
|
||||||
|
is_async=is_async,
|
||||||
|
)
|
||||||
|
|
||||||
|
print("list_streaming_chunks", list_streaming_chunks)
|
||||||
|
print("complete_streaming_response", complete_streaming_response)
|
||||||
|
|
||||||
|
# this is the 2nd chunk - complete_streaming_response should not be None
|
||||||
|
assert complete_streaming_response is not None
|
||||||
|
assert len(list_streaming_chunks) == 2
|
||||||
|
|
||||||
|
assert isinstance(complete_streaming_response, TextCompletionResponse)
|
||||||
|
assert isinstance(complete_streaming_response.choices[0], TextChoices)
|
||||||
|
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize("is_async", [True, False])
|
||||||
|
def test_assemble_complete_response_from_streaming_chunks_3(is_async):
|
||||||
|
|
||||||
|
request_kwargs = {
|
||||||
|
"model": "test_model",
|
||||||
|
"messages": [{"role": "user", "content": "Hello, world!"}],
|
||||||
|
}
|
||||||
|
|
||||||
|
list_streaming_chunks_1 = []
|
||||||
|
list_streaming_chunks_2 = []
|
||||||
|
|
||||||
|
chunk = {
|
||||||
|
"id": "chatcmpl-9mWtyDnikZZoB75DyfUzWUxiiE2Pi",
|
||||||
|
"choices": [
|
||||||
|
litellm.utils.StreamingChoices(
|
||||||
|
delta=litellm.utils.Delta(
|
||||||
|
content="hello in response",
|
||||||
|
function_call=None,
|
||||||
|
role=None,
|
||||||
|
tool_calls=None,
|
||||||
|
),
|
||||||
|
index=0,
|
||||||
|
logprobs=None,
|
||||||
|
)
|
||||||
|
],
|
||||||
|
"created": 1721353246,
|
||||||
|
"model": "gpt-3.5-turbo",
|
||||||
|
"object": "chat.completion.chunk",
|
||||||
|
"system_fingerprint": None,
|
||||||
|
"usage": None,
|
||||||
|
}
|
||||||
|
chunk = litellm.ModelResponse(**chunk, stream=True)
|
||||||
|
complete_streaming_response = _assemble_complete_response_from_streaming_chunks(
|
||||||
|
result=chunk,
|
||||||
|
start_time=datetime.now(),
|
||||||
|
end_time=datetime.now(),
|
||||||
|
request_kwargs=request_kwargs,
|
||||||
|
streaming_chunks=list_streaming_chunks_1,
|
||||||
|
is_async=is_async,
|
||||||
|
)
|
||||||
|
|
||||||
|
# this is the 1st chunk - complete_streaming_response should be None
|
||||||
|
|
||||||
|
print("list_streaming_chunks_1", list_streaming_chunks_1)
|
||||||
|
print("complete_streaming_response", complete_streaming_response)
|
||||||
|
assert complete_streaming_response is None
|
||||||
|
assert len(list_streaming_chunks_1) == 1
|
||||||
|
assert list_streaming_chunks_1[0] == chunk
|
||||||
|
assert len(list_streaming_chunks_2) == 0
|
||||||
|
|
||||||
|
# now add a chunk to the 2nd list
|
||||||
|
|
||||||
|
complete_streaming_response = _assemble_complete_response_from_streaming_chunks(
|
||||||
|
result=chunk,
|
||||||
|
start_time=datetime.now(),
|
||||||
|
end_time=datetime.now(),
|
||||||
|
request_kwargs=request_kwargs,
|
||||||
|
streaming_chunks=list_streaming_chunks_2,
|
||||||
|
is_async=is_async,
|
||||||
|
)
|
||||||
|
|
||||||
|
print("list_streaming_chunks_2", list_streaming_chunks_2)
|
||||||
|
print("complete_streaming_response", complete_streaming_response)
|
||||||
|
assert complete_streaming_response is None
|
||||||
|
assert len(list_streaming_chunks_2) == 1
|
||||||
|
assert list_streaming_chunks_2[0] == chunk
|
||||||
|
assert len(list_streaming_chunks_1) == 1
|
||||||
|
|
||||||
|
# now add a chunk to the 1st list
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize("is_async", [True, False])
|
||||||
|
def test_assemble_complete_response_from_streaming_chunks_4(is_async):
|
||||||
|
"""
|
||||||
|
Test 4 - build a complete response when 1 chunk is poorly formatted
|
||||||
|
|
||||||
|
- Assert complete_streaming_response is None
|
||||||
|
- Assert list_streaming_chunks is not empty
|
||||||
|
"""
|
||||||
|
|
||||||
|
request_kwargs = {
|
||||||
|
"model": "test_model",
|
||||||
|
"messages": [{"role": "user", "content": "Hello, world!"}],
|
||||||
|
}
|
||||||
|
|
||||||
|
list_streaming_chunks = []
|
||||||
|
|
||||||
|
chunk = {
|
||||||
|
"id": "chatcmpl-9mWtyDnikZZoB75DyfUzWUxiiE2Pi",
|
||||||
|
"choices": [
|
||||||
|
litellm.utils.StreamingChoices(
|
||||||
|
finish_reason="stop",
|
||||||
|
delta=litellm.utils.Delta(
|
||||||
|
content="end of response",
|
||||||
|
function_call=None,
|
||||||
|
role=None,
|
||||||
|
tool_calls=None,
|
||||||
|
),
|
||||||
|
index=0,
|
||||||
|
logprobs=None,
|
||||||
|
)
|
||||||
|
],
|
||||||
|
"created": 1721353246,
|
||||||
|
"model": "gpt-3.5-turbo",
|
||||||
|
"object": "chat.completion.chunk",
|
||||||
|
"system_fingerprint": None,
|
||||||
|
"usage": None,
|
||||||
|
}
|
||||||
|
chunk = litellm.ModelResponse(**chunk, stream=True)
|
||||||
|
|
||||||
|
# remove attribute id from chunk
|
||||||
|
del chunk.id
|
||||||
|
|
||||||
|
complete_streaming_response = _assemble_complete_response_from_streaming_chunks(
|
||||||
|
result=chunk,
|
||||||
|
start_time=datetime.now(),
|
||||||
|
end_time=datetime.now(),
|
||||||
|
request_kwargs=request_kwargs,
|
||||||
|
streaming_chunks=list_streaming_chunks,
|
||||||
|
is_async=is_async,
|
||||||
|
)
|
||||||
|
|
||||||
|
print("complete_streaming_response", complete_streaming_response)
|
||||||
|
assert complete_streaming_response is None
|
||||||
|
|
||||||
|
print("list_streaming_chunks", list_streaming_chunks)
|
||||||
|
|
||||||
|
assert len(list_streaming_chunks) == 1
|
Loading…
Add table
Add a link
Reference in a new issue