fix(test_parallel_request_limiter.py): use mock responses for streaming

This commit is contained in:
Krrish Dholakia 2024-02-08 21:45:24 -08:00
parent 1ef7ad3416
commit b9393fb769
5 changed files with 35 additions and 5 deletions

View file

@ -31,6 +31,7 @@ from litellm.utils import (
get_llm_provider,
get_api_key,
mock_completion_streaming_obj,
async_mock_completion_streaming_obj,
convert_to_model_response_object,
token_counter,
Usage,
@ -307,6 +308,7 @@ def mock_completion(
messages: List,
stream: Optional[bool] = False,
mock_response: str = "This is a mock request",
logging=None,
**kwargs,
):
"""
@ -335,6 +337,15 @@ def mock_completion(
model_response = ModelResponse(stream=stream)
if stream is True:
# don't try to access stream object,
if kwargs.get("acompletion", False) == True:
return CustomStreamWrapper(
completion_stream=async_mock_completion_streaming_obj(
model_response, mock_response=mock_response, model=model
),
model=model,
custom_llm_provider="openai",
logging_obj=logging,
)
response = mock_completion_streaming_obj(
model_response, mock_response=mock_response, model=model
)
@ -717,7 +728,12 @@ def completion(
)
if mock_response:
return mock_completion(
model, messages, stream=stream, mock_response=mock_response
model,
messages,
stream=stream,
mock_response=mock_response,
logging=logging,
acompletion=acompletion,
)
if custom_llm_provider == "azure":
# azure configs