diff --git a/litellm/main.py b/litellm/main.py index 07d7be2ba..ecb6edd0d 100644 --- a/litellm/main.py +++ b/litellm/main.py @@ -487,14 +487,17 @@ def mock_completion( if kwargs.get("acompletion", False) == True: return CustomStreamWrapper( completion_stream=async_mock_completion_streaming_obj( - model_response, mock_response=mock_response, model=model + model_response, mock_response=mock_response, model=model, n=n ), model=model, custom_llm_provider="openai", logging_obj=logging, ) response = mock_completion_streaming_obj( - model_response, mock_response=mock_response, model=model + model_response, + mock_response=mock_response, + model=model, + n=n, ) return response if n is None: diff --git a/litellm/tests/test_mock_request.py b/litellm/tests/test_mock_request.py index 6b58c94b2..48b054371 100644 --- a/litellm/tests/test_mock_request.py +++ b/litellm/tests/test_mock_request.py @@ -73,3 +73,22 @@ def test_mock_request_n_greater_than_1(): except: traceback.print_exc() + + +@pytest.mark.asyncio() +async def test_async_mock_streaming_request_n_greater_than_1(): + generator = await litellm.acompletion( + messages=[{"role": "user", "content": "Why is LiteLLM amazing?"}], + mock_response="LiteLLM is awesome", + stream=True, + model="gpt-3.5-turbo", + n=5, + ) + complete_response = "" + async for chunk in generator: + print(chunk) + # complete_response += chunk["choices"][0]["delta"]["content"] or "" + + # assert ( + # complete_response == "LiteLLM is awesome" + # ), f"Unexpected response got {complete_response}" diff --git a/litellm/utils.py b/litellm/utils.py index 1bc8bf771..cae86c6f8 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -9708,18 +9708,45 @@ class TextCompletionStreamWrapper: raise StopAsyncIteration -def mock_completion_streaming_obj(model_response, mock_response, model): +def mock_completion_streaming_obj( + model_response, mock_response, model, n: Optional[int] = None +): for i in range(0, len(mock_response), 3): - completion_obj = {"role": "assistant", "content": mock_response[i : i + 3]} - model_response.choices[0].delta = completion_obj + completion_obj = Delta(role="assistant", content=mock_response[i : i + 3]) + if n is None: + model_response.choices[0].delta = completion_obj + else: + _all_choices = [] + for j in range(n): + _streaming_choice = litellm.utils.StreamingChoices( + index=j, + delta=litellm.utils.Delta( + role="assistant", content=mock_response[i : i + 3] + ), + ) + _all_choices.append(_streaming_choice) + model_response.choices = _all_choices yield model_response -async def async_mock_completion_streaming_obj(model_response, mock_response, model): +async def async_mock_completion_streaming_obj( + model_response, mock_response, model, n: Optional[int] = None +): for i in range(0, len(mock_response), 3): completion_obj = Delta(role="assistant", content=mock_response[i : i + 3]) - model_response.choices[0].delta = completion_obj - model_response.choices[0].finish_reason = "stop" + if n is None: + model_response.choices[0].delta = completion_obj + else: + _all_choices = [] + for j in range(n): + _streaming_choice = litellm.utils.StreamingChoices( + index=j, + delta=litellm.utils.Delta( + role="assistant", content=mock_response[i : i + 3] + ), + ) + _all_choices.append(_streaming_choice) + model_response.choices = _all_choices yield model_response