From a230f5f6c53f0215fb66656d0b5d888bbc3f14ec Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Tue, 25 Jun 2024 10:50:47 -0700 Subject: [PATCH 1/3] feat - use n in mock completion --- litellm/llms/prompt_templates/factory.py | 17 ++++++++--------- litellm/main.py | 17 +++++++++++++++-- 2 files changed, 23 insertions(+), 11 deletions(-) diff --git a/litellm/llms/prompt_templates/factory.py b/litellm/llms/prompt_templates/factory.py index e359d36f4..a97d6812c 100644 --- a/litellm/llms/prompt_templates/factory.py +++ b/litellm/llms/prompt_templates/factory.py @@ -135,7 +135,7 @@ def convert_to_ollama_image(openai_image_url: str): def ollama_pt( - model, messages + model, messages ): # https://github.com/ollama/ollama/blob/af4cf55884ac54b9e637cd71dadfe9b7a5685877/docs/modelfile.md#template if "instruct" in model: prompt = custom_prompt( @@ -185,19 +185,18 @@ def ollama_pt( function_name: str = call["function"]["name"] arguments = json.loads(call["function"]["arguments"]) - tool_calls.append({ - "id": call_id, - "type": "function", - "function": { - "name": function_name, - "arguments": arguments + tool_calls.append( + { + "id": call_id, + "type": "function", + "function": {"name": function_name, "arguments": arguments}, } - }) + ) prompt += f"### Assistant:\nTool Calls: {json.dumps(tool_calls, indent=2)}\n\n" elif "tool_call_id" in message: - prompt += f"### User:\n{message["content"]}\n\n" + prompt += f"### User:\n{message['content']}\n\n" elif content: prompt += f"### {role.capitalize()}:\n{content}\n\n" diff --git a/litellm/main.py b/litellm/main.py index 307659c8a..07d7be2ba 100644 --- a/litellm/main.py +++ b/litellm/main.py @@ -428,6 +428,7 @@ def mock_completion( model: str, messages: List, stream: Optional[bool] = False, + n: Optional[int] = None, mock_response: Union[str, Exception, dict] = "This is a mock request", mock_tool_calls: Optional[List] = None, logging=None, @@ -496,8 +497,19 @@ def mock_completion( model_response, mock_response=mock_response, model=model ) return response - - model_response["choices"][0]["message"]["content"] = mock_response + if n is None: + model_response["choices"][0]["message"]["content"] = mock_response + else: + _all_choices = [] + for i in range(n): + _choice = litellm.utils.Choices( + index=i, + message=litellm.utils.Message( + content=mock_response, role="assistant" + ), + ) + _all_choices.append(_choice) + model_response["choices"] = _all_choices model_response["created"] = int(time.time()) model_response["model"] = model @@ -944,6 +956,7 @@ def completion( model, messages, stream=stream, + n=n, mock_response=mock_response, mock_tool_calls=mock_tool_calls, logging=logging, From 343e3f3e909c02f38533604ff998b3dbef79153e Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Tue, 25 Jun 2024 10:54:18 -0700 Subject: [PATCH 2/3] test - test_mock_request_n_greater_than_1 --- litellm/tests/test_mock_request.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/litellm/tests/test_mock_request.py b/litellm/tests/test_mock_request.py index 7d670feb5..6b58c94b2 100644 --- a/litellm/tests/test_mock_request.py +++ b/litellm/tests/test_mock_request.py @@ -58,3 +58,18 @@ async def test_async_mock_streaming_request(): assert ( complete_response == "LiteLLM is awesome" ), f"Unexpected response got {complete_response}" + + +def test_mock_request_n_greater_than_1(): + try: + model = "gpt-3.5-turbo" + messages = [{"role": "user", "content": "Hey, I'm a mock request"}] + response = litellm.mock_completion(model=model, messages=messages, n=5) + print("response: ", response) + + assert len(response.choices) == 5 + for choice in response.choices: + assert choice.message.content == "This is a mock request" + + except: + traceback.print_exc() From ccf1bbc5d75c310fc45816d4bc55a9aa5be925d1 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Tue, 25 Jun 2024 11:14:40 -0700 Subject: [PATCH 3/3] fix using mock completion --- litellm/main.py | 7 ++++-- litellm/tests/test_mock_request.py | 19 +++++++++++++++ litellm/utils.py | 39 +++++++++++++++++++++++++----- 3 files changed, 57 insertions(+), 8 deletions(-) diff --git a/litellm/main.py b/litellm/main.py index 07d7be2ba..ecb6edd0d 100644 --- a/litellm/main.py +++ b/litellm/main.py @@ -487,14 +487,17 @@ def mock_completion( if kwargs.get("acompletion", False) == True: return CustomStreamWrapper( completion_stream=async_mock_completion_streaming_obj( - model_response, mock_response=mock_response, model=model + model_response, mock_response=mock_response, model=model, n=n ), model=model, custom_llm_provider="openai", logging_obj=logging, ) response = mock_completion_streaming_obj( - model_response, mock_response=mock_response, model=model + model_response, + mock_response=mock_response, + model=model, + n=n, ) return response if n is None: diff --git a/litellm/tests/test_mock_request.py b/litellm/tests/test_mock_request.py index 6b58c94b2..48b054371 100644 --- a/litellm/tests/test_mock_request.py +++ b/litellm/tests/test_mock_request.py @@ -73,3 +73,22 @@ def test_mock_request_n_greater_than_1(): except: traceback.print_exc() + + +@pytest.mark.asyncio() +async def test_async_mock_streaming_request_n_greater_than_1(): + generator = await litellm.acompletion( + messages=[{"role": "user", "content": "Why is LiteLLM amazing?"}], + mock_response="LiteLLM is awesome", + stream=True, + model="gpt-3.5-turbo", + n=5, + ) + complete_response = "" + async for chunk in generator: + print(chunk) + # complete_response += chunk["choices"][0]["delta"]["content"] or "" + + # assert ( + # complete_response == "LiteLLM is awesome" + # ), f"Unexpected response got {complete_response}" diff --git a/litellm/utils.py b/litellm/utils.py index 1bc8bf771..cae86c6f8 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -9708,18 +9708,45 @@ class TextCompletionStreamWrapper: raise StopAsyncIteration -def mock_completion_streaming_obj(model_response, mock_response, model): +def mock_completion_streaming_obj( + model_response, mock_response, model, n: Optional[int] = None +): for i in range(0, len(mock_response), 3): - completion_obj = {"role": "assistant", "content": mock_response[i : i + 3]} - model_response.choices[0].delta = completion_obj + completion_obj = Delta(role="assistant", content=mock_response[i : i + 3]) + if n is None: + model_response.choices[0].delta = completion_obj + else: + _all_choices = [] + for j in range(n): + _streaming_choice = litellm.utils.StreamingChoices( + index=j, + delta=litellm.utils.Delta( + role="assistant", content=mock_response[i : i + 3] + ), + ) + _all_choices.append(_streaming_choice) + model_response.choices = _all_choices yield model_response -async def async_mock_completion_streaming_obj(model_response, mock_response, model): +async def async_mock_completion_streaming_obj( + model_response, mock_response, model, n: Optional[int] = None +): for i in range(0, len(mock_response), 3): completion_obj = Delta(role="assistant", content=mock_response[i : i + 3]) - model_response.choices[0].delta = completion_obj - model_response.choices[0].finish_reason = "stop" + if n is None: + model_response.choices[0].delta = completion_obj + else: + _all_choices = [] + for j in range(n): + _streaming_choice = litellm.utils.StreamingChoices( + index=j, + delta=litellm.utils.Delta( + role="assistant", content=mock_response[i : i + 3] + ), + ) + _all_choices.append(_streaming_choice) + model_response.choices = _all_choices yield model_response