From 473ec66b848a4e7c4b7fc785d55302f9515ddd68 Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Tue, 28 May 2024 22:19:33 -0700
Subject: [PATCH 1/2] feat - router add abatch_completion

---
 litellm/router.py                             | 65 +++++++++++++++----
 litellm/tests/test_router_batch_completion.py | 44 +++++++++++++
 2 files changed, 96 insertions(+), 13 deletions(-)

diff --git a/litellm/router.py b/litellm/router.py
index e2ebea37f..a2a03da86 100644
--- a/litellm/router.py
+++ b/litellm/router.py
@@ -356,7 +356,8 @@ class Router:
                 raise ValueError(f"Item '{fallback_dict}' is not a dictionary.")
             if len(fallback_dict) != 1:
                 raise ValueError(
-                    f"Dictionary '{fallback_dict}' must have exactly one key, but has {len(fallback_dict)} keys.")
+                    f"Dictionary '{fallback_dict}' must have exactly one key, but has {len(fallback_dict)} keys."
+                )
 
     def routing_strategy_init(self, routing_strategy: str, routing_strategy_args: dict):
         if routing_strategy == "least-busy":
@@ -662,12 +663,17 @@ class Router:
             raise e
 
     async def abatch_completion(
-        self, models: List[str], messages: List[Dict[str, str]], **kwargs
+        self,
+        models: List[str],
+        messages: Union[List[Dict[str, str]], List[List[Dict[str, str]]]],
+        **kwargs,
     ):
         """
-        Async Batch Completion - Batch Process 1 request to multiple model_group on litellm.Router
-        Use this for sending the same request to N models
+        Async Batch Completion. Used for 2 scenarios:
+        1. Batch Process 1 request to N models on litellm.Router. Pass messages as List[Dict[str, str]] to use this
+        2. Batch Process N requests to M models on litellm.Router. Pass messages as List[List[Dict[str, str]]] to use this
         """
+        ############## Helpers for async completion ##################
 
         async def _async_completion_no_exceptions(
             model: str, messages: List[Dict[str, str]], **kwargs
@@ -680,17 +686,50 @@ class Router:
             except Exception as e:
                 return e
 
-        _tasks = []
-        for model in models:
-            # add each task but if the task fails
-            _tasks.append(
-                _async_completion_no_exceptions(
-                    model=model, messages=messages, **kwargs
+        async def _async_completion_no_exceptions_return_idx(
+            model: str,
+            messages: List[Dict[str, str]],
+            idx: int,  # index of message this response corresponds to
+            **kwargs,
+        ):
+            """
+            Wrapper around self.async_completion that catches exceptions and returns them as a result
+            """
+            try:
+                return (
+                    await self.acompletion(model=model, messages=messages, **kwargs),
+                    idx,
                 )
-            )
+            except Exception as e:
+                return e, idx
 
-        response = await asyncio.gather(*_tasks)
-        return response
+        ############## Helpers for async completion ##################
+
+        if isinstance(messages, list) and all(isinstance(m, dict) for m in messages):
+            _tasks = []
+            for model in models:
+                # add each task but if the task fails
+                _tasks.append(_async_completion_no_exceptions(model=model, messages=messages, **kwargs))  # type: ignore
+            response = await asyncio.gather(*_tasks)
+            return response
+        elif isinstance(messages, list) and all(isinstance(m, list) for m in messages):
+            _tasks = []
+            for idx, message in enumerate(messages):
+                for model in models:
+                    # Request Number X, Model Number Y
+                    _tasks.append(
+                        _async_completion_no_exceptions_return_idx(
+                            model=model, idx=idx, messages=message, **kwargs  # type: ignore
+                        )
+                    )
+            responses = await asyncio.gather(*_tasks)
+            final_responses: List[List[Any]] = [[] for _ in range(len(messages))]
+            for response in responses:
+                if isinstance(response, tuple):
+                    final_responses[response[1]].append(response[0])
+                else:
+                    final_responses[0].append(response)
+            return final_responses
 
     async def abatch_completion_one_model_multiple_requests(
         self, model: str, messages: List[List[Dict[str, str]]], **kwargs
diff --git a/litellm/tests/test_router_batch_completion.py b/litellm/tests/test_router_batch_completion.py
index f2873b18d..0925a3835 100644
--- a/litellm/tests/test_router_batch_completion.py
+++ b/litellm/tests/test_router_batch_completion.py
@@ -58,3 +58,47 @@ async def test_batch_completion_multiple_models():
 
     # assert both models are different
     assert models_in_responses[0] != models_in_responses[1]
+
+
+@pytest.mark.asyncio
+async def test_batch_completion_multiple_models_multiple_messages():
+    litellm.set_verbose = True
+
+    router = litellm.Router(
+        model_list=[
+            {
+                "model_name": "gpt-3.5-turbo",
+                "litellm_params": {
+                    "model": "gpt-3.5-turbo",
+                },
+            },
+            {
+                "model_name": "groq-llama",
+                "litellm_params": {
+                    "model": "groq/llama3-8b-8192",
+                },
+            },
+        ]
+    )
+
+    response = await router.abatch_completion(
+        models=["gpt-3.5-turbo", "groq-llama"],
+        messages=[
+            [{"role": "user", "content": "is litellm becoming a better product ?"}],
+            [{"role": "user", "content": "who is this"}],
+        ],
+        max_tokens=15,
+    )
+
+    print("response from batches =", response)
+    assert len(response) == 2
+    assert len(response[0]) == 2
+    assert isinstance(response[0][0], litellm.ModelResponse)
+
+    # models_in_responses = []
+    # for individual_response in response:
+    #     _model = individual_response["model"]
+    #     models_in_responses.append(_model)
+
+    # # assert both models are different
+    # assert models_in_responses[0] != models_in_responses[1]

From 9ab96e12ed002facaa57fc7fed7f10a8ee1c52a7 Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Tue, 28 May 2024 22:27:09 -0700
Subject: [PATCH 2/2] fix - update abatch_completion docstring

---
 litellm/router.py | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)

diff --git a/litellm/router.py b/litellm/router.py
index a2a03da86..9c9f81dc8 100644
--- a/litellm/router.py
+++ b/litellm/router.py
@@ -672,6 +672,29 @@ class Router:
         Async Batch Completion. Used for 2 scenarios:
         1. Batch Process 1 request to N models on litellm.Router. Pass messages as List[Dict[str, str]] to use this
         2. Batch Process N requests to M models on litellm.Router. Pass messages as List[List[Dict[str, str]]] to use this
+
+        Example Request for 1 request to N models:
+        ```
+            response = await router.abatch_completion(
+                models=["gpt-3.5-turbo", "groq-llama"],
+                messages=[
+                    {"role": "user", "content": "is litellm becoming a better product ?"}
+                ],
+                max_tokens=15,
+            )
+        ```
+
+
+        Example Request for N requests to M models:
+        ```
+            response = await router.abatch_completion(
+                models=["gpt-3.5-turbo", "groq-llama"],
+                messages=[
+                    [{"role": "user", "content": "is litellm becoming a better product ?"}],
+                    [{"role": "user", "content": "who is this"}],
+                ],
+            )
+        ```
         """
         ############## Helpers for async completion ##################