From 9156b7448aef8932517af744ec3bb20c4edc5f82 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Sat, 11 May 2024 13:08:16 -0700 Subject: [PATCH 1/7] feat - router async batch acompletion --- litellm/router.py | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/litellm/router.py b/litellm/router.py index f0d94908e..7396dab20 100644 --- a/litellm/router.py +++ b/litellm/router.py @@ -605,6 +605,33 @@ class Router: self.fail_calls[model_name] += 1 raise e + async def abatch_completion( + self, models: List[str], messages: List[Dict[str, str]], **kwargs + ): + + async def _async_completion_no_exceptions( + model: str, messages: List[Dict[str, str]], **kwargs + ): + """ + Wrapper around self.async_completion that catches exceptions and returns them as a result + """ + try: + return await self.acompletion(model=model, messages=messages, **kwargs) + except Exception as e: + return e + + _tasks = [] + for model in models: + # add each task but if the task fails + _tasks.append( + _async_completion_no_exceptions( + model=model, messages=messages, **kwargs + ) + ) + + response = await asyncio.gather(*_tasks) + return response + def image_generation(self, prompt: str, model: str, **kwargs): try: kwargs["model"] = model From 6561e0838ea36695847d38f648b81c46d225ce08 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Sat, 11 May 2024 13:09:17 -0700 Subject: [PATCH 2/7] test - router.batch_acompletion --- litellm/tests/test_router_batch_completion.py | 60 +++++++++++++++++++ 1 file changed, 60 insertions(+) create mode 100644 litellm/tests/test_router_batch_completion.py diff --git a/litellm/tests/test_router_batch_completion.py b/litellm/tests/test_router_batch_completion.py new file mode 100644 index 000000000..f2873b18d --- /dev/null +++ b/litellm/tests/test_router_batch_completion.py @@ -0,0 +1,60 @@ +#### What this tests #### +# This tests litellm router with batch completion + +import sys, os, time, openai +import traceback, asyncio +import pytest + +sys.path.insert( + 0, os.path.abspath("../..") +) # Adds the parent directory to the system path +import litellm +from litellm import Router +from litellm.router import Deployment, LiteLLM_Params, ModelInfo +from concurrent.futures import ThreadPoolExecutor +from collections import defaultdict +from dotenv import load_dotenv +import os, httpx + +load_dotenv() + + +@pytest.mark.asyncio +async def test_batch_completion_multiple_models(): + litellm.set_verbose = True + + router = litellm.Router( + model_list=[ + { + "model_name": "gpt-3.5-turbo", + "litellm_params": { + "model": "gpt-3.5-turbo", + }, + }, + { + "model_name": "groq-llama", + "litellm_params": { + "model": "groq/llama3-8b-8192", + }, + }, + ] + ) + + response = await router.abatch_completion( + models=["gpt-3.5-turbo", "groq-llama"], + messages=[ + {"role": "user", "content": "is litellm becoming a better product ?"} + ], + max_tokens=15, + ) + + print(response) + assert len(response) == 2 + + models_in_responses = [] + for individual_response in response: + _model = individual_response["model"] + models_in_responses.append(_model) + + # assert both models are different + assert models_in_responses[0] != models_in_responses[1] From b8c7bbcb9f8d10103ebed09f9c73d6d49ca48024 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Sat, 11 May 2024 13:24:25 -0700 Subject: [PATCH 3/7] support batch /chat/completions on proxy --- litellm/proxy/proxy_server.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py index 46c132773..0ae498baa 100644 --- a/litellm/proxy/proxy_server.py +++ b/litellm/proxy/proxy_server.py @@ -3639,7 +3639,7 @@ async def chat_completion( ### MODEL ALIAS MAPPING ### # check if model name in model alias map # get the actual model name - if data["model"] in litellm.model_alias_map: + if isinstance(data["model"], str) and data["model"] in litellm.model_alias_map: data["model"] = litellm.model_alias_map[data["model"]] ## LOGGING OBJECT ## - initialize logging object for logging success/failure events for call @@ -3673,6 +3673,9 @@ async def chat_completion( # skip router if user passed their key if "api_key" in data: tasks.append(litellm.acompletion(**data)) + elif isinstance(data["model"], list) and llm_router is not None: + _models = data.pop("model") + tasks.append(llm_router.abatch_completion(models=_models, **data)) elif "user_config" in data: # initialize a new router instance. make request using this Router router_config = data.pop("user_config") From 31cb1be27929eec766447616942b5e7b70f2b61d Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Sat, 11 May 2024 13:24:59 -0700 Subject: [PATCH 4/7] edit dev config.yaml --- litellm/proxy/proxy_config.yaml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/litellm/proxy/proxy_config.yaml b/litellm/proxy/proxy_config.yaml index b1cbf2e81..85634c5b8 100644 --- a/litellm/proxy/proxy_config.yaml +++ b/litellm/proxy/proxy_config.yaml @@ -4,6 +4,12 @@ model_list: model: openai/fake api_key: fake-key api_base: https://exampleopenaiendpoint-production.up.railway.app/ + - model_name: llama3 + litellm_params: + model: groq/llama3-8b-8192 + - model_name: gpt-3.5-turbo + litellm_params: + model: gpt-3.5-turbo - model_name: "*" litellm_params: model: openai/* From e1f94fcbbb3a51107fa14ef09d69132eff92e124 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Sat, 11 May 2024 13:32:30 -0700 Subject: [PATCH 5/7] test batch completions on litellm proxy --- tests/test_openai_endpoints.py | 27 ++++++++++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) diff --git a/tests/test_openai_endpoints.py b/tests/test_openai_endpoints.py index 38e87c254..7bc97ca59 100644 --- a/tests/test_openai_endpoints.py +++ b/tests/test_openai_endpoints.py @@ -4,6 +4,7 @@ import pytest import asyncio import aiohttp, openai from openai import OpenAI, AsyncOpenAI +from typing import Optional, List, Union def response_header_check(response): @@ -71,7 +72,7 @@ async def new_user(session): return await response.json() -async def chat_completion(session, key, model="gpt-4"): +async def chat_completion(session, key, model: Union[str, List] = "gpt-4"): url = "http://0.0.0.0:4000/chat/completions" headers = { "Authorization": f"Bearer {key}", @@ -409,3 +410,27 @@ async def test_openai_wildcard_chat_completion(): # call chat/completions with a model that the key was not created for + the model is not on the config.yaml await chat_completion(session=session, key=key, model="gpt-3.5-turbo-0125") + + +@pytest.mark.asyncio +async def test_batch_chat_completions(): + """ + - Make chat completion call using + + """ + async with aiohttp.ClientSession() as session: + + # call chat/completions with a model that the key was not created for + the model is not on the config.yaml + response = await chat_completion( + session=session, + key="sk-1234", + model=[ + "gpt-3.5-turbo", + "fake-openai-endpoint", + ], + ) + + print(f"response: {response}") + + assert len(response) == 2 + assert isinstance(response, list) From 5918ee543bd9d3523bfc4fdb6c6ceff4384f8a3e Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Sat, 11 May 2024 13:42:41 -0700 Subject: [PATCH 6/7] docs - batch completion litellm proxy --- docs/my-website/docs/proxy/user_keys.md | 84 +++++++++++++++++++++++++ 1 file changed, 84 insertions(+) diff --git a/docs/my-website/docs/proxy/user_keys.md b/docs/my-website/docs/proxy/user_keys.md index fa78b37c1..7aba832eb 100644 --- a/docs/my-website/docs/proxy/user_keys.md +++ b/docs/my-website/docs/proxy/user_keys.md @@ -365,6 +365,90 @@ curl --location 'http://0.0.0.0:4000/moderations' \ ## Advanced +### (BETA) Batch Completions - pass `model` as List + +Use this when you want to send 1 request to N Models + +#### Expected Request Format + +This same request will be sent to the following model groups on the [litellm proxy config.yaml](https://docs.litellm.ai/docs/proxy/configs) +- `model_name="llama3"` +- `model_name="gpt-3.5-turbo"` + +```shell +curl --location 'http://localhost:4000/chat/completions' \ + --header 'Authorization: Bearer sk-1234' \ + --header 'Content-Type: application/json' \ + --data '{ + "model": ["llama3", "gpt-3.5-turbo"], + "max_tokens": 10, + "user": "litellm2", + "messages": [ + { + "role": "user", + "content": "is litellm getting better" + } + ] +}' +``` + + +#### Expected Response Format + +Get a list of responses when `model` is passed as a list + +```json +[ + { + "id": "chatcmpl-3dbd5dd8-7c82-4ca3-bf1f-7c26f497cf2b", + "choices": [ + { + "finish_reason": "length", + "index": 0, + "message": { + "content": "The Elder Scrolls IV: Oblivion!\n\nReleased", + "role": "assistant" + } + } + ], + "created": 1715459876, + "model": "groq/llama3-8b-8192", + "object": "chat.completion", + "system_fingerprint": "fp_179b0f92c9", + "usage": { + "completion_tokens": 10, + "prompt_tokens": 12, + "total_tokens": 22 + } + }, + { + "id": "chatcmpl-9NnldUfFLmVquFHSX4yAtjCw8PGei", + "choices": [ + { + "finish_reason": "length", + "index": 0, + "message": { + "content": "TES4 could refer to The Elder Scrolls IV:", + "role": "assistant" + } + } + ], + "created": 1715459877, + "model": "gpt-3.5-turbo-0125", + "object": "chat.completion", + "system_fingerprint": null, + "usage": { + "completion_tokens": 10, + "prompt_tokens": 9, + "total_tokens": 19 + } + } +] +``` + + + + ### Pass User LLM API Keys, Fallbacks Allow your end-users to pass their model list, api base, OpenAI API key (any LiteLLM supported provider) to make requests From 62276fc22102aafbc9ea91d0f978a39b252ce75a Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Sat, 11 May 2024 13:45:32 -0700 Subject: [PATCH 7/7] docs link to litellm batch completions --- docs/my-website/docs/completion/batching.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/docs/my-website/docs/completion/batching.md b/docs/my-website/docs/completion/batching.md index 05683b3dd..09f59f743 100644 --- a/docs/my-website/docs/completion/batching.md +++ b/docs/my-website/docs/completion/batching.md @@ -4,6 +4,12 @@ LiteLLM allows you to: * Send 1 completion call to many models: Return Fastest Response * Send 1 completion call to many models: Return All Responses +:::info + +Trying to do batch completion on LiteLLM Proxy ? Go here: https://docs.litellm.ai/docs/proxy/user_keys#beta-batch-completions---pass-model-as-list + +::: + ## Send multiple completion calls to 1 model In the batch_completion method, you provide a list of `messages` where each sub-list of messages is passed to `litellm.completion()`, allowing you to process multiple prompts efficiently in a single API call.