diff --git a/docs/my-website/docs/proxy/user_keys.md b/docs/my-website/docs/proxy/user_keys.md index 7aba832eb8..cda3a46af9 100644 --- a/docs/my-website/docs/proxy/user_keys.md +++ b/docs/my-website/docs/proxy/user_keys.md @@ -365,22 +365,113 @@ curl --location 'http://0.0.0.0:4000/moderations' \ ## Advanced -### (BETA) Batch Completions - pass `model` as List +### (BETA) Batch Completions - pass multiple models Use this when you want to send 1 request to N Models #### Expected Request Format +Pass model as a string of comma separated value of models. Example `"model"="llama3,gpt-3.5-turbo"` + This same request will be sent to the following model groups on the [litellm proxy config.yaml](https://docs.litellm.ai/docs/proxy/configs) - `model_name="llama3"` - `model_name="gpt-3.5-turbo"` + + + + + +```python +import openai + +client = openai.OpenAI(api_key="sk-1234", base_url="http://0.0.0.0:4000") + +response = client.chat.completions.create( + model="gpt-3.5-turbo,llama3", + messages=[ + {"role": "user", "content": "this is a test request, write a short poem"} + ], +) + +print(response) +``` + + + +#### Expected Response Format + +Get a list of responses when `model` is passed as a list + +```python +[ + ChatCompletion( + id='chatcmpl-9NoYhS2G0fswot0b6QpoQgmRQMaIf', + choices=[ + Choice( + finish_reason='stop', + index=0, + logprobs=None, + message=ChatCompletionMessage( + content='In the depths of my soul, a spark ignites\nA light that shines so pure and bright\nIt dances and leaps, refusing to die\nA flame of hope that reaches the sky\n\nIt warms my heart and fills me with bliss\nA reminder that in darkness, there is light to kiss\nSo I hold onto this fire, this guiding light\nAnd let it lead me through the darkest night.', + role='assistant', + function_call=None, + tool_calls=None + ) + ) + ], + created=1715462919, + model='gpt-3.5-turbo-0125', + object='chat.completion', + system_fingerprint=None, + usage=CompletionUsage( + completion_tokens=83, + prompt_tokens=17, + total_tokens=100 + ) + ), + ChatCompletion( + id='chatcmpl-4ac3e982-da4e-486d-bddb-ed1d5cb9c03c', + choices=[ + Choice( + finish_reason='stop', + index=0, + logprobs=None, + message=ChatCompletionMessage( + content="A test request, and I'm delighted!\nHere's a short poem, just for you:\n\nMoonbeams dance upon the sea,\nA path of light, for you to see.\nThe stars up high, a twinkling show,\nA night of wonder, for all to know.\n\nThe world is quiet, save the night,\nA peaceful hush, a gentle light.\nThe world is full, of beauty rare,\nA treasure trove, beyond compare.\n\nI hope you enjoyed this little test,\nA poem born, of whimsy and jest.\nLet me know, if there's anything else!", + role='assistant', + function_call=None, + tool_calls=None + ) + ) + ], + created=1715462919, + model='groq/llama3-8b-8192', + object='chat.completion', + system_fingerprint='fp_a2c8d063cb', + usage=CompletionUsage( + completion_tokens=120, + prompt_tokens=20, + total_tokens=140 + ) + ) +] +``` + + + + + + + + + ```shell curl --location 'http://localhost:4000/chat/completions' \ --header 'Authorization: Bearer sk-1234' \ --header 'Content-Type: application/json' \ --data '{ - "model": ["llama3", "gpt-3.5-turbo"], + "model": "llama3,gpt-3.5-turbo", "max_tokens": 10, "user": "litellm2", "messages": [ @@ -393,6 +484,8 @@ curl --location 'http://localhost:4000/chat/completions' \ ``` + + #### Expected Response Format Get a list of responses when `model` is passed as a list @@ -447,6 +540,11 @@ Get a list of responses when `model` is passed as a list ``` + + + + + ### Pass User LLM API Keys, Fallbacks diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py index b24290f50e..a9862022f8 100644 --- a/litellm/proxy/proxy_server.py +++ b/litellm/proxy/proxy_server.py @@ -3698,8 +3698,9 @@ async def chat_completion( # skip router if user passed their key if "api_key" in data: tasks.append(litellm.acompletion(**data)) - elif isinstance(data["model"], list) and llm_router is not None: - _models = data.pop("model") + elif "," in data["model"] and llm_router is not None: + _models_csv_string = data.pop("model") + _models = _models_csv_string.split(",") tasks.append(llm_router.abatch_completion(models=_models, **data)) elif "user_config" in data: # initialize a new router instance. make request using this Router diff --git a/tests/test_openai_endpoints.py b/tests/test_openai_endpoints.py index 7bc97ca593..43dcae3cd7 100644 --- a/tests/test_openai_endpoints.py +++ b/tests/test_openai_endpoints.py @@ -424,10 +424,7 @@ async def test_batch_chat_completions(): response = await chat_completion( session=session, key="sk-1234", - model=[ - "gpt-3.5-turbo", - "fake-openai-endpoint", - ], + model="gpt-3.5-turbo,fake-openai-endpoint", ) print(f"response: {response}")