Merge pull request #3587 from BerriAI/litellm_proxy_use_batch_completions_model_csv

[Feat] Use csv values for proxy batch completions (OpenAI Python compatible)
This commit is contained in:
Ishaan Jaff 2024-05-13 07:55:12 -07:00 committed by GitHub
commit e82c00622e
3 changed files with 104 additions and 8 deletions

View file

@ -365,22 +365,113 @@ curl --location 'http://0.0.0.0:4000/moderations' \
## Advanced ## Advanced
### (BETA) Batch Completions - pass `model` as List ### (BETA) Batch Completions - pass multiple models
Use this when you want to send 1 request to N Models Use this when you want to send 1 request to N Models
#### Expected Request Format #### Expected Request Format
Pass model as a string of comma separated value of models. Example `"model"="llama3,gpt-3.5-turbo"`
This same request will be sent to the following model groups on the [litellm proxy config.yaml](https://docs.litellm.ai/docs/proxy/configs) This same request will be sent to the following model groups on the [litellm proxy config.yaml](https://docs.litellm.ai/docs/proxy/configs)
- `model_name="llama3"` - `model_name="llama3"`
- `model_name="gpt-3.5-turbo"` - `model_name="gpt-3.5-turbo"`
<Tabs>
<TabItem value="openai-py" label="OpenAI Python SDK">
```python
import openai
client = openai.OpenAI(api_key="sk-1234", base_url="http://0.0.0.0:4000")
response = client.chat.completions.create(
model="gpt-3.5-turbo,llama3",
messages=[
{"role": "user", "content": "this is a test request, write a short poem"}
],
)
print(response)
```
#### Expected Response Format
Get a list of responses when `model` is passed as a list
```python
[
ChatCompletion(
id='chatcmpl-9NoYhS2G0fswot0b6QpoQgmRQMaIf',
choices=[
Choice(
finish_reason='stop',
index=0,
logprobs=None,
message=ChatCompletionMessage(
content='In the depths of my soul, a spark ignites\nA light that shines so pure and bright\nIt dances and leaps, refusing to die\nA flame of hope that reaches the sky\n\nIt warms my heart and fills me with bliss\nA reminder that in darkness, there is light to kiss\nSo I hold onto this fire, this guiding light\nAnd let it lead me through the darkest night.',
role='assistant',
function_call=None,
tool_calls=None
)
)
],
created=1715462919,
model='gpt-3.5-turbo-0125',
object='chat.completion',
system_fingerprint=None,
usage=CompletionUsage(
completion_tokens=83,
prompt_tokens=17,
total_tokens=100
)
),
ChatCompletion(
id='chatcmpl-4ac3e982-da4e-486d-bddb-ed1d5cb9c03c',
choices=[
Choice(
finish_reason='stop',
index=0,
logprobs=None,
message=ChatCompletionMessage(
content="A test request, and I'm delighted!\nHere's a short poem, just for you:\n\nMoonbeams dance upon the sea,\nA path of light, for you to see.\nThe stars up high, a twinkling show,\nA night of wonder, for all to know.\n\nThe world is quiet, save the night,\nA peaceful hush, a gentle light.\nThe world is full, of beauty rare,\nA treasure trove, beyond compare.\n\nI hope you enjoyed this little test,\nA poem born, of whimsy and jest.\nLet me know, if there's anything else!",
role='assistant',
function_call=None,
tool_calls=None
)
)
],
created=1715462919,
model='groq/llama3-8b-8192',
object='chat.completion',
system_fingerprint='fp_a2c8d063cb',
usage=CompletionUsage(
completion_tokens=120,
prompt_tokens=20,
total_tokens=140
)
)
]
```
</TabItem>
<TabItem value="curl" label="Curl">
```shell ```shell
curl --location 'http://localhost:4000/chat/completions' \ curl --location 'http://localhost:4000/chat/completions' \
--header 'Authorization: Bearer sk-1234' \ --header 'Authorization: Bearer sk-1234' \
--header 'Content-Type: application/json' \ --header 'Content-Type: application/json' \
--data '{ --data '{
"model": ["llama3", "gpt-3.5-turbo"], "model": "llama3,gpt-3.5-turbo",
"max_tokens": 10, "max_tokens": 10,
"user": "litellm2", "user": "litellm2",
"messages": [ "messages": [
@ -393,6 +484,8 @@ curl --location 'http://localhost:4000/chat/completions' \
``` ```
#### Expected Response Format #### Expected Response Format
Get a list of responses when `model` is passed as a list Get a list of responses when `model` is passed as a list
@ -447,6 +540,11 @@ Get a list of responses when `model` is passed as a list
``` ```
</TabItem>
</Tabs>
### Pass User LLM API Keys, Fallbacks ### Pass User LLM API Keys, Fallbacks

View file

@ -3698,8 +3698,9 @@ async def chat_completion(
# skip router if user passed their key # skip router if user passed their key
if "api_key" in data: if "api_key" in data:
tasks.append(litellm.acompletion(**data)) tasks.append(litellm.acompletion(**data))
elif isinstance(data["model"], list) and llm_router is not None: elif "," in data["model"] and llm_router is not None:
_models = data.pop("model") _models_csv_string = data.pop("model")
_models = _models_csv_string.split(",")
tasks.append(llm_router.abatch_completion(models=_models, **data)) tasks.append(llm_router.abatch_completion(models=_models, **data))
elif "user_config" in data: elif "user_config" in data:
# initialize a new router instance. make request using this Router # initialize a new router instance. make request using this Router

View file

@ -424,10 +424,7 @@ async def test_batch_chat_completions():
response = await chat_completion( response = await chat_completion(
session=session, session=session,
key="sk-1234", key="sk-1234",
model=[ model="gpt-3.5-turbo,fake-openai-endpoint",
"gpt-3.5-turbo",
"fake-openai-endpoint",
],
) )
print(f"response: {response}") print(f"response: {response}")