(docs) router

This commit is contained in:
ishaan-jaff 2023-11-29 18:08:00 -08:00
parent 305faab542
commit f299120394

View file

@ -69,17 +69,18 @@ print(response)
- `router.text_completion()` - completion calls in the old OpenAI `/v1/completions` endpoint format
### Advanced
#### Routing Strategies - Shuffle, Rate Limit Aware
#### Routing Strategies - Weighted Pick, Rate Limit Aware
Router provides 2 strategies for routing your calls across multiple deployments:
<Tabs>
<TabItem value="simple-shuffle" label="Simple Shuffle">
<TabItem value="simple-shuffle" label="Weighted Pick">
**Default** Randomly picks a deployment to route a call too.
**Default** Picks a deployment based on the provided **Requests per minute (rpm) or Tokens per minute (tpm)**
```python
from litellm import Router
import asyncio
model_list = [{ # list of model deployments
"model_name": "gpt-3.5-turbo", # model alias
@ -87,7 +88,8 @@ model_list = [{ # list of model deployments
"model": "azure/chatgpt-v-2", # actual model name
"api_key": os.getenv("AZURE_API_KEY"),
"api_version": os.getenv("AZURE_API_VERSION"),
"api_base": os.getenv("AZURE_API_BASE")
"api_base": os.getenv("AZURE_API_BASE"),
"rpm": 900, # requests per minute for this API
}
}, {
"model_name": "gpt-3.5-turbo",
@ -95,24 +97,29 @@ model_list = [{ # list of model deployments
"model": "azure/chatgpt-functioncalling",
"api_key": os.getenv("AZURE_API_KEY"),
"api_version": os.getenv("AZURE_API_VERSION"),
"api_base": os.getenv("AZURE_API_BASE")
"api_base": os.getenv("AZURE_API_BASE"),
"rpm": 10,
}
}, {
"model_name": "gpt-3.5-turbo",
"litellm_params": { # params for litellm completion/embedding call
"model": "gpt-3.5-turbo",
"api_key": os.getenv("OPENAI_API_KEY"),
"rpm": 10,
}
}]
# init router
router = Router(model_list=model_list, routing_strategy="simple-shuffle")
async def router_acompletion():
response = await router.acompletion(
model="gpt-3.5-turbo",
messages=[{"role": "user", "content": "Hey, how's it going?"}]
)
print(response)
return response
response = await router.acompletion(model="gpt-3.5-turbo",
messages=[{"role": "user", "content": "Hey, how's it going?"}])
print(response)
asyncio.run(router_acompletion())
```
</TabItem>
<TabItem value="usage-based" label="Rate-Limit Aware">
@ -163,7 +170,7 @@ router = Router(model_list=model_list,
redis_host=os.environ["REDIS_HOST"],
redis_password=os.environ["REDIS_PASSWORD"],
redis_port=os.environ["REDIS_PORT"],
routing_strategy="usage-based-routing")
routing_strategy="simple-shuffle")
response = await router.acompletion(model="gpt-3.5-turbo",