forked from phoenix/litellm-mirror
(docs) router
This commit is contained in:
parent
305faab542
commit
f299120394
1 changed files with 19 additions and 12 deletions
|
@ -69,17 +69,18 @@ print(response)
|
|||
- `router.text_completion()` - completion calls in the old OpenAI `/v1/completions` endpoint format
|
||||
|
||||
### Advanced
|
||||
#### Routing Strategies - Shuffle, Rate Limit Aware
|
||||
#### Routing Strategies - Weighted Pick, Rate Limit Aware
|
||||
|
||||
Router provides 2 strategies for routing your calls across multiple deployments:
|
||||
|
||||
<Tabs>
|
||||
<TabItem value="simple-shuffle" label="Simple Shuffle">
|
||||
<TabItem value="simple-shuffle" label="Weighted Pick">
|
||||
|
||||
**Default** Randomly picks a deployment to route a call too.
|
||||
**Default** Picks a deployment based on the provided **Requests per minute (rpm) or Tokens per minute (tpm)**
|
||||
|
||||
```python
|
||||
from litellm import Router
|
||||
import asyncio
|
||||
|
||||
model_list = [{ # list of model deployments
|
||||
"model_name": "gpt-3.5-turbo", # model alias
|
||||
|
@ -87,7 +88,8 @@ model_list = [{ # list of model deployments
|
|||
"model": "azure/chatgpt-v-2", # actual model name
|
||||
"api_key": os.getenv("AZURE_API_KEY"),
|
||||
"api_version": os.getenv("AZURE_API_VERSION"),
|
||||
"api_base": os.getenv("AZURE_API_BASE")
|
||||
"api_base": os.getenv("AZURE_API_BASE"),
|
||||
"rpm": 900, # requests per minute for this API
|
||||
}
|
||||
}, {
|
||||
"model_name": "gpt-3.5-turbo",
|
||||
|
@ -95,24 +97,29 @@ model_list = [{ # list of model deployments
|
|||
"model": "azure/chatgpt-functioncalling",
|
||||
"api_key": os.getenv("AZURE_API_KEY"),
|
||||
"api_version": os.getenv("AZURE_API_VERSION"),
|
||||
"api_base": os.getenv("AZURE_API_BASE")
|
||||
"api_base": os.getenv("AZURE_API_BASE"),
|
||||
"rpm": 10,
|
||||
}
|
||||
}, {
|
||||
"model_name": "gpt-3.5-turbo",
|
||||
"litellm_params": { # params for litellm completion/embedding call
|
||||
"model": "gpt-3.5-turbo",
|
||||
"api_key": os.getenv("OPENAI_API_KEY"),
|
||||
"rpm": 10,
|
||||
}
|
||||
}]
|
||||
|
||||
|
||||
# init router
|
||||
router = Router(model_list=model_list, routing_strategy="simple-shuffle")
|
||||
async def router_acompletion():
|
||||
response = await router.acompletion(
|
||||
model="gpt-3.5-turbo",
|
||||
messages=[{"role": "user", "content": "Hey, how's it going?"}]
|
||||
)
|
||||
print(response)
|
||||
return response
|
||||
|
||||
|
||||
response = await router.acompletion(model="gpt-3.5-turbo",
|
||||
messages=[{"role": "user", "content": "Hey, how's it going?"}])
|
||||
|
||||
print(response)
|
||||
asyncio.run(router_acompletion())
|
||||
```
|
||||
</TabItem>
|
||||
<TabItem value="usage-based" label="Rate-Limit Aware">
|
||||
|
@ -163,7 +170,7 @@ router = Router(model_list=model_list,
|
|||
redis_host=os.environ["REDIS_HOST"],
|
||||
redis_password=os.environ["REDIS_PASSWORD"],
|
||||
redis_port=os.environ["REDIS_PORT"],
|
||||
routing_strategy="usage-based-routing")
|
||||
routing_strategy="simple-shuffle")
|
||||
|
||||
|
||||
response = await router.acompletion(model="gpt-3.5-turbo",
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue