forked from phoenix/litellm-mirror
docs(routing.md): add weight-based shuffling to docs
This commit is contained in:
parent
94db4ec830
commit
7f1531006c
2 changed files with 103 additions and 4 deletions
|
@ -315,6 +315,33 @@ router_settings:
|
|||
|
||||
If `rpm` or `tpm` is not provided, it randomly picks a deployment
|
||||
|
||||
You can also set a `weight` param, to specify which model should get picked when.
|
||||
|
||||
<Tabs>
|
||||
<TabItem value="rpm" label="RPM-based shuffling">
|
||||
|
||||
##### **LiteLLM Proxy Config.yaml**
|
||||
|
||||
```yaml
|
||||
model_list:
|
||||
- model_name: gpt-3.5-turbo
|
||||
litellm_params:
|
||||
model: azure/chatgpt-v-2
|
||||
api_key: os.environ/AZURE_API_KEY
|
||||
api_version: os.environ/AZURE_API_VERSION
|
||||
api_base: os.environ/AZURE_API_BASE
|
||||
rpm: 900
|
||||
- model_name: gpt-3.5-turbo
|
||||
litellm_params:
|
||||
model: azure/chatgpt-functioncalling
|
||||
api_key: os.environ/AZURE_API_KEY
|
||||
api_version: os.environ/AZURE_API_VERSION
|
||||
api_base: os.environ/AZURE_API_BASE
|
||||
rpm: 10
|
||||
```
|
||||
|
||||
##### **Python SDK**
|
||||
|
||||
```python
|
||||
from litellm import Router
|
||||
import asyncio
|
||||
|
@ -337,12 +364,68 @@ model_list = [{ # list of model deployments
|
|||
"api_base": os.getenv("AZURE_API_BASE"),
|
||||
"rpm": 10,
|
||||
}
|
||||
},]
|
||||
|
||||
# init router
|
||||
router = Router(model_list=model_list, routing_strategy="simple-shuffle")
|
||||
async def router_acompletion():
|
||||
response = await router.acompletion(
|
||||
model="gpt-3.5-turbo",
|
||||
messages=[{"role": "user", "content": "Hey, how's it going?"}]
|
||||
)
|
||||
print(response)
|
||||
return response
|
||||
|
||||
asyncio.run(router_acompletion())
|
||||
```
|
||||
|
||||
</TabItem>
|
||||
<TabItem value="weight" label="Weight-based shuffling">
|
||||
|
||||
##### **LiteLLM Proxy Config.yaml**
|
||||
|
||||
```yaml
|
||||
model_list:
|
||||
- model_name: gpt-3.5-turbo
|
||||
litellm_params:
|
||||
model: azure/chatgpt-v-2
|
||||
api_key: os.environ/AZURE_API_KEY
|
||||
api_version: os.environ/AZURE_API_VERSION
|
||||
api_base: os.environ/AZURE_API_BASE
|
||||
weight: 9
|
||||
- model_name: gpt-3.5-turbo
|
||||
litellm_params:
|
||||
model: azure/chatgpt-functioncalling
|
||||
api_key: os.environ/AZURE_API_KEY
|
||||
api_version: os.environ/AZURE_API_VERSION
|
||||
api_base: os.environ/AZURE_API_BASE
|
||||
weight: 1
|
||||
```
|
||||
|
||||
|
||||
##### **Python SDK**
|
||||
|
||||
```python
|
||||
from litellm import Router
|
||||
import asyncio
|
||||
|
||||
model_list = [{
|
||||
"model_name": "gpt-3.5-turbo", # model alias
|
||||
"litellm_params": {
|
||||
"model": "azure/chatgpt-v-2", # actual model name
|
||||
"api_key": os.getenv("AZURE_API_KEY"),
|
||||
"api_version": os.getenv("AZURE_API_VERSION"),
|
||||
"api_base": os.getenv("AZURE_API_BASE"),
|
||||
"weight": 9, # pick this 90% of the time
|
||||
}
|
||||
}, {
|
||||
"model_name": "gpt-3.5-turbo",
|
||||
"litellm_params": { # params for litellm completion/embedding call
|
||||
"model": "gpt-3.5-turbo",
|
||||
"api_key": os.getenv("OPENAI_API_KEY"),
|
||||
"rpm": 10,
|
||||
"litellm_params": {
|
||||
"model": "azure/chatgpt-functioncalling",
|
||||
"api_key": os.getenv("AZURE_API_KEY"),
|
||||
"api_version": os.getenv("AZURE_API_VERSION"),
|
||||
"api_base": os.getenv("AZURE_API_BASE"),
|
||||
"weight": 1,
|
||||
}
|
||||
}]
|
||||
|
||||
|
@ -358,6 +441,10 @@ async def router_acompletion():
|
|||
|
||||
asyncio.run(router_acompletion())
|
||||
```
|
||||
|
||||
</TabItem>
|
||||
</Tabs>
|
||||
|
||||
</TabItem>
|
||||
<TabItem value="usage-based" label="Rate-Limit Aware">
|
||||
|
||||
|
|
|
@ -305,6 +305,18 @@
|
|||
"supports_function_calling": true,
|
||||
"source": "OpenAI needs to add pricing for this ft model, will be updated when added by OpenAI. Defaulting to base model pricing"
|
||||
},
|
||||
"ft:gpt-4o-2024-08-06": {
|
||||
"max_tokens": 16384,
|
||||
"max_input_tokens": 128000,
|
||||
"max_output_tokens": 16384,
|
||||
"input_cost_per_token": 0.00000375,
|
||||
"output_cost_per_token": 0.000015,
|
||||
"litellm_provider": "openai",
|
||||
"mode": "chat",
|
||||
"supports_function_calling": true,
|
||||
"supports_parallel_function_calling": true,
|
||||
"supports_vision": true
|
||||
},
|
||||
"ft:gpt-4o-mini-2024-07-18": {
|
||||
"max_tokens": 16384,
|
||||
"max_input_tokens": 128000,
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue