diff --git a/docs/my-website/docs/routing.md b/docs/my-website/docs/routing.md
index b24a0f63e..651c07395 100644
--- a/docs/my-website/docs/routing.md
+++ b/docs/my-website/docs/routing.md
@@ -315,6 +315,33 @@ router_settings:
If `rpm` or `tpm` is not provided, it randomly picks a deployment
+You can also set a `weight` param, to specify which model should get picked when.
+
+
+
+
+##### **LiteLLM Proxy Config.yaml**
+
+```yaml
+model_list:
+ - model_name: gpt-3.5-turbo
+ litellm_params:
+ model: azure/chatgpt-v-2
+ api_key: os.environ/AZURE_API_KEY
+ api_version: os.environ/AZURE_API_VERSION
+ api_base: os.environ/AZURE_API_BASE
+ rpm: 900
+ - model_name: gpt-3.5-turbo
+ litellm_params:
+ model: azure/chatgpt-functioncalling
+ api_key: os.environ/AZURE_API_KEY
+ api_version: os.environ/AZURE_API_VERSION
+ api_base: os.environ/AZURE_API_BASE
+ rpm: 10
+```
+
+##### **Python SDK**
+
```python
from litellm import Router
import asyncio
@@ -337,12 +364,68 @@ model_list = [{ # list of model deployments
"api_base": os.getenv("AZURE_API_BASE"),
"rpm": 10,
}
+},]
+
+# init router
+router = Router(model_list=model_list, routing_strategy="simple-shuffle")
+async def router_acompletion():
+ response = await router.acompletion(
+ model="gpt-3.5-turbo",
+ messages=[{"role": "user", "content": "Hey, how's it going?"}]
+ )
+ print(response)
+ return response
+
+asyncio.run(router_acompletion())
+```
+
+
+
+
+##### **LiteLLM Proxy Config.yaml**
+
+```yaml
+model_list:
+ - model_name: gpt-3.5-turbo
+ litellm_params:
+ model: azure/chatgpt-v-2
+ api_key: os.environ/AZURE_API_KEY
+ api_version: os.environ/AZURE_API_VERSION
+ api_base: os.environ/AZURE_API_BASE
+ weight: 9
+ - model_name: gpt-3.5-turbo
+ litellm_params:
+ model: azure/chatgpt-functioncalling
+ api_key: os.environ/AZURE_API_KEY
+ api_version: os.environ/AZURE_API_VERSION
+ api_base: os.environ/AZURE_API_BASE
+ weight: 1
+```
+
+
+##### **Python SDK**
+
+```python
+from litellm import Router
+import asyncio
+
+model_list = [{
+ "model_name": "gpt-3.5-turbo", # model alias
+ "litellm_params": {
+ "model": "azure/chatgpt-v-2", # actual model name
+ "api_key": os.getenv("AZURE_API_KEY"),
+ "api_version": os.getenv("AZURE_API_VERSION"),
+ "api_base": os.getenv("AZURE_API_BASE"),
+ "weight": 9, # pick this 90% of the time
+ }
}, {
"model_name": "gpt-3.5-turbo",
- "litellm_params": { # params for litellm completion/embedding call
- "model": "gpt-3.5-turbo",
- "api_key": os.getenv("OPENAI_API_KEY"),
- "rpm": 10,
+ "litellm_params": {
+ "model": "azure/chatgpt-functioncalling",
+ "api_key": os.getenv("AZURE_API_KEY"),
+ "api_version": os.getenv("AZURE_API_VERSION"),
+ "api_base": os.getenv("AZURE_API_BASE"),
+ "weight": 1,
}
}]
@@ -358,6 +441,10 @@ async def router_acompletion():
asyncio.run(router_acompletion())
```
+
+
+
+
diff --git a/litellm/model_prices_and_context_window_backup.json b/litellm/model_prices_and_context_window_backup.json
index 9b12fa2d4..19f905be0 100644
--- a/litellm/model_prices_and_context_window_backup.json
+++ b/litellm/model_prices_and_context_window_backup.json
@@ -305,6 +305,18 @@
"supports_function_calling": true,
"source": "OpenAI needs to add pricing for this ft model, will be updated when added by OpenAI. Defaulting to base model pricing"
},
+ "ft:gpt-4o-2024-08-06": {
+ "max_tokens": 16384,
+ "max_input_tokens": 128000,
+ "max_output_tokens": 16384,
+ "input_cost_per_token": 0.00000375,
+ "output_cost_per_token": 0.000015,
+ "litellm_provider": "openai",
+ "mode": "chat",
+ "supports_function_calling": true,
+ "supports_parallel_function_calling": true,
+ "supports_vision": true
+ },
"ft:gpt-4o-mini-2024-07-18": {
"max_tokens": 16384,
"max_input_tokens": 128000,