diff --git a/docs/my-website/docs/routing.md b/docs/my-website/docs/routing.md index b24a0f63e..651c07395 100644 --- a/docs/my-website/docs/routing.md +++ b/docs/my-website/docs/routing.md @@ -315,6 +315,33 @@ router_settings: If `rpm` or `tpm` is not provided, it randomly picks a deployment +You can also set a `weight` param, to specify which model should get picked when. + + + + +##### **LiteLLM Proxy Config.yaml** + +```yaml +model_list: + - model_name: gpt-3.5-turbo + litellm_params: + model: azure/chatgpt-v-2 + api_key: os.environ/AZURE_API_KEY + api_version: os.environ/AZURE_API_VERSION + api_base: os.environ/AZURE_API_BASE + rpm: 900 + - model_name: gpt-3.5-turbo + litellm_params: + model: azure/chatgpt-functioncalling + api_key: os.environ/AZURE_API_KEY + api_version: os.environ/AZURE_API_VERSION + api_base: os.environ/AZURE_API_BASE + rpm: 10 +``` + +##### **Python SDK** + ```python from litellm import Router import asyncio @@ -337,12 +364,68 @@ model_list = [{ # list of model deployments "api_base": os.getenv("AZURE_API_BASE"), "rpm": 10, } +},] + +# init router +router = Router(model_list=model_list, routing_strategy="simple-shuffle") +async def router_acompletion(): + response = await router.acompletion( + model="gpt-3.5-turbo", + messages=[{"role": "user", "content": "Hey, how's it going?"}] + ) + print(response) + return response + +asyncio.run(router_acompletion()) +``` + + + + +##### **LiteLLM Proxy Config.yaml** + +```yaml +model_list: + - model_name: gpt-3.5-turbo + litellm_params: + model: azure/chatgpt-v-2 + api_key: os.environ/AZURE_API_KEY + api_version: os.environ/AZURE_API_VERSION + api_base: os.environ/AZURE_API_BASE + weight: 9 + - model_name: gpt-3.5-turbo + litellm_params: + model: azure/chatgpt-functioncalling + api_key: os.environ/AZURE_API_KEY + api_version: os.environ/AZURE_API_VERSION + api_base: os.environ/AZURE_API_BASE + weight: 1 +``` + + +##### **Python SDK** + +```python +from litellm import Router +import asyncio + +model_list = [{ + "model_name": "gpt-3.5-turbo", # model alias + "litellm_params": { + "model": "azure/chatgpt-v-2", # actual model name + "api_key": os.getenv("AZURE_API_KEY"), + "api_version": os.getenv("AZURE_API_VERSION"), + "api_base": os.getenv("AZURE_API_BASE"), + "weight": 9, # pick this 90% of the time + } }, { "model_name": "gpt-3.5-turbo", - "litellm_params": { # params for litellm completion/embedding call - "model": "gpt-3.5-turbo", - "api_key": os.getenv("OPENAI_API_KEY"), - "rpm": 10, + "litellm_params": { + "model": "azure/chatgpt-functioncalling", + "api_key": os.getenv("AZURE_API_KEY"), + "api_version": os.getenv("AZURE_API_VERSION"), + "api_base": os.getenv("AZURE_API_BASE"), + "weight": 1, } }] @@ -358,6 +441,10 @@ async def router_acompletion(): asyncio.run(router_acompletion()) ``` + + + + diff --git a/litellm/model_prices_and_context_window_backup.json b/litellm/model_prices_and_context_window_backup.json index 9b12fa2d4..19f905be0 100644 --- a/litellm/model_prices_and_context_window_backup.json +++ b/litellm/model_prices_and_context_window_backup.json @@ -305,6 +305,18 @@ "supports_function_calling": true, "source": "OpenAI needs to add pricing for this ft model, will be updated when added by OpenAI. Defaulting to base model pricing" }, + "ft:gpt-4o-2024-08-06": { + "max_tokens": 16384, + "max_input_tokens": 128000, + "max_output_tokens": 16384, + "input_cost_per_token": 0.00000375, + "output_cost_per_token": 0.000015, + "litellm_provider": "openai", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_vision": true + }, "ft:gpt-4o-mini-2024-07-18": { "max_tokens": 16384, "max_input_tokens": 128000,