diff --git a/docs/my-website/docs/routing.md b/docs/my-website/docs/routing.md new file mode 100644 index 000000000..f99c20f32 --- /dev/null +++ b/docs/my-website/docs/routing.md @@ -0,0 +1,50 @@ +# Load-Balance Router + +Use this if you're trying to load-balance across multiple deployments. + + +```python +pip install litellm +``` + +```python +from litellm import Router + +model_list = [{ # list of model deployments + "model_name": "gpt-3.5-turbo", # openai model name + "litellm_params": { # params for litellm completion/embedding call + "model": "azure/chatgpt-v-2", + "api_key": os.getenv("AZURE_API_KEY"), + "api_version": os.getenv("AZURE_API_VERSION"), + "api_base": os.getenv("AZURE_API_BASE") + }, + "tpm": 240000, + "rpm": 1800 +}, { + "model_name": "gpt-3.5-turbo", # openai model name + "litellm_params": { # params for litellm completion/embedding call + "model": "azure/chatgpt-functioncalling", + "api_key": os.getenv("AZURE_API_KEY"), + "api_version": os.getenv("AZURE_API_VERSION"), + "api_base": os.getenv("AZURE_API_BASE") + }, + "tpm": 240000, + "rpm": 1800 +}, { + "model_name": "gpt-3.5-turbo", # openai model name + "litellm_params": { # params for litellm completion/embedding call + "model": "gpt-3.5-turbo", + "api_key": os.getenv("OPENAI_API_KEY"), + }, + "tpm": 1000000, + "rpm": 9000 +}] + +router = Router(model_list=model_list) + +# openai.ChatCompletion.create replacement +response = router.completion(model="gpt-3.5-turbo", + messages=[{"role": "user", "content": "Hey, how's it going?"}] + +print(response) +``` \ No newline at end of file diff --git a/docs/my-website/sidebars.js b/docs/my-website/sidebars.js index c405d68ee..f3bd13255 100644 --- a/docs/my-website/sidebars.js +++ b/docs/my-website/sidebars.js @@ -83,7 +83,7 @@ const sidebars = { }, "proxy_server", "budget_manager", - "rate_limit_manager", + "routing", "set_keys", "completion/token_usage", {