From f2991203941095109707c47265090bcfb0588965 Mon Sep 17 00:00:00 2001 From: ishaan-jaff Date: Wed, 29 Nov 2023 18:08:00 -0800 Subject: [PATCH] (docs) router --- docs/my-website/docs/routing.md | 31 +++++++++++++++++++------------ 1 file changed, 19 insertions(+), 12 deletions(-) diff --git a/docs/my-website/docs/routing.md b/docs/my-website/docs/routing.md index 03d95e9ef2..ddaea36885 100644 --- a/docs/my-website/docs/routing.md +++ b/docs/my-website/docs/routing.md @@ -69,17 +69,18 @@ print(response) - `router.text_completion()` - completion calls in the old OpenAI `/v1/completions` endpoint format ### Advanced -#### Routing Strategies - Shuffle, Rate Limit Aware +#### Routing Strategies - Weighted Pick, Rate Limit Aware Router provides 2 strategies for routing your calls across multiple deployments: - + -**Default** Randomly picks a deployment to route a call too. +**Default** Picks a deployment based on the provided **Requests per minute (rpm) or Tokens per minute (tpm)** ```python from litellm import Router +import asyncio model_list = [{ # list of model deployments "model_name": "gpt-3.5-turbo", # model alias @@ -87,7 +88,8 @@ model_list = [{ # list of model deployments "model": "azure/chatgpt-v-2", # actual model name "api_key": os.getenv("AZURE_API_KEY"), "api_version": os.getenv("AZURE_API_VERSION"), - "api_base": os.getenv("AZURE_API_BASE") + "api_base": os.getenv("AZURE_API_BASE"), + "rpm": 900, # requests per minute for this API } }, { "model_name": "gpt-3.5-turbo", @@ -95,24 +97,29 @@ model_list = [{ # list of model deployments "model": "azure/chatgpt-functioncalling", "api_key": os.getenv("AZURE_API_KEY"), "api_version": os.getenv("AZURE_API_VERSION"), - "api_base": os.getenv("AZURE_API_BASE") + "api_base": os.getenv("AZURE_API_BASE"), + "rpm": 10, } }, { "model_name": "gpt-3.5-turbo", "litellm_params": { # params for litellm completion/embedding call "model": "gpt-3.5-turbo", "api_key": os.getenv("OPENAI_API_KEY"), + "rpm": 10, } }] - +# init router router = Router(model_list=model_list, routing_strategy="simple-shuffle") +async def router_acompletion(): + response = await router.acompletion( + model="gpt-3.5-turbo", + messages=[{"role": "user", "content": "Hey, how's it going?"}] + ) + print(response) + return response - -response = await router.acompletion(model="gpt-3.5-turbo", - messages=[{"role": "user", "content": "Hey, how's it going?"}]) - -print(response) +asyncio.run(router_acompletion()) ``` @@ -163,7 +170,7 @@ router = Router(model_list=model_list, redis_host=os.environ["REDIS_HOST"], redis_password=os.environ["REDIS_PASSWORD"], redis_port=os.environ["REDIS_PORT"], - routing_strategy="usage-based-routing") + routing_strategy="simple-shuffle") response = await router.acompletion(model="gpt-3.5-turbo",