(docs) router

2025-04-24 18:24:20 +00:00 · 2023-11-29 18:08:00 -08:00 · 2023-11-29 18:08:00 -08:00 · f299120394
commit f299120394
parent 305faab542
1 changed files with 19 additions and 12 deletions
--- a/docs/my-website/docs/routing.md
+++ b/docs/my-website/docs/routing.md
@ -69,17 +69,18 @@ print(response)
 - `router.text_completion()` - completion calls in the old OpenAI `/v1/completions` endpoint format

 ### Advanced
-#### Routing Strategies - Shuffle, Rate Limit Aware
+#### Routing Strategies - Weighted Pick, Rate Limit Aware

 Router provides 2 strategies for routing your calls across multiple deployments: 

 <Tabs>
-<TabItem value="simple-shuffle" label="Simple Shuffle">
+<TabItem value="simple-shuffle" label="Weighted Pick">

-**Default** Randomly picks a deployment to route a call too.
+**Default** Picks a deployment based on the provided **Requests per minute (rpm) or Tokens per minute (tpm)**

 ```python
 from litellm import Router 
+import asyncio

 model_list = [{ # list of model deployments 
 	"model_name": "gpt-3.5-turbo", # model alias 
@ -87,7 +88,8 @@ model_list = [{ # list of model deployments
 		"model": "azure/chatgpt-v-2", # actual model name
 		"api_key": os.getenv("AZURE_API_KEY"),
 		"api_version": os.getenv("AZURE_API_VERSION"),
-		"api_base": os.getenv("AZURE_API_BASE")
+		"api_base": os.getenv("AZURE_API_BASE"),
+		"rpm": 900,			# requests per minute for this API
 	}
 }, {
    "model_name": "gpt-3.5-turbo", 
@ -95,24 +97,29 @@ model_list = [{ # list of model deployments
 		"model": "azure/chatgpt-functioncalling", 
 		"api_key": os.getenv("AZURE_API_KEY"),
 		"api_version": os.getenv("AZURE_API_VERSION"),
-		"api_base": os.getenv("AZURE_API_BASE")
+		"api_base": os.getenv("AZURE_API_BASE"),
+		"rpm": 10,
 	}
 }, {
    "model_name": "gpt-3.5-turbo", 
 	"litellm_params": { # params for litellm completion/embedding call 
 		"model": "gpt-3.5-turbo", 
 		"api_key": os.getenv("OPENAI_API_KEY"),
+		"rpm": 10,
 	}
 }]

-
+# init router
 router = Router(model_list=model_list, routing_strategy="simple-shuffle")
+async def router_acompletion():
+	response = await router.acompletion(
+		model="gpt-3.5-turbo", 
+		messages=[{"role": "user", "content": "Hey, how's it going?"}]
+	)
+	print(response)
+	return response

-
-response = await router.acompletion(model="gpt-3.5-turbo", 
-				messages=[{"role": "user", "content": "Hey, how's it going?"}])
-
-print(response)
+asyncio.run(router_acompletion())
 ```
 </TabItem>
 <TabItem value="usage-based" label="Rate-Limit Aware">
@ -163,7 +170,7 @@ router = Router(model_list=model_list,
                redis_host=os.environ["REDIS_HOST"], 
 				redis_password=os.environ["REDIS_PASSWORD"], 
 				redis_port=os.environ["REDIS_PORT"], 
-                routing_strategy="usage-based-routing")
+                routing_strategy="simple-shuffle")


 response = await router.acompletion(model="gpt-3.5-turbo",