docs(routing.md): add timeouts per model

2024-01-22 08:55:17 -08:00 · 2024-01-22 08:55:17 -08:00 · 265f5ef6da
commit 265f5ef6da
parent 9988a39169
1 changed files with 31 additions and 0 deletions
--- a/docs/my-website/docs/routing.md
+++ b/docs/my-website/docs/routing.md
@ -302,6 +302,7 @@ asyncio.run(router_acompletion())

 The timeout set in router is for the entire length of the call, and is passed down to the completion() call level as well. 

+**Global Timeouts**
 ```python
 from litellm import Router 

@ -313,6 +314,36 @@ router = Router(model_list=model_list,
 print(response)
 ```

+**Timeouts per model**
+
+```python
+from litellm import Router 
+import asyncio
+
+model_list = [{
+	"model_name": "gpt-3.5-turbo",
+	"litellm_params": {
+		"model": "azure/chatgpt-v-2",
+		"api_key": os.getenv("AZURE_API_KEY"),
+		"api_version": os.getenv("AZURE_API_VERSION"),
+		"api_base": os.getenv("AZURE_API_BASE"),
+		"timeout": 300 # sets a 5 minute timeout
+		"stream_timeout": 30 # sets a 30s timeout for streaming calls
+	}
+}]
+
+# init router
+router = Router(model_list=model_list, routing_strategy="least-busy")
+async def router_acompletion():
+	response = await router.acompletion(
+		model="gpt-3.5-turbo", 
+		messages=[{"role": "user", "content": "Hey, how's it going?"}]
+	)
+	print(response)
+	return response
+
+asyncio.run(router_acompletion())
+```
 ### Cooldowns

 Set the limit for how many calls a model is allowed to fail in a minute, before being cooled down for a minute.