From e96d97d9e58dc26568d2b53afbe2abaa97a2913d Mon Sep 17 00:00:00 2001 From: CLARKBENHAM Date: Mon, 8 Apr 2024 21:31:21 -0700 Subject: [PATCH] remove formating changes --- docs/my-website/docs/routing.md | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/docs/my-website/docs/routing.md b/docs/my-website/docs/routing.md index 404c72e44..85649eb6b 100644 --- a/docs/my-website/docs/routing.md +++ b/docs/my-website/docs/routing.md @@ -117,7 +117,10 @@ import asyncio model_list = [{ ... }] # init router -router = Router(model_list=model_list, routing_strategy="latency-based-routing") # 👈 set routing strategy +router = Router(model_list=model_list, + routing_strategy="latency-based-routing",# 👈 set routing strategy + enable_pre_call_check=True, # enables router rate limits for concurrent calls + ) ## CALL 1+2 tasks = [] @@ -257,8 +260,9 @@ router = Router(model_list=model_list, redis_host=os.environ["REDIS_HOST"], redis_password=os.environ["REDIS_PASSWORD"], redis_port=os.environ["REDIS_PORT"], - routing_strategy="usage-based-routing") - + routing_strategy="usage-based-routing" + enable_pre_call_check=True, # enables router rate limits for concurrent calls + ) response = await router.acompletion(model="gpt-3.5-turbo", messages=[{"role": "user", "content": "Hey, how's it going?"}] @@ -555,7 +559,11 @@ router = Router(model_list: Optional[list] = None, ## Pre-Call Checks (Context Window) -Enable pre-call checks to filter out deployments with context window limit < messages for a call. +Enable pre-call checks to filter out: +1. deployments with context window limit < messages for a call. +2. deployments that have exceeded rate limits when making concurrent calls. (eg. `asyncio.gather(*[ + router.acompletion(model="gpt-3.5-turbo", messages=m) for m in list_of_messages + ])`)