From fbfb86f1e64cdc3f802c0a732316f3a136261a3b Mon Sep 17 00:00:00 2001
From: Daniel Hnyk <hnykda@users.noreply.github.com>
Date: Mon, 10 Mar 2025 10:24:29 +0100
Subject: [PATCH] explain better behavior of usage-based-routing-v2

---
 docs/my-website/docs/routing.md | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/docs/my-website/docs/routing.md b/docs/my-website/docs/routing.md
index 0ad28b24f4..a5ef24a3f2 100644
--- a/docs/my-website/docs/routing.md
+++ b/docs/my-website/docs/routing.md
@@ -163,9 +163,9 @@ Router provides 4 strategies for routing your calls across multiple deployments:
 
 **Filters out deployment if tpm/rpm limit exceeded** - If you pass in the deployment's tpm/rpm limits.
 
-Routes to **deployment with lowest TPM usage** for that minute. 
+Routes to **deployment with lowest TPM usage** for that minute. If two deployments have the same usage, it chooses randomly. This does not automatically favor a higher-limit deployment up front—but if usage spikes, the smaller-limit deployment may hit its cap and get excluded, so the bigger-limit one will remain in the pool longer.
 
-In production, we use Redis to track usage (TPM/RPM) across multiple deployments. This implementation uses **async redis calls** (redis.incr and redis.mget).
+In production, we use Redis to track usage (TPM/RPM) across multiple deployments. This implementation uses **async redis calls** (`redis.incr` and `redis.mget`).
 
 For Azure, [you get 6 RPM per 1000 TPM](https://stackoverflow.com/questions/77368844/what-is-the-request-per-minute-rate-limit-for-azure-openai-models-for-gpt-3-5-tu)
 
@@ -1639,4 +1639,4 @@ class RouterGeneralSettings(BaseModel):
     pass_through_all_models: bool = Field(
         default=False
     )  # if passed a model not llm_router model list, pass through the request to litellm.acompletion/embedding
-```
\ No newline at end of file
+```