diff --git a/docs/my-website/docs/routing.md b/docs/my-website/docs/routing.md index 167e909166..ec692147b8 100644 --- a/docs/my-website/docs/routing.md +++ b/docs/my-website/docs/routing.md @@ -167,7 +167,7 @@ Routes to **deployment with lowest TPM usage** for that minute. In production, we use Redis to track usage (TPM/RPM) across multiple deployments. This implementation uses **async redis calls** (redis.incr and redis.mget). -For Azure, your RPM = TPM/6. +For Azure, [you get 6 RPM per 1000 TPM](https://stackoverflow.com/questions/77368844/what-is-the-request-per-minute-rate-limit-for-azure-openai-models-for-gpt-3-5-tu) diff --git a/docs/my-website/sidebars.js b/docs/my-website/sidebars.js index a33e2b601f..8a39410e78 100644 --- a/docs/my-website/sidebars.js +++ b/docs/my-website/sidebars.js @@ -228,12 +228,12 @@ const sidebars = { }, ], }, + "routing", "scheduler", { type: "category", label: "🚅 LiteLLM Python SDK", items: [ - "routing", "set_keys", "completion/token_usage", "sdk_custom_pricing",