From 32a0ae5eb2052268b9d0c8c96ac47af3ed76cbad Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Mon, 20 Nov 2023 12:52:10 -0800
Subject: [PATCH] Update README.md

---
 README.md | 38 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 38 insertions(+)

diff --git a/README.md b/README.md
index efb3c4600..86fd31bbc 100644
--- a/README.md
+++ b/README.md
@@ -79,6 +79,44 @@ for chunk in result:
   print(chunk['choices'][0]['delta'])
 ```
 
+# Router - load balancing([Docs](https://docs.litellm.ai/docs/routing))
+LiteLLM allows you to load balance between multiple deployments (Azure, OpenAI). It picks the deployment which is below rate-limit and has the least amount of tokens used.
+```python
+from litellm import Router
+
+model_list = [{ # list of model deployments 
+    "model_name": "gpt-3.5-turbo", # model alias 
+    "litellm_params": { # params for litellm completion/embedding call 
+        "model": "azure/chatgpt-v-2", # actual model name
+        "api_key": os.getenv("AZURE_API_KEY"),
+        "api_version": os.getenv("AZURE_API_VERSION"),
+        "api_base": os.getenv("AZURE_API_BASE")
+    }
+}, {
+    "model_name": "gpt-3.5-turbo", 
+    "litellm_params": { # params for litellm completion/embedding call 
+        "model": "azure/chatgpt-functioncalling", 
+        "api_key": os.getenv("AZURE_API_KEY"),
+        "api_version": os.getenv("AZURE_API_VERSION"),
+        "api_base": os.getenv("AZURE_API_BASE")
+    }
+}, {
+    "model_name": "gpt-3.5-turbo", 
+    "litellm_params": { # params for litellm completion/embedding call 
+        "model": "gpt-3.5-turbo", 
+        "api_key": os.getenv("OPENAI_API_KEY"),
+    }
+}]
+
+router = Router(model_list=model_list)
+
+# openai.ChatCompletion.create replacement
+response = await router.completion(model="gpt-3.5-turbo", 
+                messages=[{"role": "user", "content": "Hey, how's it going?"}])
+
+print(response)
+```
+
 ## OpenAI Proxy - ([Docs](https://docs.litellm.ai/docs/simple_proxy))
 **If you want to use non-openai models in an openai code base**, you can use litellm proxy. Create a server to call 100+ LLMs (Huggingface/Bedrock/TogetherAI/etc) in the OpenAI ChatCompletions & Completions format