From 32a0ae5eb2052268b9d0c8c96ac47af3ed76cbad Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Mon, 20 Nov 2023 12:52:10 -0800 Subject: [PATCH] Update README.md --- README.md | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/README.md b/README.md index efb3c4600..86fd31bbc 100644 --- a/README.md +++ b/README.md @@ -79,6 +79,44 @@ for chunk in result: print(chunk['choices'][0]['delta']) ``` +# Router - load balancing([Docs](https://docs.litellm.ai/docs/routing)) +LiteLLM allows you to load balance between multiple deployments (Azure, OpenAI). It picks the deployment which is below rate-limit and has the least amount of tokens used. +```python +from litellm import Router + +model_list = [{ # list of model deployments + "model_name": "gpt-3.5-turbo", # model alias + "litellm_params": { # params for litellm completion/embedding call + "model": "azure/chatgpt-v-2", # actual model name + "api_key": os.getenv("AZURE_API_KEY"), + "api_version": os.getenv("AZURE_API_VERSION"), + "api_base": os.getenv("AZURE_API_BASE") + } +}, { + "model_name": "gpt-3.5-turbo", + "litellm_params": { # params for litellm completion/embedding call + "model": "azure/chatgpt-functioncalling", + "api_key": os.getenv("AZURE_API_KEY"), + "api_version": os.getenv("AZURE_API_VERSION"), + "api_base": os.getenv("AZURE_API_BASE") + } +}, { + "model_name": "gpt-3.5-turbo", + "litellm_params": { # params for litellm completion/embedding call + "model": "gpt-3.5-turbo", + "api_key": os.getenv("OPENAI_API_KEY"), + } +}] + +router = Router(model_list=model_list) + +# openai.ChatCompletion.create replacement +response = await router.completion(model="gpt-3.5-turbo", + messages=[{"role": "user", "content": "Hey, how's it going?"}]) + +print(response) +``` + ## OpenAI Proxy - ([Docs](https://docs.litellm.ai/docs/simple_proxy)) **If you want to use non-openai models in an openai code base**, you can use litellm proxy. Create a server to call 100+ LLMs (Huggingface/Bedrock/TogetherAI/etc) in the OpenAI ChatCompletions & Completions format