From c02f222b208bc75019e4c34d78891fbb155229af Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Sat, 2 Dec 2023 15:38:59 -0800 Subject: [PATCH] Update README.md --- README.md | 39 +-------------------------------------- 1 file changed, 1 insertion(+), 38 deletions(-) diff --git a/README.md b/README.md index aa720ebc7..7fad2d9da 100644 --- a/README.md +++ b/README.md @@ -77,45 +77,8 @@ for part in response: print(part.choices[0].delta.content or "") ``` -# Router - load balancing([Docs](https://docs.litellm.ai/docs/routing)) -LiteLLM allows you to load balance between multiple deployments (Azure, OpenAI). It picks the deployment which is below rate-limit and has the least amount of tokens used. -```python -from litellm import Router - -model_list = [{ # list of model deployments - "model_name": "gpt-3.5-turbo", # model alias - "litellm_params": { # params for litellm completion/embedding call - "model": "azure/chatgpt-v-2", # actual model name - "api_key": os.getenv("AZURE_API_KEY"), - "api_version": os.getenv("AZURE_API_VERSION"), - "api_base": os.getenv("AZURE_API_BASE") - } -}, { - "model_name": "gpt-3.5-turbo", - "litellm_params": { # params for litellm completion/embedding call - "model": "azure/chatgpt-functioncalling", - "api_key": os.getenv("AZURE_API_KEY"), - "api_version": os.getenv("AZURE_API_VERSION"), - "api_base": os.getenv("AZURE_API_BASE") - } -}, { - "model_name": "gpt-3.5-turbo", - "litellm_params": { # params for litellm completion/embedding call - "model": "gpt-3.5-turbo", - "api_key": os.getenv("OPENAI_API_KEY"), - } -}] - -router = Router(model_list=model_list) - -# openai.ChatCompletion.create replacement -response = router.completion(model="gpt-3.5-turbo", - messages=[{"role": "user", "content": "Hey, how's it going?"}]) - -print(response) -``` - ## OpenAI Proxy - ([Docs](https://docs.litellm.ai/docs/simple_proxy)) + LiteLLM Proxy manages: * Calling 100+ LLMs Huggingface/Bedrock/TogetherAI/etc. in the OpenAI ChatCompletions & Completions format * Load balancing - between Multiple Models + Deployments of the same model LiteLLM proxy can handle 1k+ requests/second during load tests