From 4d06c296e3373fc1f74e6c0d6ff05105095c196c Mon Sep 17 00:00:00 2001 From: ishaan-jaff Date: Tue, 28 Nov 2023 16:21:16 -0800 Subject: [PATCH] (router) re use client across requests --- litellm/router.py | 27 ++++++++++++++++++++------- 1 file changed, 20 insertions(+), 7 deletions(-) diff --git a/litellm/router.py b/litellm/router.py index d996c02c5a..2c170ee4bc 100644 --- a/litellm/router.py +++ b/litellm/router.py @@ -232,9 +232,9 @@ class Router: data["model"] = original_model_string[:index_of_model_id] else: data["model"] = original_model_string - model_client = deployment["client"] + model_client = deployment["async_client"] self.total_calls[original_model_string] +=1 - response = await litellm.acompletion(**{**data, "messages": messages, "caching": self.cache_responses, "azure_client": model_client, **kwargs}) + response = await litellm.acompletion(**{**data, "messages": messages, "caching": self.cache_responses, "client": model_client, **kwargs}) self.success_calls[original_model_string] +=1 return response except Exception as e: @@ -801,15 +801,28 @@ class Router: self.model_list = model_list # we add api_base/api_key each model so load balancing between azure/gpt on api_base1 and api_base2 works for model in self.model_list: + if "azure" in model["litellm_params"]["model"]: + model["async_client"] = openai.AsyncAzureOpenAI( + api_key= model["litellm_params"]["api_key"], + azure_endpoint = model["litellm_params"]["api_base"] + ) + model["client"] = openai.AzureOpenAI( + api_key= model["litellm_params"]["api_key"], + azure_endpoint = model["litellm_params"]["api_base"] + ) + elif model["litellm_params"]["model"] in litellm.open_ai_chat_completion_models: + model["async_client"] = openai.AsyncOpenAI( + api_key= model["litellm_params"]["api_key"], + base_ur = model["litellm_params"]["api_base"] + ) + model["client"] = openai.OpenAI( + api_key= model["litellm_params"]["api_key"], + base_url = model["litellm_params"]["api_base"] + ) model_id = "" for key in model["litellm_params"]: model_id+= str(model["litellm_params"][key]) model["litellm_params"]["model"] += "-ModelID-" + model_id - - model["client"] = openai.AsyncAzureOpenAI( - api_key= model["litellm_params"]["api_key"], - azure_endpoint = model["litellm_params"]["api_base"] - ) self.model_names = [m["model_name"] for m in model_list] def get_model_names(self):