diff --git a/litellm/llms/azure.py b/litellm/llms/azure.py index b763a7c95..5d73b9435 100644 --- a/litellm/llms/azure.py +++ b/litellm/llms/azure.py @@ -812,7 +812,7 @@ class AzureChatCompletion(BaseLLM): azure_client_params: dict, api_key: str, input: list, - client=None, + client: Optional[AsyncAzureOpenAI] = None, logging_obj=None, timeout=None, ): @@ -911,6 +911,7 @@ class AzureChatCompletion(BaseLLM): model_response=model_response, azure_client_params=azure_client_params, timeout=timeout, + client=client, ) return response if client is None: diff --git a/litellm/llms/openai.py b/litellm/llms/openai.py index 55a0d97da..7d14fa450 100644 --- a/litellm/llms/openai.py +++ b/litellm/llms/openai.py @@ -996,11 +996,11 @@ class OpenAIChatCompletion(BaseLLM): self, input: list, data: dict, - model_response: ModelResponse, + model_response: litellm.utils.EmbeddingResponse, timeout: float, api_key: Optional[str] = None, api_base: Optional[str] = None, - client=None, + client: Optional[AsyncOpenAI] = None, max_retries=None, logging_obj=None, ): @@ -1039,9 +1039,9 @@ class OpenAIChatCompletion(BaseLLM): input: list, timeout: float, logging_obj, + model_response: litellm.utils.EmbeddingResponse, api_key: Optional[str] = None, api_base: Optional[str] = None, - model_response: Optional[litellm.utils.EmbeddingResponse] = None, optional_params=None, client=None, aembedding=None, @@ -1062,7 +1062,17 @@ class OpenAIChatCompletion(BaseLLM): ) if aembedding is True: - response = self.aembedding(data=data, input=input, logging_obj=logging_obj, model_response=model_response, api_base=api_base, api_key=api_key, timeout=timeout, client=client, max_retries=max_retries) # type: ignore + response = self.aembedding( + data=data, + input=input, + logging_obj=logging_obj, + model_response=model_response, + api_base=api_base, + api_key=api_key, + timeout=timeout, + client=client, + max_retries=max_retries, + ) return response openai_client = self._get_openai_client(