diff --git a/litellm/llms/azure.py b/litellm/llms/azure.py index 6a217bc2c6..7f268c25a5 100644 --- a/litellm/llms/azure.py +++ b/litellm/llms/azure.py @@ -799,6 +799,7 @@ class AzureChatCompletion(BaseLLM): optional_params: dict, model_response: TranscriptionResponse, timeout: float, + max_retries: int, api_key: Optional[str] = None, api_base: Optional[str] = None, api_version: Optional[str] = None, @@ -817,8 +818,6 @@ class AzureChatCompletion(BaseLLM): "timeout": timeout, } - max_retries = optional_params.pop("max_retries", None) - azure_client_params = select_azure_base_url_or_endpoint( azure_client_params=azure_client_params ) diff --git a/litellm/llms/openai.py b/litellm/llms/openai.py index ecc8d5f703..921ffdb208 100644 --- a/litellm/llms/openai.py +++ b/litellm/llms/openai.py @@ -785,10 +785,10 @@ class OpenAIChatCompletion(BaseLLM): optional_params: dict, model_response: TranscriptionResponse, timeout: float, + max_retries: int, api_key: Optional[str] = None, api_base: Optional[str] = None, client=None, - max_retries=None, logging_obj=None, atranscription: bool = False, ): diff --git a/litellm/main.py b/litellm/main.py index 1fcf0d5d32..1fca06fa59 100644 --- a/litellm/main.py +++ b/litellm/main.py @@ -3553,6 +3553,7 @@ def transcription( api_key: Optional[str] = None, api_base: Optional[str] = None, api_version: Optional[str] = None, + max_retries: Optional[int] = None, litellm_logging_obj=None, custom_llm_provider=None, **kwargs, @@ -3568,6 +3569,8 @@ def transcription( proxy_server_request = kwargs.get("proxy_server_request", None) model_info = kwargs.get("model_info", None) metadata = kwargs.get("metadata", {}) + if max_retries is None: + max_retries = openai.DEFAULT_MAX_RETRIES model_response = litellm.utils.TranscriptionResponse() @@ -3611,6 +3614,7 @@ def transcription( api_key=api_key, api_version=api_version, azure_ad_token=azure_ad_token, + max_retries=max_retries, ) elif custom_llm_provider == "openai": response = openai_chat_completions.audio_transcriptions( @@ -3621,6 +3625,7 @@ def transcription( atranscription=atranscription, timeout=timeout, logging_obj=litellm_logging_obj, + max_retries=max_retries, ) return response