diff --git a/litellm/llms/openai.py b/litellm/llms/openai.py index 1ca7e1710..549f89da8 100644 --- a/litellm/llms/openai.py +++ b/litellm/llms/openai.py @@ -772,9 +772,13 @@ class OpenAIChatCompletion(BaseLLM): input: Optional[list] = None, prompt: Optional[str] = None, organization: Optional[str] = None, + api_base: Optional[str] = None, ): client = AsyncOpenAI( - api_key=api_key, timeout=timeout, organization=organization + api_key=api_key, + timeout=timeout, + organization=organization, + base_url=api_base, ) if model is None and mode != "image_generation": raise Exception("model is not set") @@ -870,9 +874,9 @@ class OpenAITextCompletion(BaseLLM): if "model" in response_object: model_response_object.model = response_object["model"] - model_response_object._hidden_params[ - "original_response" - ] = response_object # track original response, if users make a litellm.text_completion() request, we can return the original response + model_response_object._hidden_params["original_response"] = ( + response_object # track original response, if users make a litellm.text_completion() request, we can return the original response + ) return model_response_object except Exception as e: raise e diff --git a/litellm/main.py b/litellm/main.py index bb53739db..33fad52cc 100644 --- a/litellm/main.py +++ b/litellm/main.py @@ -3358,12 +3358,15 @@ async def ahealth_check( or default_timeout ) + api_base = model_params.get("api_base") or get_secret("OPENAI_API_BASE") + response = await openai_chat_completions.ahealth_check( model=model, messages=model_params.get( "messages", None ), # Replace with your actual messages list api_key=api_key, + api_base=api_base, timeout=timeout, mode=mode, prompt=prompt,