diff --git a/docs/my-website/docs/proxy/health.md b/docs/my-website/docs/proxy/health.md index 4cd119e484..f0b797329e 100644 --- a/docs/my-website/docs/proxy/health.md +++ b/docs/my-website/docs/proxy/health.md @@ -12,7 +12,7 @@ The proxy exposes: #### Request Make a GET Request to `/health` on the proxy ```shell -curl --location 'http://0.0.0.0:8000/health' +curl --location 'http://0.0.0.0:8000/health' -H "Authorization: Bearer sk-1234" ``` You can also run `litellm -health` it makes a `get` request to `http://0.0.0.0:8000/health` for you diff --git a/litellm/llms/openai.py b/litellm/llms/openai.py index 1ca7e1710f..549f89da83 100644 --- a/litellm/llms/openai.py +++ b/litellm/llms/openai.py @@ -772,9 +772,13 @@ class OpenAIChatCompletion(BaseLLM): input: Optional[list] = None, prompt: Optional[str] = None, organization: Optional[str] = None, + api_base: Optional[str] = None, ): client = AsyncOpenAI( - api_key=api_key, timeout=timeout, organization=organization + api_key=api_key, + timeout=timeout, + organization=organization, + base_url=api_base, ) if model is None and mode != "image_generation": raise Exception("model is not set") @@ -870,9 +874,9 @@ class OpenAITextCompletion(BaseLLM): if "model" in response_object: model_response_object.model = response_object["model"] - model_response_object._hidden_params[ - "original_response" - ] = response_object # track original response, if users make a litellm.text_completion() request, we can return the original response + model_response_object._hidden_params["original_response"] = ( + response_object # track original response, if users make a litellm.text_completion() request, we can return the original response + ) return model_response_object except Exception as e: raise e diff --git a/litellm/main.py b/litellm/main.py index bb53739dbb..33fad52cce 100644 --- a/litellm/main.py +++ b/litellm/main.py @@ -3358,12 +3358,15 @@ async def ahealth_check( or default_timeout ) + api_base = model_params.get("api_base") or get_secret("OPENAI_API_BASE") + response = await openai_chat_completions.ahealth_check( model=model, messages=model_params.get( "messages", None ), # Replace with your actual messages list api_key=api_key, + api_base=api_base, timeout=timeout, mode=mode, prompt=prompt,