From bf403dc02eafdcdde29aa6bfa25758694ecea293 Mon Sep 17 00:00:00 2001 From: ishaan-jaff Date: Sat, 24 Feb 2024 18:38:39 -0800 Subject: [PATCH 1/2] (docs) health --- docs/my-website/docs/proxy/health.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/my-website/docs/proxy/health.md b/docs/my-website/docs/proxy/health.md index 4cd119e484..f0b797329e 100644 --- a/docs/my-website/docs/proxy/health.md +++ b/docs/my-website/docs/proxy/health.md @@ -12,7 +12,7 @@ The proxy exposes: #### Request Make a GET Request to `/health` on the proxy ```shell -curl --location 'http://0.0.0.0:8000/health' +curl --location 'http://0.0.0.0:8000/health' -H "Authorization: Bearer sk-1234" ``` You can also run `litellm -health` it makes a `get` request to `http://0.0.0.0:8000/health` for you From c315c18695fd7fde3f57c37401a557b968f21f3e Mon Sep 17 00:00:00 2001 From: ishaan-jaff Date: Sat, 24 Feb 2024 18:39:20 -0800 Subject: [PATCH 2/2] (fix) use api_base in health checks --- litellm/llms/openai.py | 12 ++++++++---- litellm/main.py | 3 +++ 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/litellm/llms/openai.py b/litellm/llms/openai.py index 1ca7e1710f..549f89da83 100644 --- a/litellm/llms/openai.py +++ b/litellm/llms/openai.py @@ -772,9 +772,13 @@ class OpenAIChatCompletion(BaseLLM): input: Optional[list] = None, prompt: Optional[str] = None, organization: Optional[str] = None, + api_base: Optional[str] = None, ): client = AsyncOpenAI( - api_key=api_key, timeout=timeout, organization=organization + api_key=api_key, + timeout=timeout, + organization=organization, + base_url=api_base, ) if model is None and mode != "image_generation": raise Exception("model is not set") @@ -870,9 +874,9 @@ class OpenAITextCompletion(BaseLLM): if "model" in response_object: model_response_object.model = response_object["model"] - model_response_object._hidden_params[ - "original_response" - ] = response_object # track original response, if users make a litellm.text_completion() request, we can return the original response + model_response_object._hidden_params["original_response"] = ( + response_object # track original response, if users make a litellm.text_completion() request, we can return the original response + ) return model_response_object except Exception as e: raise e diff --git a/litellm/main.py b/litellm/main.py index bb53739dbb..33fad52cce 100644 --- a/litellm/main.py +++ b/litellm/main.py @@ -3358,12 +3358,15 @@ async def ahealth_check( or default_timeout ) + api_base = model_params.get("api_base") or get_secret("OPENAI_API_BASE") + response = await openai_chat_completions.ahealth_check( model=model, messages=model_params.get( "messages", None ), # Replace with your actual messages list api_key=api_key, + api_base=api_base, timeout=timeout, mode=mode, prompt=prompt,