Merge pull request #2188 from BerriAI/litellm_fix_health_checks

[Fix] Fix health check when API base set for OpenAI compatible models
2024-02-24 18:47:13 -08:00 · 2024-02-24 18:47:13 -08:00 · c8daf61592
commit c8daf61592
parent ebd0354f1d c315c18695
3 changed files with 12 additions and 5 deletions
--- a/docs/my-website/docs/proxy/health.md
+++ b/docs/my-website/docs/proxy/health.md
@ -12,7 +12,7 @@ The proxy exposes:
 #### Request
 Make a GET Request to `/health` on the proxy
 ```shell
-curl --location 'http://0.0.0.0:8000/health'
+curl --location 'http://0.0.0.0:8000/health' -H "Authorization: Bearer sk-1234"
 ```

 You can also run `litellm -health` it makes a `get` request to `http://0.0.0.0:8000/health` for you
--- a/litellm/llms/openai.py
+++ b/litellm/llms/openai.py
@ -772,9 +772,13 @@ class OpenAIChatCompletion(BaseLLM):
        input: Optional[list] = None,
        prompt: Optional[str] = None,
        organization: Optional[str] = None,
+        api_base: Optional[str] = None,
    ):
        client = AsyncOpenAI(
-            api_key=api_key, timeout=timeout, organization=organization
+            api_key=api_key,
+            timeout=timeout,
+            organization=organization,
+            base_url=api_base,
        )
        if model is None and mode != "image_generation":
            raise Exception("model is not set")
@ -870,9 +874,9 @@ class OpenAITextCompletion(BaseLLM):
            if "model" in response_object:
                model_response_object.model = response_object["model"]

-            model_response_object._hidden_params[
-                "original_response"
-            ] = response_object  # track original response, if users make a litellm.text_completion() request, we can return the original response
+            model_response_object._hidden_params["original_response"] = (
+                response_object  # track original response, if users make a litellm.text_completion() request, we can return the original response
+            )
            return model_response_object
        except Exception as e:
            raise e
--- a/litellm/main.py
+++ b/litellm/main.py
@ -3358,12 +3358,15 @@ async def ahealth_check(
                or default_timeout
            )

+            api_base = model_params.get("api_base") or get_secret("OPENAI_API_BASE")
+
            response = await openai_chat_completions.ahealth_check(
                model=model,
                messages=model_params.get(
                    "messages", None
                ),  # Replace with your actual messages list
                api_key=api_key,
+                api_base=api_base,
                timeout=timeout,
                mode=mode,
                prompt=prompt,