Merge pull request #2188 from BerriAI/litellm_fix_health_checks

[Fix] Fix health check when API base set for OpenAI compatible models
This commit is contained in:
Ishaan Jaff 2024-02-24 18:47:13 -08:00 committed by GitHub
commit c8daf61592
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 12 additions and 5 deletions

View file

@ -12,7 +12,7 @@ The proxy exposes:
#### Request #### Request
Make a GET Request to `/health` on the proxy Make a GET Request to `/health` on the proxy
```shell ```shell
curl --location 'http://0.0.0.0:8000/health' curl --location 'http://0.0.0.0:8000/health' -H "Authorization: Bearer sk-1234"
``` ```
You can also run `litellm -health` it makes a `get` request to `http://0.0.0.0:8000/health` for you You can also run `litellm -health` it makes a `get` request to `http://0.0.0.0:8000/health` for you

View file

@ -772,9 +772,13 @@ class OpenAIChatCompletion(BaseLLM):
input: Optional[list] = None, input: Optional[list] = None,
prompt: Optional[str] = None, prompt: Optional[str] = None,
organization: Optional[str] = None, organization: Optional[str] = None,
api_base: Optional[str] = None,
): ):
client = AsyncOpenAI( client = AsyncOpenAI(
api_key=api_key, timeout=timeout, organization=organization api_key=api_key,
timeout=timeout,
organization=organization,
base_url=api_base,
) )
if model is None and mode != "image_generation": if model is None and mode != "image_generation":
raise Exception("model is not set") raise Exception("model is not set")
@ -870,9 +874,9 @@ class OpenAITextCompletion(BaseLLM):
if "model" in response_object: if "model" in response_object:
model_response_object.model = response_object["model"] model_response_object.model = response_object["model"]
model_response_object._hidden_params[ model_response_object._hidden_params["original_response"] = (
"original_response" response_object # track original response, if users make a litellm.text_completion() request, we can return the original response
] = response_object # track original response, if users make a litellm.text_completion() request, we can return the original response )
return model_response_object return model_response_object
except Exception as e: except Exception as e:
raise e raise e

View file

@ -3358,12 +3358,15 @@ async def ahealth_check(
or default_timeout or default_timeout
) )
api_base = model_params.get("api_base") or get_secret("OPENAI_API_BASE")
response = await openai_chat_completions.ahealth_check( response = await openai_chat_completions.ahealth_check(
model=model, model=model,
messages=model_params.get( messages=model_params.get(
"messages", None "messages", None
), # Replace with your actual messages list ), # Replace with your actual messages list
api_key=api_key, api_key=api_key,
api_base=api_base,
timeout=timeout, timeout=timeout,
mode=mode, mode=mode,
prompt=prompt, prompt=prompt,