feat(azure.py): support health checking azure deployments

Fixes https://github.com/BerriAI/litellm/issues/5279
This commit is contained in:
Krrish Dholakia 2024-08-22 16:11:14 -07:00
parent 5959cf0b50
commit b7f42c96a4
4 changed files with 42 additions and 3 deletions

View file

@ -115,6 +115,39 @@ model_list:
mode: audio_speech mode: audio_speech
``` ```
### Batch Models (Azure Only)
For Azure models deployed as 'batch' models, set `mode: batch`.
```yaml
model_list:
- model_name: "batch-gpt-4o-mini"
litellm_params:
model: "azure/gpt-4o-mini"
api_key: os.environ/AZURE_API_KEY
api_base: os.environ/AZURE_API_BASE
model_info:
mode: batch
```
Expected Response
```bash
{
"healthy_endpoints": [
{
"api_base": "https://...",
"model": "azure/gpt-4o-mini",
"x-ms-region": "East US"
}
],
"unhealthy_endpoints": [],
"healthy_count": 1,
"unhealthy_count": 0
}
```
## Background Health Checks ## Background Health Checks
You can enable model health checks being run in the background, to prevent each model from being queried too frequently via `/health`. You can enable model health checks being run in the background, to prevent each model from being queried too frequently via `/health`.
@ -244,3 +277,4 @@ curl -X POST 'http://localhost:4000/chat/completions' \
} }
' '
``` ```

View file

@ -1970,6 +1970,8 @@ class AzureChatCompletion(BaseLLM):
input=prompt, # type: ignore input=prompt, # type: ignore
voice="alloy", voice="alloy",
) )
elif mode == "batch":
completion = await client.batches.with_raw_response.list(limit=1) # type: ignore
else: else:
raise Exception("mode not set") raise Exception("mode not set")
response = {} response = {}

View file

@ -4825,7 +4825,7 @@ def speech(
async def ahealth_check( async def ahealth_check(
model_params: dict, model_params: dict,
mode: Optional[ mode: Optional[
Literal["completion", "embedding", "image_generation", "chat"] Literal["completion", "embedding", "image_generation", "chat", "batch"]
] = None, ] = None,
prompt: Optional[str] = None, prompt: Optional[str] = None,
input: Optional[List] = None, input: Optional[List] = None,

View file

@ -1,4 +1,7 @@
model_list: model_list:
- model_name: "*" - model_name: "batch-gpt-4o-mini"
litellm_params: litellm_params:
model: "*" model: "azure/gpt-4o-mini"
api_key: os.environ/AZURE_API_KEY
api_base: os.environ/AZURE_API_BASE