feat(azure.py): support health checking azure deployments

Fixes https://github.com/BerriAI/litellm/issues/5279
This commit is contained in:
Krrish Dholakia 2024-08-22 16:11:14 -07:00
parent 63cd94c32a
commit d7d3eee349
4 changed files with 42 additions and 3 deletions

View file

@ -115,6 +115,39 @@ model_list:
mode: audio_speech
```
### Batch Models (Azure Only)
For Azure models deployed as 'batch' models, set `mode: batch`.
```yaml
model_list:
- model_name: "batch-gpt-4o-mini"
litellm_params:
model: "azure/gpt-4o-mini"
api_key: os.environ/AZURE_API_KEY
api_base: os.environ/AZURE_API_BASE
model_info:
mode: batch
```
Expected Response
```bash
{
"healthy_endpoints": [
{
"api_base": "https://...",
"model": "azure/gpt-4o-mini",
"x-ms-region": "East US"
}
],
"unhealthy_endpoints": [],
"healthy_count": 1,
"unhealthy_count": 0
}
```
## Background Health Checks
You can enable model health checks being run in the background, to prevent each model from being queried too frequently via `/health`.
@ -244,3 +277,4 @@ curl -X POST 'http://localhost:4000/chat/completions' \
}
'
```

View file

@ -1970,6 +1970,8 @@ class AzureChatCompletion(BaseLLM):
input=prompt, # type: ignore
voice="alloy",
)
elif mode == "batch":
completion = await client.batches.with_raw_response.list(limit=1) # type: ignore
else:
raise Exception("mode not set")
response = {}

View file

@ -4825,7 +4825,7 @@ def speech(
async def ahealth_check(
model_params: dict,
mode: Optional[
Literal["completion", "embedding", "image_generation", "chat"]
Literal["completion", "embedding", "image_generation", "chat", "batch"]
] = None,
prompt: Optional[str] = None,
input: Optional[List] = None,

View file

@ -1,4 +1,7 @@
model_list:
- model_name: "*"
- model_name: "batch-gpt-4o-mini"
litellm_params:
model: "*"
model: "azure/gpt-4o-mini"
api_key: os.environ/AZURE_API_KEY
api_base: os.environ/AZURE_API_BASE