mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-25 18:54:30 +00:00
feat(azure.py): support health checking azure deployments
Fixes https://github.com/BerriAI/litellm/issues/5279
This commit is contained in:
parent
63cd94c32a
commit
d7d3eee349
4 changed files with 42 additions and 3 deletions
|
@ -115,6 +115,39 @@ model_list:
|
||||||
mode: audio_speech
|
mode: audio_speech
|
||||||
```
|
```
|
||||||
|
|
||||||
|
### Batch Models (Azure Only)
|
||||||
|
|
||||||
|
For Azure models deployed as 'batch' models, set `mode: batch`.
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
model_list:
|
||||||
|
- model_name: "batch-gpt-4o-mini"
|
||||||
|
litellm_params:
|
||||||
|
model: "azure/gpt-4o-mini"
|
||||||
|
api_key: os.environ/AZURE_API_KEY
|
||||||
|
api_base: os.environ/AZURE_API_BASE
|
||||||
|
model_info:
|
||||||
|
mode: batch
|
||||||
|
```
|
||||||
|
|
||||||
|
Expected Response
|
||||||
|
|
||||||
|
|
||||||
|
```bash
|
||||||
|
{
|
||||||
|
"healthy_endpoints": [
|
||||||
|
{
|
||||||
|
"api_base": "https://...",
|
||||||
|
"model": "azure/gpt-4o-mini",
|
||||||
|
"x-ms-region": "East US"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"unhealthy_endpoints": [],
|
||||||
|
"healthy_count": 1,
|
||||||
|
"unhealthy_count": 0
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
## Background Health Checks
|
## Background Health Checks
|
||||||
|
|
||||||
You can enable model health checks being run in the background, to prevent each model from being queried too frequently via `/health`.
|
You can enable model health checks being run in the background, to prevent each model from being queried too frequently via `/health`.
|
||||||
|
@ -244,3 +277,4 @@ curl -X POST 'http://localhost:4000/chat/completions' \
|
||||||
}
|
}
|
||||||
'
|
'
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
|
@ -1970,6 +1970,8 @@ class AzureChatCompletion(BaseLLM):
|
||||||
input=prompt, # type: ignore
|
input=prompt, # type: ignore
|
||||||
voice="alloy",
|
voice="alloy",
|
||||||
)
|
)
|
||||||
|
elif mode == "batch":
|
||||||
|
completion = await client.batches.with_raw_response.list(limit=1) # type: ignore
|
||||||
else:
|
else:
|
||||||
raise Exception("mode not set")
|
raise Exception("mode not set")
|
||||||
response = {}
|
response = {}
|
||||||
|
|
|
@ -4825,7 +4825,7 @@ def speech(
|
||||||
async def ahealth_check(
|
async def ahealth_check(
|
||||||
model_params: dict,
|
model_params: dict,
|
||||||
mode: Optional[
|
mode: Optional[
|
||||||
Literal["completion", "embedding", "image_generation", "chat"]
|
Literal["completion", "embedding", "image_generation", "chat", "batch"]
|
||||||
] = None,
|
] = None,
|
||||||
prompt: Optional[str] = None,
|
prompt: Optional[str] = None,
|
||||||
input: Optional[List] = None,
|
input: Optional[List] = None,
|
||||||
|
|
|
@ -1,4 +1,7 @@
|
||||||
model_list:
|
model_list:
|
||||||
- model_name: "*"
|
- model_name: "batch-gpt-4o-mini"
|
||||||
litellm_params:
|
litellm_params:
|
||||||
model: "*"
|
model: "azure/gpt-4o-mini"
|
||||||
|
api_key: os.environ/AZURE_API_KEY
|
||||||
|
api_base: os.environ/AZURE_API_BASE
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue