feat(azure.py): support health checking azure deployments

Fixes https://github.com/BerriAI/litellm/issues/5279
2025-04-25 18:54:30 +00:00 · 2024-08-22 16:11:14 -07:00 · 2024-08-22 16:11:14 -07:00 · d7d3eee349
commit d7d3eee349
parent 63cd94c32a
4 changed files with 42 additions and 3 deletions
--- a/docs/my-website/docs/proxy/health.md
+++ b/docs/my-website/docs/proxy/health.md
@ -115,6 +115,39 @@ model_list:
      mode: audio_speech
 ```

+### Batch Models (Azure Only)
+
+For Azure models deployed as 'batch' models, set `mode: batch`. 
+
+```yaml
+model_list:
+  - model_name: "batch-gpt-4o-mini"
+    litellm_params:
+      model: "azure/gpt-4o-mini"
+      api_key: os.environ/AZURE_API_KEY
+      api_base: os.environ/AZURE_API_BASE
+    model_info:
+      mode: batch
+```
+
+Expected Response 
+
+
+```bash
+{
+    "healthy_endpoints": [
+        {
+            "api_base": "https://...",
+            "model": "azure/gpt-4o-mini",
+            "x-ms-region": "East US"
+        }
+    ],
+    "unhealthy_endpoints": [],
+    "healthy_count": 1,
+    "unhealthy_count": 0
+}
+```
+
 ## Background Health Checks 

 You can enable model health checks being run in the background, to prevent each model from being queried too frequently via `/health`.
@ -244,3 +277,4 @@ curl -X POST 'http://localhost:4000/chat/completions' \
 }
 '
 ```
+
--- a/litellm/llms/azure.py
+++ b/litellm/llms/azure.py
@ -1970,6 +1970,8 @@ class AzureChatCompletion(BaseLLM):
                input=prompt,  # type: ignore
                voice="alloy",
            )
+        elif mode == "batch":
+            completion = await client.batches.with_raw_response.list(limit=1)  # type: ignore
        else:
            raise Exception("mode not set")
        response = {}
--- a/litellm/main.py
+++ b/litellm/main.py
@ -4825,7 +4825,7 @@ def speech(
 async def ahealth_check(
    model_params: dict,
    mode: Optional[
-        Literal["completion", "embedding", "image_generation", "chat"]
+        Literal["completion", "embedding", "image_generation", "chat", "batch"]
    ] = None,
    prompt: Optional[str] = None,
    input: Optional[List] = None,
--- a/litellm/proxy/_new_secret_config.yaml
+++ b/litellm/proxy/_new_secret_config.yaml
@ -1,4 +1,7 @@
 model_list:
-  - model_name: "*"
+  - model_name: "batch-gpt-4o-mini"
    litellm_params:
-      model: "*"
+      model: "azure/gpt-4o-mini"
+      api_key: os.environ/AZURE_API_KEY
+      api_base: os.environ/AZURE_API_BASE
+