diff --git a/docs/my-website/docs/proxy/health.md b/docs/my-website/docs/proxy/health.md index 632702b914..e7ff69aeb3 100644 --- a/docs/my-website/docs/proxy/health.md +++ b/docs/my-website/docs/proxy/health.md @@ -115,6 +115,39 @@ model_list: mode: audio_speech ``` +### Batch Models (Azure Only) + +For Azure models deployed as 'batch' models, set `mode: batch`. + +```yaml +model_list: + - model_name: "batch-gpt-4o-mini" + litellm_params: + model: "azure/gpt-4o-mini" + api_key: os.environ/AZURE_API_KEY + api_base: os.environ/AZURE_API_BASE + model_info: + mode: batch +``` + +Expected Response + + +```bash +{ + "healthy_endpoints": [ + { + "api_base": "https://...", + "model": "azure/gpt-4o-mini", + "x-ms-region": "East US" + } + ], + "unhealthy_endpoints": [], + "healthy_count": 1, + "unhealthy_count": 0 +} +``` + ## Background Health Checks You can enable model health checks being run in the background, to prevent each model from being queried too frequently via `/health`. @@ -244,3 +277,4 @@ curl -X POST 'http://localhost:4000/chat/completions' \ } ' ``` + diff --git a/litellm/llms/azure.py b/litellm/llms/azure.py index a54bef3192..e235187801 100644 --- a/litellm/llms/azure.py +++ b/litellm/llms/azure.py @@ -1970,6 +1970,8 @@ class AzureChatCompletion(BaseLLM): input=prompt, # type: ignore voice="alloy", ) + elif mode == "batch": + completion = await client.batches.with_raw_response.list(limit=1) # type: ignore else: raise Exception("mode not set") response = {} diff --git a/litellm/main.py b/litellm/main.py index 28054537cf..49436f1537 100644 --- a/litellm/main.py +++ b/litellm/main.py @@ -4825,7 +4825,7 @@ def speech( async def ahealth_check( model_params: dict, mode: Optional[ - Literal["completion", "embedding", "image_generation", "chat"] + Literal["completion", "embedding", "image_generation", "chat", "batch"] ] = None, prompt: Optional[str] = None, input: Optional[List] = None, diff --git a/litellm/proxy/_new_secret_config.yaml b/litellm/proxy/_new_secret_config.yaml index 96a0242a8e..50a6d993ec 100644 --- a/litellm/proxy/_new_secret_config.yaml +++ b/litellm/proxy/_new_secret_config.yaml @@ -1,4 +1,7 @@ model_list: - - model_name: "*" + - model_name: "batch-gpt-4o-mini" litellm_params: - model: "*" + model: "azure/gpt-4o-mini" + api_key: os.environ/AZURE_API_KEY + api_base: os.environ/AZURE_API_BASE +