(Bug Fix) Add health check support for realtime models (#7453)

* add mode: realtime * add _realtime_health_check * test_realtime_health_check * azure _realtime_health_check * _realtime_health_check * Realtime Models * fix code quality
2025-04-25 18:54:30 +00:00 · 2024-12-28 18:15:00 -08:00 · 2024-12-28 18:15:00 -08:00 · 4e65722a00
commit 4e65722a00
parent 5c1e8b60d4
6 changed files with 110 additions and 1 deletions
--- a/docs/my-website/docs/proxy/health.md
+++ b/docs/my-website/docs/proxy/health.md
@ -168,6 +168,20 @@ Expected Response
 }
 ```

+### Realtime Models 
+
+To run realtime health checks, specify the mode as "realtime" in your config for the relevant model.
+
+```yaml
+model_list:
+  - model_name: openai/gpt-4o-realtime-audio
+    litellm_params:
+      model: openai/gpt-4o-realtime-audio
+      api_key: os.environ/OPENAI_API_KEY
+    model_info:
+      mode: realtime
+```
+
 ## Background Health Checks 

 You can enable model health checks being run in the background, to prevent each model from being queried too frequently via `/health`. 
--- a/litellm/llms/azure/azure.py
+++ b/litellm/llms/azure/azure.py
@ -1585,6 +1585,18 @@ class AzureChatCompletion(BaseLLM):
            )
        elif mode == "batch":
            completion = await client.batches.with_raw_response.list(limit=1)  # type: ignore
+        elif mode == "realtime":
+            from litellm.realtime_api.main import _realtime_health_check
+
+            # create a websocket connection
+            await _realtime_health_check(
+                model=model or "",
+                api_key=api_key,
+                api_base=api_base,
+                api_version=api_version,
+                custom_llm_provider="azure",
+            )
+            return {}
        else:
            raise Exception("mode not set")
        response = {}
--- a/litellm/llms/openai/openai.py
+++ b/litellm/llms/openai/openai.py
@ -1374,6 +1374,17 @@ class OpenAIChatCompletion(BaseLLM):
                input=prompt,  # type: ignore
                voice="alloy",
            )
+        elif mode == "realtime":
+            from litellm.realtime_api.main import _realtime_health_check
+
+            # create a websocket connection
+            await _realtime_health_check(
+                model=model or "",
+                api_key=api_key,
+                api_base=api_base or "https://api.openai.com/",
+                custom_llm_provider="openai",
+            )
+            return {}
        else:
            raise ValueError("mode not set, passed in mode: " + mode)
        response = {}
--- a/litellm/main.py
+++ b/litellm/main.py
@ -5153,7 +5153,13 @@ async def ahealth_check(  # noqa: PLR0915
    model_params: dict,
    mode: Optional[
        Literal[
-            "completion", "embedding", "image_generation", "chat", "batch", "rerank"
+            "completion",
+            "embedding",
+            "image_generation",
+            "chat",
+            "batch",
+            "rerank",
+            "realtime",
        ]
    ] = None,
    prompt: Optional[str] = None,
--- a/litellm/realtime_api/main.py
+++ b/litellm/realtime_api/main.py
@ -114,3 +114,45 @@ async def _arealtime(
        )
    else:
        raise ValueError(f"Unsupported model: {model}")
+
+
+async def _realtime_health_check(
+    model: str,
+    api_base: str,
+    custom_llm_provider: str,
+    api_key: Optional[str],
+    api_version: Optional[str] = None,
+):
+    """
+    Health check for realtime API - tries connection to the realtime API websocket
+
+    Args:
+        model: str - model name
+        api_base: str - api base
+        api_version: Optional[str] - api version
+        api_key: str - api key
+        custom_llm_provider: str - custom llm provider
+
+    Returns:
+        bool - True if connection is successful, False otherwise
+    Raises:
+        Exception - if the connection is not successful
+    """
+    import websockets
+
+    url: Optional[str] = None
+    if custom_llm_provider == "azure":
+        url = azure_realtime._construct_url(
+            api_base=api_base,
+            model=model,
+            api_version=api_version or "2024-10-01-preview",
+        )
+    elif custom_llm_provider == "openai":
+        url = openai_realtime._construct_url(api_base=api_base, model=model)
+    async with websockets.connect(  # type: ignore
+        url,
+        extra_headers={
+            "api-key": api_key,  # type: ignore
+        },
+    ):
+        return True
--- a/tests/local_testing/test_health_check.py
+++ b/tests/local_testing/test_health_check.py
@ -152,3 +152,27 @@ async def test_cohere_rerank_health_check():
    assert "error" not in response

    print(response)
+
+
+@pytest.mark.asyncio
+@pytest.mark.parametrize(
+    "model", ["azure/gpt-4o-realtime-preview", "openai/gpt-4o-realtime-preview"]
+)
+async def test_realtime_health_check(model):
+    """
+    Test Health Check with Valid models passes
+
+    """
+    model_params = {
+        "model": model,
+    }
+    if model == "azure/gpt-4o-realtime-preview":
+        model_params["api_base"] = os.getenv("AZURE_REALTIME_API_BASE")
+        model_params["api_key"] = os.getenv("AZURE_REALTIME_API_KEY")
+        model_params["api_version"] = os.getenv("AZURE_REALTIME_API_VERSION")
+    response = await litellm.ahealth_check(
+        model_params=model_params,
+        mode="realtime",
+    )
+    print(response)
+    assert response == {}