(Bug Fix) Add health check support for realtime models (#7453)

* add mode: realtime

* add _realtime_health_check

* test_realtime_health_check

* azure _realtime_health_check

* _realtime_health_check

* Realtime Models

* fix code quality
This commit is contained in:
Ishaan Jaff 2024-12-28 18:15:00 -08:00 committed by GitHub
parent 5c1e8b60d4
commit 4e65722a00
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 110 additions and 1 deletions

View file

@ -168,6 +168,20 @@ Expected Response
}
```
### Realtime Models
To run realtime health checks, specify the mode as "realtime" in your config for the relevant model.
```yaml
model_list:
- model_name: openai/gpt-4o-realtime-audio
litellm_params:
model: openai/gpt-4o-realtime-audio
api_key: os.environ/OPENAI_API_KEY
model_info:
mode: realtime
```
## Background Health Checks
You can enable model health checks being run in the background, to prevent each model from being queried too frequently via `/health`.

View file

@ -1585,6 +1585,18 @@ class AzureChatCompletion(BaseLLM):
)
elif mode == "batch":
completion = await client.batches.with_raw_response.list(limit=1) # type: ignore
elif mode == "realtime":
from litellm.realtime_api.main import _realtime_health_check
# create a websocket connection
await _realtime_health_check(
model=model or "",
api_key=api_key,
api_base=api_base,
api_version=api_version,
custom_llm_provider="azure",
)
return {}
else:
raise Exception("mode not set")
response = {}

View file

@ -1374,6 +1374,17 @@ class OpenAIChatCompletion(BaseLLM):
input=prompt, # type: ignore
voice="alloy",
)
elif mode == "realtime":
from litellm.realtime_api.main import _realtime_health_check
# create a websocket connection
await _realtime_health_check(
model=model or "",
api_key=api_key,
api_base=api_base or "https://api.openai.com/",
custom_llm_provider="openai",
)
return {}
else:
raise ValueError("mode not set, passed in mode: " + mode)
response = {}

View file

@ -5153,7 +5153,13 @@ async def ahealth_check( # noqa: PLR0915
model_params: dict,
mode: Optional[
Literal[
"completion", "embedding", "image_generation", "chat", "batch", "rerank"
"completion",
"embedding",
"image_generation",
"chat",
"batch",
"rerank",
"realtime",
]
] = None,
prompt: Optional[str] = None,

View file

@ -114,3 +114,45 @@ async def _arealtime(
)
else:
raise ValueError(f"Unsupported model: {model}")
async def _realtime_health_check(
model: str,
api_base: str,
custom_llm_provider: str,
api_key: Optional[str],
api_version: Optional[str] = None,
):
"""
Health check for realtime API - tries connection to the realtime API websocket
Args:
model: str - model name
api_base: str - api base
api_version: Optional[str] - api version
api_key: str - api key
custom_llm_provider: str - custom llm provider
Returns:
bool - True if connection is successful, False otherwise
Raises:
Exception - if the connection is not successful
"""
import websockets
url: Optional[str] = None
if custom_llm_provider == "azure":
url = azure_realtime._construct_url(
api_base=api_base,
model=model,
api_version=api_version or "2024-10-01-preview",
)
elif custom_llm_provider == "openai":
url = openai_realtime._construct_url(api_base=api_base, model=model)
async with websockets.connect( # type: ignore
url,
extra_headers={
"api-key": api_key, # type: ignore
},
):
return True

View file

@ -152,3 +152,27 @@ async def test_cohere_rerank_health_check():
assert "error" not in response
print(response)
@pytest.mark.asyncio
@pytest.mark.parametrize(
"model", ["azure/gpt-4o-realtime-preview", "openai/gpt-4o-realtime-preview"]
)
async def test_realtime_health_check(model):
"""
Test Health Check with Valid models passes
"""
model_params = {
"model": model,
}
if model == "azure/gpt-4o-realtime-preview":
model_params["api_base"] = os.getenv("AZURE_REALTIME_API_BASE")
model_params["api_key"] = os.getenv("AZURE_REALTIME_API_KEY")
model_params["api_version"] = os.getenv("AZURE_REALTIME_API_VERSION")
response = await litellm.ahealth_check(
model_params=model_params,
mode="realtime",
)
print(response)
assert response == {}