OpenAI /v1/realtime api support (#6047)

* feat(azure/realtime): initial working commit for proxy azure openai realtime endpoint support

Adds support for passing /v1/realtime calls via litellm proxy

* feat(realtime_api/main.py): abstraction for handling openai realtime api calls

* feat(router.py): add `arealtime()` endpoint in router for realtime api calls

Allows using `model_list` in proxy for realtime as well

* fix: make realtime api a private function

Structure might change based on feedback. Make that clear to users.

* build(requirements.txt): add websockets to the requirements.txt

* feat(openai/realtime): add openai /v1/realtime api support
This commit is contained in:
Krish Dholakia 2024-10-03 17:11:22 -04:00 committed by GitHub
parent 130842537f
commit f9d0bcc5a1
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
11 changed files with 350 additions and 7 deletions

View file

@ -612,6 +612,7 @@ class Router:
self, model: str, messages: List[Dict[str, str]], **kwargs
) -> Union[ModelResponse, CustomStreamWrapper]:
model_name = None
traceback.print_stack()
try:
# pick the one that is available (lowest TPM/RPM)
deployment = self.get_available_deployment(
@ -1800,6 +1801,40 @@ class Router:
self.fail_calls[model_name] += 1
raise e
async def _arealtime(self, model: str, **kwargs):
messages = [{"role": "user", "content": "dummy-text"}]
try:
kwargs["num_retries"] = kwargs.get("num_retries", self.num_retries)
kwargs.get("request_timeout", self.timeout)
kwargs.setdefault("metadata", {}).update({"model_group": model})
# pick the one that is available (lowest TPM/RPM)
deployment = await self.async_get_available_deployment(
model=model,
messages=messages,
specific_deployment=kwargs.pop("specific_deployment", None),
)
data = deployment["litellm_params"].copy()
for k, v in self.default_litellm_params.items():
if (
k not in kwargs
): # prioritize model-specific params > default router params
kwargs[k] = v
elif k == "metadata":
kwargs[k].update(v)
return await litellm._arealtime(**{**data, "caching": self.cache_responses, **kwargs}) # type: ignore
except Exception as e:
traceback.print_exc()
if self.num_retries > 0:
kwargs["model"] = model
kwargs["messages"] = messages
kwargs["original_function"] = self._arealtime
return self.function_with_retries(**kwargs)
else:
raise e
def text_completion(
self,
model: str,
@ -1813,7 +1848,7 @@ class Router:
try:
kwargs["model"] = model
kwargs["prompt"] = prompt
kwargs["original_function"] = self._acompletion
kwargs["original_function"] = self.text_completion
kwargs["num_retries"] = kwargs.get("num_retries", self.num_retries)
kwargs.get("request_timeout", self.timeout)
kwargs.setdefault("metadata", {}).update({"model_group": model})
@ -1840,7 +1875,7 @@ class Router:
if self.num_retries > 0:
kwargs["model"] = model
kwargs["messages"] = messages
kwargs["original_function"] = self.completion
kwargs["original_function"] = self.text_completion
return self.function_with_retries(**kwargs)
else:
raise e