OpenAI /v1/realtime api support (#6047)

* feat(azure/realtime): initial working commit for proxy azure openai realtime endpoint support Adds support for passing /v1/realtime calls via litellm proxy * feat(realtime_api/main.py): abstraction for handling openai realtime api calls * feat(router.py): add `arealtime()` endpoint in router for realtime api calls Allows using `model_list` in proxy for realtime as well * fix: make realtime api a private function Structure might change based on feedback. Make that clear to users. * build(requirements.txt): add websockets to the requirements.txt * feat(openai/realtime): add openai /v1/realtime api support
2025-04-25 10:44:24 +00:00 · 2024-10-03 17:11:22 -04:00 · 2024-10-03 17:11:22 -04:00 · f9d0bcc5a1
commit f9d0bcc5a1
parent 130842537f
11 changed files with 350 additions and 7 deletions
--- a/litellm/router.py
+++ b/litellm/router.py
@ -612,6 +612,7 @@ class Router:
        self, model: str, messages: List[Dict[str, str]], **kwargs
    ) -> Union[ModelResponse, CustomStreamWrapper]:
        model_name = None
+        traceback.print_stack()
        try:
            # pick the one that is available (lowest TPM/RPM)
            deployment = self.get_available_deployment(
@ -1800,6 +1801,40 @@ class Router:
                self.fail_calls[model_name] += 1
            raise e

+    async def _arealtime(self, model: str, **kwargs):
+        messages = [{"role": "user", "content": "dummy-text"}]
+        try:
+            kwargs["num_retries"] = kwargs.get("num_retries", self.num_retries)
+            kwargs.get("request_timeout", self.timeout)
+            kwargs.setdefault("metadata", {}).update({"model_group": model})
+
+            # pick the one that is available (lowest TPM/RPM)
+            deployment = await self.async_get_available_deployment(
+                model=model,
+                messages=messages,
+                specific_deployment=kwargs.pop("specific_deployment", None),
+            )
+
+            data = deployment["litellm_params"].copy()
+            for k, v in self.default_litellm_params.items():
+                if (
+                    k not in kwargs
+                ):  # prioritize model-specific params > default router params
+                    kwargs[k] = v
+                elif k == "metadata":
+                    kwargs[k].update(v)
+
+            return await litellm._arealtime(**{**data, "caching": self.cache_responses, **kwargs})  # type: ignore
+        except Exception as e:
+            traceback.print_exc()
+            if self.num_retries > 0:
+                kwargs["model"] = model
+                kwargs["messages"] = messages
+                kwargs["original_function"] = self._arealtime
+                return self.function_with_retries(**kwargs)
+            else:
+                raise e
+
    def text_completion(
        self,
        model: str,
@ -1813,7 +1848,7 @@ class Router:
        try:
            kwargs["model"] = model
            kwargs["prompt"] = prompt
-            kwargs["original_function"] = self._acompletion
+            kwargs["original_function"] = self.text_completion
            kwargs["num_retries"] = kwargs.get("num_retries", self.num_retries)
            kwargs.get("request_timeout", self.timeout)
            kwargs.setdefault("metadata", {}).update({"model_group": model})
@ -1840,7 +1875,7 @@ class Router:
            if self.num_retries > 0:
                kwargs["model"] = model
                kwargs["messages"] = messages
-                kwargs["original_function"] = self.completion
+                kwargs["original_function"] = self.text_completion
                return self.function_with_retries(**kwargs)
            else:
                raise e