mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-25 10:44:24 +00:00
OpenAI /v1/realtime
api support (#6047)
* feat(azure/realtime): initial working commit for proxy azure openai realtime endpoint support Adds support for passing /v1/realtime calls via litellm proxy * feat(realtime_api/main.py): abstraction for handling openai realtime api calls * feat(router.py): add `arealtime()` endpoint in router for realtime api calls Allows using `model_list` in proxy for realtime as well * fix: make realtime api a private function Structure might change based on feedback. Make that clear to users. * build(requirements.txt): add websockets to the requirements.txt * feat(openai/realtime): add openai /v1/realtime api support
This commit is contained in:
parent
130842537f
commit
f9d0bcc5a1
11 changed files with 350 additions and 7 deletions
|
@ -612,6 +612,7 @@ class Router:
|
|||
self, model: str, messages: List[Dict[str, str]], **kwargs
|
||||
) -> Union[ModelResponse, CustomStreamWrapper]:
|
||||
model_name = None
|
||||
traceback.print_stack()
|
||||
try:
|
||||
# pick the one that is available (lowest TPM/RPM)
|
||||
deployment = self.get_available_deployment(
|
||||
|
@ -1800,6 +1801,40 @@ class Router:
|
|||
self.fail_calls[model_name] += 1
|
||||
raise e
|
||||
|
||||
async def _arealtime(self, model: str, **kwargs):
|
||||
messages = [{"role": "user", "content": "dummy-text"}]
|
||||
try:
|
||||
kwargs["num_retries"] = kwargs.get("num_retries", self.num_retries)
|
||||
kwargs.get("request_timeout", self.timeout)
|
||||
kwargs.setdefault("metadata", {}).update({"model_group": model})
|
||||
|
||||
# pick the one that is available (lowest TPM/RPM)
|
||||
deployment = await self.async_get_available_deployment(
|
||||
model=model,
|
||||
messages=messages,
|
||||
specific_deployment=kwargs.pop("specific_deployment", None),
|
||||
)
|
||||
|
||||
data = deployment["litellm_params"].copy()
|
||||
for k, v in self.default_litellm_params.items():
|
||||
if (
|
||||
k not in kwargs
|
||||
): # prioritize model-specific params > default router params
|
||||
kwargs[k] = v
|
||||
elif k == "metadata":
|
||||
kwargs[k].update(v)
|
||||
|
||||
return await litellm._arealtime(**{**data, "caching": self.cache_responses, **kwargs}) # type: ignore
|
||||
except Exception as e:
|
||||
traceback.print_exc()
|
||||
if self.num_retries > 0:
|
||||
kwargs["model"] = model
|
||||
kwargs["messages"] = messages
|
||||
kwargs["original_function"] = self._arealtime
|
||||
return self.function_with_retries(**kwargs)
|
||||
else:
|
||||
raise e
|
||||
|
||||
def text_completion(
|
||||
self,
|
||||
model: str,
|
||||
|
@ -1813,7 +1848,7 @@ class Router:
|
|||
try:
|
||||
kwargs["model"] = model
|
||||
kwargs["prompt"] = prompt
|
||||
kwargs["original_function"] = self._acompletion
|
||||
kwargs["original_function"] = self.text_completion
|
||||
kwargs["num_retries"] = kwargs.get("num_retries", self.num_retries)
|
||||
kwargs.get("request_timeout", self.timeout)
|
||||
kwargs.setdefault("metadata", {}).update({"model_group": model})
|
||||
|
@ -1840,7 +1875,7 @@ class Router:
|
|||
if self.num_retries > 0:
|
||||
kwargs["model"] = model
|
||||
kwargs["messages"] = messages
|
||||
kwargs["original_function"] = self.completion
|
||||
kwargs["original_function"] = self.text_completion
|
||||
return self.function_with_retries(**kwargs)
|
||||
else:
|
||||
raise e
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue