feat(main.py): support openai tts endpoint

Closes https://github.com/BerriAI/litellm/issues/3094
2025-04-26 19:24:27 +00:00 · 2024-05-30 14:28:28 -07:00 · 2024-05-30 14:28:28 -07:00 · 1e89a1f56e
commit 1e89a1f56e
parent 741bfb9cef
5 changed files with 322 additions and 3 deletions
--- a/litellm/main.py
+++ b/litellm/main.py
@ -91,6 +91,12 @@ import tiktoken
 from concurrent.futures import ThreadPoolExecutor
 from typing import Callable, List, Optional, Dict, Union, Mapping
 from .caching import enable_cache, disable_cache, update_cache
+from .types.llms.openai import (
+    StreamedBinaryAPIResponse,
+    ResponseContextManager,
+    AsyncResponseContextManager,
+    AsyncStreamedBinaryAPIResponse,
+)

 encoding = tiktoken.get_encoding("cl100k_base")
 from litellm.utils import (
@ -4163,6 +4169,134 @@ def transcription(
    return response


+def aspeech(
+    *args, **kwargs
+) -> AsyncResponseContextManager[AsyncStreamedBinaryAPIResponse]:
+    """
+    Calls openai tts endpoints.
+    """
+    loop = asyncio.get_event_loop()
+    model = args[0] if len(args) > 0 else kwargs["model"]
+    ### PASS ARGS TO Image Generation ###
+    kwargs["aspeech"] = True
+    custom_llm_provider = kwargs.get("custom_llm_provider", None)
+    try:
+        # # Use a partial function to pass your keyword arguments
+        # func = partial(speech, *args, **kwargs)
+
+        # # Add the context to the function
+        # ctx = contextvars.copy_context()
+        # func_with_context = partial(ctx.run, func)
+
+        # _, custom_llm_provider, _, _ = get_llm_provider(
+        #     model=model, api_base=kwargs.get("api_base", None)
+        # )
+
+        # # Await normally
+        # init_response = await loop.run_in_executor(None, func_with_context)
+        # if asyncio.iscoroutine(init_response):
+        #     response = await init_response
+        # else:
+        #     # Call the synchronous function using run_in_executor
+        #     response = await loop.run_in_executor(None, func_with_context)
+        return speech(*args, **kwargs)  # type: ignore
+    except Exception as e:
+        custom_llm_provider = custom_llm_provider or "openai"
+        raise exception_type(
+            model=model,
+            custom_llm_provider=custom_llm_provider,
+            original_exception=e,
+            completion_kwargs=args,
+            extra_kwargs=kwargs,
+        )
+
+
+def speech(
+    model: str,
+    input: str,
+    voice: str,
+    optional_params: dict,
+    api_key: Optional[str],
+    api_base: Optional[str],
+    organization: Optional[str],
+    project: Optional[str],
+    max_retries: int,
+    timeout: Optional[Union[float, httpx.Timeout]] = None,
+    response_format: Optional[str] = None,
+    speed: Optional[int] = None,
+    client=None,
+    headers: Optional[dict] = None,
+    custom_llm_provider: Optional[str] = None,
+    aspeech: Optional[bool] = None,
+) -> ResponseContextManager[StreamedBinaryAPIResponse]:
+
+    model, custom_llm_provider, dynamic_api_key, api_base = get_llm_provider(model=model, custom_llm_provider=custom_llm_provider, api_base=api_base)  # type: ignore
+
+    optional_params = {}
+    if response_format is not None:
+        optional_params["response_format"] = response_format
+    if speed is not None:
+        optional_params["speed"] = speed
+
+    if timeout is None:
+        timeout = litellm.request_timeout
+
+    response: Optional[ResponseContextManager[StreamedBinaryAPIResponse]] = None
+    if custom_llm_provider == "openai":
+        api_base = (
+            api_base  # for deepinfra/perplexity/anyscale/groq we check in get_llm_provider and pass in the api base from there
+            or litellm.api_base
+            or get_secret("OPENAI_API_BASE")
+            or "https://api.openai.com/v1"
+        )  # type: ignore
+        # set API KEY
+        api_key = (
+            api_key
+            or litellm.api_key  # for deepinfra/perplexity/anyscale we check in get_llm_provider and pass in the api key from there
+            or litellm.openai_key
+            or get_secret("OPENAI_API_KEY")
+        )  # type: ignore
+
+        organization = (
+            organization
+            or litellm.organization
+            or get_secret("OPENAI_ORGANIZATION")
+            or None  # default - https://github.com/openai/openai-python/blob/284c1799070c723c6a553337134148a7ab088dd8/openai/util.py#L105
+        )  # type: ignore
+
+        project = (
+            project
+            or litellm.project
+            or get_secret("OPENAI_PROJECT")
+            or None  # default - https://github.com/openai/openai-python/blob/284c1799070c723c6a553337134148a7ab088dd8/openai/util.py#L105
+        )  # type: ignore
+
+        headers = headers or litellm.headers
+
+        response = openai_chat_completions.audio_speech(
+            model=model,
+            input=input,
+            voice=voice,
+            optional_params=optional_params,
+            api_key=api_key,
+            api_base=api_base,
+            organization=organization,
+            project=project,
+            max_retries=max_retries,
+            timeout=timeout,
+            client=client,  # pass AsyncOpenAI, OpenAI client
+            aspeech=aspeech,
+        )
+
+    if response is None:
+        raise Exception(
+            "Unable to map the custom llm provider={} to a known provider={}.".format(
+                custom_llm_provider, litellm.provider_list
+            )
+        )
+    return response
+
+
 ##### Health Endpoints #######################