From a67cbf47f6a905e69f85ee1b27289bb48a25be2b Mon Sep 17 00:00:00 2001 From: Krrish Dholakia Date: Thu, 30 May 2024 14:28:28 -0700 Subject: [PATCH 1/3] feat(main.py): support openai tts endpoint Closes https://github.com/BerriAI/litellm/issues/3094 --- litellm/__init__.py | 3 +- litellm/llms/openai.py | 89 +++++++++++++++++++ litellm/main.py | 134 +++++++++++++++++++++++++++++ litellm/tests/test_audio_speech.py | 91 ++++++++++++++++++++ litellm/types/llms/openai.py | 8 +- 5 files changed, 322 insertions(+), 3 deletions(-) create mode 100644 litellm/tests/test_audio_speech.py diff --git a/litellm/__init__.py b/litellm/__init__.py index 026eeb833..9fa801318 100644 --- a/litellm/__init__.py +++ b/litellm/__init__.py @@ -227,7 +227,7 @@ default_team_settings: Optional[List] = None max_user_budget: Optional[float] = None max_end_user_budget: Optional[float] = None #### RELIABILITY #### -request_timeout: Optional[float] = 6000 +request_timeout: float = 6000 num_retries: Optional[int] = None # per model endpoint default_fallbacks: Optional[List] = None fallbacks: Optional[List] = None @@ -304,6 +304,7 @@ api_base = None headers = None api_version = None organization = None +project = None config_path = None ####### COMPLETION MODELS ################### open_ai_chat_completion_models: List = [] diff --git a/litellm/llms/openai.py b/litellm/llms/openai.py index 84d9c773f..4f1649651 100644 --- a/litellm/llms/openai.py +++ b/litellm/llms/openai.py @@ -26,6 +26,7 @@ import litellm from .prompt_templates.factory import prompt_factory, custom_prompt from openai import OpenAI, AsyncOpenAI from ..types.llms.openai import * +import openai class OpenAIError(Exception): @@ -1180,6 +1181,94 @@ class OpenAIChatCompletion(BaseLLM): ) raise e + def audio_speech( + self, + model: str, + input: str, + voice: str, + optional_params: dict, + api_key: Optional[str], + api_base: Optional[str], + organization: Optional[str], + project: Optional[str], + max_retries: int, + timeout: Union[float, httpx.Timeout], + aspeech: Optional[bool] = None, + client=None, + ) -> ResponseContextManager[StreamedBinaryAPIResponse]: + + if aspeech is not None and aspeech == True: + return self.async_audio_speech( + model=model, + input=input, + voice=voice, + optional_params=optional_params, + api_key=api_key, + api_base=api_base, + organization=organization, + project=project, + max_retries=max_retries, + timeout=timeout, + client=client, + ) # type: ignore + + if client is None: + openai_client = OpenAI( + api_key=api_key, + base_url=api_base, + organization=organization, + project=project, + http_client=litellm.client_session, + timeout=timeout, + max_retries=max_retries, + ) + else: + openai_client = client + + response = openai_client.audio.speech.with_streaming_response.create( + model="tts-1", + voice="alloy", + input="the quick brown fox jumped over the lazy dogs", + **optional_params, + ) + return response + + def async_audio_speech( + self, + model: str, + input: str, + voice: str, + optional_params: dict, + api_key: Optional[str], + api_base: Optional[str], + organization: Optional[str], + project: Optional[str], + max_retries: int, + timeout: Union[float, httpx.Timeout], + client=None, + ) -> AsyncResponseContextManager[AsyncStreamedBinaryAPIResponse]: + + if client is None: + openai_client = AsyncOpenAI( + api_key=api_key, + base_url=api_base, + organization=organization, + project=project, + http_client=litellm.aclient_session, + timeout=timeout, + max_retries=max_retries, + ) + else: + openai_client = client + + response = openai_client.audio.speech.with_streaming_response.create( + model="tts-1", + voice="alloy", + input="the quick brown fox jumped over the lazy dogs", + **optional_params, + ) + return response + async def ahealth_check( self, model: Optional[str], diff --git a/litellm/main.py b/litellm/main.py index d50694697..458e9bd8c 100644 --- a/litellm/main.py +++ b/litellm/main.py @@ -91,6 +91,12 @@ import tiktoken from concurrent.futures import ThreadPoolExecutor from typing import Callable, List, Optional, Dict, Union, Mapping from .caching import enable_cache, disable_cache, update_cache +from .types.llms.openai import ( + StreamedBinaryAPIResponse, + ResponseContextManager, + AsyncResponseContextManager, + AsyncStreamedBinaryAPIResponse, +) encoding = tiktoken.get_encoding("cl100k_base") from litellm.utils import ( @@ -4163,6 +4169,134 @@ def transcription( return response +def aspeech( + *args, **kwargs +) -> AsyncResponseContextManager[AsyncStreamedBinaryAPIResponse]: + """ + Calls openai tts endpoints. + """ + loop = asyncio.get_event_loop() + model = args[0] if len(args) > 0 else kwargs["model"] + ### PASS ARGS TO Image Generation ### + kwargs["aspeech"] = True + custom_llm_provider = kwargs.get("custom_llm_provider", None) + try: + # # Use a partial function to pass your keyword arguments + # func = partial(speech, *args, **kwargs) + + # # Add the context to the function + # ctx = contextvars.copy_context() + # func_with_context = partial(ctx.run, func) + + # _, custom_llm_provider, _, _ = get_llm_provider( + # model=model, api_base=kwargs.get("api_base", None) + # ) + + # # Await normally + # init_response = await loop.run_in_executor(None, func_with_context) + # if asyncio.iscoroutine(init_response): + # response = await init_response + # else: + # # Call the synchronous function using run_in_executor + # response = await loop.run_in_executor(None, func_with_context) + return speech(*args, **kwargs) # type: ignore + except Exception as e: + custom_llm_provider = custom_llm_provider or "openai" + raise exception_type( + model=model, + custom_llm_provider=custom_llm_provider, + original_exception=e, + completion_kwargs=args, + extra_kwargs=kwargs, + ) + + +def speech( + model: str, + input: str, + voice: str, + optional_params: dict, + api_key: Optional[str], + api_base: Optional[str], + organization: Optional[str], + project: Optional[str], + max_retries: int, + timeout: Optional[Union[float, httpx.Timeout]] = None, + response_format: Optional[str] = None, + speed: Optional[int] = None, + client=None, + headers: Optional[dict] = None, + custom_llm_provider: Optional[str] = None, + aspeech: Optional[bool] = None, +) -> ResponseContextManager[StreamedBinaryAPIResponse]: + + model, custom_llm_provider, dynamic_api_key, api_base = get_llm_provider(model=model, custom_llm_provider=custom_llm_provider, api_base=api_base) # type: ignore + + optional_params = {} + if response_format is not None: + optional_params["response_format"] = response_format + if speed is not None: + optional_params["speed"] = speed + + if timeout is None: + timeout = litellm.request_timeout + + response: Optional[ResponseContextManager[StreamedBinaryAPIResponse]] = None + if custom_llm_provider == "openai": + api_base = ( + api_base # for deepinfra/perplexity/anyscale/groq we check in get_llm_provider and pass in the api base from there + or litellm.api_base + or get_secret("OPENAI_API_BASE") + or "https://api.openai.com/v1" + ) # type: ignore + # set API KEY + api_key = ( + api_key + or litellm.api_key # for deepinfra/perplexity/anyscale we check in get_llm_provider and pass in the api key from there + or litellm.openai_key + or get_secret("OPENAI_API_KEY") + ) # type: ignore + + organization = ( + organization + or litellm.organization + or get_secret("OPENAI_ORGANIZATION") + or None # default - https://github.com/openai/openai-python/blob/284c1799070c723c6a553337134148a7ab088dd8/openai/util.py#L105 + ) # type: ignore + + project = ( + project + or litellm.project + or get_secret("OPENAI_PROJECT") + or None # default - https://github.com/openai/openai-python/blob/284c1799070c723c6a553337134148a7ab088dd8/openai/util.py#L105 + ) # type: ignore + + headers = headers or litellm.headers + + response = openai_chat_completions.audio_speech( + model=model, + input=input, + voice=voice, + optional_params=optional_params, + api_key=api_key, + api_base=api_base, + organization=organization, + project=project, + max_retries=max_retries, + timeout=timeout, + client=client, # pass AsyncOpenAI, OpenAI client + aspeech=aspeech, + ) + + if response is None: + raise Exception( + "Unable to map the custom llm provider={} to a known provider={}.".format( + custom_llm_provider, litellm.provider_list + ) + ) + return response + + ##### Health Endpoints ####################### diff --git a/litellm/tests/test_audio_speech.py b/litellm/tests/test_audio_speech.py new file mode 100644 index 000000000..9cc6269de --- /dev/null +++ b/litellm/tests/test_audio_speech.py @@ -0,0 +1,91 @@ +# What is this? +## unit tests for openai tts endpoint + +import sys, os, asyncio, time, random, uuid +import traceback +from dotenv import load_dotenv + +load_dotenv() +import os + +sys.path.insert( + 0, os.path.abspath("../..") +) # Adds the parent directory to the system path +import pytest +import litellm, openai +from pathlib import Path + + +@pytest.mark.parametrize("sync_mode", [True, False]) +@pytest.mark.asyncio +async def test_audio_speech_openai(sync_mode): + + speech_file_path = Path(__file__).parent / "speech.mp3" + openai_chat_completions = litellm.OpenAIChatCompletion() + if sync_mode: + with openai_chat_completions.audio_speech( + model="tts-1", + voice="alloy", + input="the quick brown fox jumped over the lazy dogs", + api_base=None, + api_key=None, + organization=None, + project=None, + max_retries=1, + timeout=600, + client=None, + optional_params={}, + ) as response: + response.stream_to_file(speech_file_path) + else: + async with openai_chat_completions.async_audio_speech( + model="tts-1", + voice="alloy", + input="the quick brown fox jumped over the lazy dogs", + api_base=None, + api_key=None, + organization=None, + project=None, + max_retries=1, + timeout=600, + client=None, + optional_params={}, + ) as response: + speech = await response.parse() + + +@pytest.mark.parametrize("sync_mode", [True, False]) +@pytest.mark.asyncio +async def test_audio_speech_litellm(sync_mode): + speech_file_path = Path(__file__).parent / "speech.mp3" + + if sync_mode: + with litellm.speech( + model="openai/tts-1", + voice="alloy", + input="the quick brown fox jumped over the lazy dogs", + api_base=None, + api_key=None, + organization=None, + project=None, + max_retries=1, + timeout=600, + client=None, + optional_params={}, + ) as response: + response.stream_to_file(speech_file_path) + else: + async with litellm.aspeech( + model="openai/tts-1", + voice="alloy", + input="the quick brown fox jumped over the lazy dogs", + api_base=None, + api_key=None, + organization=None, + project=None, + max_retries=1, + timeout=600, + client=None, + optional_params={}, + ) as response: + await response.stream_to_file(speech_file_path) diff --git a/litellm/types/llms/openai.py b/litellm/types/llms/openai.py index 57c199b61..2ea72a76e 100644 --- a/litellm/types/llms/openai.py +++ b/litellm/types/llms/openai.py @@ -8,7 +8,6 @@ from typing import ( ) from typing_extensions import override, Required, Dict from pydantic import BaseModel - from openai.types.beta.threads.message_content import MessageContent from openai.types.beta.threads.message import Message as OpenAIMessage from openai.types.beta.thread_create_params import ( @@ -21,7 +20,12 @@ from openai.pagination import SyncCursorPage from os import PathLike from openai.types import FileObject, Batch from openai._legacy_response import HttpxBinaryResponseContent - +from openai._response import ( + StreamedBinaryAPIResponse, + ResponseContextManager, + AsyncStreamedBinaryAPIResponse, + AsyncResponseContextManager, +) from typing import TypedDict, List, Optional, Tuple, Mapping, IO FileContent = Union[IO[bytes], bytes, PathLike] From 93166cdabf4661568c85081ec92958e719081782 Mon Sep 17 00:00:00 2001 From: Krrish Dholakia Date: Thu, 30 May 2024 16:41:06 -0700 Subject: [PATCH 2/3] fix(openai.py): fix openai response for `/audio/speech` endpoint --- litellm/llms/openai.py | 23 ++-- litellm/main.py | 64 +++++------ litellm/proxy/_super_secret_config.yaml | 34 +----- litellm/proxy/proxy_server.py | 140 ++++++++++++++++++++++++ litellm/router.py | 78 +++++++++++++ litellm/tests/test_audio_speech.py | 93 ++++++++-------- litellm/types/llms/openai.py | 6 - 7 files changed, 311 insertions(+), 127 deletions(-) diff --git a/litellm/llms/openai.py b/litellm/llms/openai.py index 4f1649651..e68a50347 100644 --- a/litellm/llms/openai.py +++ b/litellm/llms/openai.py @@ -1195,7 +1195,7 @@ class OpenAIChatCompletion(BaseLLM): timeout: Union[float, httpx.Timeout], aspeech: Optional[bool] = None, client=None, - ) -> ResponseContextManager[StreamedBinaryAPIResponse]: + ) -> HttpxBinaryResponseContent: if aspeech is not None and aspeech == True: return self.async_audio_speech( @@ -1225,15 +1225,15 @@ class OpenAIChatCompletion(BaseLLM): else: openai_client = client - response = openai_client.audio.speech.with_streaming_response.create( - model="tts-1", - voice="alloy", - input="the quick brown fox jumped over the lazy dogs", + response = openai_client.audio.speech.create( + model=model, + voice=voice, # type: ignore + input=input, **optional_params, ) return response - def async_audio_speech( + async def async_audio_speech( self, model: str, input: str, @@ -1246,7 +1246,7 @@ class OpenAIChatCompletion(BaseLLM): max_retries: int, timeout: Union[float, httpx.Timeout], client=None, - ) -> AsyncResponseContextManager[AsyncStreamedBinaryAPIResponse]: + ) -> HttpxBinaryResponseContent: if client is None: openai_client = AsyncOpenAI( @@ -1261,12 +1261,13 @@ class OpenAIChatCompletion(BaseLLM): else: openai_client = client - response = openai_client.audio.speech.with_streaming_response.create( - model="tts-1", - voice="alloy", - input="the quick brown fox jumped over the lazy dogs", + response = await openai_client.audio.speech.create( + model=model, + voice=voice, # type: ignore + input=input, **optional_params, ) + return response async def ahealth_check( diff --git a/litellm/main.py b/litellm/main.py index 458e9bd8c..2a07ae3c8 100644 --- a/litellm/main.py +++ b/litellm/main.py @@ -91,12 +91,7 @@ import tiktoken from concurrent.futures import ThreadPoolExecutor from typing import Callable, List, Optional, Dict, Union, Mapping from .caching import enable_cache, disable_cache, update_cache -from .types.llms.openai import ( - StreamedBinaryAPIResponse, - ResponseContextManager, - AsyncResponseContextManager, - AsyncStreamedBinaryAPIResponse, -) +from .types.llms.openai import HttpxBinaryResponseContent encoding = tiktoken.get_encoding("cl100k_base") from litellm.utils import ( @@ -4169,9 +4164,7 @@ def transcription( return response -def aspeech( - *args, **kwargs -) -> AsyncResponseContextManager[AsyncStreamedBinaryAPIResponse]: +async def aspeech(*args, **kwargs) -> HttpxBinaryResponseContent: """ Calls openai tts endpoints. """ @@ -4181,25 +4174,25 @@ def aspeech( kwargs["aspeech"] = True custom_llm_provider = kwargs.get("custom_llm_provider", None) try: - # # Use a partial function to pass your keyword arguments - # func = partial(speech, *args, **kwargs) + # Use a partial function to pass your keyword arguments + func = partial(speech, *args, **kwargs) - # # Add the context to the function - # ctx = contextvars.copy_context() - # func_with_context = partial(ctx.run, func) + # Add the context to the function + ctx = contextvars.copy_context() + func_with_context = partial(ctx.run, func) - # _, custom_llm_provider, _, _ = get_llm_provider( - # model=model, api_base=kwargs.get("api_base", None) - # ) + _, custom_llm_provider, _, _ = get_llm_provider( + model=model, api_base=kwargs.get("api_base", None) + ) - # # Await normally - # init_response = await loop.run_in_executor(None, func_with_context) - # if asyncio.iscoroutine(init_response): - # response = await init_response - # else: - # # Call the synchronous function using run_in_executor - # response = await loop.run_in_executor(None, func_with_context) - return speech(*args, **kwargs) # type: ignore + # Await normally + init_response = await loop.run_in_executor(None, func_with_context) + if asyncio.iscoroutine(init_response): + response = await init_response + else: + # Call the synchronous function using run_in_executor + response = await loop.run_in_executor(None, func_with_context) + return response # type: ignore except Exception as e: custom_llm_provider = custom_llm_provider or "openai" raise exception_type( @@ -4215,12 +4208,12 @@ def speech( model: str, input: str, voice: str, - optional_params: dict, - api_key: Optional[str], - api_base: Optional[str], - organization: Optional[str], - project: Optional[str], - max_retries: int, + api_key: Optional[str] = None, + api_base: Optional[str] = None, + organization: Optional[str] = None, + project: Optional[str] = None, + max_retries: Optional[int] = None, + metadata: Optional[dict] = None, timeout: Optional[Union[float, httpx.Timeout]] = None, response_format: Optional[str] = None, speed: Optional[int] = None, @@ -4228,7 +4221,8 @@ def speech( headers: Optional[dict] = None, custom_llm_provider: Optional[str] = None, aspeech: Optional[bool] = None, -) -> ResponseContextManager[StreamedBinaryAPIResponse]: + **kwargs, +) -> HttpxBinaryResponseContent: model, custom_llm_provider, dynamic_api_key, api_base = get_llm_provider(model=model, custom_llm_provider=custom_llm_provider, api_base=api_base) # type: ignore @@ -4236,12 +4230,14 @@ def speech( if response_format is not None: optional_params["response_format"] = response_format if speed is not None: - optional_params["speed"] = speed + optional_params["speed"] = speed # type: ignore if timeout is None: timeout = litellm.request_timeout - response: Optional[ResponseContextManager[StreamedBinaryAPIResponse]] = None + if max_retries is None: + max_retries = litellm.num_retries or openai.DEFAULT_MAX_RETRIES + response: Optional[HttpxBinaryResponseContent] = None if custom_llm_provider == "openai": api_base = ( api_base # for deepinfra/perplexity/anyscale/groq we check in get_llm_provider and pass in the api base from there diff --git a/litellm/proxy/_super_secret_config.yaml b/litellm/proxy/_super_secret_config.yaml index 95cf05e71..9c9982428 100644 --- a/litellm/proxy/_super_secret_config.yaml +++ b/litellm/proxy/_super_secret_config.yaml @@ -1,31 +1,3 @@ -general_settings: - alert_to_webhook_url: - budget_alerts: https://hooks.slack.com/services/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 - daily_reports: https://hooks.slack.com/services/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 - db_exceptions: https://hooks.slack.com/services/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 - llm_exceptions: https://hooks.slack.com/services/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 - llm_requests_hanging: https://hooks.slack.com/services/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 - llm_too_slow: https://hooks.slack.com/services/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 - outage_alerts: https://hooks.slack.com/services/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 - alert_types: - - llm_exceptions - - llm_too_slow - - llm_requests_hanging - - budget_alerts - - db_exceptions - - daily_reports - - spend_reports - - cooldown_deployment - - new_model_added - - outage_alerts - alerting: - - slack - database_connection_pool_limit: 100 - database_connection_timeout: 60 - health_check_interval: 300 - ui_access_mode: all -# litellm_settings: -# json_logs: true model_list: - litellm_params: api_base: http://0.0.0.0:8080 @@ -52,10 +24,8 @@ model_list: api_version: '2023-05-15' model: azure/chatgpt-v-2 model_name: gpt-3.5-turbo -- model_name: mistral +- model_name: tts litellm_params: - model: azure/mistral-large-latest - api_base: https://Mistral-large-nmefg-serverless.eastus2.inference.ai.azure.com/v1/ - api_key: zEJhgmw1FAKk0XzPWoLEg7WU1cXbWYYn + model: openai/tts-1 router_settings: enable_pre_call_checks: true diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py index 56aa1b35e..e2a3425f2 100644 --- a/litellm/proxy/proxy_server.py +++ b/litellm/proxy/proxy_server.py @@ -79,6 +79,9 @@ def generate_feedback_box(): import litellm +from litellm.types.llms.openai import ( + HttpxBinaryResponseContent, +) from litellm.proxy.utils import ( PrismaClient, DBClient, @@ -4875,6 +4878,143 @@ async def image_generation( ) +@router.post( + "/v1/audio/speech", + dependencies=[Depends(user_api_key_auth)], + tags=["audio"], +) +@router.post( + "/audio/speech", + dependencies=[Depends(user_api_key_auth)], + tags=["audio"], +) +async def audio_speech( + request: Request, + fastapi_response: Response, + user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth), +): + """ + Same params as: + + https://platform.openai.com/docs/api-reference/audio/createSpeech + """ + global proxy_logging_obj + data: Dict = {} + try: + # Use orjson to parse JSON data, orjson speeds up requests significantly + body = await request.body() + data = orjson.loads(body) + + # Include original request and headers in the data + data["proxy_server_request"] = { # type: ignore + "url": str(request.url), + "method": request.method, + "headers": dict(request.headers), + "body": copy.copy(data), # use copy instead of deepcopy + } + + if data.get("user", None) is None and user_api_key_dict.user_id is not None: + data["user"] = user_api_key_dict.user_id + + if user_model: + data["model"] = user_model + + if "metadata" not in data: + data["metadata"] = {} + data["metadata"]["user_api_key"] = user_api_key_dict.api_key + data["metadata"]["user_api_key_metadata"] = user_api_key_dict.metadata + _headers = dict(request.headers) + _headers.pop( + "authorization", None + ) # do not store the original `sk-..` api key in the db + data["metadata"]["headers"] = _headers + data["metadata"]["user_api_key_alias"] = getattr( + user_api_key_dict, "key_alias", None + ) + data["metadata"]["user_api_key_user_id"] = user_api_key_dict.user_id + data["metadata"]["user_api_key_team_id"] = getattr( + user_api_key_dict, "team_id", None + ) + data["metadata"]["global_max_parallel_requests"] = general_settings.get( + "global_max_parallel_requests", None + ) + data["metadata"]["user_api_key_team_alias"] = getattr( + user_api_key_dict, "team_alias", None + ) + data["metadata"]["endpoint"] = str(request.url) + + ### TEAM-SPECIFIC PARAMS ### + if user_api_key_dict.team_id is not None: + team_config = await proxy_config.load_team_config( + team_id=user_api_key_dict.team_id + ) + if len(team_config) == 0: + pass + else: + team_id = team_config.pop("team_id", None) + data["metadata"]["team_id"] = team_id + data = { + **team_config, + **data, + } # add the team-specific configs to the completion call + + router_model_names = llm_router.model_names if llm_router is not None else [] + + ### CALL HOOKS ### - modify incoming data / reject request before calling the model + data = await proxy_logging_obj.pre_call_hook( + user_api_key_dict=user_api_key_dict, data=data, call_type="image_generation" + ) + + ## ROUTE TO CORRECT ENDPOINT ## + # skip router if user passed their key + if "api_key" in data: + response = await litellm.aspeech(**data) + elif ( + llm_router is not None and data["model"] in router_model_names + ): # model in router model list + response = await llm_router.aspeech(**data) + elif ( + llm_router is not None and data["model"] in llm_router.deployment_names + ): # model in router deployments, calling a specific deployment on the router + response = await llm_router.aspeech(**data, specific_deployment=True) + elif ( + llm_router is not None + and llm_router.model_group_alias is not None + and data["model"] in llm_router.model_group_alias + ): # model set in model_group_alias + response = await llm_router.aspeech( + **data + ) # ensure this goes the llm_router, router will do the correct alias mapping + elif ( + llm_router is not None + and data["model"] not in router_model_names + and llm_router.default_deployment is not None + ): # model in router deployments, calling a specific deployment on the router + response = await llm_router.aspeech(**data) + elif user_model is not None: # `litellm --model ` + response = await litellm.aspeech(**data) + else: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail={ + "error": "audio_speech: Invalid model name passed in model=" + + data.get("model", "") + }, + ) + + # Printing each chunk size + async def generate(_response: HttpxBinaryResponseContent): + _generator = await _response.aiter_bytes(chunk_size=1024) + async for chunk in _generator: + yield chunk + + return StreamingResponse(generate(response), media_type="audio/mpeg") + + except Exception as e: + traceback.print_exc() + raise e + + @router.post( "/v1/audio/transcriptions", dependencies=[Depends(user_api_key_auth)], diff --git a/litellm/router.py b/litellm/router.py index d7535a83a..4474864c3 100644 --- a/litellm/router.py +++ b/litellm/router.py @@ -1202,6 +1202,84 @@ class Router: self.fail_calls[model_name] += 1 raise e + async def aspeech(self, model: str, input: str, voice: str, **kwargs): + """ + Example Usage: + + ``` + from litellm import Router + client = Router(model_list = [ + { + "model_name": "tts", + "litellm_params": { + "model": "tts-1", + }, + }, + ]) + + async with client.aspeech( + model="tts", + voice="alloy", + input="the quick brown fox jumped over the lazy dogs", + api_base=None, + api_key=None, + organization=None, + project=None, + max_retries=1, + timeout=600, + client=None, + optional_params={}, + ) as response: + response.stream_to_file(speech_file_path) + + ``` + """ + try: + kwargs["input"] = input + kwargs["voice"] = voice + + deployment = await self.async_get_available_deployment( + model=model, + messages=[{"role": "user", "content": "prompt"}], + specific_deployment=kwargs.pop("specific_deployment", None), + ) + kwargs.setdefault("metadata", {}).update( + { + "deployment": deployment["litellm_params"]["model"], + "model_info": deployment.get("model_info", {}), + } + ) + kwargs["model_info"] = deployment.get("model_info", {}) + data = deployment["litellm_params"].copy() + model_name = data["model"] + for k, v in self.default_litellm_params.items(): + if ( + k not in kwargs + ): # prioritize model-specific params > default router params + kwargs[k] = v + elif k == "metadata": + kwargs[k].update(v) + + potential_model_client = self._get_client( + deployment=deployment, kwargs=kwargs, client_type="async" + ) + # check if provided keys == client keys # + dynamic_api_key = kwargs.get("api_key", None) + if ( + dynamic_api_key is not None + and potential_model_client is not None + and dynamic_api_key != potential_model_client.api_key + ): + model_client = None + else: + model_client = potential_model_client + + response = await litellm.aspeech(**data, **kwargs) + + return response + except Exception as e: + raise e + async def amoderation(self, model: str, input: str, **kwargs): try: kwargs["model"] = model diff --git a/litellm/tests/test_audio_speech.py b/litellm/tests/test_audio_speech.py index 9cc6269de..dde196d9c 100644 --- a/litellm/tests/test_audio_speech.py +++ b/litellm/tests/test_audio_speech.py @@ -16,51 +16,13 @@ import litellm, openai from pathlib import Path -@pytest.mark.parametrize("sync_mode", [True, False]) -@pytest.mark.asyncio -async def test_audio_speech_openai(sync_mode): - - speech_file_path = Path(__file__).parent / "speech.mp3" - openai_chat_completions = litellm.OpenAIChatCompletion() - if sync_mode: - with openai_chat_completions.audio_speech( - model="tts-1", - voice="alloy", - input="the quick brown fox jumped over the lazy dogs", - api_base=None, - api_key=None, - organization=None, - project=None, - max_retries=1, - timeout=600, - client=None, - optional_params={}, - ) as response: - response.stream_to_file(speech_file_path) - else: - async with openai_chat_completions.async_audio_speech( - model="tts-1", - voice="alloy", - input="the quick brown fox jumped over the lazy dogs", - api_base=None, - api_key=None, - organization=None, - project=None, - max_retries=1, - timeout=600, - client=None, - optional_params={}, - ) as response: - speech = await response.parse() - - @pytest.mark.parametrize("sync_mode", [True, False]) @pytest.mark.asyncio async def test_audio_speech_litellm(sync_mode): speech_file_path = Path(__file__).parent / "speech.mp3" if sync_mode: - with litellm.speech( + response = litellm.speech( model="openai/tts-1", voice="alloy", input="the quick brown fox jumped over the lazy dogs", @@ -72,10 +34,13 @@ async def test_audio_speech_litellm(sync_mode): timeout=600, client=None, optional_params={}, - ) as response: - response.stream_to_file(speech_file_path) + ) + + from litellm.llms.openai import HttpxBinaryResponseContent + + assert isinstance(response, HttpxBinaryResponseContent) else: - async with litellm.aspeech( + response = await litellm.aspeech( model="openai/tts-1", voice="alloy", input="the quick brown fox jumped over the lazy dogs", @@ -87,5 +52,45 @@ async def test_audio_speech_litellm(sync_mode): timeout=600, client=None, optional_params={}, - ) as response: - await response.stream_to_file(speech_file_path) + ) + + from litellm.llms.openai import HttpxBinaryResponseContent + + assert isinstance(response, HttpxBinaryResponseContent) + + +@pytest.mark.parametrize("mode", ["iterator"]) # "file", +@pytest.mark.asyncio +async def test_audio_speech_router(mode): + speech_file_path = Path(__file__).parent / "speech.mp3" + + from litellm import Router + + client = Router( + model_list=[ + { + "model_name": "tts", + "litellm_params": { + "model": "openai/tts-1", + }, + }, + ] + ) + + response = await client.aspeech( + model="tts", + voice="alloy", + input="the quick brown fox jumped over the lazy dogs", + api_base=None, + api_key=None, + organization=None, + project=None, + max_retries=1, + timeout=600, + client=None, + optional_params={}, + ) + + from litellm.llms.openai import HttpxBinaryResponseContent + + assert isinstance(response, HttpxBinaryResponseContent) diff --git a/litellm/types/llms/openai.py b/litellm/types/llms/openai.py index 2ea72a76e..964ac9c9b 100644 --- a/litellm/types/llms/openai.py +++ b/litellm/types/llms/openai.py @@ -20,12 +20,6 @@ from openai.pagination import SyncCursorPage from os import PathLike from openai.types import FileObject, Batch from openai._legacy_response import HttpxBinaryResponseContent -from openai._response import ( - StreamedBinaryAPIResponse, - ResponseContextManager, - AsyncStreamedBinaryAPIResponse, - AsyncResponseContextManager, -) from typing import TypedDict, List, Optional, Tuple, Mapping, IO FileContent = Union[IO[bytes], bytes, PathLike] From d65b7fe01b1737eb75beb4868e93fa1695ce8c09 Mon Sep 17 00:00:00 2001 From: Krrish Dholakia Date: Thu, 30 May 2024 16:57:11 -0700 Subject: [PATCH 3/3] fix(main.py): add logging to audio_transcription calls --- litellm/main.py | 2 ++ .../out/{404.html => 404/index.html} | 0 .../{model_hub.html => model_hub/index.html} | 0 litellm/proxy/proxy_server.py | 28 ++++++++++++++++++- litellm/utils.py | 8 ++++++ 5 files changed, 37 insertions(+), 1 deletion(-) rename litellm/proxy/_experimental/out/{404.html => 404/index.html} (100%) rename litellm/proxy/_experimental/out/{model_hub.html => model_hub/index.html} (100%) diff --git a/litellm/main.py b/litellm/main.py index 2a07ae3c8..525a39d68 100644 --- a/litellm/main.py +++ b/litellm/main.py @@ -4164,6 +4164,7 @@ def transcription( return response +@client async def aspeech(*args, **kwargs) -> HttpxBinaryResponseContent: """ Calls openai tts endpoints. @@ -4204,6 +4205,7 @@ async def aspeech(*args, **kwargs) -> HttpxBinaryResponseContent: ) +@client def speech( model: str, input: str, diff --git a/litellm/proxy/_experimental/out/404.html b/litellm/proxy/_experimental/out/404/index.html similarity index 100% rename from litellm/proxy/_experimental/out/404.html rename to litellm/proxy/_experimental/out/404/index.html diff --git a/litellm/proxy/_experimental/out/model_hub.html b/litellm/proxy/_experimental/out/model_hub/index.html similarity index 100% rename from litellm/proxy/_experimental/out/model_hub.html rename to litellm/proxy/_experimental/out/model_hub/index.html diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py index e2a3425f2..2bd08fb89 100644 --- a/litellm/proxy/proxy_server.py +++ b/litellm/proxy/proxy_server.py @@ -5002,13 +5002,39 @@ async def audio_speech( }, ) + ### ALERTING ### + data["litellm_status"] = "success" # used for alerting + + ### RESPONSE HEADERS ### + hidden_params = getattr(response, "_hidden_params", {}) or {} + model_id = hidden_params.get("model_id", None) or "" + cache_key = hidden_params.get("cache_key", None) or "" + api_base = hidden_params.get("api_base", None) or "" + # Printing each chunk size async def generate(_response: HttpxBinaryResponseContent): _generator = await _response.aiter_bytes(chunk_size=1024) async for chunk in _generator: yield chunk - return StreamingResponse(generate(response), media_type="audio/mpeg") + custom_headers = get_custom_headers( + user_api_key_dict=user_api_key_dict, + model_id=model_id, + cache_key=cache_key, + api_base=api_base, + version=version, + model_region=getattr(user_api_key_dict, "allowed_model_region", ""), + fastest_response_batch_completion=None, + ) + + selected_data_generator = select_data_generator( + response=response, + user_api_key_dict=user_api_key_dict, + request_data=data, + ) + return StreamingResponse( + generate(response), media_type="audio/mpeg", headers=custom_headers + ) except Exception as e: traceback.print_exc() diff --git a/litellm/utils.py b/litellm/utils.py index 95d9160ef..b48ab9b15 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -1136,6 +1136,8 @@ class CallTypes(Enum): amoderation = "amoderation" atranscription = "atranscription" transcription = "transcription" + aspeech = "aspeech" + speech = "speech" # Logging function -> log the exact model details + what's being sent | Non-BlockingP @@ -3005,6 +3007,10 @@ def function_setup( ): _file_name: BinaryIO = args[1] if len(args) > 1 else kwargs["file"] messages = "audio_file" + elif ( + call_type == CallTypes.aspeech.value or call_type == CallTypes.speech.value + ): + messages = kwargs.get("input", "speech") stream = True if "stream" in kwargs and kwargs["stream"] == True else False logging_obj = Logging( model=model, @@ -3346,6 +3352,8 @@ def client(original_function): return result elif "atranscription" in kwargs and kwargs["atranscription"] == True: return result + elif "aspeech" in kwargs and kwargs["aspeech"] == True: + return result ### POST-CALL RULES ### post_call_processing(original_response=result, model=model or None)