diff --git a/litellm/llms/azure.py b/litellm/llms/azure.py index fe10cc017c..11bf5c3f0d 100644 --- a/litellm/llms/azure.py +++ b/litellm/llms/azure.py @@ -42,6 +42,7 @@ from ..types.llms.openai import ( AsyncAssistantEventHandler, AsyncAssistantStreamManager, AsyncCursorPage, + HttpxBinaryResponseContent, MessageData, OpenAICreateThreadParamsMessage, OpenAIMessage, @@ -414,6 +415,49 @@ class AzureChatCompletion(BaseLLM): headers["Authorization"] = f"Bearer {azure_ad_token}" return headers + def _get_sync_azure_client( + self, + api_version: Optional[str], + api_base: Optional[str], + api_key: Optional[str], + azure_ad_token: Optional[str], + model: str, + max_retries: int, + timeout: Union[float, httpx.Timeout], + client: Optional[Any], + client_type: Literal["sync", "async"], + ): + # init AzureOpenAI Client + azure_client_params = { + "api_version": api_version, + "azure_endpoint": api_base, + "azure_deployment": model, + "http_client": litellm.client_session, + "max_retries": max_retries, + "timeout": timeout, + } + azure_client_params = select_azure_base_url_or_endpoint( + azure_client_params=azure_client_params + ) + if api_key is not None: + azure_client_params["api_key"] = api_key + elif azure_ad_token is not None: + if azure_ad_token.startswith("oidc/"): + azure_ad_token = get_azure_ad_token_from_oidc(azure_ad_token) + azure_client_params["azure_ad_token"] = azure_ad_token + if client is None: + if client_type == "sync": + azure_client = AzureOpenAI(**azure_client_params) # type: ignore + elif client_type == "async": + azure_client = AsyncAzureOpenAI(**azure_client_params) # type: ignore + else: + azure_client = client + if api_version is not None and isinstance(azure_client._custom_query, dict): + # set api_version to version passed by user + azure_client._custom_query.setdefault("api-version", api_version) + + return azure_client + def completion( self, model: str, @@ -1256,6 +1300,96 @@ class AzureChatCompletion(BaseLLM): ) raise e + def audio_speech( + self, + model: str, + input: str, + voice: str, + optional_params: dict, + api_key: Optional[str], + api_base: Optional[str], + api_version: Optional[str], + organization: Optional[str], + max_retries: int, + timeout: Union[float, httpx.Timeout], + azure_ad_token: Optional[str] = None, + aspeech: Optional[bool] = None, + client=None, + ) -> HttpxBinaryResponseContent: + + max_retries = optional_params.pop("max_retries", 2) + + if aspeech is not None and aspeech is True: + return self.async_audio_speech( + model=model, + input=input, + voice=voice, + optional_params=optional_params, + api_key=api_key, + api_base=api_base, + api_version=api_version, + azure_ad_token=azure_ad_token, + max_retries=max_retries, + timeout=timeout, + client=client, + ) # type: ignore + + azure_client: AzureOpenAI = self._get_sync_azure_client( + api_base=api_base, + api_version=api_version, + api_key=api_key, + azure_ad_token=azure_ad_token, + model=model, + max_retries=max_retries, + timeout=timeout, + client=client, + client_type="sync", + ) # type: ignore + + response = azure_client.audio.speech.create( + model=model, + voice=voice, # type: ignore + input=input, + **optional_params, + ) + return response + + async def async_audio_speech( + self, + model: str, + input: str, + voice: str, + optional_params: dict, + api_key: Optional[str], + api_base: Optional[str], + api_version: Optional[str], + azure_ad_token: Optional[str], + max_retries: int, + timeout: Union[float, httpx.Timeout], + client=None, + ) -> HttpxBinaryResponseContent: + + azure_client: AsyncAzureOpenAI = self._get_sync_azure_client( + api_base=api_base, + api_version=api_version, + api_key=api_key, + azure_ad_token=azure_ad_token, + model=model, + max_retries=max_retries, + timeout=timeout, + client=client, + client_type="async", + ) # type: ignore + + response = await azure_client.audio.speech.create( + model=model, + voice=voice, # type: ignore + input=input, + **optional_params, + ) + + return response + def get_headers( self, model: Optional[str], diff --git a/litellm/main.py b/litellm/main.py index 318d0b7fe1..69ce61fab1 100644 --- a/litellm/main.py +++ b/litellm/main.py @@ -4410,6 +4410,7 @@ def speech( voice: str, api_key: Optional[str] = None, api_base: Optional[str] = None, + api_version: Optional[str] = None, organization: Optional[str] = None, project: Optional[str] = None, max_retries: Optional[int] = None, @@ -4483,6 +4484,45 @@ def speech( client=client, # pass AsyncOpenAI, OpenAI client aspeech=aspeech, ) + elif custom_llm_provider == "azure": + # azure configs + api_base = api_base or litellm.api_base or get_secret("AZURE_API_BASE") # type: ignore + + api_version = ( + api_version or litellm.api_version or get_secret("AZURE_API_VERSION") + ) # type: ignore + + api_key = ( + api_key + or litellm.api_key + or litellm.azure_key + or get_secret("AZURE_OPENAI_API_KEY") + or get_secret("AZURE_API_KEY") + ) # type: ignore + + azure_ad_token: Optional[str] = optional_params.get("extra_body", {}).pop( # type: ignore + "azure_ad_token", None + ) or get_secret( + "AZURE_AD_TOKEN" + ) + + headers = headers or litellm.headers + + response = azure_chat_completions.audio_speech( + model=model, + input=input, + voice=voice, + optional_params=optional_params, + api_key=api_key, + api_base=api_base, + api_version=api_version, + azure_ad_token=azure_ad_token, + organization=organization, + max_retries=max_retries, + timeout=timeout, + client=client, # pass AsyncOpenAI, OpenAI client + aspeech=aspeech, + ) if response is None: raise Exception( diff --git a/litellm/proxy/_super_secret_config.yaml b/litellm/proxy/_super_secret_config.yaml index 2060f61ca4..c570c08cf7 100644 --- a/litellm/proxy/_super_secret_config.yaml +++ b/litellm/proxy/_super_secret_config.yaml @@ -18,7 +18,6 @@ model_list: api_key: os.environ/PREDIBASE_API_KEY tenant_id: os.environ/PREDIBASE_TENANT_ID max_new_tokens: 256 - # - litellm_params: # api_base: https://my-endpoint-europe-berri-992.openai.azure.com/ # api_key: os.environ/AZURE_EUROPE_API_KEY diff --git a/litellm/tests/test_audio_speech.py b/litellm/tests/test_audio_speech.py index dde196d9cc..285334f7ef 100644 --- a/litellm/tests/test_audio_speech.py +++ b/litellm/tests/test_audio_speech.py @@ -1,8 +1,14 @@ # What is this? ## unit tests for openai tts endpoint -import sys, os, asyncio, time, random, uuid +import asyncio +import os +import random +import sys +import time import traceback +import uuid + from dotenv import load_dotenv load_dotenv() @@ -11,23 +17,40 @@ import os sys.path.insert( 0, os.path.abspath("../..") ) # Adds the parent directory to the system path -import pytest -import litellm, openai from pathlib import Path +import openai +import pytest -@pytest.mark.parametrize("sync_mode", [True, False]) +import litellm + + +@pytest.mark.parametrize( + "sync_mode", + [True, False], +) +@pytest.mark.parametrize( + "model, api_key, api_base", + [ + ( + "azure/azure-tts", + os.getenv("AZURE_SWEDEN_API_KEY"), + os.getenv("AZURE_SWEDEN_API_BASE"), + ), + ("openai/tts-1", os.getenv("OPENAI_API_KEY"), None), + ], +) # , @pytest.mark.asyncio -async def test_audio_speech_litellm(sync_mode): +async def test_audio_speech_litellm(sync_mode, model, api_base, api_key): speech_file_path = Path(__file__).parent / "speech.mp3" if sync_mode: response = litellm.speech( - model="openai/tts-1", + model=model, voice="alloy", input="the quick brown fox jumped over the lazy dogs", - api_base=None, - api_key=None, + api_base=api_base, + api_key=api_key, organization=None, project=None, max_retries=1, @@ -41,11 +64,11 @@ async def test_audio_speech_litellm(sync_mode): assert isinstance(response, HttpxBinaryResponseContent) else: response = await litellm.aspeech( - model="openai/tts-1", + model=model, voice="alloy", input="the quick brown fox jumped over the lazy dogs", - api_base=None, - api_key=None, + api_base=api_base, + api_key=api_key, organization=None, project=None, max_retries=1,