docs on using vertex tts

This commit is contained in:
Ishaan Jaff 2024-08-23 17:57:49 -07:00
parent 225ff8432d
commit 8fada93fff
4 changed files with 36 additions and 25 deletions

View file

@ -1812,9 +1812,9 @@ response.stream_to_file(speech_file_path)
1. Add model to config.yaml 1. Add model to config.yaml
```yaml ```yaml
model_list: model_list:
- model_name: multimodalembedding@001 - model_name: vertex-tts
litellm_params: litellm_params:
model: vertex_ai/multimodalembedding@001 model: vertex_ai/ # Vertex AI does not support passing a `model` param - so passing `model=vertex_ai/` is the only required param
vertex_project: "adroit-crow-413218" vertex_project: "adroit-crow-413218"
vertex_location: "us-central1" vertex_location: "us-central1"
vertex_credentials: adroit-crow-413218-a956eef1a2a8.json vertex_credentials: adroit-crow-413218-a956eef1a2a8.json
@ -1837,23 +1837,14 @@ import openai
client = openai.OpenAI(api_key="sk-1234", base_url="http://0.0.0.0:4000") client = openai.OpenAI(api_key="sk-1234", base_url="http://0.0.0.0:4000")
# # request sent to model set on litellm proxy, `litellm --model` # see supported values for "voice" on vertex here:
response = client.embeddings.create( # https://console.cloud.google.com/vertex-ai/generative/speech/text-to-speech
model="multimodalembedding@001", response = client.audio.speech.create(
input = None, model = "vertex-tts",
extra_body = { input="the quick brown fox jumped over the lazy dogs",
"instances": [ voice={'languageCode': 'en-US', 'name': 'en-US-Studio-O'}
{
"image": {
"bytesBase64Encoded": "base64"
},
"text": "this is a unicorn",
},
],
}
) )
print("response from proxy", response)
print(response)
``` ```
</TabItem> </TabItem>

View file

@ -54,7 +54,7 @@ class VertexTextToSpeechAPI(VertexLLM):
timeout: Union[float, httpx.Timeout], timeout: Union[float, httpx.Timeout],
model: str, model: str,
input: str, input: str,
voice: Optional[str] = None, voice: Optional[dict] = None,
_is_async: Optional[bool] = False, _is_async: Optional[bool] = False,
optional_params: Optional[dict] = None, optional_params: Optional[dict] = None,
**kwargs, **kwargs,
@ -87,7 +87,9 @@ class VertexTextToSpeechAPI(VertexLLM):
vertex_input = VertexInput(text=input) vertex_input = VertexInput(text=input)
# required param # required param
optional_params = optional_params or {} optional_params = optional_params or {}
if "voice" in optional_params: if voice is not None:
vertex_voice = VertexVoice(**voice)
elif "voice" in optional_params:
vertex_voice = VertexVoice(**optional_params["voice"]) vertex_voice = VertexVoice(**optional_params["voice"])
else: else:
# use defaults to not fail the request # use defaults to not fail the request

View file

@ -4699,7 +4699,7 @@ async def aspeech(*args, **kwargs) -> HttpxBinaryResponseContent:
def speech( def speech(
model: str, model: str,
input: str, input: str,
voice: Optional[str] = None, voice: Optional[Union[str, dict]] = None,
api_key: Optional[str] = None, api_key: Optional[str] = None,
api_base: Optional[str] = None, api_base: Optional[str] = None,
api_version: Optional[str] = None, api_version: Optional[str] = None,
@ -4735,9 +4735,9 @@ def speech(
logging_obj = kwargs.get("litellm_logging_obj", None) logging_obj = kwargs.get("litellm_logging_obj", None)
response: Optional[HttpxBinaryResponseContent] = None response: Optional[HttpxBinaryResponseContent] = None
if custom_llm_provider == "openai": if custom_llm_provider == "openai":
if voice is None: if voice is None or not (isinstance(voice, str)):
raise litellm.BadRequestError( raise litellm.BadRequestError(
message="'voice' is required for OpenAI TTS", message="'voice' is required to be passed as a string for OpenAI TTS",
model=model, model=model,
llm_provider=custom_llm_provider, llm_provider=custom_llm_provider,
) )
@ -4787,9 +4787,9 @@ def speech(
) )
elif custom_llm_provider == "azure": elif custom_llm_provider == "azure":
# azure configs # azure configs
if voice is None: if voice is None or not (isinstance(voice, str)):
raise litellm.BadRequestError( raise litellm.BadRequestError(
message="'voice' is required for Azure TTS", message="'voice' is required to be passed as a string for Azure TTS",
model=model, model=model,
llm_provider=custom_llm_provider, llm_provider=custom_llm_provider,
) )
@ -4849,6 +4849,13 @@ def speech(
vertex_credentials = generic_optional_params.vertex_credentials or get_secret( vertex_credentials = generic_optional_params.vertex_credentials or get_secret(
"VERTEXAI_CREDENTIALS" "VERTEXAI_CREDENTIALS"
) )
if voice is not None and not isinstance(voice, dict):
raise litellm.BadRequestError(
message=f"'voice' is required to be passed as a dict for Vertex AI TTS, passed in voice={voice}",
model=model,
llm_provider=custom_llm_provider,
)
response = vertex_text_to_speech.audio_speech( response = vertex_text_to_speech.audio_speech(
_is_async=aspeech, _is_async=aspeech,
vertex_credentials=vertex_credentials, vertex_credentials=vertex_credentials,

View file

@ -0,0 +1,11 @@
import openai
client = openai.OpenAI(api_key="sk-1234", base_url="http://0.0.0.0:4000")
# # request sent to model set on litellm proxy, `litellm --model`
response = client.audio.speech.create(
model="vertex-tts",
input="the quick brown fox jumped over the lazy dogs",
voice={"languageCode": "en-US", "name": "en-US-Studio-O"}, # type: ignore
)
print("response from proxy", response) # noqa