mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-27 03:34:10 +00:00
(feat) Support audio
, modalities
params (#6304)
* add audio, modalities param * add test for gpt audio models * add get_supported_openai_params for GPT audio models * add supported params for audio * test_audio_output_from_model * bump openai to openai==1.52.0 * bump openai on pyproject * fix audio test * fix test mock_chat_response * handle audio for Message * fix handling audio for OAI compatible API endpoints * fix linting * fix mock dbrx test
This commit is contained in:
parent
e35fc3203e
commit
13e0b3f626
15 changed files with 290 additions and 23 deletions
|
@ -147,6 +147,8 @@ from .llms.vertex_ai_and_google_ai_studio.vertex_embeddings.embedding_handler im
|
|||
from .llms.watsonx import IBMWatsonXAI
|
||||
from .types.llms.openai import (
|
||||
ChatCompletionAssistantMessage,
|
||||
ChatCompletionAudioParam,
|
||||
ChatCompletionModality,
|
||||
ChatCompletionUserMessage,
|
||||
HttpxBinaryResponseContent,
|
||||
)
|
||||
|
@ -287,6 +289,8 @@ async def acompletion(
|
|||
stop=None,
|
||||
max_tokens: Optional[int] = None,
|
||||
max_completion_tokens: Optional[int] = None,
|
||||
modalities: Optional[List[ChatCompletionModality]] = None,
|
||||
audio: Optional[ChatCompletionAudioParam] = None,
|
||||
presence_penalty: Optional[float] = None,
|
||||
frequency_penalty: Optional[float] = None,
|
||||
logit_bias: Optional[dict] = None,
|
||||
|
@ -327,6 +331,8 @@ async def acompletion(
|
|||
stop(string/list, optional): - Up to 4 sequences where the LLM API will stop generating further tokens.
|
||||
max_tokens (integer, optional): The maximum number of tokens in the generated completion (default is infinity).
|
||||
max_completion_tokens (integer, optional): An upper bound for the number of tokens that can be generated for a completion, including visible output tokens and reasoning tokens.
|
||||
modalities (List[ChatCompletionModality], optional): Output types that you would like the model to generate for this request. You can use `["text", "audio"]`
|
||||
audio (ChatCompletionAudioParam, optional): Parameters for audio output. Required when audio output is requested with modalities: ["audio"]
|
||||
presence_penalty (float, optional): It is used to penalize new tokens based on their existence in the text so far.
|
||||
frequency_penalty: It is used to penalize new tokens based on their frequency in the text so far.
|
||||
logit_bias (dict, optional): Used to modify the probability of specific tokens appearing in the completion.
|
||||
|
@ -366,6 +372,8 @@ async def acompletion(
|
|||
"stop": stop,
|
||||
"max_tokens": max_tokens,
|
||||
"max_completion_tokens": max_completion_tokens,
|
||||
"modalities": modalities,
|
||||
"audio": audio,
|
||||
"presence_penalty": presence_penalty,
|
||||
"frequency_penalty": frequency_penalty,
|
||||
"logit_bias": logit_bias,
|
||||
|
@ -670,6 +678,8 @@ def completion( # type: ignore # noqa: PLR0915
|
|||
stop=None,
|
||||
max_completion_tokens: Optional[int] = None,
|
||||
max_tokens: Optional[int] = None,
|
||||
modalities: Optional[List[ChatCompletionModality]] = None,
|
||||
audio: Optional[ChatCompletionAudioParam] = None,
|
||||
presence_penalty: Optional[float] = None,
|
||||
frequency_penalty: Optional[float] = None,
|
||||
logit_bias: Optional[dict] = None,
|
||||
|
@ -712,6 +722,8 @@ def completion( # type: ignore # noqa: PLR0915
|
|||
stop(string/list, optional): - Up to 4 sequences where the LLM API will stop generating further tokens.
|
||||
max_tokens (integer, optional): The maximum number of tokens in the generated completion (default is infinity).
|
||||
max_completion_tokens (integer, optional): An upper bound for the number of tokens that can be generated for a completion, including visible output tokens and reasoning tokens.
|
||||
modalities (List[ChatCompletionModality], optional): Output types that you would like the model to generate for this request.. You can use `["text", "audio"]`
|
||||
audio (ChatCompletionAudioParam, optional): Parameters for audio output. Required when audio output is requested with modalities: ["audio"]
|
||||
presence_penalty (float, optional): It is used to penalize new tokens based on their existence in the text so far.
|
||||
frequency_penalty: It is used to penalize new tokens based on their frequency in the text so far.
|
||||
logit_bias (dict, optional): Used to modify the probability of specific tokens appearing in the completion.
|
||||
|
@ -816,6 +828,8 @@ def completion( # type: ignore # noqa: PLR0915
|
|||
"stream_options",
|
||||
"stop",
|
||||
"max_completion_tokens",
|
||||
"modalities",
|
||||
"audio",
|
||||
"max_tokens",
|
||||
"presence_penalty",
|
||||
"frequency_penalty",
|
||||
|
@ -975,6 +989,8 @@ def completion( # type: ignore # noqa: PLR0915
|
|||
stop=stop,
|
||||
max_tokens=max_tokens,
|
||||
max_completion_tokens=max_completion_tokens,
|
||||
modalities=modalities,
|
||||
audio=audio,
|
||||
presence_penalty=presence_penalty,
|
||||
frequency_penalty=frequency_penalty,
|
||||
logit_bias=logit_bias,
|
||||
|
@ -1515,7 +1531,7 @@ def completion( # type: ignore # noqa: PLR0915
|
|||
|
||||
## COMPLETION CALL
|
||||
try:
|
||||
if litellm.OpenAIO1Config().is_model_o1_reasoning_model(model=model):
|
||||
if litellm.openAIO1Config.is_model_o1_reasoning_model(model=model):
|
||||
response = openai_o1_chat_completions.completion(
|
||||
model=model,
|
||||
messages=messages,
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue