mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-25 10:44:24 +00:00
(feat) add Predicted Outputs
for OpenAI (#6594)
* bump openai to openai==1.54.0 * add 'prediction' param * testing fix bedrock deprecated cohere.command-text-v14 * test test_openai_prediction_param.py * test_openai_prediction_param_with_caching * doc Predicted Outputs * doc Predicted Output
This commit is contained in:
parent
57b1bb5e06
commit
c047d51cc8
12 changed files with 362 additions and 13 deletions
|
@ -162,6 +162,7 @@ from .types.llms.openai import (
|
|||
ChatCompletionAssistantMessage,
|
||||
ChatCompletionAudioParam,
|
||||
ChatCompletionModality,
|
||||
ChatCompletionPredictionContentParam,
|
||||
ChatCompletionUserMessage,
|
||||
HttpxBinaryResponseContent,
|
||||
)
|
||||
|
@ -304,6 +305,7 @@ async def acompletion(
|
|||
max_tokens: Optional[int] = None,
|
||||
max_completion_tokens: Optional[int] = None,
|
||||
modalities: Optional[List[ChatCompletionModality]] = None,
|
||||
prediction: Optional[ChatCompletionPredictionContentParam] = None,
|
||||
audio: Optional[ChatCompletionAudioParam] = None,
|
||||
presence_penalty: Optional[float] = None,
|
||||
frequency_penalty: Optional[float] = None,
|
||||
|
@ -346,6 +348,7 @@ async def acompletion(
|
|||
max_tokens (integer, optional): The maximum number of tokens in the generated completion (default is infinity).
|
||||
max_completion_tokens (integer, optional): An upper bound for the number of tokens that can be generated for a completion, including visible output tokens and reasoning tokens.
|
||||
modalities (List[ChatCompletionModality], optional): Output types that you would like the model to generate for this request. You can use `["text", "audio"]`
|
||||
prediction (ChatCompletionPredictionContentParam, optional): Configuration for a Predicted Output, which can greatly improve response times when large parts of the model response are known ahead of time. This is most common when you are regenerating a file with only minor changes to most of the content.
|
||||
audio (ChatCompletionAudioParam, optional): Parameters for audio output. Required when audio output is requested with modalities: ["audio"]
|
||||
presence_penalty (float, optional): It is used to penalize new tokens based on their existence in the text so far.
|
||||
frequency_penalty: It is used to penalize new tokens based on their frequency in the text so far.
|
||||
|
@ -387,6 +390,7 @@ async def acompletion(
|
|||
"max_tokens": max_tokens,
|
||||
"max_completion_tokens": max_completion_tokens,
|
||||
"modalities": modalities,
|
||||
"prediction": prediction,
|
||||
"audio": audio,
|
||||
"presence_penalty": presence_penalty,
|
||||
"frequency_penalty": frequency_penalty,
|
||||
|
@ -693,6 +697,7 @@ def completion( # type: ignore # noqa: PLR0915
|
|||
max_completion_tokens: Optional[int] = None,
|
||||
max_tokens: Optional[int] = None,
|
||||
modalities: Optional[List[ChatCompletionModality]] = None,
|
||||
prediction: Optional[ChatCompletionPredictionContentParam] = None,
|
||||
audio: Optional[ChatCompletionAudioParam] = None,
|
||||
presence_penalty: Optional[float] = None,
|
||||
frequency_penalty: Optional[float] = None,
|
||||
|
@ -737,6 +742,7 @@ def completion( # type: ignore # noqa: PLR0915
|
|||
max_tokens (integer, optional): The maximum number of tokens in the generated completion (default is infinity).
|
||||
max_completion_tokens (integer, optional): An upper bound for the number of tokens that can be generated for a completion, including visible output tokens and reasoning tokens.
|
||||
modalities (List[ChatCompletionModality], optional): Output types that you would like the model to generate for this request.. You can use `["text", "audio"]`
|
||||
prediction (ChatCompletionPredictionContentParam, optional): Configuration for a Predicted Output, which can greatly improve response times when large parts of the model response are known ahead of time. This is most common when you are regenerating a file with only minor changes to most of the content.
|
||||
audio (ChatCompletionAudioParam, optional): Parameters for audio output. Required when audio output is requested with modalities: ["audio"]
|
||||
presence_penalty (float, optional): It is used to penalize new tokens based on their existence in the text so far.
|
||||
frequency_penalty: It is used to penalize new tokens based on their frequency in the text so far.
|
||||
|
@ -843,6 +849,7 @@ def completion( # type: ignore # noqa: PLR0915
|
|||
"stop",
|
||||
"max_completion_tokens",
|
||||
"modalities",
|
||||
"prediction",
|
||||
"audio",
|
||||
"max_tokens",
|
||||
"presence_penalty",
|
||||
|
@ -994,6 +1001,7 @@ def completion( # type: ignore # noqa: PLR0915
|
|||
max_tokens=max_tokens,
|
||||
max_completion_tokens=max_completion_tokens,
|
||||
modalities=modalities,
|
||||
prediction=prediction,
|
||||
audio=audio,
|
||||
presence_penalty=presence_penalty,
|
||||
frequency_penalty=frequency_penalty,
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue