[Feat] Add max_completion_tokens param (#5691)

* add max_completion_tokens

* add max_completion_tokens

* add max_completion_tokens support for OpenAI models

* add max_completion_tokens param

* add max_completion_tokens for bedrock converse models

* add test for converse maxTokens

* fix openai o1 param mapping test

* move test optional params

* add max_completion_tokens for anthropic api

* fix conftest

* add max_completion tokens for vertex ai partner models

* add max_completion_tokens for fireworks ai

* add max_completion_tokens for hf rest api

* add test for param mapping

* add param mapping for vertex, gemini + testing

* predibase is the most unstable and unusable llm api in prod, can't handle our ci/cd

* add max_completion_tokens to openai supported params

* fix fireworks ai param mapping
This commit is contained in:
Ishaan Jaff 2024-09-14 14:57:01 -07:00 committed by GitHub
parent 415a3ede9e
commit 85acdb9193
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
31 changed files with 591 additions and 35 deletions

View file

@ -264,6 +264,7 @@ async def acompletion(
stream_options: Optional[dict] = None,
stop=None,
max_tokens: Optional[int] = None,
max_completion_tokens: Optional[int] = None,
presence_penalty: Optional[float] = None,
frequency_penalty: Optional[float] = None,
logit_bias: Optional[dict] = None,
@ -303,6 +304,7 @@ async def acompletion(
stream_options (dict, optional): A dictionary containing options for the streaming response. Only use this if stream is True.
stop(string/list, optional): - Up to 4 sequences where the LLM API will stop generating further tokens.
max_tokens (integer, optional): The maximum number of tokens in the generated completion (default is infinity).
max_completion_tokens (integer, optional): An upper bound for the number of tokens that can be generated for a completion, including visible output tokens and reasoning tokens.
presence_penalty (float, optional): It is used to penalize new tokens based on their existence in the text so far.
frequency_penalty: It is used to penalize new tokens based on their frequency in the text so far.
logit_bias (dict, optional): Used to modify the probability of specific tokens appearing in the completion.
@ -341,6 +343,7 @@ async def acompletion(
"stream_options": stream_options,
"stop": stop,
"max_tokens": max_tokens,
"max_completion_tokens": max_completion_tokens,
"presence_penalty": presence_penalty,
"frequency_penalty": frequency_penalty,
"logit_bias": logit_bias,
@ -633,6 +636,7 @@ def completion(
stream: Optional[bool] = None,
stream_options: Optional[dict] = None,
stop=None,
max_completion_tokens: Optional[int] = None,
max_tokens: Optional[int] = None,
presence_penalty: Optional[float] = None,
frequency_penalty: Optional[float] = None,
@ -675,6 +679,7 @@ def completion(
stream_options (dict, optional): A dictionary containing options for the streaming response. Only set this when you set stream: true.
stop(string/list, optional): - Up to 4 sequences where the LLM API will stop generating further tokens.
max_tokens (integer, optional): The maximum number of tokens in the generated completion (default is infinity).
max_completion_tokens (integer, optional): An upper bound for the number of tokens that can be generated for a completion, including visible output tokens and reasoning tokens.
presence_penalty (float, optional): It is used to penalize new tokens based on their existence in the text so far.
frequency_penalty: It is used to penalize new tokens based on their frequency in the text so far.
logit_bias (dict, optional): Used to modify the probability of specific tokens appearing in the completion.
@ -759,6 +764,7 @@ def completion(
"stream",
"stream_options",
"stop",
"max_completion_tokens",
"max_tokens",
"presence_penalty",
"frequency_penalty",
@ -917,6 +923,7 @@ def completion(
stream_options=stream_options,
stop=stop,
max_tokens=max_tokens,
max_completion_tokens=max_completion_tokens,
presence_penalty=presence_penalty,
frequency_penalty=frequency_penalty,
logit_bias=logit_bias,