feat - add stream_options support litellm

This commit is contained in:
Ishaan Jaff 2024-05-08 21:25:40 -07:00
parent b5db045624
commit edb10198ef

View file

@ -187,6 +187,7 @@ async def acompletion(
top_p: Optional[float] = None, top_p: Optional[float] = None,
n: Optional[int] = None, n: Optional[int] = None,
stream: Optional[bool] = None, stream: Optional[bool] = None,
stream_options: Optional[dict] = None,
stop=None, stop=None,
max_tokens: Optional[int] = None, max_tokens: Optional[int] = None,
presence_penalty: Optional[float] = None, presence_penalty: Optional[float] = None,
@ -223,6 +224,7 @@ async def acompletion(
top_p (float, optional): The top-p parameter for nucleus sampling (default is 1.0). top_p (float, optional): The top-p parameter for nucleus sampling (default is 1.0).
n (int, optional): The number of completions to generate (default is 1). n (int, optional): The number of completions to generate (default is 1).
stream (bool, optional): If True, return a streaming response (default is False). stream (bool, optional): If True, return a streaming response (default is False).
stream_options (dict, optional): A dictionary containing options for the streaming response. Only use this if stream is True.
stop(string/list, optional): - Up to 4 sequences where the LLM API will stop generating further tokens. stop(string/list, optional): - Up to 4 sequences where the LLM API will stop generating further tokens.
max_tokens (integer, optional): The maximum number of tokens in the generated completion (default is infinity). max_tokens (integer, optional): The maximum number of tokens in the generated completion (default is infinity).
presence_penalty (float, optional): It is used to penalize new tokens based on their existence in the text so far. presence_penalty (float, optional): It is used to penalize new tokens based on their existence in the text so far.
@ -260,6 +262,7 @@ async def acompletion(
"top_p": top_p, "top_p": top_p,
"n": n, "n": n,
"stream": stream, "stream": stream,
"stream_options": stream_options,
"stop": stop, "stop": stop,
"max_tokens": max_tokens, "max_tokens": max_tokens,
"presence_penalty": presence_penalty, "presence_penalty": presence_penalty,
@ -457,6 +460,7 @@ def completion(
top_p: Optional[float] = None, top_p: Optional[float] = None,
n: Optional[int] = None, n: Optional[int] = None,
stream: Optional[bool] = None, stream: Optional[bool] = None,
stream_options: Optional[dict] = None,
stop=None, stop=None,
max_tokens: Optional[int] = None, max_tokens: Optional[int] = None,
presence_penalty: Optional[float] = None, presence_penalty: Optional[float] = None,
@ -496,6 +500,7 @@ def completion(
top_p (float, optional): The top-p parameter for nucleus sampling (default is 1.0). top_p (float, optional): The top-p parameter for nucleus sampling (default is 1.0).
n (int, optional): The number of completions to generate (default is 1). n (int, optional): The number of completions to generate (default is 1).
stream (bool, optional): If True, return a streaming response (default is False). stream (bool, optional): If True, return a streaming response (default is False).
stream_options (dict, optional): A dictionary containing options for the streaming response. Only set this when you set stream: true.
stop(string/list, optional): - Up to 4 sequences where the LLM API will stop generating further tokens. stop(string/list, optional): - Up to 4 sequences where the LLM API will stop generating further tokens.
max_tokens (integer, optional): The maximum number of tokens in the generated completion (default is infinity). max_tokens (integer, optional): The maximum number of tokens in the generated completion (default is infinity).
presence_penalty (float, optional): It is used to penalize new tokens based on their existence in the text so far. presence_penalty (float, optional): It is used to penalize new tokens based on their existence in the text so far.
@ -573,6 +578,7 @@ def completion(
"top_p", "top_p",
"n", "n",
"stream", "stream",
"stream_options",
"stop", "stop",
"max_tokens", "max_tokens",
"presence_penalty", "presence_penalty",
@ -783,6 +789,7 @@ def completion(
top_p=top_p, top_p=top_p,
n=n, n=n,
stream=stream, stream=stream,
stream_options=stream_options,
stop=stop, stop=stop,
max_tokens=max_tokens, max_tokens=max_tokens,
presence_penalty=presence_penalty, presence_penalty=presence_penalty,