From edb10198efd70b9161fbf2981aad957d185af43e Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Wed, 8 May 2024 21:25:40 -0700 Subject: [PATCH] feat - add stream_options support litellm --- litellm/main.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/litellm/main.py b/litellm/main.py index bff9886ac..d6d276653 100644 --- a/litellm/main.py +++ b/litellm/main.py @@ -187,6 +187,7 @@ async def acompletion( top_p: Optional[float] = None, n: Optional[int] = None, stream: Optional[bool] = None, + stream_options: Optional[dict] = None, stop=None, max_tokens: Optional[int] = None, presence_penalty: Optional[float] = None, @@ -223,6 +224,7 @@ async def acompletion( top_p (float, optional): The top-p parameter for nucleus sampling (default is 1.0). n (int, optional): The number of completions to generate (default is 1). stream (bool, optional): If True, return a streaming response (default is False). + stream_options (dict, optional): A dictionary containing options for the streaming response. Only use this if stream is True. stop(string/list, optional): - Up to 4 sequences where the LLM API will stop generating further tokens. max_tokens (integer, optional): The maximum number of tokens in the generated completion (default is infinity). presence_penalty (float, optional): It is used to penalize new tokens based on their existence in the text so far. @@ -260,6 +262,7 @@ async def acompletion( "top_p": top_p, "n": n, "stream": stream, + "stream_options": stream_options, "stop": stop, "max_tokens": max_tokens, "presence_penalty": presence_penalty, @@ -457,6 +460,7 @@ def completion( top_p: Optional[float] = None, n: Optional[int] = None, stream: Optional[bool] = None, + stream_options: Optional[dict] = None, stop=None, max_tokens: Optional[int] = None, presence_penalty: Optional[float] = None, @@ -496,6 +500,7 @@ def completion( top_p (float, optional): The top-p parameter for nucleus sampling (default is 1.0). n (int, optional): The number of completions to generate (default is 1). stream (bool, optional): If True, return a streaming response (default is False). + stream_options (dict, optional): A dictionary containing options for the streaming response. Only set this when you set stream: true. stop(string/list, optional): - Up to 4 sequences where the LLM API will stop generating further tokens. max_tokens (integer, optional): The maximum number of tokens in the generated completion (default is infinity). presence_penalty (float, optional): It is used to penalize new tokens based on their existence in the text so far. @@ -573,6 +578,7 @@ def completion( "top_p", "n", "stream", + "stream_options", "stop", "max_tokens", "presence_penalty", @@ -783,6 +789,7 @@ def completion( top_p=top_p, n=n, stream=stream, + stream_options=stream_options, stop=stop, max_tokens=max_tokens, presence_penalty=presence_penalty,