feat - add stream_options support litellm

2024-05-08 21:25:40 -07:00 · 2024-05-08 21:25:40 -07:00 · edb10198ef
commit edb10198ef
parent b5db045624
1 changed files with 7 additions and 0 deletions
--- a/litellm/main.py
+++ b/litellm/main.py
@ -187,6 +187,7 @@ async def acompletion(
    top_p: Optional[float] = None,
    n: Optional[int] = None,
    stream: Optional[bool] = None,
+    stream_options: Optional[dict] = None,
    stop=None,
    max_tokens: Optional[int] = None,
    presence_penalty: Optional[float] = None,
@ -223,6 +224,7 @@ async def acompletion(
        top_p (float, optional): The top-p parameter for nucleus sampling (default is 1.0).
        n (int, optional): The number of completions to generate (default is 1).
        stream (bool, optional): If True, return a streaming response (default is False).
+        stream_options (dict, optional): A dictionary containing options for the streaming response. Only use this if stream is True.
        stop(string/list, optional): - Up to 4 sequences where the LLM API will stop generating further tokens.
        max_tokens (integer, optional): The maximum number of tokens in the generated completion (default is infinity).
        presence_penalty (float, optional): It is used to penalize new tokens based on their existence in the text so far.
@ -260,6 +262,7 @@ async def acompletion(
        "top_p": top_p,
        "n": n,
        "stream": stream,
+        "stream_options": stream_options,
        "stop": stop,
        "max_tokens": max_tokens,
        "presence_penalty": presence_penalty,
@ -457,6 +460,7 @@ def completion(
    top_p: Optional[float] = None,
    n: Optional[int] = None,
    stream: Optional[bool] = None,
+    stream_options: Optional[dict] = None,
    stop=None,
    max_tokens: Optional[int] = None,
    presence_penalty: Optional[float] = None,
@ -496,6 +500,7 @@ def completion(
        top_p (float, optional): The top-p parameter for nucleus sampling (default is 1.0).
        n (int, optional): The number of completions to generate (default is 1).
        stream (bool, optional): If True, return a streaming response (default is False).
+        stream_options (dict, optional): A dictionary containing options for the streaming response. Only set this when you set stream: true.
        stop(string/list, optional): - Up to 4 sequences where the LLM API will stop generating further tokens.
        max_tokens (integer, optional): The maximum number of tokens in the generated completion (default is infinity).
        presence_penalty (float, optional): It is used to penalize new tokens based on their existence in the text so far.
@ -573,6 +578,7 @@ def completion(
        "top_p",
        "n",
        "stream",
+        "stream_options",
        "stop",
        "max_tokens",
        "presence_penalty",
@ -783,6 +789,7 @@ def completion(
            top_p=top_p,
            n=n,
            stream=stream,
+            stream_options=stream_options,
            stop=stop,
            max_tokens=max_tokens,
            presence_penalty=presence_penalty,