streaming fixes

2025-04-24 18:24:20 +00:00 · 2023-09-15 15:49:20 -07:00 · 2023-09-15 15:49:20 -07:00 · 7b19d62564
commit 7b19d62564
parent fa441e9a3e
12 changed files with 14 additions and 8 deletions
--- a/dist/litellm-0.1.647-py3-none-any.whl
+++ b/dist/litellm-0.1.647-py3-none-any.whl
--- a/dist/litellm-0.1.647.tar.gz
+++ b/dist/litellm-0.1.647.tar.gz
--- a/dist/litellm-0.1.648-py3-none-any.whl
+++ b/dist/litellm-0.1.648-py3-none-any.whl
--- a/dist/litellm-0.1.648.tar.gz
+++ b/dist/litellm-0.1.648.tar.gz
--- a/dist/litellm-0.1.649-py3-none-any.whl
+++ b/dist/litellm-0.1.649-py3-none-any.whl
--- a/dist/litellm-0.1.649.tar.gz
+++ b/dist/litellm-0.1.649.tar.gz
--- a/dist/litellm-0.1.650-py3-none-any.whl
+++ b/dist/litellm-0.1.650-py3-none-any.whl
--- a/dist/litellm-0.1.650.tar.gz
+++ b/dist/litellm-0.1.650.tar.gz
--- a/litellm/pycache/utils.cpython-311.pyc
+++ b/litellm/pycache/utils.cpython-311.pyc
--- a/litellm/tests/test_streaming.py
+++ b/litellm/tests/test_streaming.py
@ -116,9 +116,11 @@ def test_openai_chat_completion_call():
        complete_response = ""
        start_time = time.time()
        for chunk in response:
            chunk_time = time.time() 
            print(f"time since initial request: {chunk_time - start_time:.5f}")
            print(chunk)
            if chunk["choices"][0]["finish_reason"]:
                break
            # if chunk["choices"][0]["delta"]["role"] != "assistant":
            #     raise Exception("invalid role")
            if "content" in chunk["choices"][0]["delta"]:
                complete_response += chunk["choices"][0]["delta"]["content"]
            print(f'complete_chunk: {complete_response}')
--- a/litellm/utils.py
+++ b/litellm/utils.py
@ -89,10 +89,12 @@ class Message(OpenAIObject):
        self.logprobs = logprobs
 class Delta(OpenAIObject):
-    def __init__(self, content=" ", logprobs=None, role="assistant", **params):
+    def __init__(self, content="<special_litellm_token>", logprobs=None, role=None, **params):
        super(Delta, self).__init__(**params)
-        self.content = content
+        if content != "<special_litellm_token>":
-        self.role = role
+            self.content = content
        if role:
            self.role = role
 class Choices(OpenAIObject):
@ -2501,9 +2503,11 @@ class CustomStreamWrapper:
            threading.Thread(target=self.logging_obj.success_handler, args=(completion_obj,)).start()
            # return this for all models
            model_response = ModelResponse(stream=True)
-            model_response.choices[0].delta.content = completion_obj["content"]
+            model_response.choices[0].delta = {
                "content": completion_obj["content"],
            }
            if "role" in completion_obj: 
-                model_response.choices[0].delta.role = completion_obj["role"]
+                model_response.choices[0].delta = completion_obj
            return model_response
        except StopIteration:
            raise StopIteration
--- a/pyproject.toml
+++ b/pyproject.toml
@ -1,6 +1,6 @@
 [tool.poetry]
 name = "litellm"
-version = "0.1.647"
+version = "0.1.650"
 description = "Library to easily interface with LLM API providers"
 authors = ["BerriAI"]
 license = "MIT License"