diff --git a/dist/litellm-0.1.651.tar.gz b/dist/litellm-0.1.651.tar.gz new file mode 100644 index 000000000..ba1bfbb4f Binary files /dev/null and b/dist/litellm-0.1.651.tar.gz differ diff --git a/litellm/__pycache__/utils.cpython-311.pyc b/litellm/__pycache__/utils.cpython-311.pyc index b1f1c387f..7e2dcf510 100644 Binary files a/litellm/__pycache__/utils.cpython-311.pyc and b/litellm/__pycache__/utils.cpython-311.pyc differ diff --git a/litellm/tests/test_streaming.py b/litellm/tests/test_streaming.py index 8728d4031..3d162ff26 100644 --- a/litellm/tests/test_streaming.py +++ b/litellm/tests/test_streaming.py @@ -123,7 +123,7 @@ def test_openai_chat_completion_call(): # raise Exception("invalid role") if "content" in chunk["choices"][0]["delta"]: complete_response += chunk["choices"][0]["delta"]["content"] - print(f'complete_chunk: {complete_response}') + # print(f'complete_chunk: {complete_response}') if complete_response.strip() == "": raise Exception("Empty response received") except: diff --git a/litellm/utils.py b/litellm/utils.py index 7e688c558..220ec8278 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -2453,6 +2453,8 @@ class CustomStreamWrapper: def __next__(self): try: + # return this for all models + model_response = ModelResponse(stream=True) completion_obj = {"content": ""} # default to role being assistant if self.model in litellm.anthropic_models: chunk = next(self.completion_stream) @@ -2497,21 +2499,23 @@ class CustomStreamWrapper: completion_obj["content"] = self.handle_cohere_chunk(chunk) else: # openai chat/azure models chunk = next(self.completion_stream) - completion_obj = chunk["choices"][0]["delta"] + model_response = chunk + # LOGGING + threading.Thread(target=self.logging_obj.success_handler, args=(completion_obj,)).start() + return model_response # LOGGING threading.Thread(target=self.logging_obj.success_handler, args=(completion_obj,)).start() - # return this for all models - model_response = ModelResponse(stream=True) - model_response.choices[0].delta = { - "content": completion_obj["content"], - } - if "role" in completion_obj: - model_response.choices[0].delta = completion_obj + + if model_response.choices[0].delta.content == "": + model_response.choices[0].delta = { + "content": completion_obj["content"], + } return model_response except StopIteration: raise StopIteration except Exception as e: + print(e) model_response = ModelResponse(stream=True) model_response.choices[0].finish_reason = "stop" return model_response diff --git a/pyproject.toml b/pyproject.toml index 90a294583..9bd13f4a7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "litellm" -version = "0.1.650" +version = "0.1.651" description = "Library to easily interface with LLM API providers" authors = ["BerriAI"] license = "MIT License"