diff --git a/litellm/__pycache__/utils.cpython-311.pyc b/litellm/__pycache__/utils.cpython-311.pyc index ac9c4d897e..c4098f31b1 100644 Binary files a/litellm/__pycache__/utils.cpython-311.pyc and b/litellm/__pycache__/utils.cpython-311.pyc differ diff --git a/litellm/tests/test_streaming.py b/litellm/tests/test_streaming.py index 43308aa168..7a20c6cb2e 100644 --- a/litellm/tests/test_streaming.py +++ b/litellm/tests/test_streaming.py @@ -40,7 +40,8 @@ def test_completion_cohere_stream(): # Add any assertions here to check the response for chunk in response: print(f"chunk: {chunk}") - complete_response += chunk["choices"][0]["delta"]["content"] + if "content" in chunk["choices"][0]["delta"]: + complete_response += chunk["choices"][0]["delta"]["content"] if complete_response == "": raise Exception("Empty response received") print(f"completion_response: {complete_response}") @@ -79,7 +80,8 @@ def test_openai_text_completion_call(): for chunk in response: chunk_time = time.time() print(f"chunk: {chunk}") - complete_response += chunk["choices"][0]["delta"]["content"] + if "content" in chunk["choices"][0]["delta"]: + complete_response += chunk["choices"][0]["delta"]["content"] if complete_response == "": raise Exception("Empty response received") except: @@ -98,15 +100,15 @@ def ai21_completion_call(): for chunk in response: chunk_time = time.time() print(f"time since initial request: {chunk_time - start_time:.5f}") - print(chunk["choices"][0]["delta"]) - complete_response += chunk["choices"][0]["delta"]["content"] + print(chunk) + if "content" in chunk["choices"][0]["delta"]: + complete_response += chunk["choices"][0]["delta"]["content"] if complete_response == "": raise Exception("Empty response received") except: print(f"error occurred: {traceback.format_exc()}") pass -ai21_completion_call() # test on openai completion call def test_openai_chat_completion_call(): try: @@ -118,14 +120,16 @@ def test_openai_chat_completion_call(): for chunk in response: chunk_time = time.time() print(f"time since initial request: {chunk_time - start_time:.5f}") - print(chunk["choices"][0]["delta"]) - complete_response += chunk["choices"][0]["delta"]["content"] + print(chunk) + if "content" in chunk["choices"][0]["delta"]: + complete_response += chunk["choices"][0]["delta"]["content"] if complete_response == "": raise Exception("Empty response received") except: print(f"error occurred: {traceback.format_exc()}") pass +# test_openai_chat_completion_call() async def completion_call(): try: response = completion( @@ -139,7 +143,8 @@ async def completion_call(): chunk_time = time.time() print(f"time since initial request: {chunk_time - start_time:.5f}") print(chunk["choices"][0]["delta"]) - complete_response += chunk["choices"][0]["delta"]["content"] + if "content" in chunk["choices"][0]["delta"]: + complete_response += chunk["choices"][0]["delta"]["content"] if complete_response == "": raise Exception("Empty response received") except: @@ -205,6 +210,8 @@ def test_together_ai_completion_call_replit(): ) if complete_response == "": raise Exception("Empty response received") + except KeyError as e: + pass except: print(f"error occurred: {traceback.format_exc()}") pass @@ -232,6 +239,8 @@ def test_together_ai_completion_call_starcoder(): print(complete_response) if complete_response == "": raise Exception("Empty response received") + except KeyError as e: + pass except: print(f"error occurred: {traceback.format_exc()}") pass @@ -281,6 +290,8 @@ async def ai21_async_completion_call(): complete_response += chunk["choices"][0]["delta"]["content"] if complete_response == "": raise Exception("Empty response received") + except KeyError as e: + pass except: print(f"error occurred: {traceback.format_exc()}") pass \ No newline at end of file diff --git a/litellm/utils.py b/litellm/utils.py index e3547dc1c6..20206461a1 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -103,7 +103,7 @@ class Choices(OpenAIObject): self.message = message class StreamingChoices(OpenAIObject): - def __init__(self, finish_reason="stop", index=0, delta=Delta(), **params): + def __init__(self, finish_reason=None, index=0, delta: Optional[Delta]={}, **params): super(StreamingChoices, self).__init__(**params) self.finish_reason = finish_reason self.index = index @@ -2493,7 +2493,10 @@ class CustomStreamWrapper: model_response.choices[0].delta = completion_obj return model_response except Exception as e: - raise StopIteration + model_response = ModelResponse(stream=True) + model_response.choices[0].finish_reason = "stop" + return model_response + # raise StopIteration async def __anext__(self): try: diff --git a/pyproject.toml b/pyproject.toml index dd424cb157..5e62451466 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "litellm" -version = "0.1.632" +version = "0.1.633" description = "Library to easily interface with LLM API providers" authors = ["BerriAI"] license = "MIT License"