diff --git a/litellm/__pycache__/utils.cpython-311.pyc b/litellm/__pycache__/utils.cpython-311.pyc index ea63bbb384..b26f9b7a7d 100644 Binary files a/litellm/__pycache__/utils.cpython-311.pyc and b/litellm/__pycache__/utils.cpython-311.pyc differ diff --git a/litellm/tests/test_streaming.py b/litellm/tests/test_streaming.py index d37cc1c4ef..c577567b77 100644 --- a/litellm/tests/test_streaming.py +++ b/litellm/tests/test_streaming.py @@ -82,7 +82,7 @@ def ai21_completion_call(): print(f"error occurred: {traceback.format_exc()}") pass - +ai21_completion_call() # test on openai completion call def test_openai_chat_completion_call(): try: @@ -122,7 +122,7 @@ async def completion_call(): print(f"error occurred: {traceback.format_exc()}") pass -asyncio.run(completion_call()) +# asyncio.run(completion_call()) # # test on azure completion call # try: diff --git a/litellm/utils.py b/litellm/utils.py index e28e9e29aa..d91723eeda 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -87,6 +87,12 @@ class Message(OpenAIObject): self.role = role self.logprobs = logprobs +class Delta(OpenAIObject): + def __init__(self, content="default", logprobs=None, **params): + super(Delta, self).__init__(**params) + self.content = content + self.logprobs = logprobs + class Choices(OpenAIObject): def __init__(self, finish_reason="stop", index=0, message=Message(), **params): @@ -95,11 +101,20 @@ class Choices(OpenAIObject): self.index = index self.message = message +class StreamingChoices(OpenAIObject): + def __init__(self, finish_reason="stop", index=0, delta=Delta(), **params): + super(StreamingChoices, self).__init__(**params) + self.finish_reason = finish_reason + self.index = index + self.delta = delta class ModelResponse(OpenAIObject): - def __init__(self, choices=None, created=None, model=None, usage=None, **params): + def __init__(self, choices=None, created=None, model=None, usage=None, stream=False, **params): super(ModelResponse, self).__init__(**params) - self.choices = self.choices = choices if choices else [Choices(message=Message())] + if stream: + self.choices = self.choices = choices if choices else [StreamingChoices()] + else: + self.choices = self.choices = choices if choices else [Choices()] self.created = created self.model = model self.usage = ( @@ -2274,7 +2289,7 @@ class CustomStreamWrapper: def __next__(self): try: - completion_obj = {"role": "assistant", "content": ""} + completion_obj = {"content": ""} if self.model in litellm.anthropic_models: chunk = next(self.completion_stream) completion_obj["content"] = self.handle_anthropic_chunk(chunk) @@ -2315,19 +2330,12 @@ class CustomStreamWrapper: return chunk # open ai returns finish_reason, we should just return the openai chunk #completion_obj["content"] = self.handle_openai_chat_completion_chunk(chunk) - # LOGGING threading.Thread(target=self.logging_obj.success_handler, args=(completion_obj,)).start() # return this for all models - return { - "choices": - [ - { - "delta": completion_obj, - "finish_reason": None - }, - ] - } + model_response = ModelResponse(stream=True) + model_response.choices[0].delta = completion_obj + return model_response except Exception as e: raise StopIteration diff --git a/pyproject.toml b/pyproject.toml index 9f73e68f10..6347dbbe89 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "litellm" -version = "0.1.609" +version = "0.1.610" description = "Library to easily interface with LLM API providers" authors = ["BerriAI"] license = "MIT License"