diff --git a/litellm/main.py b/litellm/main.py index 9ab017bbbd..859486ae6f 100644 --- a/litellm/main.py +++ b/litellm/main.py @@ -195,7 +195,9 @@ def completion( response = openai.ChatCompletion.create( engine=model, messages=messages, **optional_params ) - + if "stream" in optional_params and optional_params["stream"] == True: + response = CustomStreamWrapper(response, model) + return response ## LOGGING logging.post_call( input=messages, @@ -251,6 +253,9 @@ def completion( response = openai.ChatCompletion.create( model=model, messages=messages, **optional_params ) + if "stream" in optional_params and optional_params["stream"] == True: + response = CustomStreamWrapper(response, model) + return response ## LOGGING logging.post_call( input=messages, diff --git a/litellm/utils.py b/litellm/utils.py index 8a31e36eae..3ef075a180 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -1623,6 +1623,9 @@ class CustomStreamWrapper: elif self.model in litellm.open_ai_text_completion_models: chunk = next(self.completion_stream) completion_obj["content"] = self.handle_openai_text_completion_chunk(chunk) + else: # openai chat/azure models + chunk = next(self.completion_stream) + completion_obj['content'] = chunk['choices']['delta'] # LOGGING self.logging_obj(completion_obj["content"]) # return this for all models