diff --git a/litellm/main.py b/litellm/main.py index 8d8930ca49..3b3539ee1a 100644 --- a/litellm/main.py +++ b/litellm/main.py @@ -1000,9 +1000,11 @@ def completion( logging.pre_call( input=prompt, api_key=None, additional_args={"api_base": api_base, "custom_prompt_dict": litellm.custom_prompt_dict} ) - if kwargs.get('acompletion', False) == True: - async_generator = ollama.async_get_ollama_response_stream(api_base, model, prompt, optional_params) - return async_generator + if kwargs.get('acompletion', False) == True: + if optional_params.get("stream", False) == True: + # assume all ollama responses are streamed + async_generator = ollama.async_get_ollama_response_stream(api_base, model, prompt, optional_params) + return async_generator generator = ollama.get_ollama_response_stream(api_base, model, prompt, optional_params) if optional_params.get("stream", False) == True: