fix(utils.py): fix streaming chunks to not return role, unless set

2025-04-26 03:04:13 +00:00 · 2024-02-01 09:53:33 -08:00 · 2024-02-01 09:53:33 -08:00 · d46df34ff5
commit d46df34ff5
parent 76f053f85b
2 changed files with 7 additions and 6 deletions
--- a/litellm/llms/ollama.py
+++ b/litellm/llms/ollama.py
@ -145,8 +145,8 @@ def get_ollama_response(
        ):  # completion(top_k=3) > cohere_config(top_k=3) <- allows for dynamic variables to be passed in
            optional_params[k] = v

-    optional_params["stream"] = optional_params.get("stream", False)
-    data = {"model": model, "prompt": prompt, **optional_params}
+    stream = optional_params.pop("stream", False)
+    data = {"model": model, "prompt": prompt, "options": optional_params}
    ## LOGGING
    logging_obj.pre_call(
        input=None,
@ -159,7 +159,7 @@ def get_ollama_response(
        },
    )
    if acompletion is True:
-        if optional_params.get("stream", False) == True:
+        if stream == True:
            response = ollama_async_streaming(
                url=url,
                data=data,
@ -176,7 +176,7 @@ def get_ollama_response(
                logging_obj=logging_obj,
            )
        return response
-    elif optional_params.get("stream", False) == True:
+    elif stream == True:
        return ollama_completion_stream(url=url, data=data, logging_obj=logging_obj)

    response = requests.post(url=f"{url}", json=data, timeout=litellm.request_timeout)
@ -254,7 +254,7 @@ async def ollama_async_streaming(url, data, model_response, encoding, logging_ob
        ) as response:
            if response.status_code != 200:
                raise OllamaError(
-                    status_code=response.status_code, message=response.text
+                    status_code=response.status_code, message=await response.aread()
                )

            streamwrapper = litellm.CustomStreamWrapper(