From ef684ff178f6b22c1fea9e50fbea65f58e2a1172 Mon Sep 17 00:00:00 2001
From: Ben Browning <bbrownin@redhat.com>
Date: Wed, 9 Apr 2025 15:22:52 -0400
Subject: [PATCH] Fix openai_completion tests for ollama

When called via the OpenAI API, ollama is responding with more brief
responses than when called via its native API. This adjusts the
prompting for its OpenAI calls to ask it to be more verbose.
---
 llama_stack/providers/remote/inference/ollama/ollama.py | 3 +++
 tests/integration/inference/test_openai_completion.py   | 8 ++++++--
 2 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/llama_stack/providers/remote/inference/ollama/ollama.py b/llama_stack/providers/remote/inference/ollama/ollama.py
index 1fbc9e747..cdd41e372 100644
--- a/llama_stack/providers/remote/inference/ollama/ollama.py
+++ b/llama_stack/providers/remote/inference/ollama/ollama.py
@@ -348,6 +348,9 @@ class OllamaInferenceAdapter(
         top_p: Optional[float] = None,
         user: Optional[str] = None,
     ) -> OpenAICompletion:
+        if not isinstance(prompt, str):
+            raise ValueError("Ollama does not support non-string prompts for completion")
+
         model_obj = await self._get_model(model)
         params = {
             k: v
diff --git a/tests/integration/inference/test_openai_completion.py b/tests/integration/inference/test_openai_completion.py
index fe368b20f..78df64af0 100644
--- a/tests/integration/inference/test_openai_completion.py
+++ b/tests/integration/inference/test_openai_completion.py
@@ -53,9 +53,11 @@ def openai_client(client_with_models, text_model_id):
 def test_openai_completion_non_streaming(openai_client, text_model_id, test_case):
     tc = TestCase(test_case)
 
+    # ollama needs more verbose prompting for some reason here...
+    prompt = "Respond to this question and explain your answer. " + tc["content"]
     response = openai_client.completions.create(
         model=text_model_id,
-        prompt=tc["content"],
+        prompt=prompt,
         stream=False,
     )
     assert len(response.choices) > 0
@@ -72,9 +74,11 @@ def test_openai_completion_non_streaming(openai_client, text_model_id, test_case
 def test_openai_completion_streaming(openai_client, text_model_id, test_case):
     tc = TestCase(test_case)
 
+    # ollama needs more verbose prompting for some reason here...
+    prompt = "Respond to this question and explain your answer. " + tc["content"]
     response = openai_client.completions.create(
         model=text_model_id,
-        prompt=tc["content"],
+        prompt=prompt,
         stream=True,
         max_tokens=50,
     )