Fix openai_completion tests for ollama

When called via the OpenAI API, ollama is responding with more brief responses than when called via its native API. This adjusts the prompting for its OpenAI calls to ask it to be more verbose.
2025-08-03 09:21:45 +00:00 · 2025-04-09 15:22:52 -04:00 · 2025-04-09 15:22:52 -04:00 · ef684ff178
commit ef684ff178
parent 52b4766949
2 changed files with 9 additions and 2 deletions
--- a/llama_stack/providers/remote/inference/ollama/ollama.py
+++ b/llama_stack/providers/remote/inference/ollama/ollama.py
@ -348,6 +348,9 @@ class OllamaInferenceAdapter(
        top_p: Optional[float] = None,
        user: Optional[str] = None,
    ) -> OpenAICompletion:
+        if not isinstance(prompt, str):
+            raise ValueError("Ollama does not support non-string prompts for completion")
+
        model_obj = await self._get_model(model)
        params = {
            k: v
--- a/tests/integration/inference/test_openai_completion.py
+++ b/tests/integration/inference/test_openai_completion.py
@ -53,9 +53,11 @@ def openai_client(client_with_models, text_model_id):
 def test_openai_completion_non_streaming(openai_client, text_model_id, test_case):
    tc = TestCase(test_case)

+    # ollama needs more verbose prompting for some reason here...
+    prompt = "Respond to this question and explain your answer. " + tc["content"]
    response = openai_client.completions.create(
        model=text_model_id,
-        prompt=tc["content"],
+        prompt=prompt,
        stream=False,
    )
    assert len(response.choices) > 0
@ -72,9 +74,11 @@ def test_openai_completion_non_streaming(openai_client, text_model_id, test_case
 def test_openai_completion_streaming(openai_client, text_model_id, test_case):
    tc = TestCase(test_case)

+    # ollama needs more verbose prompting for some reason here...
+    prompt = "Respond to this question and explain your answer. " + tc["content"]
    response = openai_client.completions.create(
        model=text_model_id,
-        prompt=tc["content"],
+        prompt=prompt,
        stream=True,
        max_tokens=50,
    )