From ef684ff178f6b22c1fea9e50fbea65f58e2a1172 Mon Sep 17 00:00:00 2001 From: Ben Browning Date: Wed, 9 Apr 2025 15:22:52 -0400 Subject: [PATCH] Fix openai_completion tests for ollama When called via the OpenAI API, ollama is responding with more brief responses than when called via its native API. This adjusts the prompting for its OpenAI calls to ask it to be more verbose. --- llama_stack/providers/remote/inference/ollama/ollama.py | 3 +++ tests/integration/inference/test_openai_completion.py | 8 ++++++-- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/llama_stack/providers/remote/inference/ollama/ollama.py b/llama_stack/providers/remote/inference/ollama/ollama.py index 1fbc9e747..cdd41e372 100644 --- a/llama_stack/providers/remote/inference/ollama/ollama.py +++ b/llama_stack/providers/remote/inference/ollama/ollama.py @@ -348,6 +348,9 @@ class OllamaInferenceAdapter( top_p: Optional[float] = None, user: Optional[str] = None, ) -> OpenAICompletion: + if not isinstance(prompt, str): + raise ValueError("Ollama does not support non-string prompts for completion") + model_obj = await self._get_model(model) params = { k: v diff --git a/tests/integration/inference/test_openai_completion.py b/tests/integration/inference/test_openai_completion.py index fe368b20f..78df64af0 100644 --- a/tests/integration/inference/test_openai_completion.py +++ b/tests/integration/inference/test_openai_completion.py @@ -53,9 +53,11 @@ def openai_client(client_with_models, text_model_id): def test_openai_completion_non_streaming(openai_client, text_model_id, test_case): tc = TestCase(test_case) + # ollama needs more verbose prompting for some reason here... + prompt = "Respond to this question and explain your answer. " + tc["content"] response = openai_client.completions.create( model=text_model_id, - prompt=tc["content"], + prompt=prompt, stream=False, ) assert len(response.choices) > 0 @@ -72,9 +74,11 @@ def test_openai_completion_non_streaming(openai_client, text_model_id, test_case def test_openai_completion_streaming(openai_client, text_model_id, test_case): tc = TestCase(test_case) + # ollama needs more verbose prompting for some reason here... + prompt = "Respond to this question and explain your answer. " + tc["content"] response = openai_client.completions.create( model=text_model_id, - prompt=tc["content"], + prompt=prompt, stream=True, max_tokens=50, )