diff --git a/llama_stack/providers/remote/inference/ollama/ollama.py b/llama_stack/providers/remote/inference/ollama/ollama.py index 1fbc9e747..cdd41e372 100644 --- a/llama_stack/providers/remote/inference/ollama/ollama.py +++ b/llama_stack/providers/remote/inference/ollama/ollama.py @@ -348,6 +348,9 @@ class OllamaInferenceAdapter( top_p: Optional[float] = None, user: Optional[str] = None, ) -> OpenAICompletion: + if not isinstance(prompt, str): + raise ValueError("Ollama does not support non-string prompts for completion") + model_obj = await self._get_model(model) params = { k: v diff --git a/tests/integration/inference/test_openai_completion.py b/tests/integration/inference/test_openai_completion.py index fe368b20f..78df64af0 100644 --- a/tests/integration/inference/test_openai_completion.py +++ b/tests/integration/inference/test_openai_completion.py @@ -53,9 +53,11 @@ def openai_client(client_with_models, text_model_id): def test_openai_completion_non_streaming(openai_client, text_model_id, test_case): tc = TestCase(test_case) + # ollama needs more verbose prompting for some reason here... + prompt = "Respond to this question and explain your answer. " + tc["content"] response = openai_client.completions.create( model=text_model_id, - prompt=tc["content"], + prompt=prompt, stream=False, ) assert len(response.choices) > 0 @@ -72,9 +74,11 @@ def test_openai_completion_non_streaming(openai_client, text_model_id, test_case def test_openai_completion_streaming(openai_client, text_model_id, test_case): tc = TestCase(test_case) + # ollama needs more verbose prompting for some reason here... + prompt = "Respond to this question and explain your answer. " + tc["content"] response = openai_client.completions.create( model=text_model_id, - prompt=tc["content"], + prompt=prompt, stream=True, max_tokens=50, )