diff --git a/llama_stack/providers/remote/inference/ollama/ollama.py b/llama_stack/providers/remote/inference/ollama/ollama.py index 88f985f3a..2de5a994e 100644 --- a/llama_stack/providers/remote/inference/ollama/ollama.py +++ b/llama_stack/providers/remote/inference/ollama/ollama.py @@ -236,6 +236,7 @@ class OllamaInferenceAdapter(Inference, ModelsProtocolPrivate): tool_prompt_format=tool_prompt_format, stream=stream, logprobs=logprobs, + response_format=response_format, ) if stream: return self._stream_chat_completion(request) @@ -279,6 +280,14 @@ class OllamaInferenceAdapter(Inference, ModelsProtocolPrivate): ) input_dict["raw"] = True + if fmt := request.response_format: + if fmt.type == "json_schema": + input_dict["format"] = fmt.json_schema + elif fmt.type == "grammar": + raise NotImplementedError("Grammar response format is not supported") + else: + raise ValueError(f"Unknown response format type: {fmt.type}") + return { "model": request.model, **input_dict, diff --git a/llama_stack/providers/tests/inference/test_text_inference.py b/llama_stack/providers/tests/inference/test_text_inference.py index 2eeda0dbf..fd93857a3 100644 --- a/llama_stack/providers/tests/inference/test_text_inference.py +++ b/llama_stack/providers/tests/inference/test_text_inference.py @@ -210,6 +210,7 @@ class TestInference: provider = inference_impl.routing_table.get_provider_impl(inference_model) if provider.__provider_spec__.provider_type not in ( "inline::meta-reference", + "remote::ollama", "remote::tgi", "remote::together", "remote::fireworks", @@ -272,6 +273,7 @@ class TestInference: provider = inference_impl.routing_table.get_provider_impl(inference_model) if provider.__provider_spec__.provider_type not in ( "inline::meta-reference", + "remote::ollama", "remote::fireworks", "remote::tgi", "remote::together",