Add JSON structured outputs to Ollama

2025-08-03 01:03:59 +00:00 · 2024-12-22 15:47:39 +11:00 · 2024-12-22 15:47:39 +11:00 · da82fb22f6
commit da82fb22f6
parent bae197c37e
2 changed files with 15 additions and 4 deletions
--- a/llama_stack/providers/remote/inference/ollama/ollama.py
+++ b/llama_stack/providers/remote/inference/ollama/ollama.py
@ -48,10 +48,10 @@ model_aliases = [
        "llama3.1:8b-instruct-fp16",
        CoreModelId.llama3_1_8b_instruct.value,
    ),
-    build_model_alias_with_just_provider_model_id(
-        "llama3.1:8b",
-        CoreModelId.llama3_1_8b_instruct.value,
-    ),
+    # build_model_alias_with_just_provider_model_id(
+    #     "llama3.1:8b",
+    #     CoreModelId.llama3_1_8b_instruct.value,
+    # ),
    build_model_alias(
        "llama3.1:70b-instruct-fp16",
        CoreModelId.llama3_1_70b_instruct.value,
@ -214,6 +214,7 @@ class OllamaInferenceAdapter(Inference, ModelsProtocolPrivate):
            tool_prompt_format=tool_prompt_format,
            stream=stream,
            logprobs=logprobs,
+            response_format=response_format,
        )
        if stream:
            return self._stream_chat_completion(request)
@ -257,6 +258,14 @@ class OllamaInferenceAdapter(Inference, ModelsProtocolPrivate):
            )
            input_dict["raw"] = True

+        if fmt := request.response_format:
+            if fmt.type == "json_schema":
+                input_dict["format"] = fmt.json_schema
+            elif fmt.type == "grammar":
+                raise NotImplementedError("Grammar response format is not supported")
+            else:
+                raise ValueError(f"Unknown response format type: {fmt.type}")
+
        return {
            "model": request.model,
            **input_dict,
--- a/llama_stack/providers/tests/inference/test_text_inference.py
+++ b/llama_stack/providers/tests/inference/test_text_inference.py
@ -191,6 +191,7 @@ class TestInference:
        provider = inference_impl.routing_table.get_provider_impl(inference_model)
        if provider.__provider_spec__.provider_type not in (
            "inline::meta-reference",
+            "remote::ollama",
            "remote::tgi",
            "remote::together",
            "remote::fireworks",
@ -253,6 +254,7 @@ class TestInference:
        provider = inference_impl.routing_table.get_provider_impl(inference_model)
        if provider.__provider_spec__.provider_type not in (
            "inline::meta-reference",
+            "remote::ollama",
            "remote::fireworks",
            "remote::tgi",
            "remote::together",