Responses API: Finish wiring up function tool calls

This finishes the plumbing for function tool call and adds a basic verification test (that passes for me locally against Llama 4 Scout in vllm). Signed-off-by: Ben Browning <bbrownin@redhat.com>
2025-08-02 08:44:44 +00:00 · 2025-04-30 17:01:00 -04:00 · 2025-04-30 17:01:00 -04:00 · 924213a689
commit 924213a689
parent 1990df2c50
6 changed files with 148 additions and 6 deletions
--- a/docs/_static/llama-stack-spec.html
+++ b/docs/_static/llama-stack-spec.html
@ -6920,16 +6920,55 @@
                    },
                    {
                        "$ref": "#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall"
                    },
                    {
                        "$ref": "#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall"
                    }
                ],
                "discriminator": {
                    "propertyName": "type",
                    "mapping": {
                        "message": "#/components/schemas/OpenAIResponseMessage",
-                        "web_search_call": "#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall"
+                        "web_search_call": "#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall",
                        "function_call": "#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall"
                    }
                }
            },
            "OpenAIResponseOutputMessageFunctionToolCall": {
                "type": "object",
                "properties": {
                    "arguments": {
                        "type": "string"
                    },
                    "call_id": {
                        "type": "string"
                    },
                    "name": {
                        "type": "string"
                    },
                    "type": {
                        "type": "string",
                        "const": "function_call",
                        "default": "function_call"
                    },
                    "id": {
                        "type": "string"
                    },
                    "status": {
                        "type": "string"
                    }
                },
                "additionalProperties": false,
                "required": [
                    "arguments",
                    "call_id",
                    "name",
                    "type",
                    "id",
                    "status"
                ],
                "title": "OpenAIResponseOutputMessageFunctionToolCall"
            },
            "OpenAIResponseObjectStream": {
                "oneOf": [
                    {
--- a/docs/_static/llama-stack-spec.yaml
+++ b/docs/_static/llama-stack-spec.yaml
@ -4833,11 +4833,40 @@ components:
      oneOf:
        - $ref: '#/components/schemas/OpenAIResponseMessage'
        - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
        - $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
      discriminator:
        propertyName: type
        mapping:
          message: '#/components/schemas/OpenAIResponseMessage'
          web_search_call: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
          function_call: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
    "OpenAIResponseOutputMessageFunctionToolCall":
      type: object
      properties:
        arguments:
          type: string
        call_id:
          type: string
        name:
          type: string
        type:
          type: string
          const: function_call
          default: function_call
        id:
          type: string
        status:
          type: string
      additionalProperties: false
      required:
        - arguments
        - call_id
        - name
        - type
        - id
        - status
      title: >-
        OpenAIResponseOutputMessageFunctionToolCall
    OpenAIResponseObjectStream:
      oneOf:
        - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseCreated'
--- a/llama_stack/apis/agents/openai_responses.py
+++ b/llama_stack/apis/agents/openai_responses.py
@ -77,8 +77,20 @@ class OpenAIResponseOutputMessageWebSearchToolCall(BaseModel):
    type: Literal["web_search_call"] = "web_search_call"
@json_schema_type
 class OpenAIResponseOutputMessageFunctionToolCall(BaseModel):
    arguments: str
    call_id: str
    name: str
    type: Literal["function_call"] = "function_call"
    id: str
    status: str
 OpenAIResponseOutput = Annotated[
-    OpenAIResponseMessage | OpenAIResponseOutputMessageWebSearchToolCall,
+    OpenAIResponseMessage
    | OpenAIResponseOutputMessageWebSearchToolCall
    | OpenAIResponseOutputMessageFunctionToolCall,
    Field(discriminator="type"),
 ]
 register_schema(OpenAIResponseOutput, name="OpenAIResponseOutput")
--- a/llama_stack/providers/inline/agents/meta_reference/openai_responses.py
+++ b/llama_stack/providers/inline/agents/meta_reference/openai_responses.py
@ -17,6 +17,7 @@ from llama_stack.apis.agents.openai_responses import (
    OpenAIResponseInputMessageContentImage,
    OpenAIResponseInputMessageContentText,
    OpenAIResponseInputTool,
    OpenAIResponseInputToolFunction,
    OpenAIResponseMessage,
    OpenAIResponseObject,
    OpenAIResponseObjectStream,
@ -24,6 +25,7 @@ from llama_stack.apis.agents.openai_responses import (
    OpenAIResponseObjectStreamResponseCreated,
    OpenAIResponseOutput,
    OpenAIResponseOutputMessageContentOutputText,
    OpenAIResponseOutputMessageFunctionToolCall,
    OpenAIResponseOutputMessageWebSearchToolCall,
    OpenAIResponsePreviousResponseWithInputItems,
 )
@ -221,10 +223,28 @@ class OpenAIResponsesImpl:
            chat_response = OpenAIChatCompletion(**chat_response.model_dump())
        output_messages: list[OpenAIResponseOutput] = []
-        if chat_response.choices[0].message.tool_calls:
+        # TODO: should we check more than choices[0] here?
-            output_messages.extend(
+        if chat_response.choices[0].message.tool_calls and tools:
-                await self._execute_tool_and_return_final_output(model, stream, chat_response, messages, temperature)
+            # TODO: Should we support a mix of custom and builtin tools?
-            )
+            #       in other words, should we check for more than tools[0]?
            if isinstance(tools[0], OpenAIResponseInputToolFunction):
                choice = chat_response.choices[0]
                for tool_call in choice.message.tool_calls:
                    output_messages.append(
                        OpenAIResponseOutputMessageFunctionToolCall(
                            arguments=tool_call.function.arguments or "",
                            call_id=tool_call.id,
                            name=tool_call.function.name or "",
                            id=f"fc_{uuid.uuid4()}",
                            status="completed",
                        )
                    )
            else:
                output_messages.extend(
                    await self._execute_tool_and_return_final_output(
                        model, stream, chat_response, messages, temperature
                    )
                )
        else:
            output_messages.extend(await _openai_choices_to_output_messages(chat_response.choices))
        response = OpenAIResponseObject(
--- a/tests/verifications/openai_api/fixtures/test_cases/responses.yaml
+++ b/tests/verifications/openai_api/fixtures/test_cases/responses.yaml
@ -31,6 +31,26 @@ test_response_web_search:
        search_context_size: "low"
      output: "128"
 test_response_custom_tool:
  test_name: test_response_custom_tool
  test_params:
    case:
    - case_id: "sf_weather"
      input: "What's the weather like in San Francisco?"
      tools:
      - type: function
        name: get_weather
        description: Get current temperature for a given location.
        parameters:
          additionalProperties: false
          properties:
            location:
              description: "City and country e.g. Bogot\xE1, Colombia"
              type: string
          required:
          - location
          type: object
 test_response_image:
  test_name: test_response_image
  test_params:
--- a/tests/verifications/openai_api/test_responses.py
+++ b/tests/verifications/openai_api/test_responses.py
@ -124,6 +124,28 @@ def test_response_non_streaming_web_search(request, openai_client, model, provid
    assert case["output"].lower() in response.output_text.lower().strip()
@pytest.mark.parametrize(
    "case",
    responses_test_cases["test_response_custom_tool"]["test_params"]["case"],
    ids=case_id_generator,
 )
 def test_response_non_streaming_custom_tool(request, openai_client, model, provider, verification_config, case):
    test_name_base = get_base_test_name(request)
    if should_skip_test(verification_config, provider, model, test_name_base):
        pytest.skip(f"Skipping {test_name_base} for model {model} on provider {provider} based on config.")
    response = openai_client.responses.create(
        model=model,
        input=case["input"],
        tools=case["tools"],
        stream=False,
    )
    assert len(response.output) == 1
    assert response.output[0].type == "function_call"
    assert response.output[0].status == "completed"
    assert response.output[0].name == "get_weather"
@pytest.mark.parametrize(
    "case",
    responses_test_cases["test_response_image"]["test_params"]["case"],