feat: Add metadata field to request and response (#4237)

This changes adds Optional metadata field to OpenAI compatible request and response object. fixes: #3564 Signed-off-by: Abhishek Bongale <abhishekbongale@outlook.com> Co-authored-by: Ashwin Bharambe <ashwin.bharambe@gmail.com>
2025-12-03 01:48:05 +00:00 · 2025-12-01 18:48:53 +00:00 · 2025-12-01 18:48:53 +00:00 · 618c03405c
commit 618c03405c
parent 28ff6d8659
10 changed files with 98 additions and 0 deletions
--- a/client-sdks/stainless/openapi.yml
+++ b/client-sdks/stainless/openapi.yml
@ -6796,6 +6796,12 @@ components:
          anyOf:
          - type: integer
          - type: 'null'
+        metadata:
+          anyOf:
+          - additionalProperties:
+              type: string
+            type: object
+          - type: 'null'
        input:
          items:
            anyOf:
@ -7199,6 +7205,12 @@ components:
          anyOf:
          - type: integer
          - type: 'null'
+        metadata:
+          anyOf:
+          - additionalProperties:
+              type: string
+            type: object
+          - type: 'null'
      type: object
      required:
      - input
@ -7330,6 +7342,12 @@ components:
          anyOf:
          - type: integer
          - type: 'null'
+        metadata:
+          anyOf:
+          - additionalProperties:
+              type: string
+            type: object
+          - type: 'null'
      type: object
      required:
      - created_at
--- a/docs/static/deprecated-llama-stack-spec.yaml
+++ b/docs/static/deprecated-llama-stack-spec.yaml
@ -3639,6 +3639,12 @@ components:
          anyOf:
          - type: integer
          - type: 'null'
+        metadata:
+          anyOf:
+          - additionalProperties:
+              type: string
+            type: object
+          - type: 'null'
        input:
          items:
            anyOf:
@ -4042,6 +4048,12 @@ components:
          anyOf:
          - type: integer
          - type: 'null'
+        metadata:
+          anyOf:
+          - additionalProperties:
+              type: string
+            type: object
+          - type: 'null'
      type: object
      required:
      - input
@ -4173,6 +4185,12 @@ components:
          anyOf:
          - type: integer
          - type: 'null'
+        metadata:
+          anyOf:
+          - additionalProperties:
+              type: string
+            type: object
+          - type: 'null'
      type: object
      required:
      - created_at
--- a/docs/static/experimental-llama-stack-spec.yaml
+++ b/docs/static/experimental-llama-stack-spec.yaml
@ -3336,6 +3336,12 @@ components:
          anyOf:
          - type: integer
          - type: 'null'
+        metadata:
+          anyOf:
+          - additionalProperties:
+              type: string
+            type: object
+          - type: 'null'
        input:
          items:
            anyOf:
@ -3736,6 +3742,12 @@ components:
          anyOf:
          - type: integer
          - type: 'null'
+        metadata:
+          anyOf:
+          - additionalProperties:
+              type: string
+            type: object
+          - type: 'null'
      type: object
      required:
      - created_at
--- a/docs/static/llama-stack-spec.yaml
+++ b/docs/static/llama-stack-spec.yaml
@ -5817,6 +5817,12 @@ components:
          anyOf:
          - type: integer
          - type: 'null'
+        metadata:
+          anyOf:
+          - additionalProperties:
+              type: string
+            type: object
+          - type: 'null'
        input:
          items:
            anyOf:
@ -6220,6 +6226,12 @@ components:
          anyOf:
          - type: integer
          - type: 'null'
+        metadata:
+          anyOf:
+          - additionalProperties:
+              type: string
+            type: object
+          - type: 'null'
      type: object
      required:
      - input
@ -6351,6 +6363,12 @@ components:
          anyOf:
          - type: integer
          - type: 'null'
+        metadata:
+          anyOf:
+          - additionalProperties:
+              type: string
+            type: object
+          - type: 'null'
      type: object
      required:
      - created_at
--- a/docs/static/stainless-llama-stack-spec.yaml
+++ b/docs/static/stainless-llama-stack-spec.yaml
@ -6796,6 +6796,12 @@ components:
          anyOf:
          - type: integer
          - type: 'null'
+        metadata:
+          anyOf:
+          - additionalProperties:
+              type: string
+            type: object
+          - type: 'null'
        input:
          items:
            anyOf:
@ -7199,6 +7205,12 @@ components:
          anyOf:
          - type: integer
          - type: 'null'
+        metadata:
+          anyOf:
+          - additionalProperties:
+              type: string
+            type: object
+          - type: 'null'
      type: object
      required:
      - input
@ -7330,6 +7342,12 @@ components:
          anyOf:
          - type: integer
          - type: 'null'
+        metadata:
+          anyOf:
+          - additionalProperties:
+              type: string
+            type: object
+          - type: 'null'
      type: object
      required:
      - created_at
--- a/src/llama_stack/providers/inline/agents/meta_reference/agents.py
+++ b/src/llama_stack/providers/inline/agents/meta_reference/agents.py
@ -109,6 +109,7 @@ class MetaReferenceAgentsImpl(Agents):
        max_infer_iters: int | None = 10,
        guardrails: list[ResponseGuardrail] | None = None,
        max_tool_calls: int | None = None,
+        metadata: dict[str, str] | None = None,
    ) -> OpenAIResponseObject:
        assert self.openai_responses_impl is not None, "OpenAI responses not initialized"
        result = await self.openai_responses_impl.create_openai_response(
@ -128,6 +129,7 @@ class MetaReferenceAgentsImpl(Agents):
            guardrails,
            parallel_tool_calls,
            max_tool_calls,
+            metadata,
        )
        return result  # type: ignore[no-any-return]

--- a/src/llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py
+++ b/src/llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py
@ -336,6 +336,7 @@ class OpenAIResponsesImpl:
        guardrails: list[str | ResponseGuardrailSpec] | None = None,
        parallel_tool_calls: bool | None = None,
        max_tool_calls: int | None = None,
+        metadata: dict[str, str] | None = None,
    ):
        stream = bool(stream)
        text = OpenAIResponseText(format=OpenAIResponseTextFormat(type="text")) if text is None else text
@ -390,6 +391,7 @@ class OpenAIResponsesImpl:
            guardrail_ids=guardrail_ids,
            parallel_tool_calls=parallel_tool_calls,
            max_tool_calls=max_tool_calls,
+            metadata=metadata,
        )

        if stream:
@ -442,6 +444,7 @@ class OpenAIResponsesImpl:
        guardrail_ids: list[str] | None = None,
        parallel_tool_calls: bool | None = True,
        max_tool_calls: int | None = None,
+        metadata: dict[str, str] | None = None,
    ) -> AsyncIterator[OpenAIResponseObjectStream]:
        # These should never be None when called from create_openai_response (which sets defaults)
        # but we assert here to help mypy understand the types
@ -490,6 +493,7 @@ class OpenAIResponsesImpl:
            guardrail_ids=guardrail_ids,
            instructions=instructions,
            max_tool_calls=max_tool_calls,
+            metadata=metadata,
        )

        # Stream the response
--- a/src/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py
+++ b/src/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py
@ -120,6 +120,7 @@ class StreamingResponseOrchestrator:
        prompt: OpenAIResponsePrompt | None = None,
        parallel_tool_calls: bool | None = None,
        max_tool_calls: int | None = None,
+        metadata: dict[str, str] | None = None,
    ):
        self.inference_api = inference_api
        self.ctx = ctx
@ -137,6 +138,7 @@ class StreamingResponseOrchestrator:
        self.parallel_tool_calls = parallel_tool_calls
        # Max number of total calls to built-in tools that can be processed in a response
        self.max_tool_calls = max_tool_calls
+        self.metadata = metadata
        self.sequence_number = 0
        # Store MCP tool mapping that gets built during tool processing
        self.mcp_tool_to_server: dict[str, OpenAIResponseInputToolMCP] = (
@ -164,6 +166,7 @@ class StreamingResponseOrchestrator:
            model=self.ctx.model,
            status="completed",
            output=[OpenAIResponseMessage(role="assistant", content=[refusal_content], type="message")],
+            metadata=self.metadata,
        )

        return OpenAIResponseObjectStreamResponseCompleted(response=refusal_response)
@ -199,6 +202,7 @@ class StreamingResponseOrchestrator:
            prompt=self.prompt,
            parallel_tool_calls=self.parallel_tool_calls,
            max_tool_calls=self.max_tool_calls,
+            metadata=self.metadata,
        )

    async def create_response(self) -> AsyncIterator[OpenAIResponseObjectStream]:
--- a/src/llama_stack_api/agents.py
+++ b/src/llama_stack_api/agents.py
@ -89,6 +89,7 @@ class Agents(Protocol):
            ),
        ] = None,
        max_tool_calls: int | None = None,
+        metadata: dict[str, str] | None = None,
    ) -> OpenAIResponseObject | AsyncIterator[OpenAIResponseObjectStream]:
        """Create a model response.

@ -100,6 +101,7 @@ class Agents(Protocol):
        :param include: (Optional) Additional fields to include in the response.
        :param guardrails: (Optional) List of guardrails to apply during response generation. Can be guardrail IDs (strings) or guardrail specifications.
        :param max_tool_calls: (Optional) Max number of total calls to built-in tools that can be processed in a response.
+        :param metadata: (Optional) Dictionary of metadata key-value pairs to attach to the response.
        :returns: An OpenAIResponseObject.
        """
        ...
--- a/src/llama_stack_api/openai_responses.py
+++ b/src/llama_stack_api/openai_responses.py
@ -597,6 +597,7 @@ class OpenAIResponseObject(BaseModel):
    :param usage: (Optional) Token usage information for the response
    :param instructions: (Optional) System message inserted into the model's context
    :param max_tool_calls: (Optional) Max number of total calls to built-in tools that can be processed in a response
+    :param metadata: (Optional) Dictionary of metadata key-value pairs
    """

    created_at: int
@ -619,6 +620,7 @@ class OpenAIResponseObject(BaseModel):
    usage: OpenAIResponseUsage | None = None
    instructions: str | None = None
    max_tool_calls: int | None = None
+    metadata: dict[str, str] | None = None


@json_schema_type