From 618c03405c91a03c91240092b35f02c9565dbb18 Mon Sep 17 00:00:00 2001 From: Abhishek Bongale Date: Mon, 1 Dec 2025 18:48:53 +0000 Subject: [PATCH] feat: Add metadata field to request and response (#4237) This changes adds Optional metadata field to OpenAI compatible request and response object. fixes: #3564 Signed-off-by: Abhishek Bongale Co-authored-by: Ashwin Bharambe --- client-sdks/stainless/openapi.yml | 18 ++++++++++++++++++ docs/static/deprecated-llama-stack-spec.yaml | 18 ++++++++++++++++++ docs/static/experimental-llama-stack-spec.yaml | 12 ++++++++++++ docs/static/llama-stack-spec.yaml | 18 ++++++++++++++++++ docs/static/stainless-llama-stack-spec.yaml | 18 ++++++++++++++++++ .../inline/agents/meta_reference/agents.py | 2 ++ .../responses/openai_responses.py | 4 ++++ .../meta_reference/responses/streaming.py | 4 ++++ src/llama_stack_api/agents.py | 2 ++ src/llama_stack_api/openai_responses.py | 2 ++ 10 files changed, 98 insertions(+) diff --git a/client-sdks/stainless/openapi.yml b/client-sdks/stainless/openapi.yml index 9703f94b5..da61a6385 100644 --- a/client-sdks/stainless/openapi.yml +++ b/client-sdks/stainless/openapi.yml @@ -6796,6 +6796,12 @@ components: anyOf: - type: integer - type: 'null' + metadata: + anyOf: + - additionalProperties: + type: string + type: object + - type: 'null' input: items: anyOf: @@ -7199,6 +7205,12 @@ components: anyOf: - type: integer - type: 'null' + metadata: + anyOf: + - additionalProperties: + type: string + type: object + - type: 'null' type: object required: - input @@ -7330,6 +7342,12 @@ components: anyOf: - type: integer - type: 'null' + metadata: + anyOf: + - additionalProperties: + type: string + type: object + - type: 'null' type: object required: - created_at diff --git a/docs/static/deprecated-llama-stack-spec.yaml b/docs/static/deprecated-llama-stack-spec.yaml index 70ecf9b03..d07b216b0 100644 --- a/docs/static/deprecated-llama-stack-spec.yaml +++ b/docs/static/deprecated-llama-stack-spec.yaml @@ -3639,6 +3639,12 @@ components: anyOf: - type: integer - type: 'null' + metadata: + anyOf: + - additionalProperties: + type: string + type: object + - type: 'null' input: items: anyOf: @@ -4042,6 +4048,12 @@ components: anyOf: - type: integer - type: 'null' + metadata: + anyOf: + - additionalProperties: + type: string + type: object + - type: 'null' type: object required: - input @@ -4173,6 +4185,12 @@ components: anyOf: - type: integer - type: 'null' + metadata: + anyOf: + - additionalProperties: + type: string + type: object + - type: 'null' type: object required: - created_at diff --git a/docs/static/experimental-llama-stack-spec.yaml b/docs/static/experimental-llama-stack-spec.yaml index 18cf3415f..4d5a43693 100644 --- a/docs/static/experimental-llama-stack-spec.yaml +++ b/docs/static/experimental-llama-stack-spec.yaml @@ -3336,6 +3336,12 @@ components: anyOf: - type: integer - type: 'null' + metadata: + anyOf: + - additionalProperties: + type: string + type: object + - type: 'null' input: items: anyOf: @@ -3736,6 +3742,12 @@ components: anyOf: - type: integer - type: 'null' + metadata: + anyOf: + - additionalProperties: + type: string + type: object + - type: 'null' type: object required: - created_at diff --git a/docs/static/llama-stack-spec.yaml b/docs/static/llama-stack-spec.yaml index 19239e722..0d91aeaaa 100644 --- a/docs/static/llama-stack-spec.yaml +++ b/docs/static/llama-stack-spec.yaml @@ -5817,6 +5817,12 @@ components: anyOf: - type: integer - type: 'null' + metadata: + anyOf: + - additionalProperties: + type: string + type: object + - type: 'null' input: items: anyOf: @@ -6220,6 +6226,12 @@ components: anyOf: - type: integer - type: 'null' + metadata: + anyOf: + - additionalProperties: + type: string + type: object + - type: 'null' type: object required: - input @@ -6351,6 +6363,12 @@ components: anyOf: - type: integer - type: 'null' + metadata: + anyOf: + - additionalProperties: + type: string + type: object + - type: 'null' type: object required: - created_at diff --git a/docs/static/stainless-llama-stack-spec.yaml b/docs/static/stainless-llama-stack-spec.yaml index 9703f94b5..da61a6385 100644 --- a/docs/static/stainless-llama-stack-spec.yaml +++ b/docs/static/stainless-llama-stack-spec.yaml @@ -6796,6 +6796,12 @@ components: anyOf: - type: integer - type: 'null' + metadata: + anyOf: + - additionalProperties: + type: string + type: object + - type: 'null' input: items: anyOf: @@ -7199,6 +7205,12 @@ components: anyOf: - type: integer - type: 'null' + metadata: + anyOf: + - additionalProperties: + type: string + type: object + - type: 'null' type: object required: - input @@ -7330,6 +7342,12 @@ components: anyOf: - type: integer - type: 'null' + metadata: + anyOf: + - additionalProperties: + type: string + type: object + - type: 'null' type: object required: - created_at diff --git a/src/llama_stack/providers/inline/agents/meta_reference/agents.py b/src/llama_stack/providers/inline/agents/meta_reference/agents.py index 1ceb34f8f..39cc22be7 100644 --- a/src/llama_stack/providers/inline/agents/meta_reference/agents.py +++ b/src/llama_stack/providers/inline/agents/meta_reference/agents.py @@ -109,6 +109,7 @@ class MetaReferenceAgentsImpl(Agents): max_infer_iters: int | None = 10, guardrails: list[ResponseGuardrail] | None = None, max_tool_calls: int | None = None, + metadata: dict[str, str] | None = None, ) -> OpenAIResponseObject: assert self.openai_responses_impl is not None, "OpenAI responses not initialized" result = await self.openai_responses_impl.create_openai_response( @@ -128,6 +129,7 @@ class MetaReferenceAgentsImpl(Agents): guardrails, parallel_tool_calls, max_tool_calls, + metadata, ) return result # type: ignore[no-any-return] diff --git a/src/llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py b/src/llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py index c8282df69..9cf30908c 100644 --- a/src/llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py +++ b/src/llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py @@ -336,6 +336,7 @@ class OpenAIResponsesImpl: guardrails: list[str | ResponseGuardrailSpec] | None = None, parallel_tool_calls: bool | None = None, max_tool_calls: int | None = None, + metadata: dict[str, str] | None = None, ): stream = bool(stream) text = OpenAIResponseText(format=OpenAIResponseTextFormat(type="text")) if text is None else text @@ -390,6 +391,7 @@ class OpenAIResponsesImpl: guardrail_ids=guardrail_ids, parallel_tool_calls=parallel_tool_calls, max_tool_calls=max_tool_calls, + metadata=metadata, ) if stream: @@ -442,6 +444,7 @@ class OpenAIResponsesImpl: guardrail_ids: list[str] | None = None, parallel_tool_calls: bool | None = True, max_tool_calls: int | None = None, + metadata: dict[str, str] | None = None, ) -> AsyncIterator[OpenAIResponseObjectStream]: # These should never be None when called from create_openai_response (which sets defaults) # but we assert here to help mypy understand the types @@ -490,6 +493,7 @@ class OpenAIResponsesImpl: guardrail_ids=guardrail_ids, instructions=instructions, max_tool_calls=max_tool_calls, + metadata=metadata, ) # Stream the response diff --git a/src/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py b/src/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py index 3515e0578..c778d65e7 100644 --- a/src/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py +++ b/src/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py @@ -120,6 +120,7 @@ class StreamingResponseOrchestrator: prompt: OpenAIResponsePrompt | None = None, parallel_tool_calls: bool | None = None, max_tool_calls: int | None = None, + metadata: dict[str, str] | None = None, ): self.inference_api = inference_api self.ctx = ctx @@ -137,6 +138,7 @@ class StreamingResponseOrchestrator: self.parallel_tool_calls = parallel_tool_calls # Max number of total calls to built-in tools that can be processed in a response self.max_tool_calls = max_tool_calls + self.metadata = metadata self.sequence_number = 0 # Store MCP tool mapping that gets built during tool processing self.mcp_tool_to_server: dict[str, OpenAIResponseInputToolMCP] = ( @@ -164,6 +166,7 @@ class StreamingResponseOrchestrator: model=self.ctx.model, status="completed", output=[OpenAIResponseMessage(role="assistant", content=[refusal_content], type="message")], + metadata=self.metadata, ) return OpenAIResponseObjectStreamResponseCompleted(response=refusal_response) @@ -199,6 +202,7 @@ class StreamingResponseOrchestrator: prompt=self.prompt, parallel_tool_calls=self.parallel_tool_calls, max_tool_calls=self.max_tool_calls, + metadata=self.metadata, ) async def create_response(self) -> AsyncIterator[OpenAIResponseObjectStream]: diff --git a/src/llama_stack_api/agents.py b/src/llama_stack_api/agents.py index 9b767608a..8d3b489e1 100644 --- a/src/llama_stack_api/agents.py +++ b/src/llama_stack_api/agents.py @@ -89,6 +89,7 @@ class Agents(Protocol): ), ] = None, max_tool_calls: int | None = None, + metadata: dict[str, str] | None = None, ) -> OpenAIResponseObject | AsyncIterator[OpenAIResponseObjectStream]: """Create a model response. @@ -100,6 +101,7 @@ class Agents(Protocol): :param include: (Optional) Additional fields to include in the response. :param guardrails: (Optional) List of guardrails to apply during response generation. Can be guardrail IDs (strings) or guardrail specifications. :param max_tool_calls: (Optional) Max number of total calls to built-in tools that can be processed in a response. + :param metadata: (Optional) Dictionary of metadata key-value pairs to attach to the response. :returns: An OpenAIResponseObject. """ ... diff --git a/src/llama_stack_api/openai_responses.py b/src/llama_stack_api/openai_responses.py index e20004487..177d2314a 100644 --- a/src/llama_stack_api/openai_responses.py +++ b/src/llama_stack_api/openai_responses.py @@ -597,6 +597,7 @@ class OpenAIResponseObject(BaseModel): :param usage: (Optional) Token usage information for the response :param instructions: (Optional) System message inserted into the model's context :param max_tool_calls: (Optional) Max number of total calls to built-in tools that can be processed in a response + :param metadata: (Optional) Dictionary of metadata key-value pairs """ created_at: int @@ -619,6 +620,7 @@ class OpenAIResponseObject(BaseModel): usage: OpenAIResponseUsage | None = None instructions: str | None = None max_tool_calls: int | None = None + metadata: dict[str, str] | None = None @json_schema_type