diff --git a/client-sdks/stainless/openapi.yml b/client-sdks/stainless/openapi.yml index 9269b7e39..d5db884fa 100644 --- a/client-sdks/stainless/openapi.yml +++ b/client-sdks/stainless/openapi.yml @@ -6796,6 +6796,12 @@ components: anyOf: - type: integer - type: 'null' + metadata: + anyOf: + - additionalProperties: + type: string + type: object + - type: 'null' input: items: anyOf: @@ -7199,6 +7205,12 @@ components: anyOf: - type: integer - type: 'null' + metadata: + anyOf: + - additionalProperties: + type: string + type: object + - type: 'null' type: object required: - input @@ -7330,6 +7342,12 @@ components: anyOf: - type: integer - type: 'null' + metadata: + anyOf: + - additionalProperties: + type: string + type: object + - type: 'null' type: object required: - created_at diff --git a/docs/static/deprecated-llama-stack-spec.yaml b/docs/static/deprecated-llama-stack-spec.yaml index cf9bd14c4..3f7f99be5 100644 --- a/docs/static/deprecated-llama-stack-spec.yaml +++ b/docs/static/deprecated-llama-stack-spec.yaml @@ -3639,6 +3639,12 @@ components: anyOf: - type: integer - type: 'null' + metadata: + anyOf: + - additionalProperties: + type: string + type: object + - type: 'null' input: items: anyOf: @@ -4042,6 +4048,12 @@ components: anyOf: - type: integer - type: 'null' + metadata: + anyOf: + - additionalProperties: + type: string + type: object + - type: 'null' type: object required: - input @@ -4173,6 +4185,12 @@ components: anyOf: - type: integer - type: 'null' + metadata: + anyOf: + - additionalProperties: + type: string + type: object + - type: 'null' type: object required: - created_at diff --git a/docs/static/experimental-llama-stack-spec.yaml b/docs/static/experimental-llama-stack-spec.yaml index 18ce75562..44e7d306c 100644 --- a/docs/static/experimental-llama-stack-spec.yaml +++ b/docs/static/experimental-llama-stack-spec.yaml @@ -3336,6 +3336,12 @@ components: anyOf: - type: integer - type: 'null' + metadata: + anyOf: + - additionalProperties: + type: string + type: object + - type: 'null' input: items: anyOf: @@ -3736,6 +3742,12 @@ components: anyOf: - type: integer - type: 'null' + metadata: + anyOf: + - additionalProperties: + type: string + type: object + - type: 'null' type: object required: - created_at diff --git a/docs/static/llama-stack-spec.yaml b/docs/static/llama-stack-spec.yaml index 9f7b2ed64..01c322f0e 100644 --- a/docs/static/llama-stack-spec.yaml +++ b/docs/static/llama-stack-spec.yaml @@ -5817,6 +5817,12 @@ components: anyOf: - type: integer - type: 'null' + metadata: + anyOf: + - additionalProperties: + type: string + type: object + - type: 'null' input: items: anyOf: @@ -6220,6 +6226,12 @@ components: anyOf: - type: integer - type: 'null' + metadata: + anyOf: + - additionalProperties: + type: string + type: object + - type: 'null' type: object required: - input @@ -6351,6 +6363,12 @@ components: anyOf: - type: integer - type: 'null' + metadata: + anyOf: + - additionalProperties: + type: string + type: object + - type: 'null' type: object required: - created_at diff --git a/docs/static/stainless-llama-stack-spec.yaml b/docs/static/stainless-llama-stack-spec.yaml index 9269b7e39..d5db884fa 100644 --- a/docs/static/stainless-llama-stack-spec.yaml +++ b/docs/static/stainless-llama-stack-spec.yaml @@ -6796,6 +6796,12 @@ components: anyOf: - type: integer - type: 'null' + metadata: + anyOf: + - additionalProperties: + type: string + type: object + - type: 'null' input: items: anyOf: @@ -7199,6 +7205,12 @@ components: anyOf: - type: integer - type: 'null' + metadata: + anyOf: + - additionalProperties: + type: string + type: object + - type: 'null' type: object required: - input @@ -7330,6 +7342,12 @@ components: anyOf: - type: integer - type: 'null' + metadata: + anyOf: + - additionalProperties: + type: string + type: object + - type: 'null' type: object required: - created_at diff --git a/src/llama_stack/providers/inline/agents/meta_reference/agents.py b/src/llama_stack/providers/inline/agents/meta_reference/agents.py index ca419a51a..fb70311ac 100644 --- a/src/llama_stack/providers/inline/agents/meta_reference/agents.py +++ b/src/llama_stack/providers/inline/agents/meta_reference/agents.py @@ -111,6 +111,7 @@ class MetaReferenceAgentsImpl(Agents): max_infer_iters: int | None = 10, guardrails: list[ResponseGuardrail] | None = None, max_tool_calls: int | None = None, + metadata: dict[str, str] | None = None, ) -> OpenAIResponseObject: assert self.openai_responses_impl is not None, "OpenAI responses not initialized" result = await self.openai_responses_impl.create_openai_response( @@ -130,6 +131,7 @@ class MetaReferenceAgentsImpl(Agents): guardrails, parallel_tool_calls, max_tool_calls, + metadata, ) return result # type: ignore[no-any-return] diff --git a/src/llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py b/src/llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py index c8282df69..9cf30908c 100644 --- a/src/llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py +++ b/src/llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py @@ -336,6 +336,7 @@ class OpenAIResponsesImpl: guardrails: list[str | ResponseGuardrailSpec] | None = None, parallel_tool_calls: bool | None = None, max_tool_calls: int | None = None, + metadata: dict[str, str] | None = None, ): stream = bool(stream) text = OpenAIResponseText(format=OpenAIResponseTextFormat(type="text")) if text is None else text @@ -390,6 +391,7 @@ class OpenAIResponsesImpl: guardrail_ids=guardrail_ids, parallel_tool_calls=parallel_tool_calls, max_tool_calls=max_tool_calls, + metadata=metadata, ) if stream: @@ -442,6 +444,7 @@ class OpenAIResponsesImpl: guardrail_ids: list[str] | None = None, parallel_tool_calls: bool | None = True, max_tool_calls: int | None = None, + metadata: dict[str, str] | None = None, ) -> AsyncIterator[OpenAIResponseObjectStream]: # These should never be None when called from create_openai_response (which sets defaults) # but we assert here to help mypy understand the types @@ -490,6 +493,7 @@ class OpenAIResponsesImpl: guardrail_ids=guardrail_ids, instructions=instructions, max_tool_calls=max_tool_calls, + metadata=metadata, ) # Stream the response diff --git a/src/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py b/src/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py index 9e901d88b..4dbf83838 100644 --- a/src/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py +++ b/src/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py @@ -118,6 +118,7 @@ class StreamingResponseOrchestrator: prompt: OpenAIResponsePrompt | None = None, parallel_tool_calls: bool | None = None, max_tool_calls: int | None = None, + metadata: dict[str, str] | None = None, ): self.inference_api = inference_api self.ctx = ctx @@ -135,6 +136,7 @@ class StreamingResponseOrchestrator: self.parallel_tool_calls = parallel_tool_calls # Max number of total calls to built-in tools that can be processed in a response self.max_tool_calls = max_tool_calls + self.metadata = metadata self.sequence_number = 0 # Store MCP tool mapping that gets built during tool processing self.mcp_tool_to_server: dict[str, OpenAIResponseInputToolMCP] = ( @@ -162,6 +164,7 @@ class StreamingResponseOrchestrator: model=self.ctx.model, status="completed", output=[OpenAIResponseMessage(role="assistant", content=[refusal_content], type="message")], + metadata=self.metadata, ) return OpenAIResponseObjectStreamResponseCompleted(response=refusal_response) @@ -197,6 +200,7 @@ class StreamingResponseOrchestrator: prompt=self.prompt, parallel_tool_calls=self.parallel_tool_calls, max_tool_calls=self.max_tool_calls, + metadata=self.metadata, ) async def create_response(self) -> AsyncIterator[OpenAIResponseObjectStream]: diff --git a/src/llama_stack_api/agents.py b/src/llama_stack_api/agents.py index 9b767608a..8d3b489e1 100644 --- a/src/llama_stack_api/agents.py +++ b/src/llama_stack_api/agents.py @@ -89,6 +89,7 @@ class Agents(Protocol): ), ] = None, max_tool_calls: int | None = None, + metadata: dict[str, str] | None = None, ) -> OpenAIResponseObject | AsyncIterator[OpenAIResponseObjectStream]: """Create a model response. @@ -100,6 +101,7 @@ class Agents(Protocol): :param include: (Optional) Additional fields to include in the response. :param guardrails: (Optional) List of guardrails to apply during response generation. Can be guardrail IDs (strings) or guardrail specifications. :param max_tool_calls: (Optional) Max number of total calls to built-in tools that can be processed in a response. + :param metadata: (Optional) Dictionary of metadata key-value pairs to attach to the response. :returns: An OpenAIResponseObject. """ ... diff --git a/src/llama_stack_api/openai_responses.py b/src/llama_stack_api/openai_responses.py index e20004487..177d2314a 100644 --- a/src/llama_stack_api/openai_responses.py +++ b/src/llama_stack_api/openai_responses.py @@ -597,6 +597,7 @@ class OpenAIResponseObject(BaseModel): :param usage: (Optional) Token usage information for the response :param instructions: (Optional) System message inserted into the model's context :param max_tool_calls: (Optional) Max number of total calls to built-in tools that can be processed in a response + :param metadata: (Optional) Dictionary of metadata key-value pairs """ created_at: int @@ -619,6 +620,7 @@ class OpenAIResponseObject(BaseModel): usage: OpenAIResponseUsage | None = None instructions: str | None = None max_tool_calls: int | None = None + metadata: dict[str, str] | None = None @json_schema_type