Merge branch 'main' into nvidia-e2e-notebook

2025-07-21 03:59:42 +00:00 · 2025-05-28 17:48:15 -04:00 · 2025-05-28 17:48:15 -04:00 · f5cb965f0f
commit f5cb965f0f
parent 51b68b4be6 0b695538af
226 changed files with 16519 additions and 8666 deletions
--- a/docs/_static/llama-stack-spec.html
+++ b/docs/_static/llama-stack-spec.html
@ -518,6 +518,74 @@
            }
        },
        "/v1/openai/v1/responses": {
+            "get": {
+                "responses": {
+                    "200": {
+                        "description": "A ListOpenAIResponseObject.",
+                        "content": {
+                            "application/json": {
+                                "schema": {
+                                    "$ref": "#/components/schemas/ListOpenAIResponseObject"
+                                }
+                            }
+                        }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
+                    }
+                },
+                "tags": [
+                    "Agents"
+                ],
+                "description": "List all OpenAI responses.",
+                "parameters": [
+                    {
+                        "name": "after",
+                        "in": "query",
+                        "description": "The ID of the last response to return.",
+                        "required": false,
+                        "schema": {
+                            "type": "string"
+                        }
+                    },
+                    {
+                        "name": "limit",
+                        "in": "query",
+                        "description": "The number of responses to return.",
+                        "required": false,
+                        "schema": {
+                            "type": "integer"
+                        }
+                    },
+                    {
+                        "name": "model",
+                        "in": "query",
+                        "description": "The model to filter responses by.",
+                        "required": false,
+                        "schema": {
+                            "type": "string"
+                        }
+                    },
+                    {
+                        "name": "order",
+                        "in": "query",
+                        "description": "The order to sort responses by when sorted by created_at ('asc' or 'desc').",
+                        "required": false,
+                        "schema": {
+                            "$ref": "#/components/schemas/Order"
+                        }
+                    }
+                ]
+            },
            "post": {
                "responses": {
                    "200": {
@ -1395,7 +1463,7 @@
                ]
            }
        },
-        "/v1/openai/v1/responses/{id}": {
+        "/v1/openai/v1/responses/{response_id}": {
            "get": {
                "responses": {
                    "200": {
@ -1427,7 +1495,7 @@
                "description": "Retrieve an OpenAI response by its ID.",
                "parameters": [
                    {
-                        "name": "id",
+                        "name": "response_id",
                        "in": "path",
                        "description": "The ID of the OpenAI response to retrieve.",
                        "required": true,
@ -2926,6 +2994,97 @@
                }
            }
        },
+        "/v1/openai/v1/responses/{response_id}/input_items": {
+            "get": {
+                "responses": {
+                    "200": {
+                        "description": "An ListOpenAIResponseInputItem.",
+                        "content": {
+                            "application/json": {
+                                "schema": {
+                                    "$ref": "#/components/schemas/ListOpenAIResponseInputItem"
+                                }
+                            }
+                        }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
+                    }
+                },
+                "tags": [
+                    "Agents"
+                ],
+                "description": "List input items for a given OpenAI response.",
+                "parameters": [
+                    {
+                        "name": "response_id",
+                        "in": "path",
+                        "description": "The ID of the response to retrieve input items for.",
+                        "required": true,
+                        "schema": {
+                            "type": "string"
+                        }
+                    },
+                    {
+                        "name": "after",
+                        "in": "query",
+                        "description": "An item ID to list items after, used for pagination.",
+                        "required": false,
+                        "schema": {
+                            "type": "string"
+                        }
+                    },
+                    {
+                        "name": "before",
+                        "in": "query",
+                        "description": "An item ID to list items before, used for pagination.",
+                        "required": false,
+                        "schema": {
+                            "type": "string"
+                        }
+                    },
+                    {
+                        "name": "include",
+                        "in": "query",
+                        "description": "Additional fields to include in the response.",
+                        "required": false,
+                        "schema": {
+                            "type": "array",
+                            "items": {
+                                "type": "string"
+                            }
+                        }
+                    },
+                    {
+                        "name": "limit",
+                        "in": "query",
+                        "description": "A limit on the number of objects to be returned. Limit can range between 1 and 100, and the default is 20.",
+                        "required": false,
+                        "schema": {
+                            "type": "integer"
+                        }
+                    },
+                    {
+                        "name": "order",
+                        "in": "query",
+                        "description": "The order to return the input items in. Default is desc.",
+                        "required": false,
+                        "schema": {
+                            "$ref": "#/components/schemas/Order"
+                        }
+                    }
+                ]
+            }
+        },
        "/v1/providers": {
            "get": {
                "responses": {
@ -6742,6 +6901,9 @@
                    },
                    {
                        "$ref": "#/components/schemas/OpenAIResponseInputToolFunction"
+                    },
+                    {
+                        "$ref": "#/components/schemas/OpenAIResponseInputToolMCP"
                    }
                ],
                "discriminator": {
@ -6749,7 +6911,8 @@
                    "mapping": {
                        "web_search": "#/components/schemas/OpenAIResponseInputToolWebSearch",
                        "file_search": "#/components/schemas/OpenAIResponseInputToolFileSearch",
-                        "function": "#/components/schemas/OpenAIResponseInputToolFunction"
+                        "function": "#/components/schemas/OpenAIResponseInputToolFunction",
+                        "mcp": "#/components/schemas/OpenAIResponseInputToolMCP"
                    }
                }
            },
@ -6839,6 +7002,110 @@
                ],
                "title": "OpenAIResponseInputToolFunction"
            },
+            "OpenAIResponseInputToolMCP": {
+                "type": "object",
+                "properties": {
+                    "type": {
+                        "type": "string",
+                        "const": "mcp",
+                        "default": "mcp"
+                    },
+                    "server_label": {
+                        "type": "string"
+                    },
+                    "server_url": {
+                        "type": "string"
+                    },
+                    "headers": {
+                        "type": "object",
+                        "additionalProperties": {
+                            "oneOf": [
+                                {
+                                    "type": "null"
+                                },
+                                {
+                                    "type": "boolean"
+                                },
+                                {
+                                    "type": "number"
+                                },
+                                {
+                                    "type": "string"
+                                },
+                                {
+                                    "type": "array"
+                                },
+                                {
+                                    "type": "object"
+                                }
+                            ]
+                        }
+                    },
+                    "require_approval": {
+                        "oneOf": [
+                            {
+                                "type": "string",
+                                "const": "always"
+                            },
+                            {
+                                "type": "string",
+                                "const": "never"
+                            },
+                            {
+                                "type": "object",
+                                "properties": {
+                                    "always": {
+                                        "type": "array",
+                                        "items": {
+                                            "type": "string"
+                                        }
+                                    },
+                                    "never": {
+                                        "type": "array",
+                                        "items": {
+                                            "type": "string"
+                                        }
+                                    }
+                                },
+                                "additionalProperties": false,
+                                "title": "ApprovalFilter"
+                            }
+                        ],
+                        "default": "never"
+                    },
+                    "allowed_tools": {
+                        "oneOf": [
+                            {
+                                "type": "array",
+                                "items": {
+                                    "type": "string"
+                                }
+                            },
+                            {
+                                "type": "object",
+                                "properties": {
+                                    "tool_names": {
+                                        "type": "array",
+                                        "items": {
+                                            "type": "string"
+                                        }
+                                    }
+                                },
+                                "additionalProperties": false,
+                                "title": "AllowedToolsFilter"
+                            }
+                        ]
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "type",
+                    "server_label",
+                    "server_url",
+                    "require_approval"
+                ],
+                "title": "OpenAIResponseInputToolMCP"
+            },
            "OpenAIResponseInputToolWebSearch": {
                "type": "object",
                "properties": {
@ -6951,15 +7218,15 @@
            "OpenAIResponseOutputMessageFunctionToolCall": {
                "type": "object",
                "properties": {
-                    "arguments": {
-                        "type": "string"
-                    },
                    "call_id": {
                        "type": "string"
                    },
                    "name": {
                        "type": "string"
                    },
+                    "arguments": {
+                        "type": "string"
+                    },
                    "type": {
                        "type": "string",
                        "const": "function_call",
@ -6974,12 +7241,10 @@
                },
                "additionalProperties": false,
                "required": [
-                    "arguments",
                    "call_id",
                    "name",
-                    "type",
-                    "id",
-                    "status"
+                    "arguments",
+                    "type"
                ],
                "title": "OpenAIResponseOutputMessageFunctionToolCall"
            },
@ -7027,6 +7292,9 @@
                        "type": "string",
                        "description": "The underlying LLM used for completions."
                    },
+                    "instructions": {
+                        "type": "string"
+                    },
                    "previous_response_id": {
                        "type": "string",
                        "description": "(Optional) if specified, the new response will be a continuation of the previous response. This can be used to easily fork-off new responses from existing responses."
@ -7142,6 +7410,12 @@
                    },
                    {
                        "$ref": "#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall"
+                    },
+                    {
+                        "$ref": "#/components/schemas/OpenAIResponseOutputMessageMCPCall"
+                    },
+                    {
+                        "$ref": "#/components/schemas/OpenAIResponseOutputMessageMCPListTools"
                    }
                ],
                "discriminator": {
@ -7149,15 +7423,126 @@
                    "mapping": {
                        "message": "#/components/schemas/OpenAIResponseMessage",
                        "web_search_call": "#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall",
-                        "function_call": "#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall"
+                        "function_call": "#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall",
+                        "mcp_call": "#/components/schemas/OpenAIResponseOutputMessageMCPCall",
+                        "mcp_list_tools": "#/components/schemas/OpenAIResponseOutputMessageMCPListTools"
                    }
                }
            },
+            "OpenAIResponseOutputMessageMCPCall": {
+                "type": "object",
+                "properties": {
+                    "id": {
+                        "type": "string"
+                    },
+                    "type": {
+                        "type": "string",
+                        "const": "mcp_call",
+                        "default": "mcp_call"
+                    },
+                    "arguments": {
+                        "type": "string"
+                    },
+                    "name": {
+                        "type": "string"
+                    },
+                    "server_label": {
+                        "type": "string"
+                    },
+                    "error": {
+                        "type": "string"
+                    },
+                    "output": {
+                        "type": "string"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "id",
+                    "type",
+                    "arguments",
+                    "name",
+                    "server_label"
+                ],
+                "title": "OpenAIResponseOutputMessageMCPCall"
+            },
+            "OpenAIResponseOutputMessageMCPListTools": {
+                "type": "object",
+                "properties": {
+                    "id": {
+                        "type": "string"
+                    },
+                    "type": {
+                        "type": "string",
+                        "const": "mcp_list_tools",
+                        "default": "mcp_list_tools"
+                    },
+                    "server_label": {
+                        "type": "string"
+                    },
+                    "tools": {
+                        "type": "array",
+                        "items": {
+                            "type": "object",
+                            "properties": {
+                                "input_schema": {
+                                    "type": "object",
+                                    "additionalProperties": {
+                                        "oneOf": [
+                                            {
+                                                "type": "null"
+                                            },
+                                            {
+                                                "type": "boolean"
+                                            },
+                                            {
+                                                "type": "number"
+                                            },
+                                            {
+                                                "type": "string"
+                                            },
+                                            {
+                                                "type": "array"
+                                            },
+                                            {
+                                                "type": "object"
+                                            }
+                                        ]
+                                    }
+                                },
+                                "name": {
+                                    "type": "string"
+                                },
+                                "description": {
+                                    "type": "string"
+                                }
+                            },
+                            "additionalProperties": false,
+                            "required": [
+                                "input_schema",
+                                "name"
+                            ],
+                            "title": "MCPListToolsTool"
+                        }
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "id",
+                    "type",
+                    "server_label",
+                    "tools"
+                ],
+                "title": "OpenAIResponseOutputMessageMCPListTools"
+            },
            "OpenAIResponseObjectStream": {
                "oneOf": [
                    {
                        "$ref": "#/components/schemas/OpenAIResponseObjectStreamResponseCreated"
                    },
+                    {
+                        "$ref": "#/components/schemas/OpenAIResponseObjectStreamResponseOutputTextDelta"
+                    },
                    {
                        "$ref": "#/components/schemas/OpenAIResponseObjectStreamResponseCompleted"
                    }
@ -7166,6 +7551,7 @@
                    "propertyName": "type",
                    "mapping": {
                        "response.created": "#/components/schemas/OpenAIResponseObjectStreamResponseCreated",
+                        "response.output_text.delta": "#/components/schemas/OpenAIResponseObjectStreamResponseOutputTextDelta",
                        "response.completed": "#/components/schemas/OpenAIResponseObjectStreamResponseCompleted"
                    }
                }
@ -7208,6 +7594,41 @@
                ],
                "title": "OpenAIResponseObjectStreamResponseCreated"
            },
+            "OpenAIResponseObjectStreamResponseOutputTextDelta": {
+                "type": "object",
+                "properties": {
+                    "content_index": {
+                        "type": "integer"
+                    },
+                    "delta": {
+                        "type": "string"
+                    },
+                    "item_id": {
+                        "type": "string"
+                    },
+                    "output_index": {
+                        "type": "integer"
+                    },
+                    "sequence_number": {
+                        "type": "integer"
+                    },
+                    "type": {
+                        "type": "string",
+                        "const": "response.output_text.delta",
+                        "default": "response.output_text.delta"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "content_index",
+                    "delta",
+                    "item_id",
+                    "output_index",
+                    "sequence_number",
+                    "type"
+                ],
+                "title": "OpenAIResponseObjectStreamResponseOutputTextDelta"
+            },
            "CreateUploadSessionRequest": {
                "type": "object",
                "properties": {
@ -9173,9 +9594,6 @@
                    "toolgroup_id": {
                        "type": "string"
                    },
-                    "tool_host": {
-                        "$ref": "#/components/schemas/ToolHost"
-                    },
                    "description": {
                        "type": "string"
                    },
@ -9217,21 +9635,11 @@
                    "provider_id",
                    "type",
                    "toolgroup_id",
-                    "tool_host",
                    "description",
                    "parameters"
                ],
                "title": "Tool"
            },
-            "ToolHost": {
-                "type": "string",
-                "enum": [
-                    "distribution",
-                    "client",
-                    "model_context_protocol"
-                ],
-                "title": "ToolHost"
-            },
            "ToolGroup": {
                "type": "object",
                "properties": {
@ -10068,6 +10476,130 @@
                ],
                "title": "ListModelsResponse"
            },
+            "ListOpenAIResponseInputItem": {
+                "type": "object",
+                "properties": {
+                    "data": {
+                        "type": "array",
+                        "items": {
+                            "$ref": "#/components/schemas/OpenAIResponseInput"
+                        }
+                    },
+                    "object": {
+                        "type": "string",
+                        "const": "list",
+                        "default": "list"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "data",
+                    "object"
+                ],
+                "title": "ListOpenAIResponseInputItem"
+            },
+            "ListOpenAIResponseObject": {
+                "type": "object",
+                "properties": {
+                    "data": {
+                        "type": "array",
+                        "items": {
+                            "$ref": "#/components/schemas/OpenAIResponseObjectWithInput"
+                        }
+                    },
+                    "has_more": {
+                        "type": "boolean"
+                    },
+                    "first_id": {
+                        "type": "string"
+                    },
+                    "last_id": {
+                        "type": "string"
+                    },
+                    "object": {
+                        "type": "string",
+                        "const": "list",
+                        "default": "list"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "data",
+                    "has_more",
+                    "first_id",
+                    "last_id",
+                    "object"
+                ],
+                "title": "ListOpenAIResponseObject"
+            },
+            "OpenAIResponseObjectWithInput": {
+                "type": "object",
+                "properties": {
+                    "created_at": {
+                        "type": "integer"
+                    },
+                    "error": {
+                        "$ref": "#/components/schemas/OpenAIResponseError"
+                    },
+                    "id": {
+                        "type": "string"
+                    },
+                    "model": {
+                        "type": "string"
+                    },
+                    "object": {
+                        "type": "string",
+                        "const": "response",
+                        "default": "response"
+                    },
+                    "output": {
+                        "type": "array",
+                        "items": {
+                            "$ref": "#/components/schemas/OpenAIResponseOutput"
+                        }
+                    },
+                    "parallel_tool_calls": {
+                        "type": "boolean",
+                        "default": false
+                    },
+                    "previous_response_id": {
+                        "type": "string"
+                    },
+                    "status": {
+                        "type": "string"
+                    },
+                    "temperature": {
+                        "type": "number"
+                    },
+                    "top_p": {
+                        "type": "number"
+                    },
+                    "truncation": {
+                        "type": "string"
+                    },
+                    "user": {
+                        "type": "string"
+                    },
+                    "input": {
+                        "type": "array",
+                        "items": {
+                            "$ref": "#/components/schemas/OpenAIResponseInput"
+                        }
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "created_at",
+                    "id",
+                    "model",
+                    "object",
+                    "output",
+                    "parallel_tool_calls",
+                    "status",
+                    "input"
+                ],
+                "title": "OpenAIResponseObjectWithInput"
+            },
            "ListProvidersResponse": {
                "type": "object",
                "properties": {
@ -11605,6 +12137,10 @@
                        "type": "string",
                        "default": "Result {index}\nContent: {chunk.content}\nMetadata: {metadata}\n",
                        "description": "Template for formatting each retrieved chunk in the context. Available placeholders: {index} (1-based chunk ordinal), {chunk.content} (chunk content string), {metadata} (chunk metadata dict). Default: \"Result {index}\\nContent: {chunk.content}\\nMetadata: {metadata}\\n\""
+                    },
+                    "mode": {
+                        "type": "string",
+                        "description": "Search mode for retrieval—either \"vector\" or \"keyword\". Default \"vector\"."
                    }
                },
                "additionalProperties": false,
--- a/docs/_static/llama-stack-spec.yaml
+++ b/docs/_static/llama-stack-spec.yaml
@ -349,6 +349,53 @@ paths:
              $ref: '#/components/schemas/CreateAgentTurnRequest'
        required: true
  /v1/openai/v1/responses:
+    get:
+      responses:
+        '200':
+          description: A ListOpenAIResponseObject.
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/ListOpenAIResponseObject'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
+      tags:
+        - Agents
+      description: List all OpenAI responses.
+      parameters:
+        - name: after
+          in: query
+          description: The ID of the last response to return.
+          required: false
+          schema:
+            type: string
+        - name: limit
+          in: query
+          description: The number of responses to return.
+          required: false
+          schema:
+            type: integer
+        - name: model
+          in: query
+          description: The model to filter responses by.
+          required: false
+          schema:
+            type: string
+        - name: order
+          in: query
+          description: >-
+            The order to sort responses by when sorted by created_at ('asc' or 'desc').
+          required: false
+          schema:
+            $ref: '#/components/schemas/Order'
    post:
      responses:
        '200':
@ -963,7 +1010,7 @@ paths:
          required: true
          schema:
            type: string
-  /v1/openai/v1/responses/{id}:
+  /v1/openai/v1/responses/{response_id}:
    get:
      responses:
        '200':
@ -986,7 +1033,7 @@ paths:
        - Agents
      description: Retrieve an OpenAI response by its ID.
      parameters:
-        - name: id
+        - name: response_id
          in: path
          description: >-
            The ID of the OpenAI response to retrieve.
@ -2038,6 +2085,75 @@ paths:
            schema:
              $ref: '#/components/schemas/RegisterModelRequest'
        required: true
+  /v1/openai/v1/responses/{response_id}/input_items:
+    get:
+      responses:
+        '200':
+          description: An ListOpenAIResponseInputItem.
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/ListOpenAIResponseInputItem'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
+      tags:
+        - Agents
+      description: >-
+        List input items for a given OpenAI response.
+      parameters:
+        - name: response_id
+          in: path
+          description: >-
+            The ID of the response to retrieve input items for.
+          required: true
+          schema:
+            type: string
+        - name: after
+          in: query
+          description: >-
+            An item ID to list items after, used for pagination.
+          required: false
+          schema:
+            type: string
+        - name: before
+          in: query
+          description: >-
+            An item ID to list items before, used for pagination.
+          required: false
+          schema:
+            type: string
+        - name: include
+          in: query
+          description: >-
+            Additional fields to include in the response.
+          required: false
+          schema:
+            type: array
+            items:
+              type: string
+        - name: limit
+          in: query
+          description: >-
+            A limit on the number of objects to be returned. Limit can range between
+            1 and 100, and the default is 20.
+          required: false
+          schema:
+            type: integer
+        - name: order
+          in: query
+          description: >-
+            The order to return the input items in. Default is desc.
+          required: false
+          schema:
+            $ref: '#/components/schemas/Order'
  /v1/providers:
    get:
      responses:
@ -4762,12 +4878,14 @@ components:
        - $ref: '#/components/schemas/OpenAIResponseInputToolWebSearch'
        - $ref: '#/components/schemas/OpenAIResponseInputToolFileSearch'
        - $ref: '#/components/schemas/OpenAIResponseInputToolFunction'
+        - $ref: '#/components/schemas/OpenAIResponseInputToolMCP'
      discriminator:
        propertyName: type
        mapping:
          web_search: '#/components/schemas/OpenAIResponseInputToolWebSearch'
          file_search: '#/components/schemas/OpenAIResponseInputToolFileSearch'
          function: '#/components/schemas/OpenAIResponseInputToolFunction'
+          mcp: '#/components/schemas/OpenAIResponseInputToolMCP'
    OpenAIResponseInputToolFileSearch:
      type: object
      properties:
@ -4822,6 +4940,66 @@ components:
        - type
        - name
      title: OpenAIResponseInputToolFunction
+    OpenAIResponseInputToolMCP:
+      type: object
+      properties:
+        type:
+          type: string
+          const: mcp
+          default: mcp
+        server_label:
+          type: string
+        server_url:
+          type: string
+        headers:
+          type: object
+          additionalProperties:
+            oneOf:
+              - type: 'null'
+              - type: boolean
+              - type: number
+              - type: string
+              - type: array
+              - type: object
+        require_approval:
+          oneOf:
+            - type: string
+              const: always
+            - type: string
+              const: never
+            - type: object
+              properties:
+                always:
+                  type: array
+                  items:
+                    type: string
+                never:
+                  type: array
+                  items:
+                    type: string
+              additionalProperties: false
+              title: ApprovalFilter
+          default: never
+        allowed_tools:
+          oneOf:
+            - type: array
+              items:
+                type: string
+            - type: object
+              properties:
+                tool_names:
+                  type: array
+                  items:
+                    type: string
+              additionalProperties: false
+              title: AllowedToolsFilter
+      additionalProperties: false
+      required:
+        - type
+        - server_label
+        - server_url
+        - require_approval
+      title: OpenAIResponseInputToolMCP
    OpenAIResponseInputToolWebSearch:
      type: object
      properties:
@ -4897,12 +5075,12 @@ components:
    "OpenAIResponseOutputMessageFunctionToolCall":
      type: object
      properties:
-        arguments:
-          type: string
        call_id:
          type: string
        name:
          type: string
+        arguments:
+          type: string
        type:
          type: string
          const: function_call
@ -4913,12 +5091,10 @@ components:
          type: string
      additionalProperties: false
      required:
-        - arguments
        - call_id
        - name
+        - arguments
        - type
-        - id
-        - status
      title: >-
        OpenAIResponseOutputMessageFunctionToolCall
    "OpenAIResponseOutputMessageWebSearchToolCall":
@ -4952,6 +5128,8 @@ components:
        model:
          type: string
          description: The underlying LLM used for completions.
+        instructions:
+          type: string
        previous_response_id:
          type: string
          description: >-
@ -5034,20 +5212,95 @@ components:
        - $ref: '#/components/schemas/OpenAIResponseMessage'
        - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
        - $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
+        - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
+        - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
      discriminator:
        propertyName: type
        mapping:
          message: '#/components/schemas/OpenAIResponseMessage'
          web_search_call: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
          function_call: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
+          mcp_call: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
+          mcp_list_tools: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
+    OpenAIResponseOutputMessageMCPCall:
+      type: object
+      properties:
+        id:
+          type: string
+        type:
+          type: string
+          const: mcp_call
+          default: mcp_call
+        arguments:
+          type: string
+        name:
+          type: string
+        server_label:
+          type: string
+        error:
+          type: string
+        output:
+          type: string
+      additionalProperties: false
+      required:
+        - id
+        - type
+        - arguments
+        - name
+        - server_label
+      title: OpenAIResponseOutputMessageMCPCall
+    OpenAIResponseOutputMessageMCPListTools:
+      type: object
+      properties:
+        id:
+          type: string
+        type:
+          type: string
+          const: mcp_list_tools
+          default: mcp_list_tools
+        server_label:
+          type: string
+        tools:
+          type: array
+          items:
+            type: object
+            properties:
+              input_schema:
+                type: object
+                additionalProperties:
+                  oneOf:
+                    - type: 'null'
+                    - type: boolean
+                    - type: number
+                    - type: string
+                    - type: array
+                    - type: object
+              name:
+                type: string
+              description:
+                type: string
+            additionalProperties: false
+            required:
+              - input_schema
+              - name
+            title: MCPListToolsTool
+      additionalProperties: false
+      required:
+        - id
+        - type
+        - server_label
+        - tools
+      title: OpenAIResponseOutputMessageMCPListTools
    OpenAIResponseObjectStream:
      oneOf:
        - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseCreated'
+        - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputTextDelta'
        - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseCompleted'
      discriminator:
        propertyName: type
        mapping:
          response.created: '#/components/schemas/OpenAIResponseObjectStreamResponseCreated'
+          response.output_text.delta: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputTextDelta'
          response.completed: '#/components/schemas/OpenAIResponseObjectStreamResponseCompleted'
    "OpenAIResponseObjectStreamResponseCompleted":
      type: object
@ -5079,6 +5332,33 @@ components:
        - type
      title: >-
        OpenAIResponseObjectStreamResponseCreated
+    "OpenAIResponseObjectStreamResponseOutputTextDelta":
+      type: object
+      properties:
+        content_index:
+          type: integer
+        delta:
+          type: string
+        item_id:
+          type: string
+        output_index:
+          type: integer
+        sequence_number:
+          type: integer
+        type:
+          type: string
+          const: response.output_text.delta
+          default: response.output_text.delta
+      additionalProperties: false
+      required:
+        - content_index
+        - delta
+        - item_id
+        - output_index
+        - sequence_number
+        - type
+      title: >-
+        OpenAIResponseObjectStreamResponseOutputTextDelta
    CreateUploadSessionRequest:
      type: object
      properties:
@ -6462,8 +6742,6 @@ components:
          default: tool
        toolgroup_id:
          type: string
-        tool_host:
-          $ref: '#/components/schemas/ToolHost'
        description:
          type: string
        parameters:
@ -6486,17 +6764,9 @@ components:
        - provider_id
        - type
        - toolgroup_id
-        - tool_host
        - description
        - parameters
      title: Tool
-    ToolHost:
-      type: string
-      enum:
-        - distribution
-        - client
-        - model_context_protocol
-      title: ToolHost
    ToolGroup:
      type: object
      properties:
@ -7042,6 +7312,96 @@ components:
      required:
        - data
      title: ListModelsResponse
+    ListOpenAIResponseInputItem:
+      type: object
+      properties:
+        data:
+          type: array
+          items:
+            $ref: '#/components/schemas/OpenAIResponseInput'
+        object:
+          type: string
+          const: list
+          default: list
+      additionalProperties: false
+      required:
+        - data
+        - object
+      title: ListOpenAIResponseInputItem
+    ListOpenAIResponseObject:
+      type: object
+      properties:
+        data:
+          type: array
+          items:
+            $ref: '#/components/schemas/OpenAIResponseObjectWithInput'
+        has_more:
+          type: boolean
+        first_id:
+          type: string
+        last_id:
+          type: string
+        object:
+          type: string
+          const: list
+          default: list
+      additionalProperties: false
+      required:
+        - data
+        - has_more
+        - first_id
+        - last_id
+        - object
+      title: ListOpenAIResponseObject
+    OpenAIResponseObjectWithInput:
+      type: object
+      properties:
+        created_at:
+          type: integer
+        error:
+          $ref: '#/components/schemas/OpenAIResponseError'
+        id:
+          type: string
+        model:
+          type: string
+        object:
+          type: string
+          const: response
+          default: response
+        output:
+          type: array
+          items:
+            $ref: '#/components/schemas/OpenAIResponseOutput'
+        parallel_tool_calls:
+          type: boolean
+          default: false
+        previous_response_id:
+          type: string
+        status:
+          type: string
+        temperature:
+          type: number
+        top_p:
+          type: number
+        truncation:
+          type: string
+        user:
+          type: string
+        input:
+          type: array
+          items:
+            $ref: '#/components/schemas/OpenAIResponseInput'
+      additionalProperties: false
+      required:
+        - created_at
+        - id
+        - model
+        - object
+        - output
+        - parallel_tool_calls
+        - status
+        - input
+      title: OpenAIResponseObjectWithInput
    ListProvidersResponse:
      type: object
      properties:
@ -8084,6 +8444,10 @@ components:
            placeholders: {index} (1-based chunk ordinal), {chunk.content} (chunk
            content string), {metadata} (chunk metadata dict). Default: "Result {index}\nContent:
            {chunk.content}\nMetadata: {metadata}\n"
+        mode:
+          type: string
+          description: >-
+            Search mode for retrieval—either "vector" or "keyword". Default "vector".
      additionalProperties: false
      required:
        - query_generator_config
--- a/docs/notebooks/Llama_Stack_Benchmark_Evals.ipynb
+++ b/docs/notebooks/Llama_Stack_Benchmark_Evals.ipynb
--- a/docs/readme.md
+++ b/docs/readme.md
@ -3,10 +3,10 @@
 Here's a collection of comprehensive guides, examples, and resources for building AI applications with Llama Stack. For the complete documentation, visit our [ReadTheDocs page](https://llama-stack.readthedocs.io/en/latest/index.html).

 ## Render locally
+
+From the llama-stack root directory, run the following command to render the docs locally:
 ```bash
-pip install -r requirements.txt
-cd docs
-python -m sphinx_autobuild source _build
+uv run --with ".[docs]" sphinx-autobuild docs/source docs/build/html --write-all
 ```
 You can open up the docs in your browser at http://localhost:8000

--- a/docs/requirements.txt
+++ b/docs/requirements.txt
@ -1,16 +0,0 @@
-linkify
-myst-parser
-e git+https://github.com/pytorch/pytorch_sphinx_theme.git#egg=pytorch_sphinx_theme
-sphinx==8.1.3
-sphinx-copybutton
-sphinx-design
-sphinx-pdj-theme
-sphinx-rtd-theme>=1.0.0
-sphinx-tabs
-sphinx_autobuild
-sphinx_rtd_dark_mode
-sphinxcontrib-mermaid
-sphinxcontrib-openapi
-sphinxcontrib-redoc
-sphinxcontrib-video
-tomli
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@ -22,7 +22,11 @@ from docutils import nodes
 # Read version from pyproject.toml
 with Path(__file__).parent.parent.parent.joinpath("pyproject.toml").open("rb") as f:
    pypi_url = "https://pypi.org/pypi/llama-stack/json"
-    version_tag = json.loads(requests.get(pypi_url).text)["info"]["version"]
+    headers = {
+        'User-Agent': 'pip/23.0.1 (python 3.11)',  # Mimic pip's user agent
+        'Accept': 'application/json'
+    }
+    version_tag = json.loads(requests.get(pypi_url, headers=headers).text)["info"]["version"]
    print(f"{version_tag=}")

    # generate the full link including text and url here
@ -53,14 +57,6 @@ myst_enable_extensions = ["colon_fence"]

 html_theme = "sphinx_rtd_theme"
 html_use_relative_paths = True
-
-# html_theme = "sphinx_pdj_theme"
-# html_theme_path = [sphinx_pdj_theme.get_html_theme_path()]
-
-# html_theme = "pytorch_sphinx_theme"
-# html_theme_path = [pytorch_sphinx_theme.get_html_theme_path()]
-
-
 templates_path = ["_templates"]
 exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"]

--- a/docs/source/distributions/building_distro.md
+++ b/docs/source/distributions/building_distro.md
@ -338,6 +338,48 @@ INFO:     Application startup complete.
 INFO:     Uvicorn running on http://['::', '0.0.0.0']:8321 (Press CTRL+C to quit)
 INFO:     2401:db00:35c:2d2b:face:0:c9:0:54678 - "GET /models/list HTTP/1.1" 200 OK
 ```
+### Listing Distributions
+Using the list command, you can view all existing Llama Stack distributions, including stacks built from templates, from scratch, or using custom configuration files.
+
+```
+llama stack list -h
+usage: llama stack list [-h]
+
+list the build stacks
+
+options:
+  -h, --help  show this help message and exit
+```
+
+Example Usage
+
+```
+llama stack list
+```
+
+### Removing a Distribution
+Use the remove command to delete a distribution you've previously built.
+
+```
+llama stack rm -h
+usage: llama stack rm [-h] [--all] [name]
+
+Remove the build stack
+
+positional arguments:
+  name        Name of the stack to delete (default: None)
+
+options:
+  -h, --help  show this help message and exit
+  --all, -a   Delete all stacks (use with caution) (default: False)
+```
+
+Example
+```
+llama stack rm llamastack-test
+```
+
+To keep your environment organized and avoid clutter, consider using `llama stack list` to review old or unused distributions and `llama stack rm <name>` to delete them when they’re no longer needed.

 ### Troubleshooting

--- a/docs/source/distributions/configuration.md
+++ b/docs/source/distributions/configuration.md
@ -118,11 +118,6 @@ server:
  port: 8321  # Port to listen on (default: 8321)
  tls_certfile: "/path/to/cert.pem"  # Optional: Path to TLS certificate for HTTPS
  tls_keyfile: "/path/to/key.pem"    # Optional: Path to TLS key for HTTPS
-  auth:                              # Optional: Authentication configuration
-    provider_type: "kubernetes"      # Type of auth provider
-    config:                          # Provider-specific configuration
-      api_server_url: "https://kubernetes.default.svc"
-      ca_cert_path: "/path/to/ca.crt" # Optional: Path to CA certificate
 ```

 ### Authentication Configuration
@ -135,7 +130,7 @@ Authorization: Bearer <token>

 The server supports multiple authentication providers:

-#### Kubernetes Provider
+#### OAuth 2.0/OpenID Connect Provider with Kubernetes

 The Kubernetes cluster must be configured to use a service account for authentication.

@ -146,14 +141,67 @@ kubectl create rolebinding llama-stack-auth-rolebinding --clusterrole=admin --se
 kubectl create token llama-stack-auth -n llama-stack > llama-stack-auth-token
 ```

-Validates tokens against the Kubernetes API server:
+Make sure the `kube-apiserver` runs with `--anonymous-auth=true` to allow unauthenticated requests
+and that the correct RoleBinding is created to allow the service account to access the necessary
+resources. If that is not the case, you can create a RoleBinding for the service account to access
+the necessary resources:
+
+```yaml
+# allow-anonymous-openid.yaml
+apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRole
+metadata:
+  name: allow-anonymous-openid
+rules:
+- nonResourceURLs: ["/openid/v1/jwks"]
+  verbs: ["get"]
+---
+apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRoleBinding
+metadata:
+  name: allow-anonymous-openid
+roleRef:
+  apiGroup: rbac.authorization.k8s.io
+  kind: ClusterRole
+  name: allow-anonymous-openid
+subjects:
+- kind: User
+  name: system:anonymous
+  apiGroup: rbac.authorization.k8s.io
+```
+
+And then apply the configuration:
+```bash
+kubectl apply -f allow-anonymous-openid.yaml
+```
+
+Validates tokens against the Kubernetes API server through the OIDC provider:
 ```yaml
 server:
  auth:
-    provider_type: "kubernetes"
+    provider_type: "oauth2_token"
    config:
-      api_server_url: "https://kubernetes.default.svc"  # URL of the Kubernetes API server
-      ca_cert_path: "/path/to/ca.crt"                   # Optional: Path to CA certificate
+      jwks:
+        uri: "https://kubernetes.default.svc"
+        key_recheck_period: 3600
+      tls_cafile: "/path/to/ca.crt"
+      issuer: "https://kubernetes.default.svc"
+      audience: "https://kubernetes.default.svc"
+```
+
+To find your cluster's audience, run:
+```bash
+kubectl create token default --duration=1h | cut -d. -f2 | base64 -d | jq .aud
+```
+
+For the issuer, you can use the OIDC provider's URL:
+```bash
+kubectl get --raw /.well-known/openid-configuration| jq .issuer
+```
+
+For the tls_cafile, you can use the CA certificate of the OIDC provider:
+```bash
+kubectl config view --minify -o jsonpath='{.clusters[0].cluster.certificate-authority}'
 ```

 The provider extracts user information from the JWT token:
@ -208,6 +256,80 @@ And must respond with:

 If no access attributes are returned, the token is used as a namespace.

+### Quota Configuration
+
+The `quota` section allows you to enable server-side request throttling for both
+authenticated and anonymous clients. This is useful for preventing abuse, enforcing
+fairness across tenants, and controlling infrastructure costs without requiring
+client-side rate limiting or external proxies.
+
+Quotas are disabled by default. When enabled, each client is tracked using either:
+
+* Their authenticated `client_id` (derived from the Bearer token), or
+* Their IP address (fallback for anonymous requests)
+
+Quota state is stored in a SQLite-backed key-value store, and rate limits are applied
+within a configurable time window (currently only `day` is supported).
+
+#### Example
+
+```yaml
+server:
+  quota:
+    kvstore:
+      type: sqlite
+      db_path: ./quotas.db
+    anonymous_max_requests: 100
+    authenticated_max_requests: 1000
+    period: day
+```
+
+#### Configuration Options
+
+| Field                        | Description                                                                |
+| ---------------------------- | -------------------------------------------------------------------------- |
+| `kvstore`                    | Required. Backend storage config for tracking request counts.              |
+| `kvstore.type`               | Must be `"sqlite"` for now. Other backends may be supported in the future. |
+| `kvstore.db_path`            | File path to the SQLite database.                                          |
+| `anonymous_max_requests`     | Max requests per period for unauthenticated clients.                       |
+| `authenticated_max_requests` | Max requests per period for authenticated clients.                         |
+| `period`                     | Time window for quota enforcement. Only `"day"` is supported.              |
+
+> Note: if `authenticated_max_requests` is set but no authentication provider is
+configured, the server will fall back to applying `anonymous_max_requests` to all
+clients.
+
+#### Example with Authentication Enabled
+
+```yaml
+server:
+  port: 8321
+  auth:
+    provider_type: custom
+    config:
+      endpoint: https://auth.example.com/validate
+  quota:
+    kvstore:
+      type: sqlite
+      db_path: ./quotas.db
+    anonymous_max_requests: 100
+    authenticated_max_requests: 1000
+    period: day
+```
+
+If a client exceeds their limit, the server responds with:
+
+```http
+HTTP/1.1 429 Too Many Requests
+Content-Type: application/json
+
+{
+  "error": {
+    "message": "Quota exceeded"
+  }
+}
+```
+
 ## Extending to handle Safety

 Configuring Safety can be a little involved so it is instructive to go through an example.
--- a/docs/source/distributions/self_hosted_distro/sambanova.md
+++ b/docs/source/distributions/self_hosted_distro/sambanova.md
@ -17,7 +17,7 @@ The `llamastack/distribution-sambanova` distribution consists of the following p
 |-----|-------------|
 | agents | `inline::meta-reference` |
 | inference | `remote::sambanova`, `inline::sentence-transformers` |
-| safety | `inline::llama-guard` |
+| safety | `remote::sambanova` |
 | telemetry | `inline::meta-reference` |
 | tool_runtime | `remote::brave-search`, `remote::tavily-search`, `inline::rag-runtime`, `remote::model-context-protocol`, `remote::wolfram-alpha` |
 | vector_io | `inline::faiss`, `remote::chromadb`, `remote::pgvector` |
@ -48,33 +48,44 @@ The following models are available by default:

 ### Prerequisite: API Keys

-Make sure you have access to a SambaNova API Key. You can get one by visiting [SambaNova.ai](https://sambanova.ai/).
+Make sure you have access to a SambaNova API Key. You can get one by visiting [SambaNova.ai](http://cloud.sambanova.ai?utm_source=llamastack&utm_medium=external&utm_campaign=cloud_signup).


 ## Running Llama Stack with SambaNova

 You can do this via Conda (build code) or Docker which has a pre-built image.

-### Via Docker

-This method allows you to get started quickly without having to build the distribution code.
+### Via Docker

 ```bash
 LLAMA_STACK_PORT=8321
+llama stack build --template sambanova --image-type container
 docker run \
  -it \
-  --pull always \
  -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
-  llamastack/distribution-sambanova \
+  -v ~/.llama:/root/.llama \
+  distribution-sambanova \
  --port $LLAMA_STACK_PORT \
  --env SAMBANOVA_API_KEY=$SAMBANOVA_API_KEY
 ```

+
+### Via Venv
+
+```bash
+llama stack build --template sambanova --image-type venv
+llama stack run --image-type venv ~/.llama/distributions/sambanova/sambanova-run.yaml \
+  --port $LLAMA_STACK_PORT \
+  --env SAMBANOVA_API_KEY=$SAMBANOVA_API_KEY
+```
+
+
 ### Via Conda

 ```bash
 llama stack build --template sambanova --image-type conda
-llama stack run ./run.yaml \
+llama stack run --image-type conda ~/.llama/distributions/sambanova/sambanova-run.yaml \
  --port $LLAMA_STACK_PORT \
  --env SAMBANOVA_API_KEY=$SAMBANOVA_API_KEY
 ```
--- a/docs/source/providers/vector_io/sqlite-vec.md
+++ b/docs/source/providers/vector_io/sqlite-vec.md
@ -66,6 +66,25 @@ To use sqlite-vec in your Llama Stack project, follow these steps:
 2. Configure your Llama Stack project to use SQLite-Vec.
 3. Start storing and querying vectors.

+## Supported Search Modes
+
+The sqlite-vec provider supports both vector-based and keyword-based (full-text) search modes.
+
+When using the RAGTool interface, you can specify the desired search behavior via the `mode` parameter in
+`RAGQueryConfig`. For example:
+
+```python
+from llama_stack.apis.tool_runtime.rag import RAGQueryConfig
+
+query_config = RAGQueryConfig(max_chunks=6, mode="vector")
+
+results = client.tool_runtime.rag_tool.query(
+    vector_db_ids=[vector_db_id],
+    content="what is torchtune",
+    query_config=query_config,
+)
+```
+
 ## Installation

 You can install SQLite-Vec using pip: