Merge branch 'main' into nvidia-e2e-notebook

2025-12-17 11:02:36 +00:00 · 2025-04-30 12:05:11 -04:00 · 2025-04-30 12:05:11 -04:00 · 012dd6891f
commit 012dd6891f
parent bfbaf09fa8 eab550f7d2
96 changed files with 4675 additions and 426 deletions
--- a/docs/_static/llama-stack-spec.html
+++ b/docs/_static/llama-stack-spec.html
@ -497,6 +497,54 @@
                }
            }
        },
+        "/v1/openai/v1/responses": {
+            "post": {
+                "responses": {
+                    "200": {
+                        "description": "Runtime representation of an annotated type.",
+                        "content": {
+                            "application/json": {
+                                "schema": {
+                                    "$ref": "#/components/schemas/OpenAIResponseObject"
+                                }
+                            },
+                            "text/event-stream": {
+                                "schema": {
+                                    "$ref": "#/components/schemas/OpenAIResponseObjectStream"
+                                }
+                            }
+                        }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
+                    }
+                },
+                "tags": [
+                    "Agents"
+                ],
+                "description": "Create a new OpenAI response.",
+                "parameters": [],
+                "requestBody": {
+                    "content": {
+                        "application/json": {
+                            "schema": {
+                                "$ref": "#/components/schemas/CreateOpenaiResponseRequest"
+                            }
+                        }
+                    },
+                    "required": true
+                }
+            }
+        },
        "/v1/files": {
            "get": {
                "responses": {
@ -1278,6 +1326,49 @@
                ]
            }
        },
+        "/v1/openai/v1/responses/{id}": {
+            "get": {
+                "responses": {
+                    "200": {
+                        "description": "An OpenAIResponseObject.",
+                        "content": {
+                            "application/json": {
+                                "schema": {
+                                    "$ref": "#/components/schemas/OpenAIResponseObject"
+                                }
+                            }
+                        }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
+                    }
+                },
+                "tags": [
+                    "Agents"
+                ],
+                "description": "Retrieve an OpenAI response by its ID.",
+                "parameters": [
+                    {
+                        "name": "id",
+                        "in": "path",
+                        "description": "The ID of the OpenAI response to retrieve.",
+                        "required": true,
+                        "schema": {
+                            "type": "string"
+                        }
+                    }
+                ]
+            }
+        },
        "/v1/scoring-functions/{scoring_fn_id}": {
            "get": {
                "responses": {
@ -6192,6 +6283,427 @@
                ],
                "title": "AgentTurnResponseTurnStartPayload"
            },
+            "OpenAIResponseInputMessage": {
+                "type": "object",
+                "properties": {
+                    "content": {
+                        "oneOf": [
+                            {
+                                "type": "string"
+                            },
+                            {
+                                "type": "array",
+                                "items": {
+                                    "$ref": "#/components/schemas/OpenAIResponseInputMessageContent"
+                                }
+                            }
+                        ]
+                    },
+                    "role": {
+                        "oneOf": [
+                            {
+                                "type": "string",
+                                "const": "system"
+                            },
+                            {
+                                "type": "string",
+                                "const": "developer"
+                            },
+                            {
+                                "type": "string",
+                                "const": "user"
+                            },
+                            {
+                                "type": "string",
+                                "const": "assistant"
+                            }
+                        ]
+                    },
+                    "type": {
+                        "type": "string",
+                        "const": "message",
+                        "default": "message"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "content",
+                    "role"
+                ],
+                "title": "OpenAIResponseInputMessage"
+            },
+            "OpenAIResponseInputMessageContent": {
+                "oneOf": [
+                    {
+                        "$ref": "#/components/schemas/OpenAIResponseInputMessageContentText"
+                    },
+                    {
+                        "$ref": "#/components/schemas/OpenAIResponseInputMessageContentImage"
+                    }
+                ],
+                "discriminator": {
+                    "propertyName": "type",
+                    "mapping": {
+                        "input_text": "#/components/schemas/OpenAIResponseInputMessageContentText",
+                        "input_image": "#/components/schemas/OpenAIResponseInputMessageContentImage"
+                    }
+                }
+            },
+            "OpenAIResponseInputMessageContentImage": {
+                "type": "object",
+                "properties": {
+                    "detail": {
+                        "oneOf": [
+                            {
+                                "type": "string",
+                                "const": "low"
+                            },
+                            {
+                                "type": "string",
+                                "const": "high"
+                            },
+                            {
+                                "type": "string",
+                                "const": "auto"
+                            }
+                        ],
+                        "default": "auto"
+                    },
+                    "type": {
+                        "type": "string",
+                        "const": "input_image",
+                        "default": "input_image"
+                    },
+                    "image_url": {
+                        "type": "string"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "detail",
+                    "type"
+                ],
+                "title": "OpenAIResponseInputMessageContentImage"
+            },
+            "OpenAIResponseInputMessageContentText": {
+                "type": "object",
+                "properties": {
+                    "text": {
+                        "type": "string"
+                    },
+                    "type": {
+                        "type": "string",
+                        "const": "input_text",
+                        "default": "input_text"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "text",
+                    "type"
+                ],
+                "title": "OpenAIResponseInputMessageContentText"
+            },
+            "OpenAIResponseInputTool": {
+                "type": "object",
+                "properties": {
+                    "type": {
+                        "oneOf": [
+                            {
+                                "type": "string",
+                                "const": "web_search"
+                            },
+                            {
+                                "type": "string",
+                                "const": "web_search_preview_2025_03_11"
+                            }
+                        ],
+                        "default": "web_search"
+                    },
+                    "search_context_size": {
+                        "type": "string",
+                        "default": "medium"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "type"
+                ],
+                "title": "OpenAIResponseInputToolWebSearch"
+            },
+            "CreateOpenaiResponseRequest": {
+                "type": "object",
+                "properties": {
+                    "input": {
+                        "oneOf": [
+                            {
+                                "type": "string"
+                            },
+                            {
+                                "type": "array",
+                                "items": {
+                                    "$ref": "#/components/schemas/OpenAIResponseInputMessage"
+                                }
+                            }
+                        ],
+                        "description": "Input message(s) to create the response."
+                    },
+                    "model": {
+                        "type": "string",
+                        "description": "The underlying LLM used for completions."
+                    },
+                    "previous_response_id": {
+                        "type": "string",
+                        "description": "(Optional) if specified, the new response will be a continuation of the previous response. This can be used to easily fork-off new responses from existing responses."
+                    },
+                    "store": {
+                        "type": "boolean"
+                    },
+                    "stream": {
+                        "type": "boolean"
+                    },
+                    "tools": {
+                        "type": "array",
+                        "items": {
+                            "$ref": "#/components/schemas/OpenAIResponseInputTool"
+                        }
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "input",
+                    "model"
+                ],
+                "title": "CreateOpenaiResponseRequest"
+            },
+            "OpenAIResponseError": {
+                "type": "object",
+                "properties": {
+                    "code": {
+                        "type": "string"
+                    },
+                    "message": {
+                        "type": "string"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "code",
+                    "message"
+                ],
+                "title": "OpenAIResponseError"
+            },
+            "OpenAIResponseObject": {
+                "type": "object",
+                "properties": {
+                    "created_at": {
+                        "type": "integer"
+                    },
+                    "error": {
+                        "$ref": "#/components/schemas/OpenAIResponseError"
+                    },
+                    "id": {
+                        "type": "string"
+                    },
+                    "model": {
+                        "type": "string"
+                    },
+                    "object": {
+                        "type": "string",
+                        "const": "response",
+                        "default": "response"
+                    },
+                    "output": {
+                        "type": "array",
+                        "items": {
+                            "$ref": "#/components/schemas/OpenAIResponseOutput"
+                        }
+                    },
+                    "parallel_tool_calls": {
+                        "type": "boolean",
+                        "default": false
+                    },
+                    "previous_response_id": {
+                        "type": "string"
+                    },
+                    "status": {
+                        "type": "string"
+                    },
+                    "temperature": {
+                        "type": "number"
+                    },
+                    "top_p": {
+                        "type": "number"
+                    },
+                    "truncation": {
+                        "type": "string"
+                    },
+                    "user": {
+                        "type": "string"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "created_at",
+                    "id",
+                    "model",
+                    "object",
+                    "output",
+                    "parallel_tool_calls",
+                    "status"
+                ],
+                "title": "OpenAIResponseObject"
+            },
+            "OpenAIResponseOutput": {
+                "oneOf": [
+                    {
+                        "$ref": "#/components/schemas/OpenAIResponseOutputMessage"
+                    },
+                    {
+                        "$ref": "#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall"
+                    }
+                ],
+                "discriminator": {
+                    "propertyName": "type",
+                    "mapping": {
+                        "message": "#/components/schemas/OpenAIResponseOutputMessage",
+                        "web_search_call": "#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall"
+                    }
+                }
+            },
+            "OpenAIResponseOutputMessage": {
+                "type": "object",
+                "properties": {
+                    "id": {
+                        "type": "string"
+                    },
+                    "content": {
+                        "type": "array",
+                        "items": {
+                            "$ref": "#/components/schemas/OpenAIResponseOutputMessageContent"
+                        }
+                    },
+                    "role": {
+                        "type": "string",
+                        "const": "assistant",
+                        "default": "assistant"
+                    },
+                    "status": {
+                        "type": "string"
+                    },
+                    "type": {
+                        "type": "string",
+                        "const": "message",
+                        "default": "message"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "id",
+                    "content",
+                    "role",
+                    "status",
+                    "type"
+                ],
+                "title": "OpenAIResponseOutputMessage"
+            },
+            "OpenAIResponseOutputMessageContent": {
+                "type": "object",
+                "properties": {
+                    "text": {
+                        "type": "string"
+                    },
+                    "type": {
+                        "type": "string",
+                        "const": "output_text",
+                        "default": "output_text"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "text",
+                    "type"
+                ],
+                "title": "OpenAIResponseOutputMessageContentOutputText"
+            },
+            "OpenAIResponseOutputMessageWebSearchToolCall": {
+                "type": "object",
+                "properties": {
+                    "id": {
+                        "type": "string"
+                    },
+                    "status": {
+                        "type": "string"
+                    },
+                    "type": {
+                        "type": "string",
+                        "const": "web_search_call",
+                        "default": "web_search_call"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "id",
+                    "status",
+                    "type"
+                ],
+                "title": "OpenAIResponseOutputMessageWebSearchToolCall"
+            },
+            "OpenAIResponseObjectStream": {
+                "oneOf": [
+                    {
+                        "$ref": "#/components/schemas/OpenAIResponseObjectStreamResponseCreated"
+                    },
+                    {
+                        "$ref": "#/components/schemas/OpenAIResponseObjectStreamResponseCompleted"
+                    }
+                ],
+                "discriminator": {
+                    "propertyName": "type",
+                    "mapping": {
+                        "response.created": "#/components/schemas/OpenAIResponseObjectStreamResponseCreated",
+                        "response.completed": "#/components/schemas/OpenAIResponseObjectStreamResponseCompleted"
+                    }
+                }
+            },
+            "OpenAIResponseObjectStreamResponseCompleted": {
+                "type": "object",
+                "properties": {
+                    "response": {
+                        "$ref": "#/components/schemas/OpenAIResponseObject"
+                    },
+                    "type": {
+                        "type": "string",
+                        "const": "response.completed",
+                        "default": "response.completed"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "response",
+                    "type"
+                ],
+                "title": "OpenAIResponseObjectStreamResponseCompleted"
+            },
+            "OpenAIResponseObjectStreamResponseCreated": {
+                "type": "object",
+                "properties": {
+                    "response": {
+                        "$ref": "#/components/schemas/OpenAIResponseObject"
+                    },
+                    "type": {
+                        "type": "string",
+                        "const": "response.created",
+                        "default": "response.created"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "response",
+                    "type"
+                ],
+                "title": "OpenAIResponseObjectStreamResponseCreated"
+            },
            "CreateUploadSessionRequest": {
                "type": "object",
                "properties": {
--- a/docs/_static/llama-stack-spec.yaml
+++ b/docs/_static/llama-stack-spec.yaml
@ -330,6 +330,39 @@ paths:
            schema:
              $ref: '#/components/schemas/CreateAgentTurnRequest'
        required: true
+  /v1/openai/v1/responses:
+    post:
+      responses:
+        '200':
+          description: >-
+            Runtime representation of an annotated type.
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/OpenAIResponseObject'
+            text/event-stream:
+              schema:
+                $ref: '#/components/schemas/OpenAIResponseObjectStream'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
+      tags:
+        - Agents
+      description: Create a new OpenAI response.
+      parameters: []
+      requestBody:
+        content:
+          application/json:
+            schema:
+              $ref: '#/components/schemas/CreateOpenaiResponseRequest'
+        required: true
  /v1/files:
    get:
      responses:
@ -875,6 +908,36 @@ paths:
          required: true
          schema:
            type: string
+  /v1/openai/v1/responses/{id}:
+    get:
+      responses:
+        '200':
+          description: An OpenAIResponseObject.
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/OpenAIResponseObject'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
+      tags:
+        - Agents
+      description: Retrieve an OpenAI response by its ID.
+      parameters:
+        - name: id
+          in: path
+          description: >-
+            The ID of the OpenAI response to retrieve.
+          required: true
+          schema:
+            type: string
  /v1/scoring-functions/{scoring_fn_id}:
    get:
      responses:
@ -4329,6 +4392,293 @@ components:
        - event_type
        - turn_id
      title: AgentTurnResponseTurnStartPayload
+    OpenAIResponseInputMessage:
+      type: object
+      properties:
+        content:
+          oneOf:
+            - type: string
+            - type: array
+              items:
+                $ref: '#/components/schemas/OpenAIResponseInputMessageContent'
+        role:
+          oneOf:
+            - type: string
+              const: system
+            - type: string
+              const: developer
+            - type: string
+              const: user
+            - type: string
+              const: assistant
+        type:
+          type: string
+          const: message
+          default: message
+      additionalProperties: false
+      required:
+        - content
+        - role
+      title: OpenAIResponseInputMessage
+    OpenAIResponseInputMessageContent:
+      oneOf:
+        - $ref: '#/components/schemas/OpenAIResponseInputMessageContentText'
+        - $ref: '#/components/schemas/OpenAIResponseInputMessageContentImage'
+      discriminator:
+        propertyName: type
+        mapping:
+          input_text: '#/components/schemas/OpenAIResponseInputMessageContentText'
+          input_image: '#/components/schemas/OpenAIResponseInputMessageContentImage'
+    OpenAIResponseInputMessageContentImage:
+      type: object
+      properties:
+        detail:
+          oneOf:
+            - type: string
+              const: low
+            - type: string
+              const: high
+            - type: string
+              const: auto
+          default: auto
+        type:
+          type: string
+          const: input_image
+          default: input_image
+        image_url:
+          type: string
+      additionalProperties: false
+      required:
+        - detail
+        - type
+      title: OpenAIResponseInputMessageContentImage
+    OpenAIResponseInputMessageContentText:
+      type: object
+      properties:
+        text:
+          type: string
+        type:
+          type: string
+          const: input_text
+          default: input_text
+      additionalProperties: false
+      required:
+        - text
+        - type
+      title: OpenAIResponseInputMessageContentText
+    OpenAIResponseInputTool:
+      type: object
+      properties:
+        type:
+          oneOf:
+            - type: string
+              const: web_search
+            - type: string
+              const: web_search_preview_2025_03_11
+          default: web_search
+        search_context_size:
+          type: string
+          default: medium
+      additionalProperties: false
+      required:
+        - type
+      title: OpenAIResponseInputToolWebSearch
+    CreateOpenaiResponseRequest:
+      type: object
+      properties:
+        input:
+          oneOf:
+            - type: string
+            - type: array
+              items:
+                $ref: '#/components/schemas/OpenAIResponseInputMessage'
+          description: Input message(s) to create the response.
+        model:
+          type: string
+          description: The underlying LLM used for completions.
+        previous_response_id:
+          type: string
+          description: >-
+            (Optional) if specified, the new response will be a continuation of the
+            previous response. This can be used to easily fork-off new responses from
+            existing responses.
+        store:
+          type: boolean
+        stream:
+          type: boolean
+        tools:
+          type: array
+          items:
+            $ref: '#/components/schemas/OpenAIResponseInputTool'
+      additionalProperties: false
+      required:
+        - input
+        - model
+      title: CreateOpenaiResponseRequest
+    OpenAIResponseError:
+      type: object
+      properties:
+        code:
+          type: string
+        message:
+          type: string
+      additionalProperties: false
+      required:
+        - code
+        - message
+      title: OpenAIResponseError
+    OpenAIResponseObject:
+      type: object
+      properties:
+        created_at:
+          type: integer
+        error:
+          $ref: '#/components/schemas/OpenAIResponseError'
+        id:
+          type: string
+        model:
+          type: string
+        object:
+          type: string
+          const: response
+          default: response
+        output:
+          type: array
+          items:
+            $ref: '#/components/schemas/OpenAIResponseOutput'
+        parallel_tool_calls:
+          type: boolean
+          default: false
+        previous_response_id:
+          type: string
+        status:
+          type: string
+        temperature:
+          type: number
+        top_p:
+          type: number
+        truncation:
+          type: string
+        user:
+          type: string
+      additionalProperties: false
+      required:
+        - created_at
+        - id
+        - model
+        - object
+        - output
+        - parallel_tool_calls
+        - status
+      title: OpenAIResponseObject
+    OpenAIResponseOutput:
+      oneOf:
+        - $ref: '#/components/schemas/OpenAIResponseOutputMessage'
+        - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
+      discriminator:
+        propertyName: type
+        mapping:
+          message: '#/components/schemas/OpenAIResponseOutputMessage'
+          web_search_call: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
+    OpenAIResponseOutputMessage:
+      type: object
+      properties:
+        id:
+          type: string
+        content:
+          type: array
+          items:
+            $ref: '#/components/schemas/OpenAIResponseOutputMessageContent'
+        role:
+          type: string
+          const: assistant
+          default: assistant
+        status:
+          type: string
+        type:
+          type: string
+          const: message
+          default: message
+      additionalProperties: false
+      required:
+        - id
+        - content
+        - role
+        - status
+        - type
+      title: OpenAIResponseOutputMessage
+    OpenAIResponseOutputMessageContent:
+      type: object
+      properties:
+        text:
+          type: string
+        type:
+          type: string
+          const: output_text
+          default: output_text
+      additionalProperties: false
+      required:
+        - text
+        - type
+      title: >-
+        OpenAIResponseOutputMessageContentOutputText
+    "OpenAIResponseOutputMessageWebSearchToolCall":
+      type: object
+      properties:
+        id:
+          type: string
+        status:
+          type: string
+        type:
+          type: string
+          const: web_search_call
+          default: web_search_call
+      additionalProperties: false
+      required:
+        - id
+        - status
+        - type
+      title: >-
+        OpenAIResponseOutputMessageWebSearchToolCall
+    OpenAIResponseObjectStream:
+      oneOf:
+        - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseCreated'
+        - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseCompleted'
+      discriminator:
+        propertyName: type
+        mapping:
+          response.created: '#/components/schemas/OpenAIResponseObjectStreamResponseCreated'
+          response.completed: '#/components/schemas/OpenAIResponseObjectStreamResponseCompleted'
+    "OpenAIResponseObjectStreamResponseCompleted":
+      type: object
+      properties:
+        response:
+          $ref: '#/components/schemas/OpenAIResponseObject'
+        type:
+          type: string
+          const: response.completed
+          default: response.completed
+      additionalProperties: false
+      required:
+        - response
+        - type
+      title: >-
+        OpenAIResponseObjectStreamResponseCompleted
+    "OpenAIResponseObjectStreamResponseCreated":
+      type: object
+      properties:
+        response:
+          $ref: '#/components/schemas/OpenAIResponseObject'
+        type:
+          type: string
+          const: response.created
+          default: response.created
+      additionalProperties: false
+      required:
+        - response
+        - type
+      title: >-
+        OpenAIResponseObjectStreamResponseCreated
    CreateUploadSessionRequest:
      type: object
      properties:
--- a/docs/getting_started_llama_api.ipynb
+++ b/docs/getting_started_llama_api.ipynb
--- a/docs/openapi_generator/pyopenapi/generator.py
+++ b/docs/openapi_generator/pyopenapi/generator.py
@ -179,7 +179,7 @@ class ContentBuilder:
        "Creates the content subtree for a request or response."

        def is_iterator_type(t):
-            return "StreamChunk" in str(t)
+            return "StreamChunk" in str(t) or "OpenAIResponseObjectStream" in str(t)

        def get_media_type(t):
            if is_generic_list(t):
--- a/docs/source/distributions/configuration.md
+++ b/docs/source/distributions/configuration.md
@ -53,6 +53,13 @@ models:
  provider_id: ollama
  provider_model_id: null
 shields: []
+server:
+  port: 8321
+  auth:
+    provider_type: "kubernetes"
+    config:
+      api_server_url: "https://kubernetes.default.svc"
+      ca_cert_path: "/path/to/ca.crt"
 ```

 Let's break this down into the different sections. The first section specifies the set of APIs that the stack server will serve:
@ -102,6 +109,105 @@ A Model is an instance of a "Resource" (see [Concepts](../concepts/index)) and i

 What's with the `provider_model_id` field? This is an identifier for the model inside the provider's model catalog. Contrast it with `model_id` which is the identifier for the same model for Llama Stack's purposes. For example, you may want to name "llama3.2:vision-11b" as "image_captioning_model" when you use it in your Stack interactions. When omitted, the server will set `provider_model_id` to be the same as `model_id`.

+## Server Configuration
+
+The `server` section configures the HTTP server that serves the Llama Stack APIs:
+
+```yaml
+server:
+  port: 8321  # Port to listen on (default: 8321)
+  tls_certfile: "/path/to/cert.pem"  # Optional: Path to TLS certificate for HTTPS
+  tls_keyfile: "/path/to/key.pem"    # Optional: Path to TLS key for HTTPS
+  auth:                              # Optional: Authentication configuration
+    provider_type: "kubernetes"      # Type of auth provider
+    config:                          # Provider-specific configuration
+      api_server_url: "https://kubernetes.default.svc"
+      ca_cert_path: "/path/to/ca.crt" # Optional: Path to CA certificate
+```
+
+### Authentication Configuration
+
+The `auth` section configures authentication for the server. When configured, all API requests must include a valid Bearer token in the Authorization header:
+
+```
+Authorization: Bearer <token>
+```
+
+The server supports multiple authentication providers:
+
+#### Kubernetes Provider
+
+The Kubernetes cluster must be configured to use a service account for authentication.
+
+```bash
+kubectl create namespace llama-stack
+kubectl create serviceaccount llama-stack-auth -n llama-stack
+kubectl create rolebinding llama-stack-auth-rolebinding --clusterrole=admin --serviceaccount=llama-stack:llama-stack-auth -n llama-stack
+kubectl create token llama-stack-auth -n llama-stack > llama-stack-auth-token
+```
+
+Validates tokens against the Kubernetes API server:
+```yaml
+server:
+  auth:
+    provider_type: "kubernetes"
+    config:
+      api_server_url: "https://kubernetes.default.svc"  # URL of the Kubernetes API server
+      ca_cert_path: "/path/to/ca.crt"                   # Optional: Path to CA certificate
+```
+
+The provider extracts user information from the JWT token:
+- Username from the `sub` claim becomes a role
+- Kubernetes groups become teams
+
+You can easily validate a request by running:
+
+```bash
+curl -s -L -H "Authorization: Bearer $(cat llama-stack-auth-token)" http://127.0.0.1:8321/v1/providers
+```
+
+#### Custom Provider
+Validates tokens against a custom authentication endpoint:
+```yaml
+server:
+  auth:
+    provider_type: "custom"
+    config:
+      endpoint: "https://auth.example.com/validate"  # URL of the auth endpoint
+```
+
+The custom endpoint receives a POST request with:
+```json
+{
+  "api_key": "<token>",
+  "request": {
+    "path": "/api/v1/endpoint",
+    "headers": {
+      "content-type": "application/json",
+      "user-agent": "curl/7.64.1"
+    },
+    "params": {
+      "key": ["value"]
+    }
+  }
+}
+```
+
+And must respond with:
+```json
+{
+  "access_attributes": {
+    "roles": ["admin", "user"],
+    "teams": ["ml-team", "nlp-team"],
+    "projects": ["llama-3", "project-x"],
+    "namespaces": ["research"]
+  },
+  "message": "Authentication successful"
+}
+```
+
+If no access attributes are returned, the token is used as a namespace.
+
 ## Extending to handle Safety

 Configuring Safety can be a little involved so it is instructive to go through an example.