Merge ed4e452de0 into sapling-pr-archive-ehhuang

2025-10-09 05:08:37 +00:00 · 2025-10-08 11:39:41 -07:00 · 2025-10-08 11:39:41 -07:00 · 08d46d6363
commit 08d46d6363
parent 75690a7cc6 ed4e452de0
37 changed files with 242 additions and 6940 deletions
--- a/.github/workflows/stale_bot.yml
+++ b/.github/workflows/stale_bot.yml
@ -24,7 +24,7 @@ jobs:
    runs-on: ubuntu-latest
    steps:
      - name: Stale Action
-        uses: actions/stale@3a9db7e6a41a89f618792c92c0e97cc736e1b13f # v10.0.0
+        uses: actions/stale@5f858e3efba33a5ca4407a664cc011ad407f2008 # v10.1.0
        with:
          stale-issue-label: 'stale'
          stale-issue-message: >
--- a/docs/docs/providers/inference/remote_watsonx.mdx
+++ b/docs/docs/providers/inference/remote_watsonx.mdx
@ -17,8 +17,8 @@ IBM WatsonX inference provider for accessing AI models on IBM's WatsonX platform
 | `allowed_models` | `list[str \| None` | No |  | List of models that should be registered with the model registry. If None, all models are allowed. |
 | `refresh_models` | `<class 'bool'>` | No | False | Whether to refresh models periodically from the provider |
 | `url` | `<class 'str'>` | No | https://us-south.ml.cloud.ibm.com | A base url for accessing the watsonx.ai |
-| `api_key` | `pydantic.types.SecretStr \| None` | No |  | The watsonx API key |
+| `api_key` | `pydantic.types.SecretStr \| None` | No |  | The watsonx.ai API key |
-| `project_id` | `str \| None` | No |  | The Project ID key |
+| `project_id` | `str \| None` | No |  | The watsonx.ai project ID |
 | `timeout` | `<class 'int'>` | No | 60 | Timeout for the HTTP requests |
 ## Sample Configuration
--- a/docs/static/deprecated-llama-stack-spec.html
+++ b/docs/static/deprecated-llama-stack-spec.html
@ -3526,343 +3526,6 @@
                },
                "deprecated": true
            }
        },
        "/v1/telemetry/metrics/{metric_name}": {
            "post": {
                "responses": {
                    "200": {
                        "description": "A QueryMetricsResponse.",
                        "content": {
                            "application/json": {
                                "schema": {
                                    "$ref": "#/components/schemas/QueryMetricsResponse"
                                }
                            }
                        }
                    },
                    "400": {
                        "$ref": "#/components/responses/BadRequest400"
                    },
                    "429": {
                        "$ref": "#/components/responses/TooManyRequests429"
                    },
                    "500": {
                        "$ref": "#/components/responses/InternalServerError500"
                    },
                    "default": {
                        "$ref": "#/components/responses/DefaultError"
                    }
                },
                "tags": [
                    "Telemetry"
                ],
                "summary": "Query metrics.",
                "description": "Query metrics.",
                "parameters": [
                    {
                        "name": "metric_name",
                        "in": "path",
                        "description": "The name of the metric to query.",
                        "required": true,
                        "schema": {
                            "type": "string"
                        }
                    }
                ],
                "requestBody": {
                    "content": {
                        "application/json": {
                            "schema": {
                                "$ref": "#/components/schemas/QueryMetricsRequest"
                            }
                        }
                    },
                    "required": true
                },
                "deprecated": true
            }
        },
        "/v1/telemetry/spans": {
            "post": {
                "responses": {
                    "200": {
                        "description": "A QuerySpansResponse.",
                        "content": {
                            "application/json": {
                                "schema": {
                                    "$ref": "#/components/schemas/QuerySpansResponse"
                                }
                            }
                        }
                    },
                    "400": {
                        "$ref": "#/components/responses/BadRequest400"
                    },
                    "429": {
                        "$ref": "#/components/responses/TooManyRequests429"
                    },
                    "500": {
                        "$ref": "#/components/responses/InternalServerError500"
                    },
                    "default": {
                        "$ref": "#/components/responses/DefaultError"
                    }
                },
                "tags": [
                    "Telemetry"
                ],
                "summary": "Query spans.",
                "description": "Query spans.",
                "parameters": [],
                "requestBody": {
                    "content": {
                        "application/json": {
                            "schema": {
                                "$ref": "#/components/schemas/QuerySpansRequest"
                            }
                        }
                    },
                    "required": true
                },
                "deprecated": true
            }
        },
        "/v1/telemetry/spans/export": {
            "post": {
                "responses": {
                    "200": {
                        "description": "OK"
                    },
                    "400": {
                        "$ref": "#/components/responses/BadRequest400"
                    },
                    "429": {
                        "$ref": "#/components/responses/TooManyRequests429"
                    },
                    "500": {
                        "$ref": "#/components/responses/InternalServerError500"
                    },
                    "default": {
                        "$ref": "#/components/responses/DefaultError"
                    }
                },
                "tags": [
                    "Telemetry"
                ],
                "summary": "Save spans to a dataset.",
                "description": "Save spans to a dataset.",
                "parameters": [],
                "requestBody": {
                    "content": {
                        "application/json": {
                            "schema": {
                                "$ref": "#/components/schemas/SaveSpansToDatasetRequest"
                            }
                        }
                    },
                    "required": true
                },
                "deprecated": true
            }
        },
        "/v1/telemetry/spans/{span_id}/tree": {
            "post": {
                "responses": {
                    "200": {
                        "description": "A QuerySpanTreeResponse.",
                        "content": {
                            "application/json": {
                                "schema": {
                                    "$ref": "#/components/schemas/QuerySpanTreeResponse"
                                }
                            }
                        }
                    },
                    "400": {
                        "$ref": "#/components/responses/BadRequest400"
                    },
                    "429": {
                        "$ref": "#/components/responses/TooManyRequests429"
                    },
                    "500": {
                        "$ref": "#/components/responses/InternalServerError500"
                    },
                    "default": {
                        "$ref": "#/components/responses/DefaultError"
                    }
                },
                "tags": [
                    "Telemetry"
                ],
                "summary": "Get a span tree by its ID.",
                "description": "Get a span tree by its ID.",
                "parameters": [
                    {
                        "name": "span_id",
                        "in": "path",
                        "description": "The ID of the span to get the tree from.",
                        "required": true,
                        "schema": {
                            "type": "string"
                        }
                    }
                ],
                "requestBody": {
                    "content": {
                        "application/json": {
                            "schema": {
                                "$ref": "#/components/schemas/GetSpanTreeRequest"
                            }
                        }
                    },
                    "required": true
                },
                "deprecated": true
            }
        },
        "/v1/telemetry/traces": {
            "post": {
                "responses": {
                    "200": {
                        "description": "A QueryTracesResponse.",
                        "content": {
                            "application/json": {
                                "schema": {
                                    "$ref": "#/components/schemas/QueryTracesResponse"
                                }
                            }
                        }
                    },
                    "400": {
                        "$ref": "#/components/responses/BadRequest400"
                    },
                    "429": {
                        "$ref": "#/components/responses/TooManyRequests429"
                    },
                    "500": {
                        "$ref": "#/components/responses/InternalServerError500"
                    },
                    "default": {
                        "$ref": "#/components/responses/DefaultError"
                    }
                },
                "tags": [
                    "Telemetry"
                ],
                "summary": "Query traces.",
                "description": "Query traces.",
                "parameters": [],
                "requestBody": {
                    "content": {
                        "application/json": {
                            "schema": {
                                "$ref": "#/components/schemas/QueryTracesRequest"
                            }
                        }
                    },
                    "required": true
                },
                "deprecated": true
            }
        },
        "/v1/telemetry/traces/{trace_id}": {
            "get": {
                "responses": {
                    "200": {
                        "description": "A Trace.",
                        "content": {
                            "application/json": {
                                "schema": {
                                    "$ref": "#/components/schemas/Trace"
                                }
                            }
                        }
                    },
                    "400": {
                        "$ref": "#/components/responses/BadRequest400"
                    },
                    "429": {
                        "$ref": "#/components/responses/TooManyRequests429"
                    },
                    "500": {
                        "$ref": "#/components/responses/InternalServerError500"
                    },
                    "default": {
                        "$ref": "#/components/responses/DefaultError"
                    }
                },
                "tags": [
                    "Telemetry"
                ],
                "summary": "Get a trace by its ID.",
                "description": "Get a trace by its ID.",
                "parameters": [
                    {
                        "name": "trace_id",
                        "in": "path",
                        "description": "The ID of the trace to get.",
                        "required": true,
                        "schema": {
                            "type": "string"
                        }
                    }
                ],
                "deprecated": true
            }
        },
        "/v1/telemetry/traces/{trace_id}/spans/{span_id}": {
            "get": {
                "responses": {
                    "200": {
                        "description": "A Span.",
                        "content": {
                            "application/json": {
                                "schema": {
                                    "$ref": "#/components/schemas/Span"
                                }
                            }
                        }
                    },
                    "400": {
                        "$ref": "#/components/responses/BadRequest400"
                    },
                    "429": {
                        "$ref": "#/components/responses/TooManyRequests429"
                    },
                    "500": {
                        "$ref": "#/components/responses/InternalServerError500"
                    },
                    "default": {
                        "$ref": "#/components/responses/DefaultError"
                    }
                },
                "tags": [
                    "Telemetry"
                ],
                "summary": "Get a span by its ID.",
                "description": "Get a span by its ID.",
                "parameters": [
                    {
                        "name": "trace_id",
                        "in": "path",
                        "description": "The ID of the trace to get the span from.",
                        "required": true,
                        "schema": {
                            "type": "string"
                        }
                    },
                    {
                        "name": "span_id",
                        "in": "path",
                        "description": "The ID of the span to get.",
                        "required": true,
                        "schema": {
                            "type": "string"
                        }
                    }
                ],
                "deprecated": true
            }
        }
    },
    "jsonSchemaDialect": "https://json-schema.org/draft/2020-12/schema",
@ -12716,561 +12379,6 @@
                    "logger_config"
                ],
                "title": "SupervisedFineTuneRequest"
            },
            "QueryMetricsRequest": {
                "type": "object",
                "properties": {
                    "start_time": {
                        "type": "integer",
                        "description": "The start time of the metric to query."
                    },
                    "end_time": {
                        "type": "integer",
                        "description": "The end time of the metric to query."
                    },
                    "granularity": {
                        "type": "string",
                        "description": "The granularity of the metric to query."
                    },
                    "query_type": {
                        "type": "string",
                        "enum": [
                            "range",
                            "instant"
                        ],
                        "description": "The type of query to perform."
                    },
                    "label_matchers": {
                        "type": "array",
                        "items": {
                            "type": "object",
                            "properties": {
                                "name": {
                                    "type": "string",
                                    "description": "The name of the label to match"
                                },
                                "value": {
                                    "type": "string",
                                    "description": "The value to match against"
                                },
                                "operator": {
                                    "type": "string",
                                    "enum": [
                                        "=",
                                        "!=",
                                        "=~",
                                        "!~"
                                    ],
                                    "description": "The comparison operator to use for matching",
                                    "default": "="
                                }
                            },
                            "additionalProperties": false,
                            "required": [
                                "name",
                                "value",
                                "operator"
                            ],
                            "title": "MetricLabelMatcher",
                            "description": "A matcher for filtering metrics by label values."
                        },
                        "description": "The label matchers to apply to the metric."
                    }
                },
                "additionalProperties": false,
                "required": [
                    "start_time",
                    "query_type"
                ],
                "title": "QueryMetricsRequest"
            },
            "MetricDataPoint": {
                "type": "object",
                "properties": {
                    "timestamp": {
                        "type": "integer",
                        "description": "Unix timestamp when the metric value was recorded"
                    },
                    "value": {
                        "type": "number",
                        "description": "The numeric value of the metric at this timestamp"
                    },
                    "unit": {
                        "type": "string"
                    }
                },
                "additionalProperties": false,
                "required": [
                    "timestamp",
                    "value",
                    "unit"
                ],
                "title": "MetricDataPoint",
                "description": "A single data point in a metric time series."
            },
            "MetricLabel": {
                "type": "object",
                "properties": {
                    "name": {
                        "type": "string",
                        "description": "The name of the label"
                    },
                    "value": {
                        "type": "string",
                        "description": "The value of the label"
                    }
                },
                "additionalProperties": false,
                "required": [
                    "name",
                    "value"
                ],
                "title": "MetricLabel",
                "description": "A label associated with a metric."
            },
            "MetricSeries": {
                "type": "object",
                "properties": {
                    "metric": {
                        "type": "string",
                        "description": "The name of the metric"
                    },
                    "labels": {
                        "type": "array",
                        "items": {
                            "$ref": "#/components/schemas/MetricLabel"
                        },
                        "description": "List of labels associated with this metric series"
                    },
                    "values": {
                        "type": "array",
                        "items": {
                            "$ref": "#/components/schemas/MetricDataPoint"
                        },
                        "description": "List of data points in chronological order"
                    }
                },
                "additionalProperties": false,
                "required": [
                    "metric",
                    "labels",
                    "values"
                ],
                "title": "MetricSeries",
                "description": "A time series of metric data points."
            },
            "QueryMetricsResponse": {
                "type": "object",
                "properties": {
                    "data": {
                        "type": "array",
                        "items": {
                            "$ref": "#/components/schemas/MetricSeries"
                        },
                        "description": "List of metric series matching the query criteria"
                    }
                },
                "additionalProperties": false,
                "required": [
                    "data"
                ],
                "title": "QueryMetricsResponse",
                "description": "Response containing metric time series data."
            },
            "QueryCondition": {
                "type": "object",
                "properties": {
                    "key": {
                        "type": "string",
                        "description": "The attribute key to filter on"
                    },
                    "op": {
                        "$ref": "#/components/schemas/QueryConditionOp",
                        "description": "The comparison operator to apply"
                    },
                    "value": {
                        "oneOf": [
                            {
                                "type": "null"
                            },
                            {
                                "type": "boolean"
                            },
                            {
                                "type": "number"
                            },
                            {
                                "type": "string"
                            },
                            {
                                "type": "array"
                            },
                            {
                                "type": "object"
                            }
                        ],
                        "description": "The value to compare against"
                    }
                },
                "additionalProperties": false,
                "required": [
                    "key",
                    "op",
                    "value"
                ],
                "title": "QueryCondition",
                "description": "A condition for filtering query results."
            },
            "QueryConditionOp": {
                "type": "string",
                "enum": [
                    "eq",
                    "ne",
                    "gt",
                    "lt"
                ],
                "title": "QueryConditionOp",
                "description": "Comparison operators for query conditions."
            },
            "QuerySpansRequest": {
                "type": "object",
                "properties": {
                    "attribute_filters": {
                        "type": "array",
                        "items": {
                            "$ref": "#/components/schemas/QueryCondition"
                        },
                        "description": "The attribute filters to apply to the spans."
                    },
                    "attributes_to_return": {
                        "type": "array",
                        "items": {
                            "type": "string"
                        },
                        "description": "The attributes to return in the spans."
                    },
                    "max_depth": {
                        "type": "integer",
                        "description": "The maximum depth of the tree."
                    }
                },
                "additionalProperties": false,
                "required": [
                    "attribute_filters",
                    "attributes_to_return"
                ],
                "title": "QuerySpansRequest"
            },
            "Span": {
                "type": "object",
                "properties": {
                    "span_id": {
                        "type": "string",
                        "description": "Unique identifier for the span"
                    },
                    "trace_id": {
                        "type": "string",
                        "description": "Unique identifier for the trace this span belongs to"
                    },
                    "parent_span_id": {
                        "type": "string",
                        "description": "(Optional) Unique identifier for the parent span, if this is a child span"
                    },
                    "name": {
                        "type": "string",
                        "description": "Human-readable name describing the operation this span represents"
                    },
                    "start_time": {
                        "type": "string",
                        "format": "date-time",
                        "description": "Timestamp when the operation began"
                    },
                    "end_time": {
                        "type": "string",
                        "format": "date-time",
                        "description": "(Optional) Timestamp when the operation finished, if completed"
                    },
                    "attributes": {
                        "type": "object",
                        "additionalProperties": {
                            "oneOf": [
                                {
                                    "type": "null"
                                },
                                {
                                    "type": "boolean"
                                },
                                {
                                    "type": "number"
                                },
                                {
                                    "type": "string"
                                },
                                {
                                    "type": "array"
                                },
                                {
                                    "type": "object"
                                }
                            ]
                        },
                        "description": "(Optional) Key-value pairs containing additional metadata about the span"
                    }
                },
                "additionalProperties": false,
                "required": [
                    "span_id",
                    "trace_id",
                    "name",
                    "start_time"
                ],
                "title": "Span",
                "description": "A span representing a single operation within a trace."
            },
            "QuerySpansResponse": {
                "type": "object",
                "properties": {
                    "data": {
                        "type": "array",
                        "items": {
                            "$ref": "#/components/schemas/Span"
                        },
                        "description": "List of spans matching the query criteria"
                    }
                },
                "additionalProperties": false,
                "required": [
                    "data"
                ],
                "title": "QuerySpansResponse",
                "description": "Response containing a list of spans."
            },
            "SaveSpansToDatasetRequest": {
                "type": "object",
                "properties": {
                    "attribute_filters": {
                        "type": "array",
                        "items": {
                            "$ref": "#/components/schemas/QueryCondition"
                        },
                        "description": "The attribute filters to apply to the spans."
                    },
                    "attributes_to_save": {
                        "type": "array",
                        "items": {
                            "type": "string"
                        },
                        "description": "The attributes to save to the dataset."
                    },
                    "dataset_id": {
                        "type": "string",
                        "description": "The ID of the dataset to save the spans to."
                    },
                    "max_depth": {
                        "type": "integer",
                        "description": "The maximum depth of the tree."
                    }
                },
                "additionalProperties": false,
                "required": [
                    "attribute_filters",
                    "attributes_to_save",
                    "dataset_id"
                ],
                "title": "SaveSpansToDatasetRequest"
            },
            "GetSpanTreeRequest": {
                "type": "object",
                "properties": {
                    "attributes_to_return": {
                        "type": "array",
                        "items": {
                            "type": "string"
                        },
                        "description": "The attributes to return in the tree."
                    },
                    "max_depth": {
                        "type": "integer",
                        "description": "The maximum depth of the tree."
                    }
                },
                "additionalProperties": false,
                "title": "GetSpanTreeRequest"
            },
            "SpanStatus": {
                "type": "string",
                "enum": [
                    "ok",
                    "error"
                ],
                "title": "SpanStatus",
                "description": "The status of a span indicating whether it completed successfully or with an error."
            },
            "SpanWithStatus": {
                "type": "object",
                "properties": {
                    "span_id": {
                        "type": "string",
                        "description": "Unique identifier for the span"
                    },
                    "trace_id": {
                        "type": "string",
                        "description": "Unique identifier for the trace this span belongs to"
                    },
                    "parent_span_id": {
                        "type": "string",
                        "description": "(Optional) Unique identifier for the parent span, if this is a child span"
                    },
                    "name": {
                        "type": "string",
                        "description": "Human-readable name describing the operation this span represents"
                    },
                    "start_time": {
                        "type": "string",
                        "format": "date-time",
                        "description": "Timestamp when the operation began"
                    },
                    "end_time": {
                        "type": "string",
                        "format": "date-time",
                        "description": "(Optional) Timestamp when the operation finished, if completed"
                    },
                    "attributes": {
                        "type": "object",
                        "additionalProperties": {
                            "oneOf": [
                                {
                                    "type": "null"
                                },
                                {
                                    "type": "boolean"
                                },
                                {
                                    "type": "number"
                                },
                                {
                                    "type": "string"
                                },
                                {
                                    "type": "array"
                                },
                                {
                                    "type": "object"
                                }
                            ]
                        },
                        "description": "(Optional) Key-value pairs containing additional metadata about the span"
                    },
                    "status": {
                        "$ref": "#/components/schemas/SpanStatus",
                        "description": "(Optional) The current status of the span"
                    }
                },
                "additionalProperties": false,
                "required": [
                    "span_id",
                    "trace_id",
                    "name",
                    "start_time"
                ],
                "title": "SpanWithStatus",
                "description": "A span that includes status information."
            },
            "QuerySpanTreeResponse": {
                "type": "object",
                "properties": {
                    "data": {
                        "type": "object",
                        "additionalProperties": {
                            "$ref": "#/components/schemas/SpanWithStatus"
                        },
                        "description": "Dictionary mapping span IDs to spans with status information"
                    }
                },
                "additionalProperties": false,
                "required": [
                    "data"
                ],
                "title": "QuerySpanTreeResponse",
                "description": "Response containing a tree structure of spans."
            },
            "QueryTracesRequest": {
                "type": "object",
                "properties": {
                    "attribute_filters": {
                        "type": "array",
                        "items": {
                            "$ref": "#/components/schemas/QueryCondition"
                        },
                        "description": "The attribute filters to apply to the traces."
                    },
                    "limit": {
                        "type": "integer",
                        "description": "The limit of traces to return."
                    },
                    "offset": {
                        "type": "integer",
                        "description": "The offset of the traces to return."
                    },
                    "order_by": {
                        "type": "array",
                        "items": {
                            "type": "string"
                        },
                        "description": "The order by of the traces to return."
                    }
                },
                "additionalProperties": false,
                "title": "QueryTracesRequest"
            },
            "Trace": {
                "type": "object",
                "properties": {
                    "trace_id": {
                        "type": "string",
                        "description": "Unique identifier for the trace"
                    },
                    "root_span_id": {
                        "type": "string",
                        "description": "Unique identifier for the root span that started this trace"
                    },
                    "start_time": {
                        "type": "string",
                        "format": "date-time",
                        "description": "Timestamp when the trace began"
                    },
                    "end_time": {
                        "type": "string",
                        "format": "date-time",
                        "description": "(Optional) Timestamp when the trace finished, if completed"
                    }
                },
                "additionalProperties": false,
                "required": [
                    "trace_id",
                    "root_span_id",
                    "start_time"
                ],
                "title": "Trace",
                "description": "A trace representing the complete execution path of a request across multiple operations."
            },
            "QueryTracesResponse": {
                "type": "object",
                "properties": {
                    "data": {
                        "type": "array",
                        "items": {
                            "$ref": "#/components/schemas/Trace"
                        },
                        "description": "List of traces matching the query criteria"
                    }
                },
                "additionalProperties": false,
                "required": [
                    "data"
                ],
                "title": "QueryTracesResponse",
                "description": "Response containing a list of traces."
            }
        },
        "responses": {
@ -13387,10 +12495,6 @@
            "description": "OpenAI-compatible Moderations API.",
            "x-displayName": "Safety"
        },
        {
            "name": "Telemetry",
            "description": ""
        },
        {
            "name": "VectorIO",
            "description": ""
@ -13410,7 +12514,6 @@
                "Models",
                "PostTraining (Coming Soon)",
                "Safety",
                "Telemetry",
                "VectorIO"
            ]
        }
--- a/docs/static/deprecated-llama-stack-spec.yaml
+++ b/docs/static/deprecated-llama-stack-spec.yaml
@ -2593,238 +2593,6 @@ paths:
              $ref: '#/components/schemas/SupervisedFineTuneRequest'
        required: true
      deprecated: true
  /v1/telemetry/metrics/{metric_name}:
    post:
      responses:
        '200':
          description: A QueryMetricsResponse.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/QueryMetricsResponse'
        '400':
          $ref: '#/components/responses/BadRequest400'
        '429':
          $ref: >-
            #/components/responses/TooManyRequests429
        '500':
          $ref: >-
            #/components/responses/InternalServerError500
        default:
          $ref: '#/components/responses/DefaultError'
      tags:
        - Telemetry
      summary: Query metrics.
      description: Query metrics.
      parameters:
        - name: metric_name
          in: path
          description: The name of the metric to query.
          required: true
          schema:
            type: string
      requestBody:
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/QueryMetricsRequest'
        required: true
      deprecated: true
  /v1/telemetry/spans:
    post:
      responses:
        '200':
          description: A QuerySpansResponse.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/QuerySpansResponse'
        '400':
          $ref: '#/components/responses/BadRequest400'
        '429':
          $ref: >-
            #/components/responses/TooManyRequests429
        '500':
          $ref: >-
            #/components/responses/InternalServerError500
        default:
          $ref: '#/components/responses/DefaultError'
      tags:
        - Telemetry
      summary: Query spans.
      description: Query spans.
      parameters: []
      requestBody:
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/QuerySpansRequest'
        required: true
      deprecated: true
  /v1/telemetry/spans/export:
    post:
      responses:
        '200':
          description: OK
        '400':
          $ref: '#/components/responses/BadRequest400'
        '429':
          $ref: >-
            #/components/responses/TooManyRequests429
        '500':
          $ref: >-
            #/components/responses/InternalServerError500
        default:
          $ref: '#/components/responses/DefaultError'
      tags:
        - Telemetry
      summary: Save spans to a dataset.
      description: Save spans to a dataset.
      parameters: []
      requestBody:
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/SaveSpansToDatasetRequest'
        required: true
      deprecated: true
  /v1/telemetry/spans/{span_id}/tree:
    post:
      responses:
        '200':
          description: A QuerySpanTreeResponse.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/QuerySpanTreeResponse'
        '400':
          $ref: '#/components/responses/BadRequest400'
        '429':
          $ref: >-
            #/components/responses/TooManyRequests429
        '500':
          $ref: >-
            #/components/responses/InternalServerError500
        default:
          $ref: '#/components/responses/DefaultError'
      tags:
        - Telemetry
      summary: Get a span tree by its ID.
      description: Get a span tree by its ID.
      parameters:
        - name: span_id
          in: path
          description: The ID of the span to get the tree from.
          required: true
          schema:
            type: string
      requestBody:
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/GetSpanTreeRequest'
        required: true
      deprecated: true
  /v1/telemetry/traces:
    post:
      responses:
        '200':
          description: A QueryTracesResponse.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/QueryTracesResponse'
        '400':
          $ref: '#/components/responses/BadRequest400'
        '429':
          $ref: >-
            #/components/responses/TooManyRequests429
        '500':
          $ref: >-
            #/components/responses/InternalServerError500
        default:
          $ref: '#/components/responses/DefaultError'
      tags:
        - Telemetry
      summary: Query traces.
      description: Query traces.
      parameters: []
      requestBody:
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/QueryTracesRequest'
        required: true
      deprecated: true
  /v1/telemetry/traces/{trace_id}:
    get:
      responses:
        '200':
          description: A Trace.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/Trace'
        '400':
          $ref: '#/components/responses/BadRequest400'
        '429':
          $ref: >-
            #/components/responses/TooManyRequests429
        '500':
          $ref: >-
            #/components/responses/InternalServerError500
        default:
          $ref: '#/components/responses/DefaultError'
      tags:
        - Telemetry
      summary: Get a trace by its ID.
      description: Get a trace by its ID.
      parameters:
        - name: trace_id
          in: path
          description: The ID of the trace to get.
          required: true
          schema:
            type: string
      deprecated: true
  /v1/telemetry/traces/{trace_id}/spans/{span_id}:
    get:
      responses:
        '200':
          description: A Span.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/Span'
        '400':
          $ref: '#/components/responses/BadRequest400'
        '429':
          $ref: >-
            #/components/responses/TooManyRequests429
        '500':
          $ref: >-
            #/components/responses/InternalServerError500
        default:
          $ref: '#/components/responses/DefaultError'
      tags:
        - Telemetry
      summary: Get a span by its ID.
      description: Get a span by its ID.
      parameters:
        - name: trace_id
          in: path
          description: >-
            The ID of the trace to get the span from.
          required: true
          schema:
            type: string
        - name: span_id
          in: path
          description: The ID of the span to get.
          required: true
          schema:
            type: string
      deprecated: true
 jsonSchemaDialect: >-
  https://json-schema.org/draft/2020-12/schema
 components:
@ -9510,434 +9278,6 @@ components:
        - hyperparam_search_config
        - logger_config
      title: SupervisedFineTuneRequest
    QueryMetricsRequest:
      type: object
      properties:
        start_time:
          type: integer
          description: The start time of the metric to query.
        end_time:
          type: integer
          description: The end time of the metric to query.
        granularity:
          type: string
          description: The granularity of the metric to query.
        query_type:
          type: string
          enum:
            - range
            - instant
          description: The type of query to perform.
        label_matchers:
          type: array
          items:
            type: object
            properties:
              name:
                type: string
                description: The name of the label to match
              value:
                type: string
                description: The value to match against
              operator:
                type: string
                enum:
                  - '='
                  - '!='
                  - =~
                  - '!~'
                description: >-
                  The comparison operator to use for matching
                default: '='
            additionalProperties: false
            required:
              - name
              - value
              - operator
            title: MetricLabelMatcher
            description: >-
              A matcher for filtering metrics by label values.
          description: >-
            The label matchers to apply to the metric.
      additionalProperties: false
      required:
        - start_time
        - query_type
      title: QueryMetricsRequest
    MetricDataPoint:
      type: object
      properties:
        timestamp:
          type: integer
          description: >-
            Unix timestamp when the metric value was recorded
        value:
          type: number
          description: >-
            The numeric value of the metric at this timestamp
        unit:
          type: string
      additionalProperties: false
      required:
        - timestamp
        - value
        - unit
      title: MetricDataPoint
      description: >-
        A single data point in a metric time series.
    MetricLabel:
      type: object
      properties:
        name:
          type: string
          description: The name of the label
        value:
          type: string
          description: The value of the label
      additionalProperties: false
      required:
        - name
        - value
      title: MetricLabel
      description: A label associated with a metric.
    MetricSeries:
      type: object
      properties:
        metric:
          type: string
          description: The name of the metric
        labels:
          type: array
          items:
            $ref: '#/components/schemas/MetricLabel'
          description: >-
            List of labels associated with this metric series
        values:
          type: array
          items:
            $ref: '#/components/schemas/MetricDataPoint'
          description: >-
            List of data points in chronological order
      additionalProperties: false
      required:
        - metric
        - labels
        - values
      title: MetricSeries
      description: A time series of metric data points.
    QueryMetricsResponse:
      type: object
      properties:
        data:
          type: array
          items:
            $ref: '#/components/schemas/MetricSeries'
          description: >-
            List of metric series matching the query criteria
      additionalProperties: false
      required:
        - data
      title: QueryMetricsResponse
      description: >-
        Response containing metric time series data.
    QueryCondition:
      type: object
      properties:
        key:
          type: string
          description: The attribute key to filter on
        op:
          $ref: '#/components/schemas/QueryConditionOp'
          description: The comparison operator to apply
        value:
          oneOf:
            - type: 'null'
            - type: boolean
            - type: number
            - type: string
            - type: array
            - type: object
          description: The value to compare against
      additionalProperties: false
      required:
        - key
        - op
        - value
      title: QueryCondition
      description: A condition for filtering query results.
    QueryConditionOp:
      type: string
      enum:
        - eq
        - ne
        - gt
        - lt
      title: QueryConditionOp
      description: >-
        Comparison operators for query conditions.
    QuerySpansRequest:
      type: object
      properties:
        attribute_filters:
          type: array
          items:
            $ref: '#/components/schemas/QueryCondition'
          description: >-
            The attribute filters to apply to the spans.
        attributes_to_return:
          type: array
          items:
            type: string
          description: The attributes to return in the spans.
        max_depth:
          type: integer
          description: The maximum depth of the tree.
      additionalProperties: false
      required:
        - attribute_filters
        - attributes_to_return
      title: QuerySpansRequest
    Span:
      type: object
      properties:
        span_id:
          type: string
          description: Unique identifier for the span
        trace_id:
          type: string
          description: >-
            Unique identifier for the trace this span belongs to
        parent_span_id:
          type: string
          description: >-
            (Optional) Unique identifier for the parent span, if this is a child span
        name:
          type: string
          description: >-
            Human-readable name describing the operation this span represents
        start_time:
          type: string
          format: date-time
          description: Timestamp when the operation began
        end_time:
          type: string
          format: date-time
          description: >-
            (Optional) Timestamp when the operation finished, if completed
        attributes:
          type: object
          additionalProperties:
            oneOf:
              - type: 'null'
              - type: boolean
              - type: number
              - type: string
              - type: array
              - type: object
          description: >-
            (Optional) Key-value pairs containing additional metadata about the span
      additionalProperties: false
      required:
        - span_id
        - trace_id
        - name
        - start_time
      title: Span
      description: >-
        A span representing a single operation within a trace.
    QuerySpansResponse:
      type: object
      properties:
        data:
          type: array
          items:
            $ref: '#/components/schemas/Span'
          description: >-
            List of spans matching the query criteria
      additionalProperties: false
      required:
        - data
      title: QuerySpansResponse
      description: Response containing a list of spans.
    SaveSpansToDatasetRequest:
      type: object
      properties:
        attribute_filters:
          type: array
          items:
            $ref: '#/components/schemas/QueryCondition'
          description: >-
            The attribute filters to apply to the spans.
        attributes_to_save:
          type: array
          items:
            type: string
          description: The attributes to save to the dataset.
        dataset_id:
          type: string
          description: >-
            The ID of the dataset to save the spans to.
        max_depth:
          type: integer
          description: The maximum depth of the tree.
      additionalProperties: false
      required:
        - attribute_filters
        - attributes_to_save
        - dataset_id
      title: SaveSpansToDatasetRequest
    GetSpanTreeRequest:
      type: object
      properties:
        attributes_to_return:
          type: array
          items:
            type: string
          description: The attributes to return in the tree.
        max_depth:
          type: integer
          description: The maximum depth of the tree.
      additionalProperties: false
      title: GetSpanTreeRequest
    SpanStatus:
      type: string
      enum:
        - ok
        - error
      title: SpanStatus
      description: >-
        The status of a span indicating whether it completed successfully or with
        an error.
    SpanWithStatus:
      type: object
      properties:
        span_id:
          type: string
          description: Unique identifier for the span
        trace_id:
          type: string
          description: >-
            Unique identifier for the trace this span belongs to
        parent_span_id:
          type: string
          description: >-
            (Optional) Unique identifier for the parent span, if this is a child span
        name:
          type: string
          description: >-
            Human-readable name describing the operation this span represents
        start_time:
          type: string
          format: date-time
          description: Timestamp when the operation began
        end_time:
          type: string
          format: date-time
          description: >-
            (Optional) Timestamp when the operation finished, if completed
        attributes:
          type: object
          additionalProperties:
            oneOf:
              - type: 'null'
              - type: boolean
              - type: number
              - type: string
              - type: array
              - type: object
          description: >-
            (Optional) Key-value pairs containing additional metadata about the span
        status:
          $ref: '#/components/schemas/SpanStatus'
          description: >-
            (Optional) The current status of the span
      additionalProperties: false
      required:
        - span_id
        - trace_id
        - name
        - start_time
      title: SpanWithStatus
      description: A span that includes status information.
    QuerySpanTreeResponse:
      type: object
      properties:
        data:
          type: object
          additionalProperties:
            $ref: '#/components/schemas/SpanWithStatus'
          description: >-
            Dictionary mapping span IDs to spans with status information
      additionalProperties: false
      required:
        - data
      title: QuerySpanTreeResponse
      description: >-
        Response containing a tree structure of spans.
    QueryTracesRequest:
      type: object
      properties:
        attribute_filters:
          type: array
          items:
            $ref: '#/components/schemas/QueryCondition'
          description: >-
            The attribute filters to apply to the traces.
        limit:
          type: integer
          description: The limit of traces to return.
        offset:
          type: integer
          description: The offset of the traces to return.
        order_by:
          type: array
          items:
            type: string
          description: The order by of the traces to return.
      additionalProperties: false
      title: QueryTracesRequest
    Trace:
      type: object
      properties:
        trace_id:
          type: string
          description: Unique identifier for the trace
        root_span_id:
          type: string
          description: >-
            Unique identifier for the root span that started this trace
        start_time:
          type: string
          format: date-time
          description: Timestamp when the trace began
        end_time:
          type: string
          format: date-time
          description: >-
            (Optional) Timestamp when the trace finished, if completed
      additionalProperties: false
      required:
        - trace_id
        - root_span_id
        - start_time
      title: Trace
      description: >-
        A trace representing the complete execution path of a request across multiple
        operations.
    QueryTracesResponse:
      type: object
      properties:
        data:
          type: array
          items:
            $ref: '#/components/schemas/Trace'
          description: >-
            List of traces matching the query criteria
      additionalProperties: false
      required:
        - data
      title: QueryTracesResponse
      description: Response containing a list of traces.
  responses:
    BadRequest400:
      description: The request was invalid or malformed
@ -10043,8 +9383,6 @@ tags:
  - name: Safety
    description: OpenAI-compatible Moderations API.
    x-displayName: Safety
  - name: Telemetry
    description: ''
  - name: VectorIO
    description: ''
 x-tagGroups:
@ -10060,5 +9398,4 @@ x-tagGroups:
      - Models
      - PostTraining (Coming Soon)
      - Safety
      - Telemetry
      - VectorIO
--- a/docs/static/experimental-llama-stack-spec.html
+++ b/docs/static/experimental-llama-stack-spec.html
@ -1711,343 +1711,6 @@
                },
                "deprecated": false
            }
        },
        "/v1alpha/telemetry/metrics/{metric_name}": {
            "post": {
                "responses": {
                    "200": {
                        "description": "A QueryMetricsResponse.",
                        "content": {
                            "application/json": {
                                "schema": {
                                    "$ref": "#/components/schemas/QueryMetricsResponse"
                                }
                            }
                        }
                    },
                    "400": {
                        "$ref": "#/components/responses/BadRequest400"
                    },
                    "429": {
                        "$ref": "#/components/responses/TooManyRequests429"
                    },
                    "500": {
                        "$ref": "#/components/responses/InternalServerError500"
                    },
                    "default": {
                        "$ref": "#/components/responses/DefaultError"
                    }
                },
                "tags": [
                    "Telemetry"
                ],
                "summary": "Query metrics.",
                "description": "Query metrics.",
                "parameters": [
                    {
                        "name": "metric_name",
                        "in": "path",
                        "description": "The name of the metric to query.",
                        "required": true,
                        "schema": {
                            "type": "string"
                        }
                    }
                ],
                "requestBody": {
                    "content": {
                        "application/json": {
                            "schema": {
                                "$ref": "#/components/schemas/QueryMetricsRequest"
                            }
                        }
                    },
                    "required": true
                },
                "deprecated": false
            }
        },
        "/v1alpha/telemetry/spans": {
            "post": {
                "responses": {
                    "200": {
                        "description": "A QuerySpansResponse.",
                        "content": {
                            "application/json": {
                                "schema": {
                                    "$ref": "#/components/schemas/QuerySpansResponse"
                                }
                            }
                        }
                    },
                    "400": {
                        "$ref": "#/components/responses/BadRequest400"
                    },
                    "429": {
                        "$ref": "#/components/responses/TooManyRequests429"
                    },
                    "500": {
                        "$ref": "#/components/responses/InternalServerError500"
                    },
                    "default": {
                        "$ref": "#/components/responses/DefaultError"
                    }
                },
                "tags": [
                    "Telemetry"
                ],
                "summary": "Query spans.",
                "description": "Query spans.",
                "parameters": [],
                "requestBody": {
                    "content": {
                        "application/json": {
                            "schema": {
                                "$ref": "#/components/schemas/QuerySpansRequest"
                            }
                        }
                    },
                    "required": true
                },
                "deprecated": false
            }
        },
        "/v1alpha/telemetry/spans/export": {
            "post": {
                "responses": {
                    "200": {
                        "description": "OK"
                    },
                    "400": {
                        "$ref": "#/components/responses/BadRequest400"
                    },
                    "429": {
                        "$ref": "#/components/responses/TooManyRequests429"
                    },
                    "500": {
                        "$ref": "#/components/responses/InternalServerError500"
                    },
                    "default": {
                        "$ref": "#/components/responses/DefaultError"
                    }
                },
                "tags": [
                    "Telemetry"
                ],
                "summary": "Save spans to a dataset.",
                "description": "Save spans to a dataset.",
                "parameters": [],
                "requestBody": {
                    "content": {
                        "application/json": {
                            "schema": {
                                "$ref": "#/components/schemas/SaveSpansToDatasetRequest"
                            }
                        }
                    },
                    "required": true
                },
                "deprecated": false
            }
        },
        "/v1alpha/telemetry/spans/{span_id}/tree": {
            "post": {
                "responses": {
                    "200": {
                        "description": "A QuerySpanTreeResponse.",
                        "content": {
                            "application/json": {
                                "schema": {
                                    "$ref": "#/components/schemas/QuerySpanTreeResponse"
                                }
                            }
                        }
                    },
                    "400": {
                        "$ref": "#/components/responses/BadRequest400"
                    },
                    "429": {
                        "$ref": "#/components/responses/TooManyRequests429"
                    },
                    "500": {
                        "$ref": "#/components/responses/InternalServerError500"
                    },
                    "default": {
                        "$ref": "#/components/responses/DefaultError"
                    }
                },
                "tags": [
                    "Telemetry"
                ],
                "summary": "Get a span tree by its ID.",
                "description": "Get a span tree by its ID.",
                "parameters": [
                    {
                        "name": "span_id",
                        "in": "path",
                        "description": "The ID of the span to get the tree from.",
                        "required": true,
                        "schema": {
                            "type": "string"
                        }
                    }
                ],
                "requestBody": {
                    "content": {
                        "application/json": {
                            "schema": {
                                "$ref": "#/components/schemas/GetSpanTreeRequest"
                            }
                        }
                    },
                    "required": true
                },
                "deprecated": false
            }
        },
        "/v1alpha/telemetry/traces": {
            "post": {
                "responses": {
                    "200": {
                        "description": "A QueryTracesResponse.",
                        "content": {
                            "application/json": {
                                "schema": {
                                    "$ref": "#/components/schemas/QueryTracesResponse"
                                }
                            }
                        }
                    },
                    "400": {
                        "$ref": "#/components/responses/BadRequest400"
                    },
                    "429": {
                        "$ref": "#/components/responses/TooManyRequests429"
                    },
                    "500": {
                        "$ref": "#/components/responses/InternalServerError500"
                    },
                    "default": {
                        "$ref": "#/components/responses/DefaultError"
                    }
                },
                "tags": [
                    "Telemetry"
                ],
                "summary": "Query traces.",
                "description": "Query traces.",
                "parameters": [],
                "requestBody": {
                    "content": {
                        "application/json": {
                            "schema": {
                                "$ref": "#/components/schemas/QueryTracesRequest"
                            }
                        }
                    },
                    "required": true
                },
                "deprecated": false
            }
        },
        "/v1alpha/telemetry/traces/{trace_id}": {
            "get": {
                "responses": {
                    "200": {
                        "description": "A Trace.",
                        "content": {
                            "application/json": {
                                "schema": {
                                    "$ref": "#/components/schemas/Trace"
                                }
                            }
                        }
                    },
                    "400": {
                        "$ref": "#/components/responses/BadRequest400"
                    },
                    "429": {
                        "$ref": "#/components/responses/TooManyRequests429"
                    },
                    "500": {
                        "$ref": "#/components/responses/InternalServerError500"
                    },
                    "default": {
                        "$ref": "#/components/responses/DefaultError"
                    }
                },
                "tags": [
                    "Telemetry"
                ],
                "summary": "Get a trace by its ID.",
                "description": "Get a trace by its ID.",
                "parameters": [
                    {
                        "name": "trace_id",
                        "in": "path",
                        "description": "The ID of the trace to get.",
                        "required": true,
                        "schema": {
                            "type": "string"
                        }
                    }
                ],
                "deprecated": false
            }
        },
        "/v1alpha/telemetry/traces/{trace_id}/spans/{span_id}": {
            "get": {
                "responses": {
                    "200": {
                        "description": "A Span.",
                        "content": {
                            "application/json": {
                                "schema": {
                                    "$ref": "#/components/schemas/Span"
                                }
                            }
                        }
                    },
                    "400": {
                        "$ref": "#/components/responses/BadRequest400"
                    },
                    "429": {
                        "$ref": "#/components/responses/TooManyRequests429"
                    },
                    "500": {
                        "$ref": "#/components/responses/InternalServerError500"
                    },
                    "default": {
                        "$ref": "#/components/responses/DefaultError"
                    }
                },
                "tags": [
                    "Telemetry"
                ],
                "summary": "Get a span by its ID.",
                "description": "Get a span by its ID.",
                "parameters": [
                    {
                        "name": "trace_id",
                        "in": "path",
                        "description": "The ID of the trace to get the span from.",
                        "required": true,
                        "schema": {
                            "type": "string"
                        }
                    },
                    {
                        "name": "span_id",
                        "in": "path",
                        "description": "The ID of the span to get.",
                        "required": true,
                        "schema": {
                            "type": "string"
                        }
                    }
                ],
                "deprecated": false
            }
        }
    },
    "jsonSchemaDialect": "https://json-schema.org/draft/2020-12/schema",
@ -5765,561 +5428,6 @@
                    "logger_config"
                ],
                "title": "SupervisedFineTuneRequest"
            },
            "QueryMetricsRequest": {
                "type": "object",
                "properties": {
                    "start_time": {
                        "type": "integer",
                        "description": "The start time of the metric to query."
                    },
                    "end_time": {
                        "type": "integer",
                        "description": "The end time of the metric to query."
                    },
                    "granularity": {
                        "type": "string",
                        "description": "The granularity of the metric to query."
                    },
                    "query_type": {
                        "type": "string",
                        "enum": [
                            "range",
                            "instant"
                        ],
                        "description": "The type of query to perform."
                    },
                    "label_matchers": {
                        "type": "array",
                        "items": {
                            "type": "object",
                            "properties": {
                                "name": {
                                    "type": "string",
                                    "description": "The name of the label to match"
                                },
                                "value": {
                                    "type": "string",
                                    "description": "The value to match against"
                                },
                                "operator": {
                                    "type": "string",
                                    "enum": [
                                        "=",
                                        "!=",
                                        "=~",
                                        "!~"
                                    ],
                                    "description": "The comparison operator to use for matching",
                                    "default": "="
                                }
                            },
                            "additionalProperties": false,
                            "required": [
                                "name",
                                "value",
                                "operator"
                            ],
                            "title": "MetricLabelMatcher",
                            "description": "A matcher for filtering metrics by label values."
                        },
                        "description": "The label matchers to apply to the metric."
                    }
                },
                "additionalProperties": false,
                "required": [
                    "start_time",
                    "query_type"
                ],
                "title": "QueryMetricsRequest"
            },
            "MetricDataPoint": {
                "type": "object",
                "properties": {
                    "timestamp": {
                        "type": "integer",
                        "description": "Unix timestamp when the metric value was recorded"
                    },
                    "value": {
                        "type": "number",
                        "description": "The numeric value of the metric at this timestamp"
                    },
                    "unit": {
                        "type": "string"
                    }
                },
                "additionalProperties": false,
                "required": [
                    "timestamp",
                    "value",
                    "unit"
                ],
                "title": "MetricDataPoint",
                "description": "A single data point in a metric time series."
            },
            "MetricLabel": {
                "type": "object",
                "properties": {
                    "name": {
                        "type": "string",
                        "description": "The name of the label"
                    },
                    "value": {
                        "type": "string",
                        "description": "The value of the label"
                    }
                },
                "additionalProperties": false,
                "required": [
                    "name",
                    "value"
                ],
                "title": "MetricLabel",
                "description": "A label associated with a metric."
            },
            "MetricSeries": {
                "type": "object",
                "properties": {
                    "metric": {
                        "type": "string",
                        "description": "The name of the metric"
                    },
                    "labels": {
                        "type": "array",
                        "items": {
                            "$ref": "#/components/schemas/MetricLabel"
                        },
                        "description": "List of labels associated with this metric series"
                    },
                    "values": {
                        "type": "array",
                        "items": {
                            "$ref": "#/components/schemas/MetricDataPoint"
                        },
                        "description": "List of data points in chronological order"
                    }
                },
                "additionalProperties": false,
                "required": [
                    "metric",
                    "labels",
                    "values"
                ],
                "title": "MetricSeries",
                "description": "A time series of metric data points."
            },
            "QueryMetricsResponse": {
                "type": "object",
                "properties": {
                    "data": {
                        "type": "array",
                        "items": {
                            "$ref": "#/components/schemas/MetricSeries"
                        },
                        "description": "List of metric series matching the query criteria"
                    }
                },
                "additionalProperties": false,
                "required": [
                    "data"
                ],
                "title": "QueryMetricsResponse",
                "description": "Response containing metric time series data."
            },
            "QueryCondition": {
                "type": "object",
                "properties": {
                    "key": {
                        "type": "string",
                        "description": "The attribute key to filter on"
                    },
                    "op": {
                        "$ref": "#/components/schemas/QueryConditionOp",
                        "description": "The comparison operator to apply"
                    },
                    "value": {
                        "oneOf": [
                            {
                                "type": "null"
                            },
                            {
                                "type": "boolean"
                            },
                            {
                                "type": "number"
                            },
                            {
                                "type": "string"
                            },
                            {
                                "type": "array"
                            },
                            {
                                "type": "object"
                            }
                        ],
                        "description": "The value to compare against"
                    }
                },
                "additionalProperties": false,
                "required": [
                    "key",
                    "op",
                    "value"
                ],
                "title": "QueryCondition",
                "description": "A condition for filtering query results."
            },
            "QueryConditionOp": {
                "type": "string",
                "enum": [
                    "eq",
                    "ne",
                    "gt",
                    "lt"
                ],
                "title": "QueryConditionOp",
                "description": "Comparison operators for query conditions."
            },
            "QuerySpansRequest": {
                "type": "object",
                "properties": {
                    "attribute_filters": {
                        "type": "array",
                        "items": {
                            "$ref": "#/components/schemas/QueryCondition"
                        },
                        "description": "The attribute filters to apply to the spans."
                    },
                    "attributes_to_return": {
                        "type": "array",
                        "items": {
                            "type": "string"
                        },
                        "description": "The attributes to return in the spans."
                    },
                    "max_depth": {
                        "type": "integer",
                        "description": "The maximum depth of the tree."
                    }
                },
                "additionalProperties": false,
                "required": [
                    "attribute_filters",
                    "attributes_to_return"
                ],
                "title": "QuerySpansRequest"
            },
            "Span": {
                "type": "object",
                "properties": {
                    "span_id": {
                        "type": "string",
                        "description": "Unique identifier for the span"
                    },
                    "trace_id": {
                        "type": "string",
                        "description": "Unique identifier for the trace this span belongs to"
                    },
                    "parent_span_id": {
                        "type": "string",
                        "description": "(Optional) Unique identifier for the parent span, if this is a child span"
                    },
                    "name": {
                        "type": "string",
                        "description": "Human-readable name describing the operation this span represents"
                    },
                    "start_time": {
                        "type": "string",
                        "format": "date-time",
                        "description": "Timestamp when the operation began"
                    },
                    "end_time": {
                        "type": "string",
                        "format": "date-time",
                        "description": "(Optional) Timestamp when the operation finished, if completed"
                    },
                    "attributes": {
                        "type": "object",
                        "additionalProperties": {
                            "oneOf": [
                                {
                                    "type": "null"
                                },
                                {
                                    "type": "boolean"
                                },
                                {
                                    "type": "number"
                                },
                                {
                                    "type": "string"
                                },
                                {
                                    "type": "array"
                                },
                                {
                                    "type": "object"
                                }
                            ]
                        },
                        "description": "(Optional) Key-value pairs containing additional metadata about the span"
                    }
                },
                "additionalProperties": false,
                "required": [
                    "span_id",
                    "trace_id",
                    "name",
                    "start_time"
                ],
                "title": "Span",
                "description": "A span representing a single operation within a trace."
            },
            "QuerySpansResponse": {
                "type": "object",
                "properties": {
                    "data": {
                        "type": "array",
                        "items": {
                            "$ref": "#/components/schemas/Span"
                        },
                        "description": "List of spans matching the query criteria"
                    }
                },
                "additionalProperties": false,
                "required": [
                    "data"
                ],
                "title": "QuerySpansResponse",
                "description": "Response containing a list of spans."
            },
            "SaveSpansToDatasetRequest": {
                "type": "object",
                "properties": {
                    "attribute_filters": {
                        "type": "array",
                        "items": {
                            "$ref": "#/components/schemas/QueryCondition"
                        },
                        "description": "The attribute filters to apply to the spans."
                    },
                    "attributes_to_save": {
                        "type": "array",
                        "items": {
                            "type": "string"
                        },
                        "description": "The attributes to save to the dataset."
                    },
                    "dataset_id": {
                        "type": "string",
                        "description": "The ID of the dataset to save the spans to."
                    },
                    "max_depth": {
                        "type": "integer",
                        "description": "The maximum depth of the tree."
                    }
                },
                "additionalProperties": false,
                "required": [
                    "attribute_filters",
                    "attributes_to_save",
                    "dataset_id"
                ],
                "title": "SaveSpansToDatasetRequest"
            },
            "GetSpanTreeRequest": {
                "type": "object",
                "properties": {
                    "attributes_to_return": {
                        "type": "array",
                        "items": {
                            "type": "string"
                        },
                        "description": "The attributes to return in the tree."
                    },
                    "max_depth": {
                        "type": "integer",
                        "description": "The maximum depth of the tree."
                    }
                },
                "additionalProperties": false,
                "title": "GetSpanTreeRequest"
            },
            "SpanStatus": {
                "type": "string",
                "enum": [
                    "ok",
                    "error"
                ],
                "title": "SpanStatus",
                "description": "The status of a span indicating whether it completed successfully or with an error."
            },
            "SpanWithStatus": {
                "type": "object",
                "properties": {
                    "span_id": {
                        "type": "string",
                        "description": "Unique identifier for the span"
                    },
                    "trace_id": {
                        "type": "string",
                        "description": "Unique identifier for the trace this span belongs to"
                    },
                    "parent_span_id": {
                        "type": "string",
                        "description": "(Optional) Unique identifier for the parent span, if this is a child span"
                    },
                    "name": {
                        "type": "string",
                        "description": "Human-readable name describing the operation this span represents"
                    },
                    "start_time": {
                        "type": "string",
                        "format": "date-time",
                        "description": "Timestamp when the operation began"
                    },
                    "end_time": {
                        "type": "string",
                        "format": "date-time",
                        "description": "(Optional) Timestamp when the operation finished, if completed"
                    },
                    "attributes": {
                        "type": "object",
                        "additionalProperties": {
                            "oneOf": [
                                {
                                    "type": "null"
                                },
                                {
                                    "type": "boolean"
                                },
                                {
                                    "type": "number"
                                },
                                {
                                    "type": "string"
                                },
                                {
                                    "type": "array"
                                },
                                {
                                    "type": "object"
                                }
                            ]
                        },
                        "description": "(Optional) Key-value pairs containing additional metadata about the span"
                    },
                    "status": {
                        "$ref": "#/components/schemas/SpanStatus",
                        "description": "(Optional) The current status of the span"
                    }
                },
                "additionalProperties": false,
                "required": [
                    "span_id",
                    "trace_id",
                    "name",
                    "start_time"
                ],
                "title": "SpanWithStatus",
                "description": "A span that includes status information."
            },
            "QuerySpanTreeResponse": {
                "type": "object",
                "properties": {
                    "data": {
                        "type": "object",
                        "additionalProperties": {
                            "$ref": "#/components/schemas/SpanWithStatus"
                        },
                        "description": "Dictionary mapping span IDs to spans with status information"
                    }
                },
                "additionalProperties": false,
                "required": [
                    "data"
                ],
                "title": "QuerySpanTreeResponse",
                "description": "Response containing a tree structure of spans."
            },
            "QueryTracesRequest": {
                "type": "object",
                "properties": {
                    "attribute_filters": {
                        "type": "array",
                        "items": {
                            "$ref": "#/components/schemas/QueryCondition"
                        },
                        "description": "The attribute filters to apply to the traces."
                    },
                    "limit": {
                        "type": "integer",
                        "description": "The limit of traces to return."
                    },
                    "offset": {
                        "type": "integer",
                        "description": "The offset of the traces to return."
                    },
                    "order_by": {
                        "type": "array",
                        "items": {
                            "type": "string"
                        },
                        "description": "The order by of the traces to return."
                    }
                },
                "additionalProperties": false,
                "title": "QueryTracesRequest"
            },
            "Trace": {
                "type": "object",
                "properties": {
                    "trace_id": {
                        "type": "string",
                        "description": "Unique identifier for the trace"
                    },
                    "root_span_id": {
                        "type": "string",
                        "description": "Unique identifier for the root span that started this trace"
                    },
                    "start_time": {
                        "type": "string",
                        "format": "date-time",
                        "description": "Timestamp when the trace began"
                    },
                    "end_time": {
                        "type": "string",
                        "format": "date-time",
                        "description": "(Optional) Timestamp when the trace finished, if completed"
                    }
                },
                "additionalProperties": false,
                "required": [
                    "trace_id",
                    "root_span_id",
                    "start_time"
                ],
                "title": "Trace",
                "description": "A trace representing the complete execution path of a request across multiple operations."
            },
            "QueryTracesResponse": {
                "type": "object",
                "properties": {
                    "data": {
                        "type": "array",
                        "items": {
                            "$ref": "#/components/schemas/Trace"
                        },
                        "description": "List of traces matching the query criteria"
                    }
                },
                "additionalProperties": false,
                "required": [
                    "data"
                ],
                "title": "QueryTracesResponse",
                "description": "Response containing a list of traces."
            }
        },
        "responses": {
@ -6416,10 +5524,6 @@
        {
            "name": "PostTraining (Coming Soon)",
            "description": ""
        },
        {
            "name": "Telemetry",
            "description": ""
        }
    ],
    "x-tagGroups": [
@ -6431,8 +5535,7 @@
                "DatasetIO",
                "Datasets",
                "Eval",
-                "PostTraining (Coming Soon)",
+                "PostTraining (Coming Soon)"
                "Telemetry"
            ]
        }
    ]
--- a/docs/static/experimental-llama-stack-spec.yaml
+++ b/docs/static/experimental-llama-stack-spec.yaml
@ -1224,238 +1224,6 @@ paths:
              $ref: '#/components/schemas/SupervisedFineTuneRequest'
        required: true
      deprecated: false
  /v1alpha/telemetry/metrics/{metric_name}:
    post:
      responses:
        '200':
          description: A QueryMetricsResponse.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/QueryMetricsResponse'
        '400':
          $ref: '#/components/responses/BadRequest400'
        '429':
          $ref: >-
            #/components/responses/TooManyRequests429
        '500':
          $ref: >-
            #/components/responses/InternalServerError500
        default:
          $ref: '#/components/responses/DefaultError'
      tags:
        - Telemetry
      summary: Query metrics.
      description: Query metrics.
      parameters:
        - name: metric_name
          in: path
          description: The name of the metric to query.
          required: true
          schema:
            type: string
      requestBody:
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/QueryMetricsRequest'
        required: true
      deprecated: false
  /v1alpha/telemetry/spans:
    post:
      responses:
        '200':
          description: A QuerySpansResponse.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/QuerySpansResponse'
        '400':
          $ref: '#/components/responses/BadRequest400'
        '429':
          $ref: >-
            #/components/responses/TooManyRequests429
        '500':
          $ref: >-
            #/components/responses/InternalServerError500
        default:
          $ref: '#/components/responses/DefaultError'
      tags:
        - Telemetry
      summary: Query spans.
      description: Query spans.
      parameters: []
      requestBody:
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/QuerySpansRequest'
        required: true
      deprecated: false
  /v1alpha/telemetry/spans/export:
    post:
      responses:
        '200':
          description: OK
        '400':
          $ref: '#/components/responses/BadRequest400'
        '429':
          $ref: >-
            #/components/responses/TooManyRequests429
        '500':
          $ref: >-
            #/components/responses/InternalServerError500
        default:
          $ref: '#/components/responses/DefaultError'
      tags:
        - Telemetry
      summary: Save spans to a dataset.
      description: Save spans to a dataset.
      parameters: []
      requestBody:
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/SaveSpansToDatasetRequest'
        required: true
      deprecated: false
  /v1alpha/telemetry/spans/{span_id}/tree:
    post:
      responses:
        '200':
          description: A QuerySpanTreeResponse.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/QuerySpanTreeResponse'
        '400':
          $ref: '#/components/responses/BadRequest400'
        '429':
          $ref: >-
            #/components/responses/TooManyRequests429
        '500':
          $ref: >-
            #/components/responses/InternalServerError500
        default:
          $ref: '#/components/responses/DefaultError'
      tags:
        - Telemetry
      summary: Get a span tree by its ID.
      description: Get a span tree by its ID.
      parameters:
        - name: span_id
          in: path
          description: The ID of the span to get the tree from.
          required: true
          schema:
            type: string
      requestBody:
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/GetSpanTreeRequest'
        required: true
      deprecated: false
  /v1alpha/telemetry/traces:
    post:
      responses:
        '200':
          description: A QueryTracesResponse.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/QueryTracesResponse'
        '400':
          $ref: '#/components/responses/BadRequest400'
        '429':
          $ref: >-
            #/components/responses/TooManyRequests429
        '500':
          $ref: >-
            #/components/responses/InternalServerError500
        default:
          $ref: '#/components/responses/DefaultError'
      tags:
        - Telemetry
      summary: Query traces.
      description: Query traces.
      parameters: []
      requestBody:
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/QueryTracesRequest'
        required: true
      deprecated: false
  /v1alpha/telemetry/traces/{trace_id}:
    get:
      responses:
        '200':
          description: A Trace.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/Trace'
        '400':
          $ref: '#/components/responses/BadRequest400'
        '429':
          $ref: >-
            #/components/responses/TooManyRequests429
        '500':
          $ref: >-
            #/components/responses/InternalServerError500
        default:
          $ref: '#/components/responses/DefaultError'
      tags:
        - Telemetry
      summary: Get a trace by its ID.
      description: Get a trace by its ID.
      parameters:
        - name: trace_id
          in: path
          description: The ID of the trace to get.
          required: true
          schema:
            type: string
      deprecated: false
  /v1alpha/telemetry/traces/{trace_id}/spans/{span_id}:
    get:
      responses:
        '200':
          description: A Span.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/Span'
        '400':
          $ref: '#/components/responses/BadRequest400'
        '429':
          $ref: >-
            #/components/responses/TooManyRequests429
        '500':
          $ref: >-
            #/components/responses/InternalServerError500
        default:
          $ref: '#/components/responses/DefaultError'
      tags:
        - Telemetry
      summary: Get a span by its ID.
      description: Get a span by its ID.
      parameters:
        - name: trace_id
          in: path
          description: >-
            The ID of the trace to get the span from.
          required: true
          schema:
            type: string
        - name: span_id
          in: path
          description: The ID of the span to get.
          required: true
          schema:
            type: string
      deprecated: false
 jsonSchemaDialect: >-
  https://json-schema.org/draft/2020-12/schema
 components:
@ -4249,434 +4017,6 @@ components:
        - hyperparam_search_config
        - logger_config
      title: SupervisedFineTuneRequest
    QueryMetricsRequest:
      type: object
      properties:
        start_time:
          type: integer
          description: The start time of the metric to query.
        end_time:
          type: integer
          description: The end time of the metric to query.
        granularity:
          type: string
          description: The granularity of the metric to query.
        query_type:
          type: string
          enum:
            - range
            - instant
          description: The type of query to perform.
        label_matchers:
          type: array
          items:
            type: object
            properties:
              name:
                type: string
                description: The name of the label to match
              value:
                type: string
                description: The value to match against
              operator:
                type: string
                enum:
                  - '='
                  - '!='
                  - =~
                  - '!~'
                description: >-
                  The comparison operator to use for matching
                default: '='
            additionalProperties: false
            required:
              - name
              - value
              - operator
            title: MetricLabelMatcher
            description: >-
              A matcher for filtering metrics by label values.
          description: >-
            The label matchers to apply to the metric.
      additionalProperties: false
      required:
        - start_time
        - query_type
      title: QueryMetricsRequest
    MetricDataPoint:
      type: object
      properties:
        timestamp:
          type: integer
          description: >-
            Unix timestamp when the metric value was recorded
        value:
          type: number
          description: >-
            The numeric value of the metric at this timestamp
        unit:
          type: string
      additionalProperties: false
      required:
        - timestamp
        - value
        - unit
      title: MetricDataPoint
      description: >-
        A single data point in a metric time series.
    MetricLabel:
      type: object
      properties:
        name:
          type: string
          description: The name of the label
        value:
          type: string
          description: The value of the label
      additionalProperties: false
      required:
        - name
        - value
      title: MetricLabel
      description: A label associated with a metric.
    MetricSeries:
      type: object
      properties:
        metric:
          type: string
          description: The name of the metric
        labels:
          type: array
          items:
            $ref: '#/components/schemas/MetricLabel'
          description: >-
            List of labels associated with this metric series
        values:
          type: array
          items:
            $ref: '#/components/schemas/MetricDataPoint'
          description: >-
            List of data points in chronological order
      additionalProperties: false
      required:
        - metric
        - labels
        - values
      title: MetricSeries
      description: A time series of metric data points.
    QueryMetricsResponse:
      type: object
      properties:
        data:
          type: array
          items:
            $ref: '#/components/schemas/MetricSeries'
          description: >-
            List of metric series matching the query criteria
      additionalProperties: false
      required:
        - data
      title: QueryMetricsResponse
      description: >-
        Response containing metric time series data.
    QueryCondition:
      type: object
      properties:
        key:
          type: string
          description: The attribute key to filter on
        op:
          $ref: '#/components/schemas/QueryConditionOp'
          description: The comparison operator to apply
        value:
          oneOf:
            - type: 'null'
            - type: boolean
            - type: number
            - type: string
            - type: array
            - type: object
          description: The value to compare against
      additionalProperties: false
      required:
        - key
        - op
        - value
      title: QueryCondition
      description: A condition for filtering query results.
    QueryConditionOp:
      type: string
      enum:
        - eq
        - ne
        - gt
        - lt
      title: QueryConditionOp
      description: >-
        Comparison operators for query conditions.
    QuerySpansRequest:
      type: object
      properties:
        attribute_filters:
          type: array
          items:
            $ref: '#/components/schemas/QueryCondition'
          description: >-
            The attribute filters to apply to the spans.
        attributes_to_return:
          type: array
          items:
            type: string
          description: The attributes to return in the spans.
        max_depth:
          type: integer
          description: The maximum depth of the tree.
      additionalProperties: false
      required:
        - attribute_filters
        - attributes_to_return
      title: QuerySpansRequest
    Span:
      type: object
      properties:
        span_id:
          type: string
          description: Unique identifier for the span
        trace_id:
          type: string
          description: >-
            Unique identifier for the trace this span belongs to
        parent_span_id:
          type: string
          description: >-
            (Optional) Unique identifier for the parent span, if this is a child span
        name:
          type: string
          description: >-
            Human-readable name describing the operation this span represents
        start_time:
          type: string
          format: date-time
          description: Timestamp when the operation began
        end_time:
          type: string
          format: date-time
          description: >-
            (Optional) Timestamp when the operation finished, if completed
        attributes:
          type: object
          additionalProperties:
            oneOf:
              - type: 'null'
              - type: boolean
              - type: number
              - type: string
              - type: array
              - type: object
          description: >-
            (Optional) Key-value pairs containing additional metadata about the span
      additionalProperties: false
      required:
        - span_id
        - trace_id
        - name
        - start_time
      title: Span
      description: >-
        A span representing a single operation within a trace.
    QuerySpansResponse:
      type: object
      properties:
        data:
          type: array
          items:
            $ref: '#/components/schemas/Span'
          description: >-
            List of spans matching the query criteria
      additionalProperties: false
      required:
        - data
      title: QuerySpansResponse
      description: Response containing a list of spans.
    SaveSpansToDatasetRequest:
      type: object
      properties:
        attribute_filters:
          type: array
          items:
            $ref: '#/components/schemas/QueryCondition'
          description: >-
            The attribute filters to apply to the spans.
        attributes_to_save:
          type: array
          items:
            type: string
          description: The attributes to save to the dataset.
        dataset_id:
          type: string
          description: >-
            The ID of the dataset to save the spans to.
        max_depth:
          type: integer
          description: The maximum depth of the tree.
      additionalProperties: false
      required:
        - attribute_filters
        - attributes_to_save
        - dataset_id
      title: SaveSpansToDatasetRequest
    GetSpanTreeRequest:
      type: object
      properties:
        attributes_to_return:
          type: array
          items:
            type: string
          description: The attributes to return in the tree.
        max_depth:
          type: integer
          description: The maximum depth of the tree.
      additionalProperties: false
      title: GetSpanTreeRequest
    SpanStatus:
      type: string
      enum:
        - ok
        - error
      title: SpanStatus
      description: >-
        The status of a span indicating whether it completed successfully or with
        an error.
    SpanWithStatus:
      type: object
      properties:
        span_id:
          type: string
          description: Unique identifier for the span
        trace_id:
          type: string
          description: >-
            Unique identifier for the trace this span belongs to
        parent_span_id:
          type: string
          description: >-
            (Optional) Unique identifier for the parent span, if this is a child span
        name:
          type: string
          description: >-
            Human-readable name describing the operation this span represents
        start_time:
          type: string
          format: date-time
          description: Timestamp when the operation began
        end_time:
          type: string
          format: date-time
          description: >-
            (Optional) Timestamp when the operation finished, if completed
        attributes:
          type: object
          additionalProperties:
            oneOf:
              - type: 'null'
              - type: boolean
              - type: number
              - type: string
              - type: array
              - type: object
          description: >-
            (Optional) Key-value pairs containing additional metadata about the span
        status:
          $ref: '#/components/schemas/SpanStatus'
          description: >-
            (Optional) The current status of the span
      additionalProperties: false
      required:
        - span_id
        - trace_id
        - name
        - start_time
      title: SpanWithStatus
      description: A span that includes status information.
    QuerySpanTreeResponse:
      type: object
      properties:
        data:
          type: object
          additionalProperties:
            $ref: '#/components/schemas/SpanWithStatus'
          description: >-
            Dictionary mapping span IDs to spans with status information
      additionalProperties: false
      required:
        - data
      title: QuerySpanTreeResponse
      description: >-
        Response containing a tree structure of spans.
    QueryTracesRequest:
      type: object
      properties:
        attribute_filters:
          type: array
          items:
            $ref: '#/components/schemas/QueryCondition'
          description: >-
            The attribute filters to apply to the traces.
        limit:
          type: integer
          description: The limit of traces to return.
        offset:
          type: integer
          description: The offset of the traces to return.
        order_by:
          type: array
          items:
            type: string
          description: The order by of the traces to return.
      additionalProperties: false
      title: QueryTracesRequest
    Trace:
      type: object
      properties:
        trace_id:
          type: string
          description: Unique identifier for the trace
        root_span_id:
          type: string
          description: >-
            Unique identifier for the root span that started this trace
        start_time:
          type: string
          format: date-time
          description: Timestamp when the trace began
        end_time:
          type: string
          format: date-time
          description: >-
            (Optional) Timestamp when the trace finished, if completed
      additionalProperties: false
      required:
        - trace_id
        - root_span_id
        - start_time
      title: Trace
      description: >-
        A trace representing the complete execution path of a request across multiple
        operations.
    QueryTracesResponse:
      type: object
      properties:
        data:
          type: array
          items:
            $ref: '#/components/schemas/Trace'
          description: >-
            List of traces matching the query criteria
      additionalProperties: false
      required:
        - data
      title: QueryTracesResponse
      description: Response containing a list of traces.
  responses:
    BadRequest400:
      description: The request was invalid or malformed
@ -4784,8 +4124,6 @@ tags:
      Llama Stack Evaluation API for running evaluations on model and agent candidates.
  - name: PostTraining (Coming Soon)
    description: ''
  - name: Telemetry
    description: ''
 x-tagGroups:
  - name: Operations
    tags:
@ -4795,4 +4133,3 @@ x-tagGroups:
      - Datasets
      - Eval
      - PostTraining (Coming Soon)
      - Telemetry
--- a/docs/static/llama-stack-spec.html
+++ b/docs/static/llama-stack-spec.html
@ -2525,44 +2525,6 @@
                "deprecated": false
            }
        },
        "/v1/telemetry/events": {
            "post": {
                "responses": {
                    "200": {
                        "description": "OK"
                    },
                    "400": {
                        "$ref": "#/components/responses/BadRequest400"
                    },
                    "429": {
                        "$ref": "#/components/responses/TooManyRequests429"
                    },
                    "500": {
                        "$ref": "#/components/responses/InternalServerError500"
                    },
                    "default": {
                        "$ref": "#/components/responses/DefaultError"
                    }
                },
                "tags": [
                    "Telemetry"
                ],
                "summary": "Log an event.",
                "description": "Log an event.",
                "parameters": [],
                "requestBody": {
                    "content": {
                        "application/json": {
                            "schema": {
                                "$ref": "#/components/schemas/LogEventRequest"
                            }
                        }
                    },
                    "required": true
                },
                "deprecated": false
            }
        },
        "/v1/tool-runtime/invoke": {
            "post": {
                "responses": {
@ -10364,354 +10326,6 @@
                "title": "SyntheticDataGenerationResponse",
                "description": "Response from the synthetic data generation. Batch of (prompt, response, score) tuples that pass the threshold."
            },
            "Event": {
                "oneOf": [
                    {
                        "$ref": "#/components/schemas/UnstructuredLogEvent"
                    },
                    {
                        "$ref": "#/components/schemas/MetricEvent"
                    },
                    {
                        "$ref": "#/components/schemas/StructuredLogEvent"
                    }
                ],
                "discriminator": {
                    "propertyName": "type",
                    "mapping": {
                        "unstructured_log": "#/components/schemas/UnstructuredLogEvent",
                        "metric": "#/components/schemas/MetricEvent",
                        "structured_log": "#/components/schemas/StructuredLogEvent"
                    }
                }
            },
            "EventType": {
                "type": "string",
                "enum": [
                    "unstructured_log",
                    "structured_log",
                    "metric"
                ],
                "title": "EventType",
                "description": "The type of telemetry event being logged."
            },
            "LogSeverity": {
                "type": "string",
                "enum": [
                    "verbose",
                    "debug",
                    "info",
                    "warn",
                    "error",
                    "critical"
                ],
                "title": "LogSeverity",
                "description": "The severity level of a log message."
            },
            "MetricEvent": {
                "type": "object",
                "properties": {
                    "trace_id": {
                        "type": "string",
                        "description": "Unique identifier for the trace this event belongs to"
                    },
                    "span_id": {
                        "type": "string",
                        "description": "Unique identifier for the span this event belongs to"
                    },
                    "timestamp": {
                        "type": "string",
                        "format": "date-time",
                        "description": "Timestamp when the event occurred"
                    },
                    "attributes": {
                        "type": "object",
                        "additionalProperties": {
                            "oneOf": [
                                {
                                    "type": "string"
                                },
                                {
                                    "type": "integer"
                                },
                                {
                                    "type": "number"
                                },
                                {
                                    "type": "boolean"
                                },
                                {
                                    "type": "null"
                                }
                            ]
                        },
                        "description": "(Optional) Key-value pairs containing additional metadata about the event"
                    },
                    "type": {
                        "$ref": "#/components/schemas/EventType",
                        "const": "metric",
                        "default": "metric",
                        "description": "Event type identifier set to METRIC"
                    },
                    "metric": {
                        "type": "string",
                        "description": "The name of the metric being measured"
                    },
                    "value": {
                        "oneOf": [
                            {
                                "type": "integer"
                            },
                            {
                                "type": "number"
                            }
                        ],
                        "description": "The numeric value of the metric measurement"
                    },
                    "unit": {
                        "type": "string",
                        "description": "The unit of measurement for the metric value"
                    }
                },
                "additionalProperties": false,
                "required": [
                    "trace_id",
                    "span_id",
                    "timestamp",
                    "type",
                    "metric",
                    "value",
                    "unit"
                ],
                "title": "MetricEvent",
                "description": "A metric event containing a measured value."
            },
            "SpanEndPayload": {
                "type": "object",
                "properties": {
                    "type": {
                        "$ref": "#/components/schemas/StructuredLogType",
                        "const": "span_end",
                        "default": "span_end",
                        "description": "Payload type identifier set to SPAN_END"
                    },
                    "status": {
                        "$ref": "#/components/schemas/SpanStatus",
                        "description": "The final status of the span indicating success or failure"
                    }
                },
                "additionalProperties": false,
                "required": [
                    "type",
                    "status"
                ],
                "title": "SpanEndPayload",
                "description": "Payload for a span end event."
            },
            "SpanStartPayload": {
                "type": "object",
                "properties": {
                    "type": {
                        "$ref": "#/components/schemas/StructuredLogType",
                        "const": "span_start",
                        "default": "span_start",
                        "description": "Payload type identifier set to SPAN_START"
                    },
                    "name": {
                        "type": "string",
                        "description": "Human-readable name describing the operation this span represents"
                    },
                    "parent_span_id": {
                        "type": "string",
                        "description": "(Optional) Unique identifier for the parent span, if this is a child span"
                    }
                },
                "additionalProperties": false,
                "required": [
                    "type",
                    "name"
                ],
                "title": "SpanStartPayload",
                "description": "Payload for a span start event."
            },
            "SpanStatus": {
                "type": "string",
                "enum": [
                    "ok",
                    "error"
                ],
                "title": "SpanStatus",
                "description": "The status of a span indicating whether it completed successfully or with an error."
            },
            "StructuredLogEvent": {
                "type": "object",
                "properties": {
                    "trace_id": {
                        "type": "string",
                        "description": "Unique identifier for the trace this event belongs to"
                    },
                    "span_id": {
                        "type": "string",
                        "description": "Unique identifier for the span this event belongs to"
                    },
                    "timestamp": {
                        "type": "string",
                        "format": "date-time",
                        "description": "Timestamp when the event occurred"
                    },
                    "attributes": {
                        "type": "object",
                        "additionalProperties": {
                            "oneOf": [
                                {
                                    "type": "string"
                                },
                                {
                                    "type": "integer"
                                },
                                {
                                    "type": "number"
                                },
                                {
                                    "type": "boolean"
                                },
                                {
                                    "type": "null"
                                }
                            ]
                        },
                        "description": "(Optional) Key-value pairs containing additional metadata about the event"
                    },
                    "type": {
                        "$ref": "#/components/schemas/EventType",
                        "const": "structured_log",
                        "default": "structured_log",
                        "description": "Event type identifier set to STRUCTURED_LOG"
                    },
                    "payload": {
                        "oneOf": [
                            {
                                "$ref": "#/components/schemas/SpanStartPayload"
                            },
                            {
                                "$ref": "#/components/schemas/SpanEndPayload"
                            }
                        ],
                        "discriminator": {
                            "propertyName": "type",
                            "mapping": {
                                "span_start": "#/components/schemas/SpanStartPayload",
                                "span_end": "#/components/schemas/SpanEndPayload"
                            }
                        },
                        "description": "The structured payload data for the log event"
                    }
                },
                "additionalProperties": false,
                "required": [
                    "trace_id",
                    "span_id",
                    "timestamp",
                    "type",
                    "payload"
                ],
                "title": "StructuredLogEvent",
                "description": "A structured log event containing typed payload data."
            },
            "StructuredLogType": {
                "type": "string",
                "enum": [
                    "span_start",
                    "span_end"
                ],
                "title": "StructuredLogType",
                "description": "The type of structured log event payload."
            },
            "UnstructuredLogEvent": {
                "type": "object",
                "properties": {
                    "trace_id": {
                        "type": "string",
                        "description": "Unique identifier for the trace this event belongs to"
                    },
                    "span_id": {
                        "type": "string",
                        "description": "Unique identifier for the span this event belongs to"
                    },
                    "timestamp": {
                        "type": "string",
                        "format": "date-time",
                        "description": "Timestamp when the event occurred"
                    },
                    "attributes": {
                        "type": "object",
                        "additionalProperties": {
                            "oneOf": [
                                {
                                    "type": "string"
                                },
                                {
                                    "type": "integer"
                                },
                                {
                                    "type": "number"
                                },
                                {
                                    "type": "boolean"
                                },
                                {
                                    "type": "null"
                                }
                            ]
                        },
                        "description": "(Optional) Key-value pairs containing additional metadata about the event"
                    },
                    "type": {
                        "$ref": "#/components/schemas/EventType",
                        "const": "unstructured_log",
                        "default": "unstructured_log",
                        "description": "Event type identifier set to UNSTRUCTURED_LOG"
                    },
                    "message": {
                        "type": "string",
                        "description": "The log message text"
                    },
                    "severity": {
                        "$ref": "#/components/schemas/LogSeverity",
                        "description": "The severity level of the log message"
                    }
                },
                "additionalProperties": false,
                "required": [
                    "trace_id",
                    "span_id",
                    "timestamp",
                    "type",
                    "message",
                    "severity"
                ],
                "title": "UnstructuredLogEvent",
                "description": "An unstructured log event containing a simple text message."
            },
            "LogEventRequest": {
                "type": "object",
                "properties": {
                    "event": {
                        "$ref": "#/components/schemas/Event",
                        "description": "The event to log."
                    },
                    "ttl_seconds": {
                        "type": "integer",
                        "description": "The time to live of the event."
                    }
                },
                "additionalProperties": false,
                "required": [
                    "event",
                    "ttl_seconds"
                ],
                "title": "LogEventRequest"
            },
            "InvokeToolRequest": {
                "type": "object",
                "properties": {
@ -12962,10 +12576,6 @@
            "name": "SyntheticDataGeneration (Coming Soon)",
            "description": ""
        },
        {
            "name": "Telemetry",
            "description": ""
        },
        {
            "name": "ToolGroups",
            "description": ""
@ -13000,7 +12610,6 @@
                "ScoringFunctions",
                "Shields",
                "SyntheticDataGeneration (Coming Soon)",
                "Telemetry",
                "ToolGroups",
                "ToolRuntime",
                "VectorDBs",
--- a/docs/static/llama-stack-spec.yaml
+++ b/docs/static/llama-stack-spec.yaml
@ -1944,33 +1944,6 @@ paths:
              $ref: '#/components/schemas/SyntheticDataGenerateRequest'
        required: true
      deprecated: false
  /v1/telemetry/events:
    post:
      responses:
        '200':
          description: OK
        '400':
          $ref: '#/components/responses/BadRequest400'
        '429':
          $ref: >-
            #/components/responses/TooManyRequests429
        '500':
          $ref: >-
            #/components/responses/InternalServerError500
        default:
          $ref: '#/components/responses/DefaultError'
      tags:
        - Telemetry
      summary: Log an event.
      description: Log an event.
      parameters: []
      requestBody:
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/LogEventRequest'
        required: true
      deprecated: false
  /v1/tool-runtime/invoke:
    post:
      responses:
@ -7840,267 +7813,6 @@ components:
      description: >-
        Response from the synthetic data generation. Batch of (prompt, response, score)
        tuples that pass the threshold.
    Event:
      oneOf:
        - $ref: '#/components/schemas/UnstructuredLogEvent'
        - $ref: '#/components/schemas/MetricEvent'
        - $ref: '#/components/schemas/StructuredLogEvent'
      discriminator:
        propertyName: type
        mapping:
          unstructured_log: '#/components/schemas/UnstructuredLogEvent'
          metric: '#/components/schemas/MetricEvent'
          structured_log: '#/components/schemas/StructuredLogEvent'
    EventType:
      type: string
      enum:
        - unstructured_log
        - structured_log
        - metric
      title: EventType
      description: >-
        The type of telemetry event being logged.
    LogSeverity:
      type: string
      enum:
        - verbose
        - debug
        - info
        - warn
        - error
        - critical
      title: LogSeverity
      description: The severity level of a log message.
    MetricEvent:
      type: object
      properties:
        trace_id:
          type: string
          description: >-
            Unique identifier for the trace this event belongs to
        span_id:
          type: string
          description: >-
            Unique identifier for the span this event belongs to
        timestamp:
          type: string
          format: date-time
          description: Timestamp when the event occurred
        attributes:
          type: object
          additionalProperties:
            oneOf:
              - type: string
              - type: integer
              - type: number
              - type: boolean
              - type: 'null'
          description: >-
            (Optional) Key-value pairs containing additional metadata about the event
        type:
          $ref: '#/components/schemas/EventType'
          const: metric
          default: metric
          description: Event type identifier set to METRIC
        metric:
          type: string
          description: The name of the metric being measured
        value:
          oneOf:
            - type: integer
            - type: number
          description: >-
            The numeric value of the metric measurement
        unit:
          type: string
          description: >-
            The unit of measurement for the metric value
      additionalProperties: false
      required:
        - trace_id
        - span_id
        - timestamp
        - type
        - metric
        - value
        - unit
      title: MetricEvent
      description: >-
        A metric event containing a measured value.
    SpanEndPayload:
      type: object
      properties:
        type:
          $ref: '#/components/schemas/StructuredLogType'
          const: span_end
          default: span_end
          description: Payload type identifier set to SPAN_END
        status:
          $ref: '#/components/schemas/SpanStatus'
          description: >-
            The final status of the span indicating success or failure
      additionalProperties: false
      required:
        - type
        - status
      title: SpanEndPayload
      description: Payload for a span end event.
    SpanStartPayload:
      type: object
      properties:
        type:
          $ref: '#/components/schemas/StructuredLogType'
          const: span_start
          default: span_start
          description: >-
            Payload type identifier set to SPAN_START
        name:
          type: string
          description: >-
            Human-readable name describing the operation this span represents
        parent_span_id:
          type: string
          description: >-
            (Optional) Unique identifier for the parent span, if this is a child span
      additionalProperties: false
      required:
        - type
        - name
      title: SpanStartPayload
      description: Payload for a span start event.
    SpanStatus:
      type: string
      enum:
        - ok
        - error
      title: SpanStatus
      description: >-
        The status of a span indicating whether it completed successfully or with
        an error.
    StructuredLogEvent:
      type: object
      properties:
        trace_id:
          type: string
          description: >-
            Unique identifier for the trace this event belongs to
        span_id:
          type: string
          description: >-
            Unique identifier for the span this event belongs to
        timestamp:
          type: string
          format: date-time
          description: Timestamp when the event occurred
        attributes:
          type: object
          additionalProperties:
            oneOf:
              - type: string
              - type: integer
              - type: number
              - type: boolean
              - type: 'null'
          description: >-
            (Optional) Key-value pairs containing additional metadata about the event
        type:
          $ref: '#/components/schemas/EventType'
          const: structured_log
          default: structured_log
          description: >-
            Event type identifier set to STRUCTURED_LOG
        payload:
          oneOf:
            - $ref: '#/components/schemas/SpanStartPayload'
            - $ref: '#/components/schemas/SpanEndPayload'
          discriminator:
            propertyName: type
            mapping:
              span_start: '#/components/schemas/SpanStartPayload'
              span_end: '#/components/schemas/SpanEndPayload'
          description: >-
            The structured payload data for the log event
      additionalProperties: false
      required:
        - trace_id
        - span_id
        - timestamp
        - type
        - payload
      title: StructuredLogEvent
      description: >-
        A structured log event containing typed payload data.
    StructuredLogType:
      type: string
      enum:
        - span_start
        - span_end
      title: StructuredLogType
      description: >-
        The type of structured log event payload.
    UnstructuredLogEvent:
      type: object
      properties:
        trace_id:
          type: string
          description: >-
            Unique identifier for the trace this event belongs to
        span_id:
          type: string
          description: >-
            Unique identifier for the span this event belongs to
        timestamp:
          type: string
          format: date-time
          description: Timestamp when the event occurred
        attributes:
          type: object
          additionalProperties:
            oneOf:
              - type: string
              - type: integer
              - type: number
              - type: boolean
              - type: 'null'
          description: >-
            (Optional) Key-value pairs containing additional metadata about the event
        type:
          $ref: '#/components/schemas/EventType'
          const: unstructured_log
          default: unstructured_log
          description: >-
            Event type identifier set to UNSTRUCTURED_LOG
        message:
          type: string
          description: The log message text
        severity:
          $ref: '#/components/schemas/LogSeverity'
          description: The severity level of the log message
      additionalProperties: false
      required:
        - trace_id
        - span_id
        - timestamp
        - type
        - message
        - severity
      title: UnstructuredLogEvent
      description: >-
        An unstructured log event containing a simple text message.
    LogEventRequest:
      type: object
      properties:
        event:
          $ref: '#/components/schemas/Event'
          description: The event to log.
        ttl_seconds:
          type: integer
          description: The time to live of the event.
      additionalProperties: false
      required:
        - event
        - ttl_seconds
      title: LogEventRequest
    InvokeToolRequest:
      type: object
      properties:
@ -9833,8 +9545,6 @@ tags:
    description: ''
  - name: SyntheticDataGeneration (Coming Soon)
    description: ''
  - name: Telemetry
    description: ''
  - name: ToolGroups
    description: ''
  - name: ToolRuntime
@ -9859,7 +9569,6 @@ x-tagGroups:
      - ScoringFunctions
      - Shields
      - SyntheticDataGeneration (Coming Soon)
      - Telemetry
      - ToolGroups
      - ToolRuntime
      - VectorDBs
--- a/docs/static/stainless-llama-stack-spec.html
+++ b/docs/static/stainless-llama-stack-spec.html
--- a/docs/static/stainless-llama-stack-spec.yaml
+++ b/docs/static/stainless-llama-stack-spec.yaml
@ -1947,33 +1947,6 @@ paths:
              $ref: '#/components/schemas/SyntheticDataGenerateRequest'
        required: true
      deprecated: false
  /v1/telemetry/events:
    post:
      responses:
        '200':
          description: OK
        '400':
          $ref: '#/components/responses/BadRequest400'
        '429':
          $ref: >-
            #/components/responses/TooManyRequests429
        '500':
          $ref: >-
            #/components/responses/InternalServerError500
        default:
          $ref: '#/components/responses/DefaultError'
      tags:
        - Telemetry
      summary: Log an event.
      description: Log an event.
      parameters: []
      requestBody:
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/LogEventRequest'
        required: true
      deprecated: false
  /v1/tool-runtime/invoke:
    post:
      responses:
@ -4392,238 +4365,6 @@ paths:
              $ref: '#/components/schemas/SupervisedFineTuneRequest'
        required: true
      deprecated: false
  /v1alpha/telemetry/metrics/{metric_name}:
    post:
      responses:
        '200':
          description: A QueryMetricsResponse.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/QueryMetricsResponse'
        '400':
          $ref: '#/components/responses/BadRequest400'
        '429':
          $ref: >-
            #/components/responses/TooManyRequests429
        '500':
          $ref: >-
            #/components/responses/InternalServerError500
        default:
          $ref: '#/components/responses/DefaultError'
      tags:
        - Telemetry
      summary: Query metrics.
      description: Query metrics.
      parameters:
        - name: metric_name
          in: path
          description: The name of the metric to query.
          required: true
          schema:
            type: string
      requestBody:
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/QueryMetricsRequest'
        required: true
      deprecated: false
  /v1alpha/telemetry/spans:
    post:
      responses:
        '200':
          description: A QuerySpansResponse.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/QuerySpansResponse'
        '400':
          $ref: '#/components/responses/BadRequest400'
        '429':
          $ref: >-
            #/components/responses/TooManyRequests429
        '500':
          $ref: >-
            #/components/responses/InternalServerError500
        default:
          $ref: '#/components/responses/DefaultError'
      tags:
        - Telemetry
      summary: Query spans.
      description: Query spans.
      parameters: []
      requestBody:
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/QuerySpansRequest'
        required: true
      deprecated: false
  /v1alpha/telemetry/spans/export:
    post:
      responses:
        '200':
          description: OK
        '400':
          $ref: '#/components/responses/BadRequest400'
        '429':
          $ref: >-
            #/components/responses/TooManyRequests429
        '500':
          $ref: >-
            #/components/responses/InternalServerError500
        default:
          $ref: '#/components/responses/DefaultError'
      tags:
        - Telemetry
      summary: Save spans to a dataset.
      description: Save spans to a dataset.
      parameters: []
      requestBody:
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/SaveSpansToDatasetRequest'
        required: true
      deprecated: false
  /v1alpha/telemetry/spans/{span_id}/tree:
    post:
      responses:
        '200':
          description: A QuerySpanTreeResponse.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/QuerySpanTreeResponse'
        '400':
          $ref: '#/components/responses/BadRequest400'
        '429':
          $ref: >-
            #/components/responses/TooManyRequests429
        '500':
          $ref: >-
            #/components/responses/InternalServerError500
        default:
          $ref: '#/components/responses/DefaultError'
      tags:
        - Telemetry
      summary: Get a span tree by its ID.
      description: Get a span tree by its ID.
      parameters:
        - name: span_id
          in: path
          description: The ID of the span to get the tree from.
          required: true
          schema:
            type: string
      requestBody:
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/GetSpanTreeRequest'
        required: true
      deprecated: false
  /v1alpha/telemetry/traces:
    post:
      responses:
        '200':
          description: A QueryTracesResponse.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/QueryTracesResponse'
        '400':
          $ref: '#/components/responses/BadRequest400'
        '429':
          $ref: >-
            #/components/responses/TooManyRequests429
        '500':
          $ref: >-
            #/components/responses/InternalServerError500
        default:
          $ref: '#/components/responses/DefaultError'
      tags:
        - Telemetry
      summary: Query traces.
      description: Query traces.
      parameters: []
      requestBody:
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/QueryTracesRequest'
        required: true
      deprecated: false
  /v1alpha/telemetry/traces/{trace_id}:
    get:
      responses:
        '200':
          description: A Trace.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/Trace'
        '400':
          $ref: '#/components/responses/BadRequest400'
        '429':
          $ref: >-
            #/components/responses/TooManyRequests429
        '500':
          $ref: >-
            #/components/responses/InternalServerError500
        default:
          $ref: '#/components/responses/DefaultError'
      tags:
        - Telemetry
      summary: Get a trace by its ID.
      description: Get a trace by its ID.
      parameters:
        - name: trace_id
          in: path
          description: The ID of the trace to get.
          required: true
          schema:
            type: string
      deprecated: false
  /v1alpha/telemetry/traces/{trace_id}/spans/{span_id}:
    get:
      responses:
        '200':
          description: A Span.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/Span'
        '400':
          $ref: '#/components/responses/BadRequest400'
        '429':
          $ref: >-
            #/components/responses/TooManyRequests429
        '500':
          $ref: >-
            #/components/responses/InternalServerError500
        default:
          $ref: '#/components/responses/DefaultError'
      tags:
        - Telemetry
      summary: Get a span by its ID.
      description: Get a span by its ID.
      parameters:
        - name: trace_id
          in: path
          description: >-
            The ID of the trace to get the span from.
          required: true
          schema:
            type: string
        - name: span_id
          in: path
          description: The ID of the span to get.
          required: true
          schema:
            type: string
      deprecated: false
 jsonSchemaDialect: >-
  https://json-schema.org/draft/2020-12/schema
 components:
@ -9285,267 +9026,6 @@ components:
      description: >-
        Response from the synthetic data generation. Batch of (prompt, response, score)
        tuples that pass the threshold.
    Event:
      oneOf:
        - $ref: '#/components/schemas/UnstructuredLogEvent'
        - $ref: '#/components/schemas/MetricEvent'
        - $ref: '#/components/schemas/StructuredLogEvent'
      discriminator:
        propertyName: type
        mapping:
          unstructured_log: '#/components/schemas/UnstructuredLogEvent'
          metric: '#/components/schemas/MetricEvent'
          structured_log: '#/components/schemas/StructuredLogEvent'
    EventType:
      type: string
      enum:
        - unstructured_log
        - structured_log
        - metric
      title: EventType
      description: >-
        The type of telemetry event being logged.
    LogSeverity:
      type: string
      enum:
        - verbose
        - debug
        - info
        - warn
        - error
        - critical
      title: LogSeverity
      description: The severity level of a log message.
    MetricEvent:
      type: object
      properties:
        trace_id:
          type: string
          description: >-
            Unique identifier for the trace this event belongs to
        span_id:
          type: string
          description: >-
            Unique identifier for the span this event belongs to
        timestamp:
          type: string
          format: date-time
          description: Timestamp when the event occurred
        attributes:
          type: object
          additionalProperties:
            oneOf:
              - type: string
              - type: integer
              - type: number
              - type: boolean
              - type: 'null'
          description: >-
            (Optional) Key-value pairs containing additional metadata about the event
        type:
          $ref: '#/components/schemas/EventType'
          const: metric
          default: metric
          description: Event type identifier set to METRIC
        metric:
          type: string
          description: The name of the metric being measured
        value:
          oneOf:
            - type: integer
            - type: number
          description: >-
            The numeric value of the metric measurement
        unit:
          type: string
          description: >-
            The unit of measurement for the metric value
      additionalProperties: false
      required:
        - trace_id
        - span_id
        - timestamp
        - type
        - metric
        - value
        - unit
      title: MetricEvent
      description: >-
        A metric event containing a measured value.
    SpanEndPayload:
      type: object
      properties:
        type:
          $ref: '#/components/schemas/StructuredLogType'
          const: span_end
          default: span_end
          description: Payload type identifier set to SPAN_END
        status:
          $ref: '#/components/schemas/SpanStatus'
          description: >-
            The final status of the span indicating success or failure
      additionalProperties: false
      required:
        - type
        - status
      title: SpanEndPayload
      description: Payload for a span end event.
    SpanStartPayload:
      type: object
      properties:
        type:
          $ref: '#/components/schemas/StructuredLogType'
          const: span_start
          default: span_start
          description: >-
            Payload type identifier set to SPAN_START
        name:
          type: string
          description: >-
            Human-readable name describing the operation this span represents
        parent_span_id:
          type: string
          description: >-
            (Optional) Unique identifier for the parent span, if this is a child span
      additionalProperties: false
      required:
        - type
        - name
      title: SpanStartPayload
      description: Payload for a span start event.
    SpanStatus:
      type: string
      enum:
        - ok
        - error
      title: SpanStatus
      description: >-
        The status of a span indicating whether it completed successfully or with
        an error.
    StructuredLogEvent:
      type: object
      properties:
        trace_id:
          type: string
          description: >-
            Unique identifier for the trace this event belongs to
        span_id:
          type: string
          description: >-
            Unique identifier for the span this event belongs to
        timestamp:
          type: string
          format: date-time
          description: Timestamp when the event occurred
        attributes:
          type: object
          additionalProperties:
            oneOf:
              - type: string
              - type: integer
              - type: number
              - type: boolean
              - type: 'null'
          description: >-
            (Optional) Key-value pairs containing additional metadata about the event
        type:
          $ref: '#/components/schemas/EventType'
          const: structured_log
          default: structured_log
          description: >-
            Event type identifier set to STRUCTURED_LOG
        payload:
          oneOf:
            - $ref: '#/components/schemas/SpanStartPayload'
            - $ref: '#/components/schemas/SpanEndPayload'
          discriminator:
            propertyName: type
            mapping:
              span_start: '#/components/schemas/SpanStartPayload'
              span_end: '#/components/schemas/SpanEndPayload'
          description: >-
            The structured payload data for the log event
      additionalProperties: false
      required:
        - trace_id
        - span_id
        - timestamp
        - type
        - payload
      title: StructuredLogEvent
      description: >-
        A structured log event containing typed payload data.
    StructuredLogType:
      type: string
      enum:
        - span_start
        - span_end
      title: StructuredLogType
      description: >-
        The type of structured log event payload.
    UnstructuredLogEvent:
      type: object
      properties:
        trace_id:
          type: string
          description: >-
            Unique identifier for the trace this event belongs to
        span_id:
          type: string
          description: >-
            Unique identifier for the span this event belongs to
        timestamp:
          type: string
          format: date-time
          description: Timestamp when the event occurred
        attributes:
          type: object
          additionalProperties:
            oneOf:
              - type: string
              - type: integer
              - type: number
              - type: boolean
              - type: 'null'
          description: >-
            (Optional) Key-value pairs containing additional metadata about the event
        type:
          $ref: '#/components/schemas/EventType'
          const: unstructured_log
          default: unstructured_log
          description: >-
            Event type identifier set to UNSTRUCTURED_LOG
        message:
          type: string
          description: The log message text
        severity:
          $ref: '#/components/schemas/LogSeverity'
          description: The severity level of the log message
      additionalProperties: false
      required:
        - trace_id
        - span_id
        - timestamp
        - type
        - message
        - severity
      title: UnstructuredLogEvent
      description: >-
        An unstructured log event containing a simple text message.
    LogEventRequest:
      type: object
      properties:
        event:
          $ref: '#/components/schemas/Event'
          description: The event to log.
        ttl_seconds:
          type: integer
          description: The time to live of the event.
      additionalProperties: false
      required:
        - event
        - ttl_seconds
      title: LogEventRequest
    InvokeToolRequest:
      type: object
      properties:
@ -13349,425 +12829,6 @@ components:
        - hyperparam_search_config
        - logger_config
      title: SupervisedFineTuneRequest
    QueryMetricsRequest:
      type: object
      properties:
        start_time:
          type: integer
          description: The start time of the metric to query.
        end_time:
          type: integer
          description: The end time of the metric to query.
        granularity:
          type: string
          description: The granularity of the metric to query.
        query_type:
          type: string
          enum:
            - range
            - instant
          description: The type of query to perform.
        label_matchers:
          type: array
          items:
            type: object
            properties:
              name:
                type: string
                description: The name of the label to match
              value:
                type: string
                description: The value to match against
              operator:
                type: string
                enum:
                  - '='
                  - '!='
                  - =~
                  - '!~'
                description: >-
                  The comparison operator to use for matching
                default: '='
            additionalProperties: false
            required:
              - name
              - value
              - operator
            title: MetricLabelMatcher
            description: >-
              A matcher for filtering metrics by label values.
          description: >-
            The label matchers to apply to the metric.
      additionalProperties: false
      required:
        - start_time
        - query_type
      title: QueryMetricsRequest
    MetricDataPoint:
      type: object
      properties:
        timestamp:
          type: integer
          description: >-
            Unix timestamp when the metric value was recorded
        value:
          type: number
          description: >-
            The numeric value of the metric at this timestamp
        unit:
          type: string
      additionalProperties: false
      required:
        - timestamp
        - value
        - unit
      title: MetricDataPoint
      description: >-
        A single data point in a metric time series.
    MetricLabel:
      type: object
      properties:
        name:
          type: string
          description: The name of the label
        value:
          type: string
          description: The value of the label
      additionalProperties: false
      required:
        - name
        - value
      title: MetricLabel
      description: A label associated with a metric.
    MetricSeries:
      type: object
      properties:
        metric:
          type: string
          description: The name of the metric
        labels:
          type: array
          items:
            $ref: '#/components/schemas/MetricLabel'
          description: >-
            List of labels associated with this metric series
        values:
          type: array
          items:
            $ref: '#/components/schemas/MetricDataPoint'
          description: >-
            List of data points in chronological order
      additionalProperties: false
      required:
        - metric
        - labels
        - values
      title: MetricSeries
      description: A time series of metric data points.
    QueryMetricsResponse:
      type: object
      properties:
        data:
          type: array
          items:
            $ref: '#/components/schemas/MetricSeries'
          description: >-
            List of metric series matching the query criteria
      additionalProperties: false
      required:
        - data
      title: QueryMetricsResponse
      description: >-
        Response containing metric time series data.
    QueryCondition:
      type: object
      properties:
        key:
          type: string
          description: The attribute key to filter on
        op:
          $ref: '#/components/schemas/QueryConditionOp'
          description: The comparison operator to apply
        value:
          oneOf:
            - type: 'null'
            - type: boolean
            - type: number
            - type: string
            - type: array
            - type: object
          description: The value to compare against
      additionalProperties: false
      required:
        - key
        - op
        - value
      title: QueryCondition
      description: A condition for filtering query results.
    QueryConditionOp:
      type: string
      enum:
        - eq
        - ne
        - gt
        - lt
      title: QueryConditionOp
      description: >-
        Comparison operators for query conditions.
    QuerySpansRequest:
      type: object
      properties:
        attribute_filters:
          type: array
          items:
            $ref: '#/components/schemas/QueryCondition'
          description: >-
            The attribute filters to apply to the spans.
        attributes_to_return:
          type: array
          items:
            type: string
          description: The attributes to return in the spans.
        max_depth:
          type: integer
          description: The maximum depth of the tree.
      additionalProperties: false
      required:
        - attribute_filters
        - attributes_to_return
      title: QuerySpansRequest
    Span:
      type: object
      properties:
        span_id:
          type: string
          description: Unique identifier for the span
        trace_id:
          type: string
          description: >-
            Unique identifier for the trace this span belongs to
        parent_span_id:
          type: string
          description: >-
            (Optional) Unique identifier for the parent span, if this is a child span
        name:
          type: string
          description: >-
            Human-readable name describing the operation this span represents
        start_time:
          type: string
          format: date-time
          description: Timestamp when the operation began
        end_time:
          type: string
          format: date-time
          description: >-
            (Optional) Timestamp when the operation finished, if completed
        attributes:
          type: object
          additionalProperties:
            oneOf:
              - type: 'null'
              - type: boolean
              - type: number
              - type: string
              - type: array
              - type: object
          description: >-
            (Optional) Key-value pairs containing additional metadata about the span
      additionalProperties: false
      required:
        - span_id
        - trace_id
        - name
        - start_time
      title: Span
      description: >-
        A span representing a single operation within a trace.
    QuerySpansResponse:
      type: object
      properties:
        data:
          type: array
          items:
            $ref: '#/components/schemas/Span'
          description: >-
            List of spans matching the query criteria
      additionalProperties: false
      required:
        - data
      title: QuerySpansResponse
      description: Response containing a list of spans.
    SaveSpansToDatasetRequest:
      type: object
      properties:
        attribute_filters:
          type: array
          items:
            $ref: '#/components/schemas/QueryCondition'
          description: >-
            The attribute filters to apply to the spans.
        attributes_to_save:
          type: array
          items:
            type: string
          description: The attributes to save to the dataset.
        dataset_id:
          type: string
          description: >-
            The ID of the dataset to save the spans to.
        max_depth:
          type: integer
          description: The maximum depth of the tree.
      additionalProperties: false
      required:
        - attribute_filters
        - attributes_to_save
        - dataset_id
      title: SaveSpansToDatasetRequest
    GetSpanTreeRequest:
      type: object
      properties:
        attributes_to_return:
          type: array
          items:
            type: string
          description: The attributes to return in the tree.
        max_depth:
          type: integer
          description: The maximum depth of the tree.
      additionalProperties: false
      title: GetSpanTreeRequest
    SpanWithStatus:
      type: object
      properties:
        span_id:
          type: string
          description: Unique identifier for the span
        trace_id:
          type: string
          description: >-
            Unique identifier for the trace this span belongs to
        parent_span_id:
          type: string
          description: >-
            (Optional) Unique identifier for the parent span, if this is a child span
        name:
          type: string
          description: >-
            Human-readable name describing the operation this span represents
        start_time:
          type: string
          format: date-time
          description: Timestamp when the operation began
        end_time:
          type: string
          format: date-time
          description: >-
            (Optional) Timestamp when the operation finished, if completed
        attributes:
          type: object
          additionalProperties:
            oneOf:
              - type: 'null'
              - type: boolean
              - type: number
              - type: string
              - type: array
              - type: object
          description: >-
            (Optional) Key-value pairs containing additional metadata about the span
        status:
          $ref: '#/components/schemas/SpanStatus'
          description: >-
            (Optional) The current status of the span
      additionalProperties: false
      required:
        - span_id
        - trace_id
        - name
        - start_time
      title: SpanWithStatus
      description: A span that includes status information.
    QuerySpanTreeResponse:
      type: object
      properties:
        data:
          type: object
          additionalProperties:
            $ref: '#/components/schemas/SpanWithStatus'
          description: >-
            Dictionary mapping span IDs to spans with status information
      additionalProperties: false
      required:
        - data
      title: QuerySpanTreeResponse
      description: >-
        Response containing a tree structure of spans.
    QueryTracesRequest:
      type: object
      properties:
        attribute_filters:
          type: array
          items:
            $ref: '#/components/schemas/QueryCondition'
          description: >-
            The attribute filters to apply to the traces.
        limit:
          type: integer
          description: The limit of traces to return.
        offset:
          type: integer
          description: The offset of the traces to return.
        order_by:
          type: array
          items:
            type: string
          description: The order by of the traces to return.
      additionalProperties: false
      title: QueryTracesRequest
    Trace:
      type: object
      properties:
        trace_id:
          type: string
          description: Unique identifier for the trace
        root_span_id:
          type: string
          description: >-
            Unique identifier for the root span that started this trace
        start_time:
          type: string
          format: date-time
          description: Timestamp when the trace began
        end_time:
          type: string
          format: date-time
          description: >-
            (Optional) Timestamp when the trace finished, if completed
      additionalProperties: false
      required:
        - trace_id
        - root_span_id
        - start_time
      title: Trace
      description: >-
        A trace representing the complete execution path of a request across multiple
        operations.
    QueryTracesResponse:
      type: object
      properties:
        data:
          type: array
          items:
            $ref: '#/components/schemas/Trace'
          description: >-
            List of traces matching the query criteria
      additionalProperties: false
      required:
        - data
      title: QueryTracesResponse
      description: Response containing a list of traces.
  responses:
    BadRequest400:
      description: The request was invalid or malformed
@ -13881,8 +12942,6 @@ tags:
    description: ''
  - name: SyntheticDataGeneration (Coming Soon)
    description: ''
  - name: Telemetry
    description: ''
  - name: ToolGroups
    description: ''
  - name: ToolRuntime
@ -13912,7 +12971,6 @@ x-tagGroups:
      - ScoringFunctions
      - Shields
      - SyntheticDataGeneration (Coming Soon)
      - Telemetry
      - ToolGroups
      - ToolRuntime
      - VectorDBs
--- a/llama_stack/apis/telemetry/telemetry.py
+++ b/llama_stack/apis/telemetry/telemetry.py
@ -16,15 +16,12 @@ from typing import (
 from pydantic import BaseModel, Field
 from llama_stack.apis.version import LLAMA_STACK_API_V1, LLAMA_STACK_API_V1ALPHA
 from llama_stack.models.llama.datatypes import Primitive
-from llama_stack.schema_utils import json_schema_type, register_schema, webmethod
+from llama_stack.schema_utils import json_schema_type, register_schema
 # Add this constant near the top of the file, after the imports
 DEFAULT_TTL_DAYS = 7
 REQUIRED_SCOPE = "telemetry.read"
@json_schema_type
 class SpanStatus(Enum):
@ -413,7 +410,6 @@ class QueryMetricsResponse(BaseModel):
@runtime_checkable
 class Telemetry(Protocol):
    @webmethod(route="/telemetry/events", method="POST", level=LLAMA_STACK_API_V1)
    async def log_event(
        self,
        event: Event,
@ -426,14 +422,6 @@ class Telemetry(Protocol):
        """
        ...
    @webmethod(
        route="/telemetry/traces",
        method="POST",
        required_scope=REQUIRED_SCOPE,
        deprecated=True,
        level=LLAMA_STACK_API_V1,
    )
    @webmethod(route="/telemetry/traces", method="POST", required_scope=REQUIRED_SCOPE, level=LLAMA_STACK_API_V1ALPHA)
    async def query_traces(
        self,
        attribute_filters: list[QueryCondition] | None = None,
@ -451,19 +439,6 @@ class Telemetry(Protocol):
        """
        ...
    @webmethod(
        route="/telemetry/traces/{trace_id:path}",
        method="GET",
        required_scope=REQUIRED_SCOPE,
        deprecated=True,
        level=LLAMA_STACK_API_V1,
    )
    @webmethod(
        route="/telemetry/traces/{trace_id:path}",
        method="GET",
        required_scope=REQUIRED_SCOPE,
        level=LLAMA_STACK_API_V1ALPHA,
    )
    async def get_trace(self, trace_id: str) -> Trace:
        """Get a trace by its ID.
@ -472,19 +447,6 @@ class Telemetry(Protocol):
        """
        ...
    @webmethod(
        route="/telemetry/traces/{trace_id:path}/spans/{span_id:path}",
        method="GET",
        required_scope=REQUIRED_SCOPE,
        deprecated=True,
        level=LLAMA_STACK_API_V1,
    )
    @webmethod(
        route="/telemetry/traces/{trace_id:path}/spans/{span_id:path}",
        method="GET",
        required_scope=REQUIRED_SCOPE,
        level=LLAMA_STACK_API_V1ALPHA,
    )
    async def get_span(self, trace_id: str, span_id: str) -> Span:
        """Get a span by its ID.
@ -494,19 +456,6 @@ class Telemetry(Protocol):
        """
        ...
    @webmethod(
        route="/telemetry/spans/{span_id:path}/tree",
        method="POST",
        deprecated=True,
        required_scope=REQUIRED_SCOPE,
        level=LLAMA_STACK_API_V1,
    )
    @webmethod(
        route="/telemetry/spans/{span_id:path}/tree",
        method="POST",
        required_scope=REQUIRED_SCOPE,
        level=LLAMA_STACK_API_V1ALPHA,
    )
    async def get_span_tree(
        self,
        span_id: str,
@ -522,14 +471,6 @@ class Telemetry(Protocol):
        """
        ...
    @webmethod(
        route="/telemetry/spans",
        method="POST",
        required_scope=REQUIRED_SCOPE,
        deprecated=True,
        level=LLAMA_STACK_API_V1,
    )
    @webmethod(route="/telemetry/spans", method="POST", required_scope=REQUIRED_SCOPE, level=LLAMA_STACK_API_V1ALPHA)
    async def query_spans(
        self,
        attribute_filters: list[QueryCondition],
@ -545,8 +486,6 @@ class Telemetry(Protocol):
        """
        ...
    @webmethod(route="/telemetry/spans/export", method="POST", deprecated=True, level=LLAMA_STACK_API_V1)
    @webmethod(route="/telemetry/spans/export", method="POST", level=LLAMA_STACK_API_V1ALPHA)
    async def save_spans_to_dataset(
        self,
        attribute_filters: list[QueryCondition],
@ -563,19 +502,6 @@ class Telemetry(Protocol):
        """
        ...
    @webmethod(
        route="/telemetry/metrics/{metric_name}",
        method="POST",
        required_scope=REQUIRED_SCOPE,
        deprecated=True,
        level=LLAMA_STACK_API_V1,
    )
    @webmethod(
        route="/telemetry/metrics/{metric_name}",
        method="POST",
        required_scope=REQUIRED_SCOPE,
        level=LLAMA_STACK_API_V1ALPHA,
    )
    async def query_metrics(
        self,
        metric_name: str,
--- a/llama_stack/core/conversations/conversations.py
+++ b/llama_stack/core/conversations/conversations.py
@ -32,7 +32,7 @@ from llama_stack.providers.utils.sqlstore.sqlstore import (
    sqlstore_impl,
 )
-logger = get_logger(name=__name__, category="openai::conversations")
+logger = get_logger(name=__name__, category="openai_conversations")
 class ConversationServiceConfig(BaseModel):
--- a/llama_stack/core/routers/inference.py
+++ b/llama_stack/core/routers/inference.py
@ -611,7 +611,7 @@ class InferenceRouter(Inference):
                        completion_text += "".join(choice_data["content_parts"])
                    # Add metrics to the chunk
-                    if self.telemetry and chunk.usage:
+                    if self.telemetry and hasattr(chunk, "usage") and chunk.usage:
                        metrics = self._construct_metrics(
                            prompt_tokens=chunk.usage.prompt_tokens,
                            completion_tokens=chunk.usage.completion_tokens,
--- a/llama_stack/core/store/registry.py
+++ b/llama_stack/core/store/registry.py
@ -98,7 +98,10 @@ class DiskDistributionRegistry(DistributionRegistry):
        existing_obj = await self.get(obj.type, obj.identifier)
        # dont register if the object's providerid already exists
        if existing_obj and existing_obj.provider_id == obj.provider_id:
-            return False
+            raise ValueError(
                f"Provider '{obj.provider_id}' is already registered."
                f"Unregister the existing provider first before registering it again."
            )
        await self.kvstore.set(
            KEY_FORMAT.format(type=obj.type, identifier=obj.identifier),
--- a/llama_stack/distributions/watsonx/init.py
+++ b/llama_stack/distributions/watsonx/init.py
@ -3,3 +3,5 @@
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 from .watsonx import get_distribution_template  # noqa: F401
--- a/llama_stack/distributions/watsonx/build.yaml
+++ b/llama_stack/distributions/watsonx/build.yaml
@ -3,44 +3,33 @@ distribution_spec:
  description: Use watsonx for running LLM inference
  providers:
    inference:
-    - provider_id: watsonx
+    - provider_type: remote::watsonx
-      provider_type: remote::watsonx
+    - provider_type: inline::sentence-transformers
    - provider_id: sentence-transformers
      provider_type: inline::sentence-transformers
    vector_io:
-    - provider_id: faiss
+    - provider_type: inline::faiss
      provider_type: inline::faiss
    safety:
-    - provider_id: llama-guard
+    - provider_type: inline::llama-guard
      provider_type: inline::llama-guard
    agents:
-    - provider_id: meta-reference
+    - provider_type: inline::meta-reference
      provider_type: inline::meta-reference
    telemetry:
-    - provider_id: meta-reference
+    - provider_type: inline::meta-reference
      provider_type: inline::meta-reference
    eval:
-    - provider_id: meta-reference
+    - provider_type: inline::meta-reference
      provider_type: inline::meta-reference
    datasetio:
-    - provider_id: huggingface
+    - provider_type: remote::huggingface
-      provider_type: remote::huggingface
+    - provider_type: inline::localfs
    - provider_id: localfs
      provider_type: inline::localfs
    scoring:
-    - provider_id: basic
+    - provider_type: inline::basic
-      provider_type: inline::basic
+    - provider_type: inline::llm-as-judge
-    - provider_id: llm-as-judge
+    - provider_type: inline::braintrust
      provider_type: inline::llm-as-judge
    - provider_id: braintrust
      provider_type: inline::braintrust
    tool_runtime:
    - provider_type: remote::brave-search
    - provider_type: remote::tavily-search
    - provider_type: inline::rag-runtime
    - provider_type: remote::model-context-protocol
    files:
    - provider_type: inline::localfs
 image_type: venv
 additional_pip_packages:
 - aiosqlite
 - sqlalchemy[asyncio]
 - aiosqlite
 - aiosqlite
--- a/llama_stack/distributions/watsonx/run.yaml
+++ b/llama_stack/distributions/watsonx/run.yaml
@ -4,13 +4,13 @@ apis:
 - agents
 - datasetio
 - eval
 - files
 - inference
 - safety
 - scoring
 - telemetry
 - tool_runtime
 - vector_io
 - files
 providers:
  inference:
  - provider_id: watsonx
@ -19,8 +19,6 @@ providers:
      url: ${env.WATSONX_BASE_URL:=https://us-south.ml.cloud.ibm.com}
      api_key: ${env.WATSONX_API_KEY:=}
      project_id: ${env.WATSONX_PROJECT_ID:=}
  - provider_id: sentence-transformers
    provider_type: inline::sentence-transformers
  vector_io:
  - provider_id: faiss
    provider_type: inline::faiss
@ -48,7 +46,7 @@ providers:
    provider_type: inline::meta-reference
    config:
      service_name: "${env.OTEL_SERVICE_NAME:=\u200B}"
-      sinks: ${env.TELEMETRY_SINKS:=console,sqlite}
+      sinks: ${env.TELEMETRY_SINKS:=sqlite}
      sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/trace_store.db
      otel_exporter_otlp_endpoint: ${env.OTEL_EXPORTER_OTLP_ENDPOINT:=}
  eval:
@ -109,102 +107,7 @@ metadata_store:
 inference_store:
  type: sqlite
  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/inference_store.db
-models:
+models: []
 - metadata: {}
  model_id: meta-llama/llama-3-3-70b-instruct
  provider_id: watsonx
  provider_model_id: meta-llama/llama-3-3-70b-instruct
  model_type: llm
 - metadata: {}
  model_id: meta-llama/Llama-3.3-70B-Instruct
  provider_id: watsonx
  provider_model_id: meta-llama/llama-3-3-70b-instruct
  model_type: llm
 - metadata: {}
  model_id: meta-llama/llama-2-13b-chat
  provider_id: watsonx
  provider_model_id: meta-llama/llama-2-13b-chat
  model_type: llm
 - metadata: {}
  model_id: meta-llama/Llama-2-13b
  provider_id: watsonx
  provider_model_id: meta-llama/llama-2-13b-chat
  model_type: llm
 - metadata: {}
  model_id: meta-llama/llama-3-1-70b-instruct
  provider_id: watsonx
  provider_model_id: meta-llama/llama-3-1-70b-instruct
  model_type: llm
 - metadata: {}
  model_id: meta-llama/Llama-3.1-70B-Instruct
  provider_id: watsonx
  provider_model_id: meta-llama/llama-3-1-70b-instruct
  model_type: llm
 - metadata: {}
  model_id: meta-llama/llama-3-1-8b-instruct
  provider_id: watsonx
  provider_model_id: meta-llama/llama-3-1-8b-instruct
  model_type: llm
 - metadata: {}
  model_id: meta-llama/Llama-3.1-8B-Instruct
  provider_id: watsonx
  provider_model_id: meta-llama/llama-3-1-8b-instruct
  model_type: llm
 - metadata: {}
  model_id: meta-llama/llama-3-2-11b-vision-instruct
  provider_id: watsonx
  provider_model_id: meta-llama/llama-3-2-11b-vision-instruct
  model_type: llm
 - metadata: {}
  model_id: meta-llama/Llama-3.2-11B-Vision-Instruct
  provider_id: watsonx
  provider_model_id: meta-llama/llama-3-2-11b-vision-instruct
  model_type: llm
 - metadata: {}
  model_id: meta-llama/llama-3-2-1b-instruct
  provider_id: watsonx
  provider_model_id: meta-llama/llama-3-2-1b-instruct
  model_type: llm
 - metadata: {}
  model_id: meta-llama/Llama-3.2-1B-Instruct
  provider_id: watsonx
  provider_model_id: meta-llama/llama-3-2-1b-instruct
  model_type: llm
 - metadata: {}
  model_id: meta-llama/llama-3-2-3b-instruct
  provider_id: watsonx
  provider_model_id: meta-llama/llama-3-2-3b-instruct
  model_type: llm
 - metadata: {}
  model_id: meta-llama/Llama-3.2-3B-Instruct
  provider_id: watsonx
  provider_model_id: meta-llama/llama-3-2-3b-instruct
  model_type: llm
 - metadata: {}
  model_id: meta-llama/llama-3-2-90b-vision-instruct
  provider_id: watsonx
  provider_model_id: meta-llama/llama-3-2-90b-vision-instruct
  model_type: llm
 - metadata: {}
  model_id: meta-llama/Llama-3.2-90B-Vision-Instruct
  provider_id: watsonx
  provider_model_id: meta-llama/llama-3-2-90b-vision-instruct
  model_type: llm
 - metadata: {}
  model_id: meta-llama/llama-guard-3-11b-vision
  provider_id: watsonx
  provider_model_id: meta-llama/llama-guard-3-11b-vision
  model_type: llm
 - metadata: {}
  model_id: meta-llama/Llama-Guard-3-11B-Vision
  provider_id: watsonx
  provider_model_id: meta-llama/llama-guard-3-11b-vision
  model_type: llm
 - metadata:
    embedding_dimension: 384
  model_id: all-MiniLM-L6-v2
  provider_id: sentence-transformers
  model_type: embedding
 shields: []
 vector_dbs: []
 datasets: []
--- a/llama_stack/distributions/watsonx/watsonx.py
+++ b/llama_stack/distributions/watsonx/watsonx.py
@ -4,17 +4,11 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 from pathlib import Path
-from llama_stack.apis.models import ModelType
+from llama_stack.core.datatypes import BuildProvider, Provider, ToolGroupInput
-from llama_stack.core.datatypes import BuildProvider, ModelInput, Provider, ToolGroupInput
+from llama_stack.distributions.template import DistributionTemplate, RunConfigSettings
 from llama_stack.distributions.template import DistributionTemplate, RunConfigSettings, get_model_registry
 from llama_stack.providers.inline.files.localfs.config import LocalfsFilesImplConfig
 from llama_stack.providers.inline.inference.sentence_transformers import (
    SentenceTransformersInferenceConfig,
 )
 from llama_stack.providers.remote.inference.watsonx import WatsonXConfig
 from llama_stack.providers.remote.inference.watsonx.models import MODEL_ENTRIES
 def get_distribution_template(name: str = "watsonx") -> DistributionTemplate:
@ -52,15 +46,6 @@ def get_distribution_template(name: str = "watsonx") -> DistributionTemplate:
        config=WatsonXConfig.sample_run_config(),
    )
    embedding_provider = Provider(
        provider_id="sentence-transformers",
        provider_type="inline::sentence-transformers",
        config=SentenceTransformersInferenceConfig.sample_run_config(),
    )
    available_models = {
        "watsonx": MODEL_ENTRIES,
    }
    default_tool_groups = [
        ToolGroupInput(
            toolgroup_id="builtin::websearch",
@ -72,36 +57,25 @@ def get_distribution_template(name: str = "watsonx") -> DistributionTemplate:
        ),
    ]
    embedding_model = ModelInput(
        model_id="all-MiniLM-L6-v2",
        provider_id="sentence-transformers",
        model_type=ModelType.embedding,
        metadata={
            "embedding_dimension": 384,
        },
    )
    files_provider = Provider(
        provider_id="meta-reference-files",
        provider_type="inline::localfs",
        config=LocalfsFilesImplConfig.sample_run_config(f"~/.llama/distributions/{name}"),
    )
    default_models, _ = get_model_registry(available_models)
    return DistributionTemplate(
        name=name,
        distro_type="remote_hosted",
        description="Use watsonx for running LLM inference",
        container_image=None,
-        template_path=Path(__file__).parent / "doc_template.md",
+        template_path=None,
        providers=providers,
        available_models_by_provider=available_models,
        run_configs={
            "run.yaml": RunConfigSettings(
                provider_overrides={
-                    "inference": [inference_provider, embedding_provider],
+                    "inference": [inference_provider],
                    "files": [files_provider],
                },
-                default_models=default_models + [embedding_model],
+                default_models=[],
                default_tool_groups=default_tool_groups,
            ),
        },
--- a/llama_stack/log.py
+++ b/llama_stack/log.py
@ -31,12 +31,17 @@ CATEGORIES = [
    "client",
    "telemetry",
    "openai_responses",
    "openai_conversations",
    "testing",
    "providers",
    "models",
    "files",
    "vector_io",
    "tool_runtime",
    "cli",
    "post_training",
    "scoring",
    "tests",
 ]
 UNCATEGORIZED = "uncategorized"
@ -261,11 +266,12 @@ def get_logger(
        if root_category in _category_levels:
            log_level = _category_levels[root_category]
        else:
            log_level = _category_levels.get("root", DEFAULT_LOG_LEVEL)
            if category != UNCATEGORIZED:
-                logging.warning(
+                raise ValueError(
-                    f"Unknown logging category: {category}. Falling back to default 'root' level: {log_level}"
+                    f"Unknown logging category: {category}. To resolve, choose a valid category from the CATEGORIES list "
                    f"or add it to the CATEGORIES list. Available categories: {CATEGORIES}"
                )
            log_level = _category_levels.get("root", DEFAULT_LOG_LEVEL)
    logger.setLevel(log_level)
    return logging.LoggerAdapter(logger, {"category": category})
--- a/llama_stack/models/llama/prompt_format.py
+++ b/llama_stack/models/llama/prompt_format.py
@ -11,19 +11,13 @@
 # top-level folder for each specific model found within the models/ directory at
 # the top-level of this source tree.
 import json
 import textwrap
 from pathlib import Path
 from pydantic import BaseModel, Field
 from llama_stack.models.llama.datatypes import (
    RawContent,
    RawMediaItem,
    RawMessage,
    RawTextItem,
    StopReason,
    ToolCall,
    ToolPromptFormat,
 )
 from llama_stack.models.llama.llama4.tokenizer import Tokenizer
@ -175,25 +169,6 @@ def llama3_1_builtin_code_interpreter_dialog(tool_prompt_format=ToolPromptFormat
    return messages
 def llama3_1_builtin_tool_call_with_image_dialog(
    tool_prompt_format=ToolPromptFormat.json,
 ):
    this_dir = Path(__file__).parent
    with open(this_dir / "llama3/dog.jpg", "rb") as f:
        img = f.read()
    interface = LLama31Interface(tool_prompt_format)
    messages = interface.system_messages(**system_message_builtin_tools_only())
    messages += interface.user_message(content=[RawMediaItem(data=img), RawTextItem(text="What is this dog breed?")])
    messages += interface.assistant_response_messages(
        "Based on the description of the dog in the image, it appears to be a small breed dog, possibly a terrier mix",
        StopReason.end_of_turn,
    )
    messages += interface.user_message("Search the web for some food recommendations for the indentified breed")
    return messages
 def llama3_1_custom_tool_call_dialog(tool_prompt_format=ToolPromptFormat.json):
    interface = LLama31Interface(tool_prompt_format)
@ -202,35 +177,6 @@ def llama3_1_custom_tool_call_dialog(tool_prompt_format=ToolPromptFormat.json):
    return messages
 def llama3_1_e2e_tool_call_dialog(tool_prompt_format=ToolPromptFormat.json):
    tool_response = json.dumps(["great song1", "awesome song2", "cool song3"])
    interface = LLama31Interface(tool_prompt_format)
    messages = interface.system_messages(**system_message_custom_tools_only())
    messages += interface.user_message(content="Use tools to get latest trending songs")
    messages.append(
        RawMessage(
            role="assistant",
            content="",
            stop_reason=StopReason.end_of_message,
            tool_calls=[
                ToolCall(
                    call_id="call_id",
                    tool_name="trending_songs",
                    arguments={"n": "10", "genre": "latest"},
                )
            ],
        ),
    )
    messages.append(
        RawMessage(
            role="assistant",
            content=tool_response,
        )
    )
    return messages
 def llama3_2_user_assistant_conversation():
    return UseCase(
        title="User and assistant conversation",
--- a/llama_stack/providers/inline/agents/meta_reference/agent_instance.py
+++ b/llama_stack/providers/inline/agents/meta_reference/agent_instance.py
@ -7,8 +7,6 @@
 import copy
 import json
 import re
 import secrets
 import string
 import uuid
 import warnings
 from collections.abc import AsyncGenerator
@ -84,11 +82,6 @@ from llama_stack.providers.utils.telemetry import tracing
 from .persistence import AgentPersistence
 from .safety import SafetyException, ShieldRunnerMixin
 def make_random_string(length: int = 8):
    return "".join(secrets.choice(string.ascii_letters + string.digits) for _ in range(length))
 TOOLS_ATTACHMENT_KEY_REGEX = re.compile(r"__tools_attachment__=(\{.*?\})")
 MEMORY_QUERY_TOOL = "knowledge_search"
 WEB_SEARCH_TOOL = "web_search"
--- a/llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py
+++ b/llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py
@ -269,7 +269,7 @@ class OpenAIResponsesImpl:
            response_tools=tools,
            temperature=temperature,
            response_format=response_format,
-            inputs=input,
+            inputs=all_input,
        )
        # Create orchestrator and delegate streaming logic
--- a/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py
+++ b/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py
@ -175,6 +175,8 @@ class StreamingResponseOrchestrator:
            ):
                yield stream_event
            messages = next_turn_messages
            if not function_tool_calls and not non_function_tool_calls:
                break
@ -187,9 +189,7 @@ class StreamingResponseOrchestrator:
                logger.info(f"Exiting inference loop since iteration count({n_iter}) exceeds {self.max_infer_iters=}")
                break
-            messages = next_turn_messages
+        self.final_messages = messages.copy()
        self.final_messages = messages.copy() + [current_response.choices[0].message]
        # Create final response
        final_response = OpenAIResponseObject(
@ -232,9 +232,11 @@ class StreamingResponseOrchestrator:
                                    non_function_tool_calls.append(tool_call)
                                else:
                                    logger.info(f"Approval denied for {tool_call.id} on {tool_call.function.name}")
                                    next_turn_messages.pop()
                            else:
                                logger.info(f"Requesting approval for {tool_call.id} on {tool_call.function.name}")
                                approvals.append(tool_call)
                                next_turn_messages.pop()
                        else:
                            non_function_tool_calls.append(tool_call)
--- a/llama_stack/providers/inline/tool_runtime/rag/memory.py
+++ b/llama_stack/providers/inline/tool_runtime/rag/memory.py
@ -8,8 +8,6 @@ import asyncio
 import base64
 import io
 import mimetypes
 import secrets
 import string
 from typing import Any
 import httpx
@ -52,10 +50,6 @@ from .context_retriever import generate_rag_query
 log = get_logger(name=__name__, category="tool_runtime")
 def make_random_string(length: int = 8):
    return "".join(secrets.choice(string.ascii_letters + string.digits) for _ in range(length))
 async def raw_data_from_doc(doc: RAGDocument) -> tuple[bytes, str]:
    """Get raw binary data and mime type from a RAGDocument for file upload."""
    if isinstance(doc.content, URL):
--- a/llama_stack/providers/registry/inference.py
+++ b/llama_stack/providers/registry/inference.py
@ -268,7 +268,7 @@ Available Models:
            api=Api.inference,
            adapter_type="watsonx",
            provider_type="remote::watsonx",
-            pip_packages=["ibm_watsonx_ai"],
+            pip_packages=["litellm"],
            module="llama_stack.providers.remote.inference.watsonx",
            config_class="llama_stack.providers.remote.inference.watsonx.WatsonXConfig",
            provider_data_validator="llama_stack.providers.remote.inference.watsonx.WatsonXProviderDataValidator",
--- a/llama_stack/providers/remote/inference/nvidia/openai_utils.py
+++ b/llama_stack/providers/remote/inference/nvidia/openai_utils.py
@ -1,217 +0,0 @@
 # Copyright (c) Meta Platforms, Inc. and affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 import warnings
 from collections.abc import AsyncGenerator
 from typing import Any
 from openai import AsyncStream
 from openai.types.chat.chat_completion import (
    Choice as OpenAIChoice,
 )
 from openai.types.completion import Completion as OpenAICompletion
 from openai.types.completion_choice import Logprobs as OpenAICompletionLogprobs
 from llama_stack.apis.inference import (
    ChatCompletionRequest,
    CompletionRequest,
    CompletionResponse,
    CompletionResponseStreamChunk,
    GreedySamplingStrategy,
    JsonSchemaResponseFormat,
    TokenLogProbs,
    TopKSamplingStrategy,
    TopPSamplingStrategy,
 )
 from llama_stack.providers.utils.inference.openai_compat import (
    _convert_openai_finish_reason,
    convert_message_to_openai_dict_new,
    convert_tooldef_to_openai_tool,
 )
 async def convert_chat_completion_request(
    request: ChatCompletionRequest,
    n: int = 1,
 ) -> dict:
    """
    Convert a ChatCompletionRequest to an OpenAI API-compatible dictionary.
    """
    # model -> model
    # messages -> messages
    # sampling_params  TODO(mattf): review strategy
    #  strategy=greedy -> nvext.top_k = -1, temperature = temperature
    #  strategy=top_p -> nvext.top_k = -1, top_p = top_p
    #  strategy=top_k -> nvext.top_k = top_k
    #  temperature -> temperature
    #  top_p -> top_p
    #  top_k -> nvext.top_k
    #  max_tokens -> max_tokens
    #  repetition_penalty -> nvext.repetition_penalty
    # response_format -> GrammarResponseFormat TODO(mf)
    # response_format -> JsonSchemaResponseFormat: response_format = "json_object" & nvext["guided_json"] = json_schema
    # tools -> tools
    # tool_choice ("auto", "required") -> tool_choice
    # tool_prompt_format -> TBD
    # stream -> stream
    # logprobs -> logprobs
    if request.response_format and not isinstance(request.response_format, JsonSchemaResponseFormat):
        raise ValueError(
            f"Unsupported response format: {request.response_format}. Only JsonSchemaResponseFormat is supported."
        )
    nvext = {}
    payload: dict[str, Any] = dict(
        model=request.model,
        messages=[await convert_message_to_openai_dict_new(message) for message in request.messages],
        stream=request.stream,
        n=n,
        extra_body=dict(nvext=nvext),
        extra_headers={
            b"User-Agent": b"llama-stack: nvidia-inference-adapter",
        },
    )
    if request.response_format:
        # server bug - setting guided_json changes the behavior of response_format resulting in an error
        # payload.update(response_format="json_object")
        nvext.update(guided_json=request.response_format.json_schema)
    if request.tools:
        payload.update(tools=[convert_tooldef_to_openai_tool(tool) for tool in request.tools])
        if request.tool_config.tool_choice:
            payload.update(
                tool_choice=request.tool_config.tool_choice.value
            )  # we cannot include tool_choice w/o tools, server will complain
    if request.logprobs:
        payload.update(logprobs=True)
        payload.update(top_logprobs=request.logprobs.top_k)
    if request.sampling_params:
        nvext.update(repetition_penalty=request.sampling_params.repetition_penalty)
        if request.sampling_params.max_tokens:
            payload.update(max_tokens=request.sampling_params.max_tokens)
        strategy = request.sampling_params.strategy
        if isinstance(strategy, TopPSamplingStrategy):
            nvext.update(top_k=-1)
            payload.update(top_p=strategy.top_p)
            payload.update(temperature=strategy.temperature)
        elif isinstance(strategy, TopKSamplingStrategy):
            if strategy.top_k != -1 and strategy.top_k < 1:
                warnings.warn("top_k must be -1 or >= 1", stacklevel=2)
            nvext.update(top_k=strategy.top_k)
        elif isinstance(strategy, GreedySamplingStrategy):
            nvext.update(top_k=-1)
        else:
            raise ValueError(f"Unsupported sampling strategy: {strategy}")
    return payload
 def convert_completion_request(
    request: CompletionRequest,
    n: int = 1,
 ) -> dict:
    """
    Convert a ChatCompletionRequest to an OpenAI API-compatible dictionary.
    """
    # model -> model
    # prompt -> prompt
    # sampling_params  TODO(mattf): review strategy
    #  strategy=greedy -> nvext.top_k = -1, temperature = temperature
    #  strategy=top_p -> nvext.top_k = -1, top_p = top_p
    #  strategy=top_k -> nvext.top_k = top_k
    #  temperature -> temperature
    #  top_p -> top_p
    #  top_k -> nvext.top_k
    #  max_tokens -> max_tokens
    #  repetition_penalty -> nvext.repetition_penalty
    # response_format -> nvext.guided_json
    # stream -> stream
    # logprobs.top_k -> logprobs
    nvext = {}
    payload: dict[str, Any] = dict(
        model=request.model,
        prompt=request.content,
        stream=request.stream,
        extra_body=dict(nvext=nvext),
        extra_headers={
            b"User-Agent": b"llama-stack: nvidia-inference-adapter",
        },
        n=n,
    )
    if request.response_format:
        # this is not openai compliant, it is a nim extension
        nvext.update(guided_json=request.response_format.json_schema)
    if request.logprobs:
        payload.update(logprobs=request.logprobs.top_k)
    if request.sampling_params:
        nvext.update(repetition_penalty=request.sampling_params.repetition_penalty)
        if request.sampling_params.max_tokens:
            payload.update(max_tokens=request.sampling_params.max_tokens)
        if request.sampling_params.strategy == "top_p":
            nvext.update(top_k=-1)
            payload.update(top_p=request.sampling_params.top_p)
        elif request.sampling_params.strategy == "top_k":
            if request.sampling_params.top_k != -1 and request.sampling_params.top_k < 1:
                warnings.warn("top_k must be -1 or >= 1", stacklevel=2)
            nvext.update(top_k=request.sampling_params.top_k)
        elif request.sampling_params.strategy == "greedy":
            nvext.update(top_k=-1)
            payload.update(temperature=request.sampling_params.temperature)
    return payload
 def _convert_openai_completion_logprobs(
    logprobs: OpenAICompletionLogprobs | None,
 ) -> list[TokenLogProbs] | None:
    """
    Convert an OpenAI CompletionLogprobs into a list of TokenLogProbs.
    """
    if not logprobs:
        return None
    return [TokenLogProbs(logprobs_by_token=logprobs) for logprobs in logprobs.top_logprobs]
 def convert_openai_completion_choice(
    choice: OpenAIChoice,
 ) -> CompletionResponse:
    """
    Convert an OpenAI Completion Choice into a CompletionResponse.
    """
    return CompletionResponse(
        content=choice.text,
        stop_reason=_convert_openai_finish_reason(choice.finish_reason),
        logprobs=_convert_openai_completion_logprobs(choice.logprobs),
    )
 async def convert_openai_completion_stream(
    stream: AsyncStream[OpenAICompletion],
 ) -> AsyncGenerator[CompletionResponse, None]:
    """
    Convert a stream of OpenAI Completions into a stream
    of ChatCompletionResponseStreamChunks.
    """
    async for chunk in stream:
        choice = chunk.choices[0]
        yield CompletionResponseStreamChunk(
            delta=choice.text,
            stop_reason=_convert_openai_finish_reason(choice.finish_reason),
            logprobs=_convert_openai_completion_logprobs(choice.logprobs),
        )
--- a/llama_stack/providers/remote/inference/nvidia/utils.py
+++ b/llama_stack/providers/remote/inference/nvidia/utils.py
@ -4,53 +4,8 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 import httpx
 from llama_stack.log import get_logger
 from . import NVIDIAConfig
 logger = get_logger(name=__name__, category="inference::nvidia")
 def _is_nvidia_hosted(config: NVIDIAConfig) -> bool:
    return "integrate.api.nvidia.com" in config.url
 async def _get_health(url: str) -> tuple[bool, bool]:
    """
    Query {url}/v1/health/{live,ready} to check if the server is running and ready
    Args:
        url (str): URL of the server
    Returns:
        Tuple[bool, bool]: (is_live, is_ready)
    """
    async with httpx.AsyncClient() as client:
        live = await client.get(f"{url}/v1/health/live")
        ready = await client.get(f"{url}/v1/health/ready")
        return live.status_code == 200, ready.status_code == 200
 async def check_health(config: NVIDIAConfig) -> None:
    """
    Check if the server is running and ready
    Args:
        url (str): URL of the server
    Raises:
        RuntimeError: If the server is not running or ready
    """
    if not _is_nvidia_hosted(config):
        logger.info("Checking NVIDIA NIM health...")
        try:
            is_live, is_ready = await _get_health(config.url)
            if not is_live:
                raise ConnectionError("NVIDIA NIM is not running")
            if not is_ready:
                raise ConnectionError("NVIDIA NIM is not ready")
            # TODO(mf): should we wait for the server to be ready?
        except httpx.ConnectError as e:
            raise ConnectionError(f"Failed to connect to NVIDIA NIM: {e}") from e
--- a/llama_stack/providers/remote/inference/watsonx/init.py
+++ b/llama_stack/providers/remote/inference/watsonx/init.py
@ -4,19 +4,12 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 from llama_stack.apis.inference import Inference
 from .config import WatsonXConfig
-async def get_adapter_impl(config: WatsonXConfig, _deps) -> Inference:
+async def get_adapter_impl(config: WatsonXConfig, _deps):
-    # import dynamically so `llama stack build` does not fail due to missing dependencies
+    # import dynamically so the import is used only when it is needed
    from .watsonx import WatsonXInferenceAdapter
    if not isinstance(config, WatsonXConfig):
        raise RuntimeError(f"Unexpected config type: {type(config)}")
    adapter = WatsonXInferenceAdapter(config)
    return adapter
 __all__ = ["get_adapter_impl", "WatsonXConfig"]
--- a/llama_stack/providers/remote/inference/watsonx/config.py
+++ b/llama_stack/providers/remote/inference/watsonx/config.py
@ -7,16 +7,18 @@
 import os
 from typing import Any
-from pydantic import BaseModel, Field, SecretStr
+from pydantic import BaseModel, ConfigDict, Field, SecretStr
 from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
 from llama_stack.schema_utils import json_schema_type
 class WatsonXProviderDataValidator(BaseModel):
-    url: str
+    model_config = ConfigDict(
-    api_key: str
+        from_attributes=True,
-    project_id: str
+        extra="forbid",
    )
    watsonx_api_key: str | None
@json_schema_type
@ -25,13 +27,17 @@ class WatsonXConfig(RemoteInferenceProviderConfig):
        default_factory=lambda: os.getenv("WATSONX_BASE_URL", "https://us-south.ml.cloud.ibm.com"),
        description="A base url for accessing the watsonx.ai",
    )
    # This seems like it should be required, but none of the other remote inference
    # providers require it, so this is optional here too for consistency.
    # The OpenAIConfig uses default=None instead, so this is following that precedent.
    api_key: SecretStr | None = Field(
-        default_factory=lambda: os.getenv("WATSONX_API_KEY"),
+        default=None,
-        description="The watsonx API key",
+        description="The watsonx.ai API key",
    )
    # As above, this is optional here too for consistency.
    project_id: str | None = Field(
-        default_factory=lambda: os.getenv("WATSONX_PROJECT_ID"),
+        default=None,
-        description="The Project ID key",
+        description="The watsonx.ai project ID",
    )
    timeout: int = Field(
        default=60,
--- a/llama_stack/providers/remote/inference/watsonx/models.py
+++ b/llama_stack/providers/remote/inference/watsonx/models.py
@ -1,47 +0,0 @@
 # Copyright (c) Meta Platforms, Inc. and affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 from llama_stack.models.llama.sku_types import CoreModelId
 from llama_stack.providers.utils.inference.model_registry import build_hf_repo_model_entry
 MODEL_ENTRIES = [
    build_hf_repo_model_entry(
        "meta-llama/llama-3-3-70b-instruct",
        CoreModelId.llama3_3_70b_instruct.value,
    ),
    build_hf_repo_model_entry(
        "meta-llama/llama-2-13b-chat",
        CoreModelId.llama2_13b.value,
    ),
    build_hf_repo_model_entry(
        "meta-llama/llama-3-1-70b-instruct",
        CoreModelId.llama3_1_70b_instruct.value,
    ),
    build_hf_repo_model_entry(
        "meta-llama/llama-3-1-8b-instruct",
        CoreModelId.llama3_1_8b_instruct.value,
    ),
    build_hf_repo_model_entry(
        "meta-llama/llama-3-2-11b-vision-instruct",
        CoreModelId.llama3_2_11b_vision_instruct.value,
    ),
    build_hf_repo_model_entry(
        "meta-llama/llama-3-2-1b-instruct",
        CoreModelId.llama3_2_1b_instruct.value,
    ),
    build_hf_repo_model_entry(
        "meta-llama/llama-3-2-3b-instruct",
        CoreModelId.llama3_2_3b_instruct.value,
    ),
    build_hf_repo_model_entry(
        "meta-llama/llama-3-2-90b-vision-instruct",
        CoreModelId.llama3_2_90b_vision_instruct.value,
    ),
    build_hf_repo_model_entry(
        "meta-llama/llama-guard-3-11b-vision",
        CoreModelId.llama_guard_3_11b_vision.value,
    ),
 ]
--- a/llama_stack/providers/remote/inference/watsonx/watsonx.py
+++ b/llama_stack/providers/remote/inference/watsonx/watsonx.py
@ -4,240 +4,120 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 from collections.abc import AsyncGenerator, AsyncIterator
 from typing import Any
-from ibm_watsonx_ai.foundation_models import Model
+import requests
 from ibm_watsonx_ai.metanames import GenTextParamsMetaNames as GenParams
 from openai import AsyncOpenAI
-from llama_stack.apis.inference import (
+from llama_stack.apis.inference import ChatCompletionRequest
-    ChatCompletionRequest,
+from llama_stack.apis.models import Model
-    CompletionRequest,
+from llama_stack.apis.models.models import ModelType
-    GreedySamplingStrategy,
+from llama_stack.providers.remote.inference.watsonx.config import WatsonXConfig
-    Inference,
+from llama_stack.providers.utils.inference.litellm_openai_mixin import LiteLLMOpenAIMixin
    OpenAIChatCompletion,
    OpenAIChatCompletionChunk,
    OpenAICompletion,
    OpenAIEmbeddingsResponse,
    OpenAIMessageParam,
    OpenAIResponseFormatParam,
    TopKSamplingStrategy,
    TopPSamplingStrategy,
 )
 from llama_stack.log import get_logger
 from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper
 from llama_stack.providers.utils.inference.openai_compat import (
    prepare_openai_completion_params,
 )
 from llama_stack.providers.utils.inference.prompt_adapter import (
    chat_completion_request_to_prompt,
    completion_request_to_prompt,
    request_has_media,
 )
 from . import WatsonXConfig
 from .models import MODEL_ENTRIES
 logger = get_logger(name=__name__, category="inference::watsonx")
-# Note on structured output
+class WatsonXInferenceAdapter(LiteLLMOpenAIMixin):
-# WatsonX returns responses with a json embedded into a string.
+    _model_cache: dict[str, Model] = {}
 # Examples:
-# ChatCompletionResponse(completion_message=CompletionMessage(content='```json\n{\n
+    def __init__(self, config: WatsonXConfig):
-# "first_name": "Michael",\n  "last_name": "Jordan",\n'...)
+        LiteLLMOpenAIMixin.__init__(
-# Not even a valid JSON, but we can still extract the JSON from the content
+            self,
            litellm_provider_name="watsonx",
            api_key_from_config=config.api_key.get_secret_value() if config.api_key else None,
            provider_data_api_key_field="watsonx_api_key",
        )
        self.available_models = None
        self.config = config
-# CompletionResponse(content=' \nThe best answer is $\\boxed{\\{"name": "Michael Jordan",
+    def get_base_url(self) -> str:
-# "year_born": "1963", "year_retired": "2003"\\}}$')
+        return self.config.url
 # Find the start of the boxed content
    async def _get_params(self, request: ChatCompletionRequest) -> dict[str, Any]:
        # Get base parameters from parent
        params = await super()._get_params(request)
-class WatsonXInferenceAdapter(Inference, ModelRegistryHelper):
+        # Add watsonx.ai specific parameters
-    def __init__(self, config: WatsonXConfig) -> None:
+        params["project_id"] = self.config.project_id
-        ModelRegistryHelper.__init__(self, model_entries=MODEL_ENTRIES)
+        params["time_limit"] = self.config.timeout
        logger.info(f"Initializing watsonx InferenceAdapter({config.url})...")
        self._config = config
        self._openai_client: AsyncOpenAI | None = None
        self._project_id = self._config.project_id
    def _get_client(self, model_id) -> Model:
        config_api_key = self._config.api_key.get_secret_value() if self._config.api_key else None
        config_url = self._config.url
        project_id = self._config.project_id
        credentials = {"url": config_url, "apikey": config_api_key}
        return Model(model_id=model_id, credentials=credentials, project_id=project_id)
    def _get_openai_client(self) -> AsyncOpenAI:
        if not self._openai_client:
            self._openai_client = AsyncOpenAI(
                base_url=f"{self._config.url}/openai/v1",
                api_key=self._config.api_key,
            )
        return self._openai_client
    async def _get_params(self, request: ChatCompletionRequest | CompletionRequest) -> dict:
        input_dict = {"params": {}}
        media_present = request_has_media(request)
        llama_model = self.get_llama_model(request.model)
        if isinstance(request, ChatCompletionRequest):
            input_dict["prompt"] = await chat_completion_request_to_prompt(request, llama_model)
        else:
            assert not media_present, "Together does not support media for Completion requests"
            input_dict["prompt"] = await completion_request_to_prompt(request)
        if request.sampling_params:
            if request.sampling_params.strategy:
                input_dict["params"][GenParams.DECODING_METHOD] = request.sampling_params.strategy.type
            if request.sampling_params.max_tokens:
                input_dict["params"][GenParams.MAX_NEW_TOKENS] = request.sampling_params.max_tokens
            if request.sampling_params.repetition_penalty:
                input_dict["params"][GenParams.REPETITION_PENALTY] = request.sampling_params.repetition_penalty
            if isinstance(request.sampling_params.strategy, TopPSamplingStrategy):
                input_dict["params"][GenParams.TOP_P] = request.sampling_params.strategy.top_p
                input_dict["params"][GenParams.TEMPERATURE] = request.sampling_params.strategy.temperature
            if isinstance(request.sampling_params.strategy, TopKSamplingStrategy):
                input_dict["params"][GenParams.TOP_K] = request.sampling_params.strategy.top_k
            if isinstance(request.sampling_params.strategy, GreedySamplingStrategy):
                input_dict["params"][GenParams.TEMPERATURE] = 0.0
        input_dict["params"][GenParams.STOP_SEQUENCES] = ["<|endoftext|>"]
        params = {
            **input_dict,
        }
        return params
-    async def openai_embeddings(
+    # Copied from OpenAIMixin
-        self,
+    async def check_model_availability(self, model: str) -> bool:
-        model: str,
+        """
-        input: str | list[str],
+        Check if a specific model is available from the provider's /v1/models.
        encoding_format: str | None = "float",
        dimensions: int | None = None,
        user: str | None = None,
    ) -> OpenAIEmbeddingsResponse:
        raise NotImplementedError()
-    async def openai_completion(
+        :param model: The model identifier to check.
-        self,
+        :return: True if the model is available dynamically, False otherwise.
-        model: str,
+        """
-        prompt: str | list[str] | list[int] | list[list[int]],
+        if not self._model_cache:
-        best_of: int | None = None,
+            await self.list_models()
-        echo: bool | None = None,
+        return model in self._model_cache
        frequency_penalty: float | None = None,
        logit_bias: dict[str, float] | None = None,
        logprobs: bool | None = None,
        max_tokens: int | None = None,
        n: int | None = None,
        presence_penalty: float | None = None,
        seed: int | None = None,
        stop: str | list[str] | None = None,
        stream: bool | None = None,
        stream_options: dict[str, Any] | None = None,
        temperature: float | None = None,
        top_p: float | None = None,
        user: str | None = None,
        guided_choice: list[str] | None = None,
        prompt_logprobs: int | None = None,
        suffix: str | None = None,
    ) -> OpenAICompletion:
        model_obj = await self.model_store.get_model(model)
        params = await prepare_openai_completion_params(
            model=model_obj.provider_resource_id,
            prompt=prompt,
            best_of=best_of,
            echo=echo,
            frequency_penalty=frequency_penalty,
            logit_bias=logit_bias,
            logprobs=logprobs,
            max_tokens=max_tokens,
            n=n,
            presence_penalty=presence_penalty,
            seed=seed,
            stop=stop,
            stream=stream,
            stream_options=stream_options,
            temperature=temperature,
            top_p=top_p,
            user=user,
        )
        return await self._get_openai_client().completions.create(**params)  # type: ignore
-    async def openai_chat_completion(
+    async def list_models(self) -> list[Model] | None:
-        self,
+        self._model_cache = {}
-        model: str,
+        models = []
-        messages: list[OpenAIMessageParam],
+        for model_spec in self._get_model_specs():
-        frequency_penalty: float | None = None,
+            functions = [f["id"] for f in model_spec.get("functions", [])]
-        function_call: str | dict[str, Any] | None = None,
+            # Format: {"embedding_dimension": 1536, "context_length": 8192}
        functions: list[dict[str, Any]] | None = None,
        logit_bias: dict[str, float] | None = None,
        logprobs: bool | None = None,
        max_completion_tokens: int | None = None,
        max_tokens: int | None = None,
        n: int | None = None,
        parallel_tool_calls: bool | None = None,
        presence_penalty: float | None = None,
        response_format: OpenAIResponseFormatParam | None = None,
        seed: int | None = None,
        stop: str | list[str] | None = None,
        stream: bool | None = None,
        stream_options: dict[str, Any] | None = None,
        temperature: float | None = None,
        tool_choice: str | dict[str, Any] | None = None,
        tools: list[dict[str, Any]] | None = None,
        top_logprobs: int | None = None,
        top_p: float | None = None,
        user: str | None = None,
    ) -> OpenAIChatCompletion | AsyncIterator[OpenAIChatCompletionChunk]:
        model_obj = await self.model_store.get_model(model)
        params = await prepare_openai_completion_params(
            model=model_obj.provider_resource_id,
            messages=messages,
            frequency_penalty=frequency_penalty,
            function_call=function_call,
            functions=functions,
            logit_bias=logit_bias,
            logprobs=logprobs,
            max_completion_tokens=max_completion_tokens,
            max_tokens=max_tokens,
            n=n,
            parallel_tool_calls=parallel_tool_calls,
            presence_penalty=presence_penalty,
            response_format=response_format,
            seed=seed,
            stop=stop,
            stream=stream,
            stream_options=stream_options,
            temperature=temperature,
            tool_choice=tool_choice,
            tools=tools,
            top_logprobs=top_logprobs,
            top_p=top_p,
            user=user,
        )
        if params.get("stream", False):
            return self._stream_openai_chat_completion(params)
        return await self._get_openai_client().chat.completions.create(**params)  # type: ignore
-    async def _stream_openai_chat_completion(self, params: dict) -> AsyncGenerator:
+            # Example of an embedding model:
-        # watsonx.ai sometimes adds usage data to the stream
+            # {'model_id': 'ibm/granite-embedding-278m-multilingual',
-        include_usage = False
+            # 'label': 'granite-embedding-278m-multilingual',
-        if params.get("stream_options", None):
+            # 'model_limits': {'max_sequence_length': 512, 'embedding_dimension': 768},
-            include_usage = params["stream_options"].get("include_usage", False)
+            # ...
-        stream = await self._get_openai_client().chat.completions.create(**params)
+            provider_resource_id = f"{self.__provider_id__}/{model_spec['model_id']}"
            if "embedding" in functions:
                embedding_dimension = model_spec["model_limits"]["embedding_dimension"]
                context_length = model_spec["model_limits"]["max_sequence_length"]
                embedding_metadata = {
                    "embedding_dimension": embedding_dimension,
                    "context_length": context_length,
                }
                model = Model(
                    identifier=model_spec["model_id"],
                    provider_resource_id=provider_resource_id,
                    provider_id=self.__provider_id__,
                    metadata=embedding_metadata,
                    model_type=ModelType.embedding,
                )
                self._model_cache[provider_resource_id] = model
                models.append(model)
            if "text_chat" in functions:
                model = Model(
                    identifier=model_spec["model_id"],
                    provider_resource_id=provider_resource_id,
                    provider_id=self.__provider_id__,
                    metadata={},
                    model_type=ModelType.llm,
                )
                # In theory, I guess it is possible that a model could be both an embedding model and a text chat model.
                # In that case, the cache will record the generator Model object, and the list which we return will have
                # both the generator Model object and the text chat Model object.  That's fine because the cache is
                # only used for check_model_availability() anyway.
                self._model_cache[provider_resource_id] = model
                models.append(model)
        return models
-        seen_finish_reason = False
+    # LiteLLM provides methods to list models for many providers, but not for watsonx.ai.
-        async for chunk in stream:
+    # So we need to implement our own method to list models by calling the watsonx.ai API.
-            # Final usage chunk with no choices that the user didn't request, so discard
+    def _get_model_specs(self) -> list[dict[str, Any]]:
-            if not include_usage and seen_finish_reason and len(chunk.choices) == 0:
+        """
-                break
+        Retrieves foundation model specifications from the watsonx.ai API.
-            yield chunk
+        """
-            for choice in chunk.choices:
+        url = f"{self.config.url}/ml/v1/foundation_model_specs?version=2023-10-25"
-                if choice.finish_reason:
+        headers = {
-                    seen_finish_reason = True
+            # Note that there is no authorization header.  Listing models does not require authentication.
-                    break
+            "Content-Type": "application/json",
        }
        response = requests.get(url, headers=headers)
        # --- Process the Response ---
        # Raise an exception for bad status codes (4xx or 5xx)
        response.raise_for_status()
        # If the request is successful, parse and return the JSON response.
        # The response should contain a list of model specifications
        response_data = response.json()
        if "resources" not in response_data:
            raise ValueError("Resources not found in response")
        return response_data["resources"]
--- a/llama_stack/providers/utils/inference/litellm_openai_mixin.py
+++ b/llama_stack/providers/utils/inference/litellm_openai_mixin.py
@ -4,6 +4,8 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 import base64
 import struct
 from collections.abc import AsyncIterator
 from typing import Any
@ -16,6 +18,7 @@ from llama_stack.apis.inference import (
    OpenAIChatCompletion,
    OpenAIChatCompletionChunk,
    OpenAICompletion,
    OpenAIEmbeddingData,
    OpenAIEmbeddingsResponse,
    OpenAIEmbeddingUsage,
    OpenAIMessageParam,
@ -26,7 +29,6 @@ from llama_stack.core.request_headers import NeedsRequestProviderData
 from llama_stack.log import get_logger
 from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper, ProviderModelEntry
 from llama_stack.providers.utils.inference.openai_compat import (
    b64_encode_openai_embeddings_response,
    convert_message_to_openai_dict_new,
    convert_tooldef_to_openai_tool,
    get_sampling_options,
@ -349,3 +351,28 @@ class LiteLLMOpenAIMixin(
            return False
        return model in litellm.models_by_provider[self.litellm_provider_name]
 def b64_encode_openai_embeddings_response(
    response_data: list[dict], encoding_format: str | None = "float"
 ) -> list[OpenAIEmbeddingData]:
    """
    Process the OpenAI embeddings response to encode the embeddings in base64 format if specified.
    """
    data = []
    for i, embedding_data in enumerate(response_data):
        if encoding_format == "base64":
            byte_array = bytearray()
            for embedding_value in embedding_data["embedding"]:
                byte_array.extend(struct.pack("f", float(embedding_value)))
            response_embedding = base64.b64encode(byte_array).decode("utf-8")
        else:
            response_embedding = embedding_data["embedding"]
        data.append(
            OpenAIEmbeddingData(
                embedding=response_embedding,
                index=i,
            )
        )
    return data
--- a/llama_stack/providers/utils/inference/openai_compat.py
+++ b/llama_stack/providers/utils/inference/openai_compat.py
@ -3,9 +3,7 @@
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 import base64
 import json
 import struct
 import time
 import uuid
 import warnings
@ -103,7 +101,6 @@ from llama_stack.apis.inference import (
    JsonSchemaResponseFormat,
    Message,
    OpenAIChatCompletion,
    OpenAIEmbeddingData,
    OpenAIMessageParam,
    OpenAIResponseFormatParam,
    SamplingParams,
@ -1402,28 +1399,3 @@ def prepare_openai_embeddings_params(
        params["user"] = user
    return params
 def b64_encode_openai_embeddings_response(
    response_data: dict, encoding_format: str | None = "float"
 ) -> list[OpenAIEmbeddingData]:
    """
    Process the OpenAI embeddings response to encode the embeddings in base64 format if specified.
    """
    data = []
    for i, embedding_data in enumerate(response_data):
        if encoding_format == "base64":
            byte_array = bytearray()
            for embedding_value in embedding_data.embedding:
                byte_array.extend(struct.pack("f", float(embedding_value)))
            response_embedding = base64.b64encode(byte_array).decode("utf-8")
        else:
            response_embedding = embedding_data.embedding
        data.append(
            OpenAIEmbeddingData(
                embedding=response_embedding,
                index=i,
            )
        )
    return data
--- a/llama_stack/providers/utils/memory/openai_vector_store_mixin.py
+++ b/llama_stack/providers/utils/memory/openai_vector_store_mixin.py
@ -296,15 +296,14 @@ class OpenAIVectorStoreMixin(ABC):
    async def shutdown(self) -> None:
        """Clean up mixin resources including background tasks."""
        # Cancel any running file batch tasks gracefully
-        if hasattr(self, "_file_batch_tasks"):
+        tasks_to_cancel = list(self._file_batch_tasks.items())
-            tasks_to_cancel = list(self._file_batch_tasks.items())
+        for _, task in tasks_to_cancel:
-            for _, task in tasks_to_cancel:
+            if not task.done():
-                if not task.done():
+                task.cancel()
-                    task.cancel()
+                try:
-                    try:
+                    await task
-                        await task
+                except asyncio.CancelledError:
-                    except asyncio.CancelledError:
+                    pass
                        pass
    @abstractmethod
    async def delete_chunks(self, store_id: str, chunks_for_deletion: list[ChunkForDeletion]) -> None:
--- a/llama_stack/providers/utils/memory/vector_store.py
+++ b/llama_stack/providers/utils/memory/vector_store.py
@ -20,7 +20,6 @@ from pydantic import BaseModel
 from llama_stack.apis.common.content_types import (
    URL,
    InterleavedContent,
    TextContentItem,
 )
 from llama_stack.apis.tools import RAGDocument
 from llama_stack.apis.vector_dbs import VectorDB
@ -129,26 +128,6 @@ def content_from_data_and_mime_type(data: bytes | str, mime_type: str | None, en
        return ""
 def concat_interleaved_content(content: list[InterleavedContent]) -> InterleavedContent:
    """concatenate interleaved content into a single list. ensure that 'str's are converted to TextContentItem when in a list"""
    ret = []
    def _process(c):
        if isinstance(c, str):
            ret.append(TextContentItem(text=c))
        elif isinstance(c, list):
            for item in c:
                _process(item)
        else:
            ret.append(c)
    for c in content:
        _process(c)
    return ret
 async def content_from_doc(doc: RAGDocument) -> str:
    if isinstance(doc.content, URL):
        if doc.content.uri.startswith("data:"):
--- a/tests/unit/providers/inference/test_inference_client_caching.py
+++ b/tests/unit/providers/inference/test_inference_client_caching.py
@ -18,6 +18,8 @@ from llama_stack.providers.remote.inference.openai.config import OpenAIConfig
 from llama_stack.providers.remote.inference.openai.openai import OpenAIInferenceAdapter
 from llama_stack.providers.remote.inference.together.config import TogetherImplConfig
 from llama_stack.providers.remote.inference.together.together import TogetherInferenceAdapter
 from llama_stack.providers.remote.inference.watsonx.config import WatsonXConfig
 from llama_stack.providers.remote.inference.watsonx.watsonx import WatsonXInferenceAdapter
@pytest.mark.parametrize(
@ -58,3 +60,29 @@ def test_openai_provider_data_used(config_cls, adapter_cls, provider_data_valida
            {"x-llamastack-provider-data": json.dumps({inference_adapter.provider_data_api_key_field: api_key})}
        ):
            assert inference_adapter.client.api_key == api_key
@pytest.mark.parametrize(
    "config_cls,adapter_cls,provider_data_validator",
    [
        (
            WatsonXConfig,
            WatsonXInferenceAdapter,
            "llama_stack.providers.remote.inference.watsonx.config.WatsonXProviderDataValidator",
        ),
    ],
 )
 def test_litellm_provider_data_used(config_cls, adapter_cls, provider_data_validator: str):
    """Validate data for LiteLLM-based providers.  Similar to test_openai_provider_data_used, but without the
    assumption that there is an OpenAI-compatible client object."""
    inference_adapter = adapter_cls(config=config_cls())
    inference_adapter.__provider_spec__ = MagicMock()
    inference_adapter.__provider_spec__.provider_data_validator = provider_data_validator
    for api_key in ["test1", "test2"]:
        with request_provider_data_context(
            {"x-llamastack-provider-data": json.dumps({inference_adapter.provider_data_api_key_field: api_key})}
        ):
            assert inference_adapter.get_api_key() == api_key
--- a/tests/unit/registry/test_registry.py
+++ b/tests/unit/registry/test_registry.py
@ -125,8 +125,15 @@ async def test_duplicate_provider_registration(cached_disk_dist_registry):
        provider_resource_id="test_vector_db_2",
        provider_id="baz",  # Same provider_id
    )
    await cached_disk_dist_registry.register(duplicate_vector_db)
    # Now we expect a ValueError to be raised for duplicate registration
    with pytest.raises(
        ValueError,
        match=r"Provider 'baz' is already registered.*Unregister the existing provider first before registering it again.",
    ):
        await cached_disk_dist_registry.register(duplicate_vector_db)
    # Verify the original registration is still intact
    result = await cached_disk_dist_registry.get("vector_db", "test_vector_db_2")
    assert result is not None
    assert result.embedding_model == original_vector_db.embedding_model  # Original values preserved