Merge ed4e452de0 into sapling-pr-archive-ehhuang

This commit is contained in:
ehhuang 2025-10-08 11:39:41 -07:00 committed by GitHub
commit 08d46d6363
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
37 changed files with 242 additions and 6940 deletions

View file

@ -24,7 +24,7 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: Stale Action
uses: actions/stale@3a9db7e6a41a89f618792c92c0e97cc736e1b13f # v10.0.0
uses: actions/stale@5f858e3efba33a5ca4407a664cc011ad407f2008 # v10.1.0
with:
stale-issue-label: 'stale'
stale-issue-message: >

View file

@ -17,8 +17,8 @@ IBM WatsonX inference provider for accessing AI models on IBM's WatsonX platform
| `allowed_models` | `list[str \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. |
| `refresh_models` | `<class 'bool'>` | No | False | Whether to refresh models periodically from the provider |
| `url` | `<class 'str'>` | No | https://us-south.ml.cloud.ibm.com | A base url for accessing the watsonx.ai |
| `api_key` | `pydantic.types.SecretStr \| None` | No | | The watsonx API key |
| `project_id` | `str \| None` | No | | The Project ID key |
| `api_key` | `pydantic.types.SecretStr \| None` | No | | The watsonx.ai API key |
| `project_id` | `str \| None` | No | | The watsonx.ai project ID |
| `timeout` | `<class 'int'>` | No | 60 | Timeout for the HTTP requests |
## Sample Configuration

View file

@ -3526,343 +3526,6 @@
},
"deprecated": true
}
},
"/v1/telemetry/metrics/{metric_name}": {
"post": {
"responses": {
"200": {
"description": "A QueryMetricsResponse.",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/QueryMetricsResponse"
}
}
}
},
"400": {
"$ref": "#/components/responses/BadRequest400"
},
"429": {
"$ref": "#/components/responses/TooManyRequests429"
},
"500": {
"$ref": "#/components/responses/InternalServerError500"
},
"default": {
"$ref": "#/components/responses/DefaultError"
}
},
"tags": [
"Telemetry"
],
"summary": "Query metrics.",
"description": "Query metrics.",
"parameters": [
{
"name": "metric_name",
"in": "path",
"description": "The name of the metric to query.",
"required": true,
"schema": {
"type": "string"
}
}
],
"requestBody": {
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/QueryMetricsRequest"
}
}
},
"required": true
},
"deprecated": true
}
},
"/v1/telemetry/spans": {
"post": {
"responses": {
"200": {
"description": "A QuerySpansResponse.",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/QuerySpansResponse"
}
}
}
},
"400": {
"$ref": "#/components/responses/BadRequest400"
},
"429": {
"$ref": "#/components/responses/TooManyRequests429"
},
"500": {
"$ref": "#/components/responses/InternalServerError500"
},
"default": {
"$ref": "#/components/responses/DefaultError"
}
},
"tags": [
"Telemetry"
],
"summary": "Query spans.",
"description": "Query spans.",
"parameters": [],
"requestBody": {
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/QuerySpansRequest"
}
}
},
"required": true
},
"deprecated": true
}
},
"/v1/telemetry/spans/export": {
"post": {
"responses": {
"200": {
"description": "OK"
},
"400": {
"$ref": "#/components/responses/BadRequest400"
},
"429": {
"$ref": "#/components/responses/TooManyRequests429"
},
"500": {
"$ref": "#/components/responses/InternalServerError500"
},
"default": {
"$ref": "#/components/responses/DefaultError"
}
},
"tags": [
"Telemetry"
],
"summary": "Save spans to a dataset.",
"description": "Save spans to a dataset.",
"parameters": [],
"requestBody": {
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/SaveSpansToDatasetRequest"
}
}
},
"required": true
},
"deprecated": true
}
},
"/v1/telemetry/spans/{span_id}/tree": {
"post": {
"responses": {
"200": {
"description": "A QuerySpanTreeResponse.",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/QuerySpanTreeResponse"
}
}
}
},
"400": {
"$ref": "#/components/responses/BadRequest400"
},
"429": {
"$ref": "#/components/responses/TooManyRequests429"
},
"500": {
"$ref": "#/components/responses/InternalServerError500"
},
"default": {
"$ref": "#/components/responses/DefaultError"
}
},
"tags": [
"Telemetry"
],
"summary": "Get a span tree by its ID.",
"description": "Get a span tree by its ID.",
"parameters": [
{
"name": "span_id",
"in": "path",
"description": "The ID of the span to get the tree from.",
"required": true,
"schema": {
"type": "string"
}
}
],
"requestBody": {
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/GetSpanTreeRequest"
}
}
},
"required": true
},
"deprecated": true
}
},
"/v1/telemetry/traces": {
"post": {
"responses": {
"200": {
"description": "A QueryTracesResponse.",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/QueryTracesResponse"
}
}
}
},
"400": {
"$ref": "#/components/responses/BadRequest400"
},
"429": {
"$ref": "#/components/responses/TooManyRequests429"
},
"500": {
"$ref": "#/components/responses/InternalServerError500"
},
"default": {
"$ref": "#/components/responses/DefaultError"
}
},
"tags": [
"Telemetry"
],
"summary": "Query traces.",
"description": "Query traces.",
"parameters": [],
"requestBody": {
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/QueryTracesRequest"
}
}
},
"required": true
},
"deprecated": true
}
},
"/v1/telemetry/traces/{trace_id}": {
"get": {
"responses": {
"200": {
"description": "A Trace.",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/Trace"
}
}
}
},
"400": {
"$ref": "#/components/responses/BadRequest400"
},
"429": {
"$ref": "#/components/responses/TooManyRequests429"
},
"500": {
"$ref": "#/components/responses/InternalServerError500"
},
"default": {
"$ref": "#/components/responses/DefaultError"
}
},
"tags": [
"Telemetry"
],
"summary": "Get a trace by its ID.",
"description": "Get a trace by its ID.",
"parameters": [
{
"name": "trace_id",
"in": "path",
"description": "The ID of the trace to get.",
"required": true,
"schema": {
"type": "string"
}
}
],
"deprecated": true
}
},
"/v1/telemetry/traces/{trace_id}/spans/{span_id}": {
"get": {
"responses": {
"200": {
"description": "A Span.",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/Span"
}
}
}
},
"400": {
"$ref": "#/components/responses/BadRequest400"
},
"429": {
"$ref": "#/components/responses/TooManyRequests429"
},
"500": {
"$ref": "#/components/responses/InternalServerError500"
},
"default": {
"$ref": "#/components/responses/DefaultError"
}
},
"tags": [
"Telemetry"
],
"summary": "Get a span by its ID.",
"description": "Get a span by its ID.",
"parameters": [
{
"name": "trace_id",
"in": "path",
"description": "The ID of the trace to get the span from.",
"required": true,
"schema": {
"type": "string"
}
},
{
"name": "span_id",
"in": "path",
"description": "The ID of the span to get.",
"required": true,
"schema": {
"type": "string"
}
}
],
"deprecated": true
}
}
},
"jsonSchemaDialect": "https://json-schema.org/draft/2020-12/schema",
@ -12716,561 +12379,6 @@
"logger_config"
],
"title": "SupervisedFineTuneRequest"
},
"QueryMetricsRequest": {
"type": "object",
"properties": {
"start_time": {
"type": "integer",
"description": "The start time of the metric to query."
},
"end_time": {
"type": "integer",
"description": "The end time of the metric to query."
},
"granularity": {
"type": "string",
"description": "The granularity of the metric to query."
},
"query_type": {
"type": "string",
"enum": [
"range",
"instant"
],
"description": "The type of query to perform."
},
"label_matchers": {
"type": "array",
"items": {
"type": "object",
"properties": {
"name": {
"type": "string",
"description": "The name of the label to match"
},
"value": {
"type": "string",
"description": "The value to match against"
},
"operator": {
"type": "string",
"enum": [
"=",
"!=",
"=~",
"!~"
],
"description": "The comparison operator to use for matching",
"default": "="
}
},
"additionalProperties": false,
"required": [
"name",
"value",
"operator"
],
"title": "MetricLabelMatcher",
"description": "A matcher for filtering metrics by label values."
},
"description": "The label matchers to apply to the metric."
}
},
"additionalProperties": false,
"required": [
"start_time",
"query_type"
],
"title": "QueryMetricsRequest"
},
"MetricDataPoint": {
"type": "object",
"properties": {
"timestamp": {
"type": "integer",
"description": "Unix timestamp when the metric value was recorded"
},
"value": {
"type": "number",
"description": "The numeric value of the metric at this timestamp"
},
"unit": {
"type": "string"
}
},
"additionalProperties": false,
"required": [
"timestamp",
"value",
"unit"
],
"title": "MetricDataPoint",
"description": "A single data point in a metric time series."
},
"MetricLabel": {
"type": "object",
"properties": {
"name": {
"type": "string",
"description": "The name of the label"
},
"value": {
"type": "string",
"description": "The value of the label"
}
},
"additionalProperties": false,
"required": [
"name",
"value"
],
"title": "MetricLabel",
"description": "A label associated with a metric."
},
"MetricSeries": {
"type": "object",
"properties": {
"metric": {
"type": "string",
"description": "The name of the metric"
},
"labels": {
"type": "array",
"items": {
"$ref": "#/components/schemas/MetricLabel"
},
"description": "List of labels associated with this metric series"
},
"values": {
"type": "array",
"items": {
"$ref": "#/components/schemas/MetricDataPoint"
},
"description": "List of data points in chronological order"
}
},
"additionalProperties": false,
"required": [
"metric",
"labels",
"values"
],
"title": "MetricSeries",
"description": "A time series of metric data points."
},
"QueryMetricsResponse": {
"type": "object",
"properties": {
"data": {
"type": "array",
"items": {
"$ref": "#/components/schemas/MetricSeries"
},
"description": "List of metric series matching the query criteria"
}
},
"additionalProperties": false,
"required": [
"data"
],
"title": "QueryMetricsResponse",
"description": "Response containing metric time series data."
},
"QueryCondition": {
"type": "object",
"properties": {
"key": {
"type": "string",
"description": "The attribute key to filter on"
},
"op": {
"$ref": "#/components/schemas/QueryConditionOp",
"description": "The comparison operator to apply"
},
"value": {
"oneOf": [
{
"type": "null"
},
{
"type": "boolean"
},
{
"type": "number"
},
{
"type": "string"
},
{
"type": "array"
},
{
"type": "object"
}
],
"description": "The value to compare against"
}
},
"additionalProperties": false,
"required": [
"key",
"op",
"value"
],
"title": "QueryCondition",
"description": "A condition for filtering query results."
},
"QueryConditionOp": {
"type": "string",
"enum": [
"eq",
"ne",
"gt",
"lt"
],
"title": "QueryConditionOp",
"description": "Comparison operators for query conditions."
},
"QuerySpansRequest": {
"type": "object",
"properties": {
"attribute_filters": {
"type": "array",
"items": {
"$ref": "#/components/schemas/QueryCondition"
},
"description": "The attribute filters to apply to the spans."
},
"attributes_to_return": {
"type": "array",
"items": {
"type": "string"
},
"description": "The attributes to return in the spans."
},
"max_depth": {
"type": "integer",
"description": "The maximum depth of the tree."
}
},
"additionalProperties": false,
"required": [
"attribute_filters",
"attributes_to_return"
],
"title": "QuerySpansRequest"
},
"Span": {
"type": "object",
"properties": {
"span_id": {
"type": "string",
"description": "Unique identifier for the span"
},
"trace_id": {
"type": "string",
"description": "Unique identifier for the trace this span belongs to"
},
"parent_span_id": {
"type": "string",
"description": "(Optional) Unique identifier for the parent span, if this is a child span"
},
"name": {
"type": "string",
"description": "Human-readable name describing the operation this span represents"
},
"start_time": {
"type": "string",
"format": "date-time",
"description": "Timestamp when the operation began"
},
"end_time": {
"type": "string",
"format": "date-time",
"description": "(Optional) Timestamp when the operation finished, if completed"
},
"attributes": {
"type": "object",
"additionalProperties": {
"oneOf": [
{
"type": "null"
},
{
"type": "boolean"
},
{
"type": "number"
},
{
"type": "string"
},
{
"type": "array"
},
{
"type": "object"
}
]
},
"description": "(Optional) Key-value pairs containing additional metadata about the span"
}
},
"additionalProperties": false,
"required": [
"span_id",
"trace_id",
"name",
"start_time"
],
"title": "Span",
"description": "A span representing a single operation within a trace."
},
"QuerySpansResponse": {
"type": "object",
"properties": {
"data": {
"type": "array",
"items": {
"$ref": "#/components/schemas/Span"
},
"description": "List of spans matching the query criteria"
}
},
"additionalProperties": false,
"required": [
"data"
],
"title": "QuerySpansResponse",
"description": "Response containing a list of spans."
},
"SaveSpansToDatasetRequest": {
"type": "object",
"properties": {
"attribute_filters": {
"type": "array",
"items": {
"$ref": "#/components/schemas/QueryCondition"
},
"description": "The attribute filters to apply to the spans."
},
"attributes_to_save": {
"type": "array",
"items": {
"type": "string"
},
"description": "The attributes to save to the dataset."
},
"dataset_id": {
"type": "string",
"description": "The ID of the dataset to save the spans to."
},
"max_depth": {
"type": "integer",
"description": "The maximum depth of the tree."
}
},
"additionalProperties": false,
"required": [
"attribute_filters",
"attributes_to_save",
"dataset_id"
],
"title": "SaveSpansToDatasetRequest"
},
"GetSpanTreeRequest": {
"type": "object",
"properties": {
"attributes_to_return": {
"type": "array",
"items": {
"type": "string"
},
"description": "The attributes to return in the tree."
},
"max_depth": {
"type": "integer",
"description": "The maximum depth of the tree."
}
},
"additionalProperties": false,
"title": "GetSpanTreeRequest"
},
"SpanStatus": {
"type": "string",
"enum": [
"ok",
"error"
],
"title": "SpanStatus",
"description": "The status of a span indicating whether it completed successfully or with an error."
},
"SpanWithStatus": {
"type": "object",
"properties": {
"span_id": {
"type": "string",
"description": "Unique identifier for the span"
},
"trace_id": {
"type": "string",
"description": "Unique identifier for the trace this span belongs to"
},
"parent_span_id": {
"type": "string",
"description": "(Optional) Unique identifier for the parent span, if this is a child span"
},
"name": {
"type": "string",
"description": "Human-readable name describing the operation this span represents"
},
"start_time": {
"type": "string",
"format": "date-time",
"description": "Timestamp when the operation began"
},
"end_time": {
"type": "string",
"format": "date-time",
"description": "(Optional) Timestamp when the operation finished, if completed"
},
"attributes": {
"type": "object",
"additionalProperties": {
"oneOf": [
{
"type": "null"
},
{
"type": "boolean"
},
{
"type": "number"
},
{
"type": "string"
},
{
"type": "array"
},
{
"type": "object"
}
]
},
"description": "(Optional) Key-value pairs containing additional metadata about the span"
},
"status": {
"$ref": "#/components/schemas/SpanStatus",
"description": "(Optional) The current status of the span"
}
},
"additionalProperties": false,
"required": [
"span_id",
"trace_id",
"name",
"start_time"
],
"title": "SpanWithStatus",
"description": "A span that includes status information."
},
"QuerySpanTreeResponse": {
"type": "object",
"properties": {
"data": {
"type": "object",
"additionalProperties": {
"$ref": "#/components/schemas/SpanWithStatus"
},
"description": "Dictionary mapping span IDs to spans with status information"
}
},
"additionalProperties": false,
"required": [
"data"
],
"title": "QuerySpanTreeResponse",
"description": "Response containing a tree structure of spans."
},
"QueryTracesRequest": {
"type": "object",
"properties": {
"attribute_filters": {
"type": "array",
"items": {
"$ref": "#/components/schemas/QueryCondition"
},
"description": "The attribute filters to apply to the traces."
},
"limit": {
"type": "integer",
"description": "The limit of traces to return."
},
"offset": {
"type": "integer",
"description": "The offset of the traces to return."
},
"order_by": {
"type": "array",
"items": {
"type": "string"
},
"description": "The order by of the traces to return."
}
},
"additionalProperties": false,
"title": "QueryTracesRequest"
},
"Trace": {
"type": "object",
"properties": {
"trace_id": {
"type": "string",
"description": "Unique identifier for the trace"
},
"root_span_id": {
"type": "string",
"description": "Unique identifier for the root span that started this trace"
},
"start_time": {
"type": "string",
"format": "date-time",
"description": "Timestamp when the trace began"
},
"end_time": {
"type": "string",
"format": "date-time",
"description": "(Optional) Timestamp when the trace finished, if completed"
}
},
"additionalProperties": false,
"required": [
"trace_id",
"root_span_id",
"start_time"
],
"title": "Trace",
"description": "A trace representing the complete execution path of a request across multiple operations."
},
"QueryTracesResponse": {
"type": "object",
"properties": {
"data": {
"type": "array",
"items": {
"$ref": "#/components/schemas/Trace"
},
"description": "List of traces matching the query criteria"
}
},
"additionalProperties": false,
"required": [
"data"
],
"title": "QueryTracesResponse",
"description": "Response containing a list of traces."
}
},
"responses": {
@ -13387,10 +12495,6 @@
"description": "OpenAI-compatible Moderations API.",
"x-displayName": "Safety"
},
{
"name": "Telemetry",
"description": ""
},
{
"name": "VectorIO",
"description": ""
@ -13410,7 +12514,6 @@
"Models",
"PostTraining (Coming Soon)",
"Safety",
"Telemetry",
"VectorIO"
]
}

View file

@ -2593,238 +2593,6 @@ paths:
$ref: '#/components/schemas/SupervisedFineTuneRequest'
required: true
deprecated: true
/v1/telemetry/metrics/{metric_name}:
post:
responses:
'200':
description: A QueryMetricsResponse.
content:
application/json:
schema:
$ref: '#/components/schemas/QueryMetricsResponse'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- Telemetry
summary: Query metrics.
description: Query metrics.
parameters:
- name: metric_name
in: path
description: The name of the metric to query.
required: true
schema:
type: string
requestBody:
content:
application/json:
schema:
$ref: '#/components/schemas/QueryMetricsRequest'
required: true
deprecated: true
/v1/telemetry/spans:
post:
responses:
'200':
description: A QuerySpansResponse.
content:
application/json:
schema:
$ref: '#/components/schemas/QuerySpansResponse'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- Telemetry
summary: Query spans.
description: Query spans.
parameters: []
requestBody:
content:
application/json:
schema:
$ref: '#/components/schemas/QuerySpansRequest'
required: true
deprecated: true
/v1/telemetry/spans/export:
post:
responses:
'200':
description: OK
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- Telemetry
summary: Save spans to a dataset.
description: Save spans to a dataset.
parameters: []
requestBody:
content:
application/json:
schema:
$ref: '#/components/schemas/SaveSpansToDatasetRequest'
required: true
deprecated: true
/v1/telemetry/spans/{span_id}/tree:
post:
responses:
'200':
description: A QuerySpanTreeResponse.
content:
application/json:
schema:
$ref: '#/components/schemas/QuerySpanTreeResponse'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- Telemetry
summary: Get a span tree by its ID.
description: Get a span tree by its ID.
parameters:
- name: span_id
in: path
description: The ID of the span to get the tree from.
required: true
schema:
type: string
requestBody:
content:
application/json:
schema:
$ref: '#/components/schemas/GetSpanTreeRequest'
required: true
deprecated: true
/v1/telemetry/traces:
post:
responses:
'200':
description: A QueryTracesResponse.
content:
application/json:
schema:
$ref: '#/components/schemas/QueryTracesResponse'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- Telemetry
summary: Query traces.
description: Query traces.
parameters: []
requestBody:
content:
application/json:
schema:
$ref: '#/components/schemas/QueryTracesRequest'
required: true
deprecated: true
/v1/telemetry/traces/{trace_id}:
get:
responses:
'200':
description: A Trace.
content:
application/json:
schema:
$ref: '#/components/schemas/Trace'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- Telemetry
summary: Get a trace by its ID.
description: Get a trace by its ID.
parameters:
- name: trace_id
in: path
description: The ID of the trace to get.
required: true
schema:
type: string
deprecated: true
/v1/telemetry/traces/{trace_id}/spans/{span_id}:
get:
responses:
'200':
description: A Span.
content:
application/json:
schema:
$ref: '#/components/schemas/Span'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- Telemetry
summary: Get a span by its ID.
description: Get a span by its ID.
parameters:
- name: trace_id
in: path
description: >-
The ID of the trace to get the span from.
required: true
schema:
type: string
- name: span_id
in: path
description: The ID of the span to get.
required: true
schema:
type: string
deprecated: true
jsonSchemaDialect: >-
https://json-schema.org/draft/2020-12/schema
components:
@ -9510,434 +9278,6 @@ components:
- hyperparam_search_config
- logger_config
title: SupervisedFineTuneRequest
QueryMetricsRequest:
type: object
properties:
start_time:
type: integer
description: The start time of the metric to query.
end_time:
type: integer
description: The end time of the metric to query.
granularity:
type: string
description: The granularity of the metric to query.
query_type:
type: string
enum:
- range
- instant
description: The type of query to perform.
label_matchers:
type: array
items:
type: object
properties:
name:
type: string
description: The name of the label to match
value:
type: string
description: The value to match against
operator:
type: string
enum:
- '='
- '!='
- =~
- '!~'
description: >-
The comparison operator to use for matching
default: '='
additionalProperties: false
required:
- name
- value
- operator
title: MetricLabelMatcher
description: >-
A matcher for filtering metrics by label values.
description: >-
The label matchers to apply to the metric.
additionalProperties: false
required:
- start_time
- query_type
title: QueryMetricsRequest
MetricDataPoint:
type: object
properties:
timestamp:
type: integer
description: >-
Unix timestamp when the metric value was recorded
value:
type: number
description: >-
The numeric value of the metric at this timestamp
unit:
type: string
additionalProperties: false
required:
- timestamp
- value
- unit
title: MetricDataPoint
description: >-
A single data point in a metric time series.
MetricLabel:
type: object
properties:
name:
type: string
description: The name of the label
value:
type: string
description: The value of the label
additionalProperties: false
required:
- name
- value
title: MetricLabel
description: A label associated with a metric.
MetricSeries:
type: object
properties:
metric:
type: string
description: The name of the metric
labels:
type: array
items:
$ref: '#/components/schemas/MetricLabel'
description: >-
List of labels associated with this metric series
values:
type: array
items:
$ref: '#/components/schemas/MetricDataPoint'
description: >-
List of data points in chronological order
additionalProperties: false
required:
- metric
- labels
- values
title: MetricSeries
description: A time series of metric data points.
QueryMetricsResponse:
type: object
properties:
data:
type: array
items:
$ref: '#/components/schemas/MetricSeries'
description: >-
List of metric series matching the query criteria
additionalProperties: false
required:
- data
title: QueryMetricsResponse
description: >-
Response containing metric time series data.
QueryCondition:
type: object
properties:
key:
type: string
description: The attribute key to filter on
op:
$ref: '#/components/schemas/QueryConditionOp'
description: The comparison operator to apply
value:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
description: The value to compare against
additionalProperties: false
required:
- key
- op
- value
title: QueryCondition
description: A condition for filtering query results.
QueryConditionOp:
type: string
enum:
- eq
- ne
- gt
- lt
title: QueryConditionOp
description: >-
Comparison operators for query conditions.
QuerySpansRequest:
type: object
properties:
attribute_filters:
type: array
items:
$ref: '#/components/schemas/QueryCondition'
description: >-
The attribute filters to apply to the spans.
attributes_to_return:
type: array
items:
type: string
description: The attributes to return in the spans.
max_depth:
type: integer
description: The maximum depth of the tree.
additionalProperties: false
required:
- attribute_filters
- attributes_to_return
title: QuerySpansRequest
Span:
type: object
properties:
span_id:
type: string
description: Unique identifier for the span
trace_id:
type: string
description: >-
Unique identifier for the trace this span belongs to
parent_span_id:
type: string
description: >-
(Optional) Unique identifier for the parent span, if this is a child span
name:
type: string
description: >-
Human-readable name describing the operation this span represents
start_time:
type: string
format: date-time
description: Timestamp when the operation began
end_time:
type: string
format: date-time
description: >-
(Optional) Timestamp when the operation finished, if completed
attributes:
type: object
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
description: >-
(Optional) Key-value pairs containing additional metadata about the span
additionalProperties: false
required:
- span_id
- trace_id
- name
- start_time
title: Span
description: >-
A span representing a single operation within a trace.
QuerySpansResponse:
type: object
properties:
data:
type: array
items:
$ref: '#/components/schemas/Span'
description: >-
List of spans matching the query criteria
additionalProperties: false
required:
- data
title: QuerySpansResponse
description: Response containing a list of spans.
SaveSpansToDatasetRequest:
type: object
properties:
attribute_filters:
type: array
items:
$ref: '#/components/schemas/QueryCondition'
description: >-
The attribute filters to apply to the spans.
attributes_to_save:
type: array
items:
type: string
description: The attributes to save to the dataset.
dataset_id:
type: string
description: >-
The ID of the dataset to save the spans to.
max_depth:
type: integer
description: The maximum depth of the tree.
additionalProperties: false
required:
- attribute_filters
- attributes_to_save
- dataset_id
title: SaveSpansToDatasetRequest
GetSpanTreeRequest:
type: object
properties:
attributes_to_return:
type: array
items:
type: string
description: The attributes to return in the tree.
max_depth:
type: integer
description: The maximum depth of the tree.
additionalProperties: false
title: GetSpanTreeRequest
SpanStatus:
type: string
enum:
- ok
- error
title: SpanStatus
description: >-
The status of a span indicating whether it completed successfully or with
an error.
SpanWithStatus:
type: object
properties:
span_id:
type: string
description: Unique identifier for the span
trace_id:
type: string
description: >-
Unique identifier for the trace this span belongs to
parent_span_id:
type: string
description: >-
(Optional) Unique identifier for the parent span, if this is a child span
name:
type: string
description: >-
Human-readable name describing the operation this span represents
start_time:
type: string
format: date-time
description: Timestamp when the operation began
end_time:
type: string
format: date-time
description: >-
(Optional) Timestamp when the operation finished, if completed
attributes:
type: object
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
description: >-
(Optional) Key-value pairs containing additional metadata about the span
status:
$ref: '#/components/schemas/SpanStatus'
description: >-
(Optional) The current status of the span
additionalProperties: false
required:
- span_id
- trace_id
- name
- start_time
title: SpanWithStatus
description: A span that includes status information.
QuerySpanTreeResponse:
type: object
properties:
data:
type: object
additionalProperties:
$ref: '#/components/schemas/SpanWithStatus'
description: >-
Dictionary mapping span IDs to spans with status information
additionalProperties: false
required:
- data
title: QuerySpanTreeResponse
description: >-
Response containing a tree structure of spans.
QueryTracesRequest:
type: object
properties:
attribute_filters:
type: array
items:
$ref: '#/components/schemas/QueryCondition'
description: >-
The attribute filters to apply to the traces.
limit:
type: integer
description: The limit of traces to return.
offset:
type: integer
description: The offset of the traces to return.
order_by:
type: array
items:
type: string
description: The order by of the traces to return.
additionalProperties: false
title: QueryTracesRequest
Trace:
type: object
properties:
trace_id:
type: string
description: Unique identifier for the trace
root_span_id:
type: string
description: >-
Unique identifier for the root span that started this trace
start_time:
type: string
format: date-time
description: Timestamp when the trace began
end_time:
type: string
format: date-time
description: >-
(Optional) Timestamp when the trace finished, if completed
additionalProperties: false
required:
- trace_id
- root_span_id
- start_time
title: Trace
description: >-
A trace representing the complete execution path of a request across multiple
operations.
QueryTracesResponse:
type: object
properties:
data:
type: array
items:
$ref: '#/components/schemas/Trace'
description: >-
List of traces matching the query criteria
additionalProperties: false
required:
- data
title: QueryTracesResponse
description: Response containing a list of traces.
responses:
BadRequest400:
description: The request was invalid or malformed
@ -10043,8 +9383,6 @@ tags:
- name: Safety
description: OpenAI-compatible Moderations API.
x-displayName: Safety
- name: Telemetry
description: ''
- name: VectorIO
description: ''
x-tagGroups:
@ -10060,5 +9398,4 @@ x-tagGroups:
- Models
- PostTraining (Coming Soon)
- Safety
- Telemetry
- VectorIO

View file

@ -1711,343 +1711,6 @@
},
"deprecated": false
}
},
"/v1alpha/telemetry/metrics/{metric_name}": {
"post": {
"responses": {
"200": {
"description": "A QueryMetricsResponse.",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/QueryMetricsResponse"
}
}
}
},
"400": {
"$ref": "#/components/responses/BadRequest400"
},
"429": {
"$ref": "#/components/responses/TooManyRequests429"
},
"500": {
"$ref": "#/components/responses/InternalServerError500"
},
"default": {
"$ref": "#/components/responses/DefaultError"
}
},
"tags": [
"Telemetry"
],
"summary": "Query metrics.",
"description": "Query metrics.",
"parameters": [
{
"name": "metric_name",
"in": "path",
"description": "The name of the metric to query.",
"required": true,
"schema": {
"type": "string"
}
}
],
"requestBody": {
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/QueryMetricsRequest"
}
}
},
"required": true
},
"deprecated": false
}
},
"/v1alpha/telemetry/spans": {
"post": {
"responses": {
"200": {
"description": "A QuerySpansResponse.",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/QuerySpansResponse"
}
}
}
},
"400": {
"$ref": "#/components/responses/BadRequest400"
},
"429": {
"$ref": "#/components/responses/TooManyRequests429"
},
"500": {
"$ref": "#/components/responses/InternalServerError500"
},
"default": {
"$ref": "#/components/responses/DefaultError"
}
},
"tags": [
"Telemetry"
],
"summary": "Query spans.",
"description": "Query spans.",
"parameters": [],
"requestBody": {
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/QuerySpansRequest"
}
}
},
"required": true
},
"deprecated": false
}
},
"/v1alpha/telemetry/spans/export": {
"post": {
"responses": {
"200": {
"description": "OK"
},
"400": {
"$ref": "#/components/responses/BadRequest400"
},
"429": {
"$ref": "#/components/responses/TooManyRequests429"
},
"500": {
"$ref": "#/components/responses/InternalServerError500"
},
"default": {
"$ref": "#/components/responses/DefaultError"
}
},
"tags": [
"Telemetry"
],
"summary": "Save spans to a dataset.",
"description": "Save spans to a dataset.",
"parameters": [],
"requestBody": {
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/SaveSpansToDatasetRequest"
}
}
},
"required": true
},
"deprecated": false
}
},
"/v1alpha/telemetry/spans/{span_id}/tree": {
"post": {
"responses": {
"200": {
"description": "A QuerySpanTreeResponse.",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/QuerySpanTreeResponse"
}
}
}
},
"400": {
"$ref": "#/components/responses/BadRequest400"
},
"429": {
"$ref": "#/components/responses/TooManyRequests429"
},
"500": {
"$ref": "#/components/responses/InternalServerError500"
},
"default": {
"$ref": "#/components/responses/DefaultError"
}
},
"tags": [
"Telemetry"
],
"summary": "Get a span tree by its ID.",
"description": "Get a span tree by its ID.",
"parameters": [
{
"name": "span_id",
"in": "path",
"description": "The ID of the span to get the tree from.",
"required": true,
"schema": {
"type": "string"
}
}
],
"requestBody": {
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/GetSpanTreeRequest"
}
}
},
"required": true
},
"deprecated": false
}
},
"/v1alpha/telemetry/traces": {
"post": {
"responses": {
"200": {
"description": "A QueryTracesResponse.",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/QueryTracesResponse"
}
}
}
},
"400": {
"$ref": "#/components/responses/BadRequest400"
},
"429": {
"$ref": "#/components/responses/TooManyRequests429"
},
"500": {
"$ref": "#/components/responses/InternalServerError500"
},
"default": {
"$ref": "#/components/responses/DefaultError"
}
},
"tags": [
"Telemetry"
],
"summary": "Query traces.",
"description": "Query traces.",
"parameters": [],
"requestBody": {
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/QueryTracesRequest"
}
}
},
"required": true
},
"deprecated": false
}
},
"/v1alpha/telemetry/traces/{trace_id}": {
"get": {
"responses": {
"200": {
"description": "A Trace.",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/Trace"
}
}
}
},
"400": {
"$ref": "#/components/responses/BadRequest400"
},
"429": {
"$ref": "#/components/responses/TooManyRequests429"
},
"500": {
"$ref": "#/components/responses/InternalServerError500"
},
"default": {
"$ref": "#/components/responses/DefaultError"
}
},
"tags": [
"Telemetry"
],
"summary": "Get a trace by its ID.",
"description": "Get a trace by its ID.",
"parameters": [
{
"name": "trace_id",
"in": "path",
"description": "The ID of the trace to get.",
"required": true,
"schema": {
"type": "string"
}
}
],
"deprecated": false
}
},
"/v1alpha/telemetry/traces/{trace_id}/spans/{span_id}": {
"get": {
"responses": {
"200": {
"description": "A Span.",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/Span"
}
}
}
},
"400": {
"$ref": "#/components/responses/BadRequest400"
},
"429": {
"$ref": "#/components/responses/TooManyRequests429"
},
"500": {
"$ref": "#/components/responses/InternalServerError500"
},
"default": {
"$ref": "#/components/responses/DefaultError"
}
},
"tags": [
"Telemetry"
],
"summary": "Get a span by its ID.",
"description": "Get a span by its ID.",
"parameters": [
{
"name": "trace_id",
"in": "path",
"description": "The ID of the trace to get the span from.",
"required": true,
"schema": {
"type": "string"
}
},
{
"name": "span_id",
"in": "path",
"description": "The ID of the span to get.",
"required": true,
"schema": {
"type": "string"
}
}
],
"deprecated": false
}
}
},
"jsonSchemaDialect": "https://json-schema.org/draft/2020-12/schema",
@ -5765,561 +5428,6 @@
"logger_config"
],
"title": "SupervisedFineTuneRequest"
},
"QueryMetricsRequest": {
"type": "object",
"properties": {
"start_time": {
"type": "integer",
"description": "The start time of the metric to query."
},
"end_time": {
"type": "integer",
"description": "The end time of the metric to query."
},
"granularity": {
"type": "string",
"description": "The granularity of the metric to query."
},
"query_type": {
"type": "string",
"enum": [
"range",
"instant"
],
"description": "The type of query to perform."
},
"label_matchers": {
"type": "array",
"items": {
"type": "object",
"properties": {
"name": {
"type": "string",
"description": "The name of the label to match"
},
"value": {
"type": "string",
"description": "The value to match against"
},
"operator": {
"type": "string",
"enum": [
"=",
"!=",
"=~",
"!~"
],
"description": "The comparison operator to use for matching",
"default": "="
}
},
"additionalProperties": false,
"required": [
"name",
"value",
"operator"
],
"title": "MetricLabelMatcher",
"description": "A matcher for filtering metrics by label values."
},
"description": "The label matchers to apply to the metric."
}
},
"additionalProperties": false,
"required": [
"start_time",
"query_type"
],
"title": "QueryMetricsRequest"
},
"MetricDataPoint": {
"type": "object",
"properties": {
"timestamp": {
"type": "integer",
"description": "Unix timestamp when the metric value was recorded"
},
"value": {
"type": "number",
"description": "The numeric value of the metric at this timestamp"
},
"unit": {
"type": "string"
}
},
"additionalProperties": false,
"required": [
"timestamp",
"value",
"unit"
],
"title": "MetricDataPoint",
"description": "A single data point in a metric time series."
},
"MetricLabel": {
"type": "object",
"properties": {
"name": {
"type": "string",
"description": "The name of the label"
},
"value": {
"type": "string",
"description": "The value of the label"
}
},
"additionalProperties": false,
"required": [
"name",
"value"
],
"title": "MetricLabel",
"description": "A label associated with a metric."
},
"MetricSeries": {
"type": "object",
"properties": {
"metric": {
"type": "string",
"description": "The name of the metric"
},
"labels": {
"type": "array",
"items": {
"$ref": "#/components/schemas/MetricLabel"
},
"description": "List of labels associated with this metric series"
},
"values": {
"type": "array",
"items": {
"$ref": "#/components/schemas/MetricDataPoint"
},
"description": "List of data points in chronological order"
}
},
"additionalProperties": false,
"required": [
"metric",
"labels",
"values"
],
"title": "MetricSeries",
"description": "A time series of metric data points."
},
"QueryMetricsResponse": {
"type": "object",
"properties": {
"data": {
"type": "array",
"items": {
"$ref": "#/components/schemas/MetricSeries"
},
"description": "List of metric series matching the query criteria"
}
},
"additionalProperties": false,
"required": [
"data"
],
"title": "QueryMetricsResponse",
"description": "Response containing metric time series data."
},
"QueryCondition": {
"type": "object",
"properties": {
"key": {
"type": "string",
"description": "The attribute key to filter on"
},
"op": {
"$ref": "#/components/schemas/QueryConditionOp",
"description": "The comparison operator to apply"
},
"value": {
"oneOf": [
{
"type": "null"
},
{
"type": "boolean"
},
{
"type": "number"
},
{
"type": "string"
},
{
"type": "array"
},
{
"type": "object"
}
],
"description": "The value to compare against"
}
},
"additionalProperties": false,
"required": [
"key",
"op",
"value"
],
"title": "QueryCondition",
"description": "A condition for filtering query results."
},
"QueryConditionOp": {
"type": "string",
"enum": [
"eq",
"ne",
"gt",
"lt"
],
"title": "QueryConditionOp",
"description": "Comparison operators for query conditions."
},
"QuerySpansRequest": {
"type": "object",
"properties": {
"attribute_filters": {
"type": "array",
"items": {
"$ref": "#/components/schemas/QueryCondition"
},
"description": "The attribute filters to apply to the spans."
},
"attributes_to_return": {
"type": "array",
"items": {
"type": "string"
},
"description": "The attributes to return in the spans."
},
"max_depth": {
"type": "integer",
"description": "The maximum depth of the tree."
}
},
"additionalProperties": false,
"required": [
"attribute_filters",
"attributes_to_return"
],
"title": "QuerySpansRequest"
},
"Span": {
"type": "object",
"properties": {
"span_id": {
"type": "string",
"description": "Unique identifier for the span"
},
"trace_id": {
"type": "string",
"description": "Unique identifier for the trace this span belongs to"
},
"parent_span_id": {
"type": "string",
"description": "(Optional) Unique identifier for the parent span, if this is a child span"
},
"name": {
"type": "string",
"description": "Human-readable name describing the operation this span represents"
},
"start_time": {
"type": "string",
"format": "date-time",
"description": "Timestamp when the operation began"
},
"end_time": {
"type": "string",
"format": "date-time",
"description": "(Optional) Timestamp when the operation finished, if completed"
},
"attributes": {
"type": "object",
"additionalProperties": {
"oneOf": [
{
"type": "null"
},
{
"type": "boolean"
},
{
"type": "number"
},
{
"type": "string"
},
{
"type": "array"
},
{
"type": "object"
}
]
},
"description": "(Optional) Key-value pairs containing additional metadata about the span"
}
},
"additionalProperties": false,
"required": [
"span_id",
"trace_id",
"name",
"start_time"
],
"title": "Span",
"description": "A span representing a single operation within a trace."
},
"QuerySpansResponse": {
"type": "object",
"properties": {
"data": {
"type": "array",
"items": {
"$ref": "#/components/schemas/Span"
},
"description": "List of spans matching the query criteria"
}
},
"additionalProperties": false,
"required": [
"data"
],
"title": "QuerySpansResponse",
"description": "Response containing a list of spans."
},
"SaveSpansToDatasetRequest": {
"type": "object",
"properties": {
"attribute_filters": {
"type": "array",
"items": {
"$ref": "#/components/schemas/QueryCondition"
},
"description": "The attribute filters to apply to the spans."
},
"attributes_to_save": {
"type": "array",
"items": {
"type": "string"
},
"description": "The attributes to save to the dataset."
},
"dataset_id": {
"type": "string",
"description": "The ID of the dataset to save the spans to."
},
"max_depth": {
"type": "integer",
"description": "The maximum depth of the tree."
}
},
"additionalProperties": false,
"required": [
"attribute_filters",
"attributes_to_save",
"dataset_id"
],
"title": "SaveSpansToDatasetRequest"
},
"GetSpanTreeRequest": {
"type": "object",
"properties": {
"attributes_to_return": {
"type": "array",
"items": {
"type": "string"
},
"description": "The attributes to return in the tree."
},
"max_depth": {
"type": "integer",
"description": "The maximum depth of the tree."
}
},
"additionalProperties": false,
"title": "GetSpanTreeRequest"
},
"SpanStatus": {
"type": "string",
"enum": [
"ok",
"error"
],
"title": "SpanStatus",
"description": "The status of a span indicating whether it completed successfully or with an error."
},
"SpanWithStatus": {
"type": "object",
"properties": {
"span_id": {
"type": "string",
"description": "Unique identifier for the span"
},
"trace_id": {
"type": "string",
"description": "Unique identifier for the trace this span belongs to"
},
"parent_span_id": {
"type": "string",
"description": "(Optional) Unique identifier for the parent span, if this is a child span"
},
"name": {
"type": "string",
"description": "Human-readable name describing the operation this span represents"
},
"start_time": {
"type": "string",
"format": "date-time",
"description": "Timestamp when the operation began"
},
"end_time": {
"type": "string",
"format": "date-time",
"description": "(Optional) Timestamp when the operation finished, if completed"
},
"attributes": {
"type": "object",
"additionalProperties": {
"oneOf": [
{
"type": "null"
},
{
"type": "boolean"
},
{
"type": "number"
},
{
"type": "string"
},
{
"type": "array"
},
{
"type": "object"
}
]
},
"description": "(Optional) Key-value pairs containing additional metadata about the span"
},
"status": {
"$ref": "#/components/schemas/SpanStatus",
"description": "(Optional) The current status of the span"
}
},
"additionalProperties": false,
"required": [
"span_id",
"trace_id",
"name",
"start_time"
],
"title": "SpanWithStatus",
"description": "A span that includes status information."
},
"QuerySpanTreeResponse": {
"type": "object",
"properties": {
"data": {
"type": "object",
"additionalProperties": {
"$ref": "#/components/schemas/SpanWithStatus"
},
"description": "Dictionary mapping span IDs to spans with status information"
}
},
"additionalProperties": false,
"required": [
"data"
],
"title": "QuerySpanTreeResponse",
"description": "Response containing a tree structure of spans."
},
"QueryTracesRequest": {
"type": "object",
"properties": {
"attribute_filters": {
"type": "array",
"items": {
"$ref": "#/components/schemas/QueryCondition"
},
"description": "The attribute filters to apply to the traces."
},
"limit": {
"type": "integer",
"description": "The limit of traces to return."
},
"offset": {
"type": "integer",
"description": "The offset of the traces to return."
},
"order_by": {
"type": "array",
"items": {
"type": "string"
},
"description": "The order by of the traces to return."
}
},
"additionalProperties": false,
"title": "QueryTracesRequest"
},
"Trace": {
"type": "object",
"properties": {
"trace_id": {
"type": "string",
"description": "Unique identifier for the trace"
},
"root_span_id": {
"type": "string",
"description": "Unique identifier for the root span that started this trace"
},
"start_time": {
"type": "string",
"format": "date-time",
"description": "Timestamp when the trace began"
},
"end_time": {
"type": "string",
"format": "date-time",
"description": "(Optional) Timestamp when the trace finished, if completed"
}
},
"additionalProperties": false,
"required": [
"trace_id",
"root_span_id",
"start_time"
],
"title": "Trace",
"description": "A trace representing the complete execution path of a request across multiple operations."
},
"QueryTracesResponse": {
"type": "object",
"properties": {
"data": {
"type": "array",
"items": {
"$ref": "#/components/schemas/Trace"
},
"description": "List of traces matching the query criteria"
}
},
"additionalProperties": false,
"required": [
"data"
],
"title": "QueryTracesResponse",
"description": "Response containing a list of traces."
}
},
"responses": {
@ -6416,10 +5524,6 @@
{
"name": "PostTraining (Coming Soon)",
"description": ""
},
{
"name": "Telemetry",
"description": ""
}
],
"x-tagGroups": [
@ -6431,8 +5535,7 @@
"DatasetIO",
"Datasets",
"Eval",
"PostTraining (Coming Soon)",
"Telemetry"
"PostTraining (Coming Soon)"
]
}
]

View file

@ -1224,238 +1224,6 @@ paths:
$ref: '#/components/schemas/SupervisedFineTuneRequest'
required: true
deprecated: false
/v1alpha/telemetry/metrics/{metric_name}:
post:
responses:
'200':
description: A QueryMetricsResponse.
content:
application/json:
schema:
$ref: '#/components/schemas/QueryMetricsResponse'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- Telemetry
summary: Query metrics.
description: Query metrics.
parameters:
- name: metric_name
in: path
description: The name of the metric to query.
required: true
schema:
type: string
requestBody:
content:
application/json:
schema:
$ref: '#/components/schemas/QueryMetricsRequest'
required: true
deprecated: false
/v1alpha/telemetry/spans:
post:
responses:
'200':
description: A QuerySpansResponse.
content:
application/json:
schema:
$ref: '#/components/schemas/QuerySpansResponse'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- Telemetry
summary: Query spans.
description: Query spans.
parameters: []
requestBody:
content:
application/json:
schema:
$ref: '#/components/schemas/QuerySpansRequest'
required: true
deprecated: false
/v1alpha/telemetry/spans/export:
post:
responses:
'200':
description: OK
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- Telemetry
summary: Save spans to a dataset.
description: Save spans to a dataset.
parameters: []
requestBody:
content:
application/json:
schema:
$ref: '#/components/schemas/SaveSpansToDatasetRequest'
required: true
deprecated: false
/v1alpha/telemetry/spans/{span_id}/tree:
post:
responses:
'200':
description: A QuerySpanTreeResponse.
content:
application/json:
schema:
$ref: '#/components/schemas/QuerySpanTreeResponse'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- Telemetry
summary: Get a span tree by its ID.
description: Get a span tree by its ID.
parameters:
- name: span_id
in: path
description: The ID of the span to get the tree from.
required: true
schema:
type: string
requestBody:
content:
application/json:
schema:
$ref: '#/components/schemas/GetSpanTreeRequest'
required: true
deprecated: false
/v1alpha/telemetry/traces:
post:
responses:
'200':
description: A QueryTracesResponse.
content:
application/json:
schema:
$ref: '#/components/schemas/QueryTracesResponse'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- Telemetry
summary: Query traces.
description: Query traces.
parameters: []
requestBody:
content:
application/json:
schema:
$ref: '#/components/schemas/QueryTracesRequest'
required: true
deprecated: false
/v1alpha/telemetry/traces/{trace_id}:
get:
responses:
'200':
description: A Trace.
content:
application/json:
schema:
$ref: '#/components/schemas/Trace'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- Telemetry
summary: Get a trace by its ID.
description: Get a trace by its ID.
parameters:
- name: trace_id
in: path
description: The ID of the trace to get.
required: true
schema:
type: string
deprecated: false
/v1alpha/telemetry/traces/{trace_id}/spans/{span_id}:
get:
responses:
'200':
description: A Span.
content:
application/json:
schema:
$ref: '#/components/schemas/Span'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- Telemetry
summary: Get a span by its ID.
description: Get a span by its ID.
parameters:
- name: trace_id
in: path
description: >-
The ID of the trace to get the span from.
required: true
schema:
type: string
- name: span_id
in: path
description: The ID of the span to get.
required: true
schema:
type: string
deprecated: false
jsonSchemaDialect: >-
https://json-schema.org/draft/2020-12/schema
components:
@ -4249,434 +4017,6 @@ components:
- hyperparam_search_config
- logger_config
title: SupervisedFineTuneRequest
QueryMetricsRequest:
type: object
properties:
start_time:
type: integer
description: The start time of the metric to query.
end_time:
type: integer
description: The end time of the metric to query.
granularity:
type: string
description: The granularity of the metric to query.
query_type:
type: string
enum:
- range
- instant
description: The type of query to perform.
label_matchers:
type: array
items:
type: object
properties:
name:
type: string
description: The name of the label to match
value:
type: string
description: The value to match against
operator:
type: string
enum:
- '='
- '!='
- =~
- '!~'
description: >-
The comparison operator to use for matching
default: '='
additionalProperties: false
required:
- name
- value
- operator
title: MetricLabelMatcher
description: >-
A matcher for filtering metrics by label values.
description: >-
The label matchers to apply to the metric.
additionalProperties: false
required:
- start_time
- query_type
title: QueryMetricsRequest
MetricDataPoint:
type: object
properties:
timestamp:
type: integer
description: >-
Unix timestamp when the metric value was recorded
value:
type: number
description: >-
The numeric value of the metric at this timestamp
unit:
type: string
additionalProperties: false
required:
- timestamp
- value
- unit
title: MetricDataPoint
description: >-
A single data point in a metric time series.
MetricLabel:
type: object
properties:
name:
type: string
description: The name of the label
value:
type: string
description: The value of the label
additionalProperties: false
required:
- name
- value
title: MetricLabel
description: A label associated with a metric.
MetricSeries:
type: object
properties:
metric:
type: string
description: The name of the metric
labels:
type: array
items:
$ref: '#/components/schemas/MetricLabel'
description: >-
List of labels associated with this metric series
values:
type: array
items:
$ref: '#/components/schemas/MetricDataPoint'
description: >-
List of data points in chronological order
additionalProperties: false
required:
- metric
- labels
- values
title: MetricSeries
description: A time series of metric data points.
QueryMetricsResponse:
type: object
properties:
data:
type: array
items:
$ref: '#/components/schemas/MetricSeries'
description: >-
List of metric series matching the query criteria
additionalProperties: false
required:
- data
title: QueryMetricsResponse
description: >-
Response containing metric time series data.
QueryCondition:
type: object
properties:
key:
type: string
description: The attribute key to filter on
op:
$ref: '#/components/schemas/QueryConditionOp'
description: The comparison operator to apply
value:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
description: The value to compare against
additionalProperties: false
required:
- key
- op
- value
title: QueryCondition
description: A condition for filtering query results.
QueryConditionOp:
type: string
enum:
- eq
- ne
- gt
- lt
title: QueryConditionOp
description: >-
Comparison operators for query conditions.
QuerySpansRequest:
type: object
properties:
attribute_filters:
type: array
items:
$ref: '#/components/schemas/QueryCondition'
description: >-
The attribute filters to apply to the spans.
attributes_to_return:
type: array
items:
type: string
description: The attributes to return in the spans.
max_depth:
type: integer
description: The maximum depth of the tree.
additionalProperties: false
required:
- attribute_filters
- attributes_to_return
title: QuerySpansRequest
Span:
type: object
properties:
span_id:
type: string
description: Unique identifier for the span
trace_id:
type: string
description: >-
Unique identifier for the trace this span belongs to
parent_span_id:
type: string
description: >-
(Optional) Unique identifier for the parent span, if this is a child span
name:
type: string
description: >-
Human-readable name describing the operation this span represents
start_time:
type: string
format: date-time
description: Timestamp when the operation began
end_time:
type: string
format: date-time
description: >-
(Optional) Timestamp when the operation finished, if completed
attributes:
type: object
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
description: >-
(Optional) Key-value pairs containing additional metadata about the span
additionalProperties: false
required:
- span_id
- trace_id
- name
- start_time
title: Span
description: >-
A span representing a single operation within a trace.
QuerySpansResponse:
type: object
properties:
data:
type: array
items:
$ref: '#/components/schemas/Span'
description: >-
List of spans matching the query criteria
additionalProperties: false
required:
- data
title: QuerySpansResponse
description: Response containing a list of spans.
SaveSpansToDatasetRequest:
type: object
properties:
attribute_filters:
type: array
items:
$ref: '#/components/schemas/QueryCondition'
description: >-
The attribute filters to apply to the spans.
attributes_to_save:
type: array
items:
type: string
description: The attributes to save to the dataset.
dataset_id:
type: string
description: >-
The ID of the dataset to save the spans to.
max_depth:
type: integer
description: The maximum depth of the tree.
additionalProperties: false
required:
- attribute_filters
- attributes_to_save
- dataset_id
title: SaveSpansToDatasetRequest
GetSpanTreeRequest:
type: object
properties:
attributes_to_return:
type: array
items:
type: string
description: The attributes to return in the tree.
max_depth:
type: integer
description: The maximum depth of the tree.
additionalProperties: false
title: GetSpanTreeRequest
SpanStatus:
type: string
enum:
- ok
- error
title: SpanStatus
description: >-
The status of a span indicating whether it completed successfully or with
an error.
SpanWithStatus:
type: object
properties:
span_id:
type: string
description: Unique identifier for the span
trace_id:
type: string
description: >-
Unique identifier for the trace this span belongs to
parent_span_id:
type: string
description: >-
(Optional) Unique identifier for the parent span, if this is a child span
name:
type: string
description: >-
Human-readable name describing the operation this span represents
start_time:
type: string
format: date-time
description: Timestamp when the operation began
end_time:
type: string
format: date-time
description: >-
(Optional) Timestamp when the operation finished, if completed
attributes:
type: object
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
description: >-
(Optional) Key-value pairs containing additional metadata about the span
status:
$ref: '#/components/schemas/SpanStatus'
description: >-
(Optional) The current status of the span
additionalProperties: false
required:
- span_id
- trace_id
- name
- start_time
title: SpanWithStatus
description: A span that includes status information.
QuerySpanTreeResponse:
type: object
properties:
data:
type: object
additionalProperties:
$ref: '#/components/schemas/SpanWithStatus'
description: >-
Dictionary mapping span IDs to spans with status information
additionalProperties: false
required:
- data
title: QuerySpanTreeResponse
description: >-
Response containing a tree structure of spans.
QueryTracesRequest:
type: object
properties:
attribute_filters:
type: array
items:
$ref: '#/components/schemas/QueryCondition'
description: >-
The attribute filters to apply to the traces.
limit:
type: integer
description: The limit of traces to return.
offset:
type: integer
description: The offset of the traces to return.
order_by:
type: array
items:
type: string
description: The order by of the traces to return.
additionalProperties: false
title: QueryTracesRequest
Trace:
type: object
properties:
trace_id:
type: string
description: Unique identifier for the trace
root_span_id:
type: string
description: >-
Unique identifier for the root span that started this trace
start_time:
type: string
format: date-time
description: Timestamp when the trace began
end_time:
type: string
format: date-time
description: >-
(Optional) Timestamp when the trace finished, if completed
additionalProperties: false
required:
- trace_id
- root_span_id
- start_time
title: Trace
description: >-
A trace representing the complete execution path of a request across multiple
operations.
QueryTracesResponse:
type: object
properties:
data:
type: array
items:
$ref: '#/components/schemas/Trace'
description: >-
List of traces matching the query criteria
additionalProperties: false
required:
- data
title: QueryTracesResponse
description: Response containing a list of traces.
responses:
BadRequest400:
description: The request was invalid or malformed
@ -4784,8 +4124,6 @@ tags:
Llama Stack Evaluation API for running evaluations on model and agent candidates.
- name: PostTraining (Coming Soon)
description: ''
- name: Telemetry
description: ''
x-tagGroups:
- name: Operations
tags:
@ -4795,4 +4133,3 @@ x-tagGroups:
- Datasets
- Eval
- PostTraining (Coming Soon)
- Telemetry

View file

@ -2525,44 +2525,6 @@
"deprecated": false
}
},
"/v1/telemetry/events": {
"post": {
"responses": {
"200": {
"description": "OK"
},
"400": {
"$ref": "#/components/responses/BadRequest400"
},
"429": {
"$ref": "#/components/responses/TooManyRequests429"
},
"500": {
"$ref": "#/components/responses/InternalServerError500"
},
"default": {
"$ref": "#/components/responses/DefaultError"
}
},
"tags": [
"Telemetry"
],
"summary": "Log an event.",
"description": "Log an event.",
"parameters": [],
"requestBody": {
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/LogEventRequest"
}
}
},
"required": true
},
"deprecated": false
}
},
"/v1/tool-runtime/invoke": {
"post": {
"responses": {
@ -10364,354 +10326,6 @@
"title": "SyntheticDataGenerationResponse",
"description": "Response from the synthetic data generation. Batch of (prompt, response, score) tuples that pass the threshold."
},
"Event": {
"oneOf": [
{
"$ref": "#/components/schemas/UnstructuredLogEvent"
},
{
"$ref": "#/components/schemas/MetricEvent"
},
{
"$ref": "#/components/schemas/StructuredLogEvent"
}
],
"discriminator": {
"propertyName": "type",
"mapping": {
"unstructured_log": "#/components/schemas/UnstructuredLogEvent",
"metric": "#/components/schemas/MetricEvent",
"structured_log": "#/components/schemas/StructuredLogEvent"
}
}
},
"EventType": {
"type": "string",
"enum": [
"unstructured_log",
"structured_log",
"metric"
],
"title": "EventType",
"description": "The type of telemetry event being logged."
},
"LogSeverity": {
"type": "string",
"enum": [
"verbose",
"debug",
"info",
"warn",
"error",
"critical"
],
"title": "LogSeverity",
"description": "The severity level of a log message."
},
"MetricEvent": {
"type": "object",
"properties": {
"trace_id": {
"type": "string",
"description": "Unique identifier for the trace this event belongs to"
},
"span_id": {
"type": "string",
"description": "Unique identifier for the span this event belongs to"
},
"timestamp": {
"type": "string",
"format": "date-time",
"description": "Timestamp when the event occurred"
},
"attributes": {
"type": "object",
"additionalProperties": {
"oneOf": [
{
"type": "string"
},
{
"type": "integer"
},
{
"type": "number"
},
{
"type": "boolean"
},
{
"type": "null"
}
]
},
"description": "(Optional) Key-value pairs containing additional metadata about the event"
},
"type": {
"$ref": "#/components/schemas/EventType",
"const": "metric",
"default": "metric",
"description": "Event type identifier set to METRIC"
},
"metric": {
"type": "string",
"description": "The name of the metric being measured"
},
"value": {
"oneOf": [
{
"type": "integer"
},
{
"type": "number"
}
],
"description": "The numeric value of the metric measurement"
},
"unit": {
"type": "string",
"description": "The unit of measurement for the metric value"
}
},
"additionalProperties": false,
"required": [
"trace_id",
"span_id",
"timestamp",
"type",
"metric",
"value",
"unit"
],
"title": "MetricEvent",
"description": "A metric event containing a measured value."
},
"SpanEndPayload": {
"type": "object",
"properties": {
"type": {
"$ref": "#/components/schemas/StructuredLogType",
"const": "span_end",
"default": "span_end",
"description": "Payload type identifier set to SPAN_END"
},
"status": {
"$ref": "#/components/schemas/SpanStatus",
"description": "The final status of the span indicating success or failure"
}
},
"additionalProperties": false,
"required": [
"type",
"status"
],
"title": "SpanEndPayload",
"description": "Payload for a span end event."
},
"SpanStartPayload": {
"type": "object",
"properties": {
"type": {
"$ref": "#/components/schemas/StructuredLogType",
"const": "span_start",
"default": "span_start",
"description": "Payload type identifier set to SPAN_START"
},
"name": {
"type": "string",
"description": "Human-readable name describing the operation this span represents"
},
"parent_span_id": {
"type": "string",
"description": "(Optional) Unique identifier for the parent span, if this is a child span"
}
},
"additionalProperties": false,
"required": [
"type",
"name"
],
"title": "SpanStartPayload",
"description": "Payload for a span start event."
},
"SpanStatus": {
"type": "string",
"enum": [
"ok",
"error"
],
"title": "SpanStatus",
"description": "The status of a span indicating whether it completed successfully or with an error."
},
"StructuredLogEvent": {
"type": "object",
"properties": {
"trace_id": {
"type": "string",
"description": "Unique identifier for the trace this event belongs to"
},
"span_id": {
"type": "string",
"description": "Unique identifier for the span this event belongs to"
},
"timestamp": {
"type": "string",
"format": "date-time",
"description": "Timestamp when the event occurred"
},
"attributes": {
"type": "object",
"additionalProperties": {
"oneOf": [
{
"type": "string"
},
{
"type": "integer"
},
{
"type": "number"
},
{
"type": "boolean"
},
{
"type": "null"
}
]
},
"description": "(Optional) Key-value pairs containing additional metadata about the event"
},
"type": {
"$ref": "#/components/schemas/EventType",
"const": "structured_log",
"default": "structured_log",
"description": "Event type identifier set to STRUCTURED_LOG"
},
"payload": {
"oneOf": [
{
"$ref": "#/components/schemas/SpanStartPayload"
},
{
"$ref": "#/components/schemas/SpanEndPayload"
}
],
"discriminator": {
"propertyName": "type",
"mapping": {
"span_start": "#/components/schemas/SpanStartPayload",
"span_end": "#/components/schemas/SpanEndPayload"
}
},
"description": "The structured payload data for the log event"
}
},
"additionalProperties": false,
"required": [
"trace_id",
"span_id",
"timestamp",
"type",
"payload"
],
"title": "StructuredLogEvent",
"description": "A structured log event containing typed payload data."
},
"StructuredLogType": {
"type": "string",
"enum": [
"span_start",
"span_end"
],
"title": "StructuredLogType",
"description": "The type of structured log event payload."
},
"UnstructuredLogEvent": {
"type": "object",
"properties": {
"trace_id": {
"type": "string",
"description": "Unique identifier for the trace this event belongs to"
},
"span_id": {
"type": "string",
"description": "Unique identifier for the span this event belongs to"
},
"timestamp": {
"type": "string",
"format": "date-time",
"description": "Timestamp when the event occurred"
},
"attributes": {
"type": "object",
"additionalProperties": {
"oneOf": [
{
"type": "string"
},
{
"type": "integer"
},
{
"type": "number"
},
{
"type": "boolean"
},
{
"type": "null"
}
]
},
"description": "(Optional) Key-value pairs containing additional metadata about the event"
},
"type": {
"$ref": "#/components/schemas/EventType",
"const": "unstructured_log",
"default": "unstructured_log",
"description": "Event type identifier set to UNSTRUCTURED_LOG"
},
"message": {
"type": "string",
"description": "The log message text"
},
"severity": {
"$ref": "#/components/schemas/LogSeverity",
"description": "The severity level of the log message"
}
},
"additionalProperties": false,
"required": [
"trace_id",
"span_id",
"timestamp",
"type",
"message",
"severity"
],
"title": "UnstructuredLogEvent",
"description": "An unstructured log event containing a simple text message."
},
"LogEventRequest": {
"type": "object",
"properties": {
"event": {
"$ref": "#/components/schemas/Event",
"description": "The event to log."
},
"ttl_seconds": {
"type": "integer",
"description": "The time to live of the event."
}
},
"additionalProperties": false,
"required": [
"event",
"ttl_seconds"
],
"title": "LogEventRequest"
},
"InvokeToolRequest": {
"type": "object",
"properties": {
@ -12962,10 +12576,6 @@
"name": "SyntheticDataGeneration (Coming Soon)",
"description": ""
},
{
"name": "Telemetry",
"description": ""
},
{
"name": "ToolGroups",
"description": ""
@ -13000,7 +12610,6 @@
"ScoringFunctions",
"Shields",
"SyntheticDataGeneration (Coming Soon)",
"Telemetry",
"ToolGroups",
"ToolRuntime",
"VectorDBs",

View file

@ -1944,33 +1944,6 @@ paths:
$ref: '#/components/schemas/SyntheticDataGenerateRequest'
required: true
deprecated: false
/v1/telemetry/events:
post:
responses:
'200':
description: OK
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- Telemetry
summary: Log an event.
description: Log an event.
parameters: []
requestBody:
content:
application/json:
schema:
$ref: '#/components/schemas/LogEventRequest'
required: true
deprecated: false
/v1/tool-runtime/invoke:
post:
responses:
@ -7840,267 +7813,6 @@ components:
description: >-
Response from the synthetic data generation. Batch of (prompt, response, score)
tuples that pass the threshold.
Event:
oneOf:
- $ref: '#/components/schemas/UnstructuredLogEvent'
- $ref: '#/components/schemas/MetricEvent'
- $ref: '#/components/schemas/StructuredLogEvent'
discriminator:
propertyName: type
mapping:
unstructured_log: '#/components/schemas/UnstructuredLogEvent'
metric: '#/components/schemas/MetricEvent'
structured_log: '#/components/schemas/StructuredLogEvent'
EventType:
type: string
enum:
- unstructured_log
- structured_log
- metric
title: EventType
description: >-
The type of telemetry event being logged.
LogSeverity:
type: string
enum:
- verbose
- debug
- info
- warn
- error
- critical
title: LogSeverity
description: The severity level of a log message.
MetricEvent:
type: object
properties:
trace_id:
type: string
description: >-
Unique identifier for the trace this event belongs to
span_id:
type: string
description: >-
Unique identifier for the span this event belongs to
timestamp:
type: string
format: date-time
description: Timestamp when the event occurred
attributes:
type: object
additionalProperties:
oneOf:
- type: string
- type: integer
- type: number
- type: boolean
- type: 'null'
description: >-
(Optional) Key-value pairs containing additional metadata about the event
type:
$ref: '#/components/schemas/EventType'
const: metric
default: metric
description: Event type identifier set to METRIC
metric:
type: string
description: The name of the metric being measured
value:
oneOf:
- type: integer
- type: number
description: >-
The numeric value of the metric measurement
unit:
type: string
description: >-
The unit of measurement for the metric value
additionalProperties: false
required:
- trace_id
- span_id
- timestamp
- type
- metric
- value
- unit
title: MetricEvent
description: >-
A metric event containing a measured value.
SpanEndPayload:
type: object
properties:
type:
$ref: '#/components/schemas/StructuredLogType'
const: span_end
default: span_end
description: Payload type identifier set to SPAN_END
status:
$ref: '#/components/schemas/SpanStatus'
description: >-
The final status of the span indicating success or failure
additionalProperties: false
required:
- type
- status
title: SpanEndPayload
description: Payload for a span end event.
SpanStartPayload:
type: object
properties:
type:
$ref: '#/components/schemas/StructuredLogType'
const: span_start
default: span_start
description: >-
Payload type identifier set to SPAN_START
name:
type: string
description: >-
Human-readable name describing the operation this span represents
parent_span_id:
type: string
description: >-
(Optional) Unique identifier for the parent span, if this is a child span
additionalProperties: false
required:
- type
- name
title: SpanStartPayload
description: Payload for a span start event.
SpanStatus:
type: string
enum:
- ok
- error
title: SpanStatus
description: >-
The status of a span indicating whether it completed successfully or with
an error.
StructuredLogEvent:
type: object
properties:
trace_id:
type: string
description: >-
Unique identifier for the trace this event belongs to
span_id:
type: string
description: >-
Unique identifier for the span this event belongs to
timestamp:
type: string
format: date-time
description: Timestamp when the event occurred
attributes:
type: object
additionalProperties:
oneOf:
- type: string
- type: integer
- type: number
- type: boolean
- type: 'null'
description: >-
(Optional) Key-value pairs containing additional metadata about the event
type:
$ref: '#/components/schemas/EventType'
const: structured_log
default: structured_log
description: >-
Event type identifier set to STRUCTURED_LOG
payload:
oneOf:
- $ref: '#/components/schemas/SpanStartPayload'
- $ref: '#/components/schemas/SpanEndPayload'
discriminator:
propertyName: type
mapping:
span_start: '#/components/schemas/SpanStartPayload'
span_end: '#/components/schemas/SpanEndPayload'
description: >-
The structured payload data for the log event
additionalProperties: false
required:
- trace_id
- span_id
- timestamp
- type
- payload
title: StructuredLogEvent
description: >-
A structured log event containing typed payload data.
StructuredLogType:
type: string
enum:
- span_start
- span_end
title: StructuredLogType
description: >-
The type of structured log event payload.
UnstructuredLogEvent:
type: object
properties:
trace_id:
type: string
description: >-
Unique identifier for the trace this event belongs to
span_id:
type: string
description: >-
Unique identifier for the span this event belongs to
timestamp:
type: string
format: date-time
description: Timestamp when the event occurred
attributes:
type: object
additionalProperties:
oneOf:
- type: string
- type: integer
- type: number
- type: boolean
- type: 'null'
description: >-
(Optional) Key-value pairs containing additional metadata about the event
type:
$ref: '#/components/schemas/EventType'
const: unstructured_log
default: unstructured_log
description: >-
Event type identifier set to UNSTRUCTURED_LOG
message:
type: string
description: The log message text
severity:
$ref: '#/components/schemas/LogSeverity'
description: The severity level of the log message
additionalProperties: false
required:
- trace_id
- span_id
- timestamp
- type
- message
- severity
title: UnstructuredLogEvent
description: >-
An unstructured log event containing a simple text message.
LogEventRequest:
type: object
properties:
event:
$ref: '#/components/schemas/Event'
description: The event to log.
ttl_seconds:
type: integer
description: The time to live of the event.
additionalProperties: false
required:
- event
- ttl_seconds
title: LogEventRequest
InvokeToolRequest:
type: object
properties:
@ -9833,8 +9545,6 @@ tags:
description: ''
- name: SyntheticDataGeneration (Coming Soon)
description: ''
- name: Telemetry
description: ''
- name: ToolGroups
description: ''
- name: ToolRuntime
@ -9859,7 +9569,6 @@ x-tagGroups:
- ScoringFunctions
- Shields
- SyntheticDataGeneration (Coming Soon)
- Telemetry
- ToolGroups
- ToolRuntime
- VectorDBs

File diff suppressed because it is too large Load diff

View file

@ -1947,33 +1947,6 @@ paths:
$ref: '#/components/schemas/SyntheticDataGenerateRequest'
required: true
deprecated: false
/v1/telemetry/events:
post:
responses:
'200':
description: OK
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- Telemetry
summary: Log an event.
description: Log an event.
parameters: []
requestBody:
content:
application/json:
schema:
$ref: '#/components/schemas/LogEventRequest'
required: true
deprecated: false
/v1/tool-runtime/invoke:
post:
responses:
@ -4392,238 +4365,6 @@ paths:
$ref: '#/components/schemas/SupervisedFineTuneRequest'
required: true
deprecated: false
/v1alpha/telemetry/metrics/{metric_name}:
post:
responses:
'200':
description: A QueryMetricsResponse.
content:
application/json:
schema:
$ref: '#/components/schemas/QueryMetricsResponse'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- Telemetry
summary: Query metrics.
description: Query metrics.
parameters:
- name: metric_name
in: path
description: The name of the metric to query.
required: true
schema:
type: string
requestBody:
content:
application/json:
schema:
$ref: '#/components/schemas/QueryMetricsRequest'
required: true
deprecated: false
/v1alpha/telemetry/spans:
post:
responses:
'200':
description: A QuerySpansResponse.
content:
application/json:
schema:
$ref: '#/components/schemas/QuerySpansResponse'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- Telemetry
summary: Query spans.
description: Query spans.
parameters: []
requestBody:
content:
application/json:
schema:
$ref: '#/components/schemas/QuerySpansRequest'
required: true
deprecated: false
/v1alpha/telemetry/spans/export:
post:
responses:
'200':
description: OK
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- Telemetry
summary: Save spans to a dataset.
description: Save spans to a dataset.
parameters: []
requestBody:
content:
application/json:
schema:
$ref: '#/components/schemas/SaveSpansToDatasetRequest'
required: true
deprecated: false
/v1alpha/telemetry/spans/{span_id}/tree:
post:
responses:
'200':
description: A QuerySpanTreeResponse.
content:
application/json:
schema:
$ref: '#/components/schemas/QuerySpanTreeResponse'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- Telemetry
summary: Get a span tree by its ID.
description: Get a span tree by its ID.
parameters:
- name: span_id
in: path
description: The ID of the span to get the tree from.
required: true
schema:
type: string
requestBody:
content:
application/json:
schema:
$ref: '#/components/schemas/GetSpanTreeRequest'
required: true
deprecated: false
/v1alpha/telemetry/traces:
post:
responses:
'200':
description: A QueryTracesResponse.
content:
application/json:
schema:
$ref: '#/components/schemas/QueryTracesResponse'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- Telemetry
summary: Query traces.
description: Query traces.
parameters: []
requestBody:
content:
application/json:
schema:
$ref: '#/components/schemas/QueryTracesRequest'
required: true
deprecated: false
/v1alpha/telemetry/traces/{trace_id}:
get:
responses:
'200':
description: A Trace.
content:
application/json:
schema:
$ref: '#/components/schemas/Trace'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- Telemetry
summary: Get a trace by its ID.
description: Get a trace by its ID.
parameters:
- name: trace_id
in: path
description: The ID of the trace to get.
required: true
schema:
type: string
deprecated: false
/v1alpha/telemetry/traces/{trace_id}/spans/{span_id}:
get:
responses:
'200':
description: A Span.
content:
application/json:
schema:
$ref: '#/components/schemas/Span'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- Telemetry
summary: Get a span by its ID.
description: Get a span by its ID.
parameters:
- name: trace_id
in: path
description: >-
The ID of the trace to get the span from.
required: true
schema:
type: string
- name: span_id
in: path
description: The ID of the span to get.
required: true
schema:
type: string
deprecated: false
jsonSchemaDialect: >-
https://json-schema.org/draft/2020-12/schema
components:
@ -9285,267 +9026,6 @@ components:
description: >-
Response from the synthetic data generation. Batch of (prompt, response, score)
tuples that pass the threshold.
Event:
oneOf:
- $ref: '#/components/schemas/UnstructuredLogEvent'
- $ref: '#/components/schemas/MetricEvent'
- $ref: '#/components/schemas/StructuredLogEvent'
discriminator:
propertyName: type
mapping:
unstructured_log: '#/components/schemas/UnstructuredLogEvent'
metric: '#/components/schemas/MetricEvent'
structured_log: '#/components/schemas/StructuredLogEvent'
EventType:
type: string
enum:
- unstructured_log
- structured_log
- metric
title: EventType
description: >-
The type of telemetry event being logged.
LogSeverity:
type: string
enum:
- verbose
- debug
- info
- warn
- error
- critical
title: LogSeverity
description: The severity level of a log message.
MetricEvent:
type: object
properties:
trace_id:
type: string
description: >-
Unique identifier for the trace this event belongs to
span_id:
type: string
description: >-
Unique identifier for the span this event belongs to
timestamp:
type: string
format: date-time
description: Timestamp when the event occurred
attributes:
type: object
additionalProperties:
oneOf:
- type: string
- type: integer
- type: number
- type: boolean
- type: 'null'
description: >-
(Optional) Key-value pairs containing additional metadata about the event
type:
$ref: '#/components/schemas/EventType'
const: metric
default: metric
description: Event type identifier set to METRIC
metric:
type: string
description: The name of the metric being measured
value:
oneOf:
- type: integer
- type: number
description: >-
The numeric value of the metric measurement
unit:
type: string
description: >-
The unit of measurement for the metric value
additionalProperties: false
required:
- trace_id
- span_id
- timestamp
- type
- metric
- value
- unit
title: MetricEvent
description: >-
A metric event containing a measured value.
SpanEndPayload:
type: object
properties:
type:
$ref: '#/components/schemas/StructuredLogType'
const: span_end
default: span_end
description: Payload type identifier set to SPAN_END
status:
$ref: '#/components/schemas/SpanStatus'
description: >-
The final status of the span indicating success or failure
additionalProperties: false
required:
- type
- status
title: SpanEndPayload
description: Payload for a span end event.
SpanStartPayload:
type: object
properties:
type:
$ref: '#/components/schemas/StructuredLogType'
const: span_start
default: span_start
description: >-
Payload type identifier set to SPAN_START
name:
type: string
description: >-
Human-readable name describing the operation this span represents
parent_span_id:
type: string
description: >-
(Optional) Unique identifier for the parent span, if this is a child span
additionalProperties: false
required:
- type
- name
title: SpanStartPayload
description: Payload for a span start event.
SpanStatus:
type: string
enum:
- ok
- error
title: SpanStatus
description: >-
The status of a span indicating whether it completed successfully or with
an error.
StructuredLogEvent:
type: object
properties:
trace_id:
type: string
description: >-
Unique identifier for the trace this event belongs to
span_id:
type: string
description: >-
Unique identifier for the span this event belongs to
timestamp:
type: string
format: date-time
description: Timestamp when the event occurred
attributes:
type: object
additionalProperties:
oneOf:
- type: string
- type: integer
- type: number
- type: boolean
- type: 'null'
description: >-
(Optional) Key-value pairs containing additional metadata about the event
type:
$ref: '#/components/schemas/EventType'
const: structured_log
default: structured_log
description: >-
Event type identifier set to STRUCTURED_LOG
payload:
oneOf:
- $ref: '#/components/schemas/SpanStartPayload'
- $ref: '#/components/schemas/SpanEndPayload'
discriminator:
propertyName: type
mapping:
span_start: '#/components/schemas/SpanStartPayload'
span_end: '#/components/schemas/SpanEndPayload'
description: >-
The structured payload data for the log event
additionalProperties: false
required:
- trace_id
- span_id
- timestamp
- type
- payload
title: StructuredLogEvent
description: >-
A structured log event containing typed payload data.
StructuredLogType:
type: string
enum:
- span_start
- span_end
title: StructuredLogType
description: >-
The type of structured log event payload.
UnstructuredLogEvent:
type: object
properties:
trace_id:
type: string
description: >-
Unique identifier for the trace this event belongs to
span_id:
type: string
description: >-
Unique identifier for the span this event belongs to
timestamp:
type: string
format: date-time
description: Timestamp when the event occurred
attributes:
type: object
additionalProperties:
oneOf:
- type: string
- type: integer
- type: number
- type: boolean
- type: 'null'
description: >-
(Optional) Key-value pairs containing additional metadata about the event
type:
$ref: '#/components/schemas/EventType'
const: unstructured_log
default: unstructured_log
description: >-
Event type identifier set to UNSTRUCTURED_LOG
message:
type: string
description: The log message text
severity:
$ref: '#/components/schemas/LogSeverity'
description: The severity level of the log message
additionalProperties: false
required:
- trace_id
- span_id
- timestamp
- type
- message
- severity
title: UnstructuredLogEvent
description: >-
An unstructured log event containing a simple text message.
LogEventRequest:
type: object
properties:
event:
$ref: '#/components/schemas/Event'
description: The event to log.
ttl_seconds:
type: integer
description: The time to live of the event.
additionalProperties: false
required:
- event
- ttl_seconds
title: LogEventRequest
InvokeToolRequest:
type: object
properties:
@ -13349,425 +12829,6 @@ components:
- hyperparam_search_config
- logger_config
title: SupervisedFineTuneRequest
QueryMetricsRequest:
type: object
properties:
start_time:
type: integer
description: The start time of the metric to query.
end_time:
type: integer
description: The end time of the metric to query.
granularity:
type: string
description: The granularity of the metric to query.
query_type:
type: string
enum:
- range
- instant
description: The type of query to perform.
label_matchers:
type: array
items:
type: object
properties:
name:
type: string
description: The name of the label to match
value:
type: string
description: The value to match against
operator:
type: string
enum:
- '='
- '!='
- =~
- '!~'
description: >-
The comparison operator to use for matching
default: '='
additionalProperties: false
required:
- name
- value
- operator
title: MetricLabelMatcher
description: >-
A matcher for filtering metrics by label values.
description: >-
The label matchers to apply to the metric.
additionalProperties: false
required:
- start_time
- query_type
title: QueryMetricsRequest
MetricDataPoint:
type: object
properties:
timestamp:
type: integer
description: >-
Unix timestamp when the metric value was recorded
value:
type: number
description: >-
The numeric value of the metric at this timestamp
unit:
type: string
additionalProperties: false
required:
- timestamp
- value
- unit
title: MetricDataPoint
description: >-
A single data point in a metric time series.
MetricLabel:
type: object
properties:
name:
type: string
description: The name of the label
value:
type: string
description: The value of the label
additionalProperties: false
required:
- name
- value
title: MetricLabel
description: A label associated with a metric.
MetricSeries:
type: object
properties:
metric:
type: string
description: The name of the metric
labels:
type: array
items:
$ref: '#/components/schemas/MetricLabel'
description: >-
List of labels associated with this metric series
values:
type: array
items:
$ref: '#/components/schemas/MetricDataPoint'
description: >-
List of data points in chronological order
additionalProperties: false
required:
- metric
- labels
- values
title: MetricSeries
description: A time series of metric data points.
QueryMetricsResponse:
type: object
properties:
data:
type: array
items:
$ref: '#/components/schemas/MetricSeries'
description: >-
List of metric series matching the query criteria
additionalProperties: false
required:
- data
title: QueryMetricsResponse
description: >-
Response containing metric time series data.
QueryCondition:
type: object
properties:
key:
type: string
description: The attribute key to filter on
op:
$ref: '#/components/schemas/QueryConditionOp'
description: The comparison operator to apply
value:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
description: The value to compare against
additionalProperties: false
required:
- key
- op
- value
title: QueryCondition
description: A condition for filtering query results.
QueryConditionOp:
type: string
enum:
- eq
- ne
- gt
- lt
title: QueryConditionOp
description: >-
Comparison operators for query conditions.
QuerySpansRequest:
type: object
properties:
attribute_filters:
type: array
items:
$ref: '#/components/schemas/QueryCondition'
description: >-
The attribute filters to apply to the spans.
attributes_to_return:
type: array
items:
type: string
description: The attributes to return in the spans.
max_depth:
type: integer
description: The maximum depth of the tree.
additionalProperties: false
required:
- attribute_filters
- attributes_to_return
title: QuerySpansRequest
Span:
type: object
properties:
span_id:
type: string
description: Unique identifier for the span
trace_id:
type: string
description: >-
Unique identifier for the trace this span belongs to
parent_span_id:
type: string
description: >-
(Optional) Unique identifier for the parent span, if this is a child span
name:
type: string
description: >-
Human-readable name describing the operation this span represents
start_time:
type: string
format: date-time
description: Timestamp when the operation began
end_time:
type: string
format: date-time
description: >-
(Optional) Timestamp when the operation finished, if completed
attributes:
type: object
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
description: >-
(Optional) Key-value pairs containing additional metadata about the span
additionalProperties: false
required:
- span_id
- trace_id
- name
- start_time
title: Span
description: >-
A span representing a single operation within a trace.
QuerySpansResponse:
type: object
properties:
data:
type: array
items:
$ref: '#/components/schemas/Span'
description: >-
List of spans matching the query criteria
additionalProperties: false
required:
- data
title: QuerySpansResponse
description: Response containing a list of spans.
SaveSpansToDatasetRequest:
type: object
properties:
attribute_filters:
type: array
items:
$ref: '#/components/schemas/QueryCondition'
description: >-
The attribute filters to apply to the spans.
attributes_to_save:
type: array
items:
type: string
description: The attributes to save to the dataset.
dataset_id:
type: string
description: >-
The ID of the dataset to save the spans to.
max_depth:
type: integer
description: The maximum depth of the tree.
additionalProperties: false
required:
- attribute_filters
- attributes_to_save
- dataset_id
title: SaveSpansToDatasetRequest
GetSpanTreeRequest:
type: object
properties:
attributes_to_return:
type: array
items:
type: string
description: The attributes to return in the tree.
max_depth:
type: integer
description: The maximum depth of the tree.
additionalProperties: false
title: GetSpanTreeRequest
SpanWithStatus:
type: object
properties:
span_id:
type: string
description: Unique identifier for the span
trace_id:
type: string
description: >-
Unique identifier for the trace this span belongs to
parent_span_id:
type: string
description: >-
(Optional) Unique identifier for the parent span, if this is a child span
name:
type: string
description: >-
Human-readable name describing the operation this span represents
start_time:
type: string
format: date-time
description: Timestamp when the operation began
end_time:
type: string
format: date-time
description: >-
(Optional) Timestamp when the operation finished, if completed
attributes:
type: object
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
description: >-
(Optional) Key-value pairs containing additional metadata about the span
status:
$ref: '#/components/schemas/SpanStatus'
description: >-
(Optional) The current status of the span
additionalProperties: false
required:
- span_id
- trace_id
- name
- start_time
title: SpanWithStatus
description: A span that includes status information.
QuerySpanTreeResponse:
type: object
properties:
data:
type: object
additionalProperties:
$ref: '#/components/schemas/SpanWithStatus'
description: >-
Dictionary mapping span IDs to spans with status information
additionalProperties: false
required:
- data
title: QuerySpanTreeResponse
description: >-
Response containing a tree structure of spans.
QueryTracesRequest:
type: object
properties:
attribute_filters:
type: array
items:
$ref: '#/components/schemas/QueryCondition'
description: >-
The attribute filters to apply to the traces.
limit:
type: integer
description: The limit of traces to return.
offset:
type: integer
description: The offset of the traces to return.
order_by:
type: array
items:
type: string
description: The order by of the traces to return.
additionalProperties: false
title: QueryTracesRequest
Trace:
type: object
properties:
trace_id:
type: string
description: Unique identifier for the trace
root_span_id:
type: string
description: >-
Unique identifier for the root span that started this trace
start_time:
type: string
format: date-time
description: Timestamp when the trace began
end_time:
type: string
format: date-time
description: >-
(Optional) Timestamp when the trace finished, if completed
additionalProperties: false
required:
- trace_id
- root_span_id
- start_time
title: Trace
description: >-
A trace representing the complete execution path of a request across multiple
operations.
QueryTracesResponse:
type: object
properties:
data:
type: array
items:
$ref: '#/components/schemas/Trace'
description: >-
List of traces matching the query criteria
additionalProperties: false
required:
- data
title: QueryTracesResponse
description: Response containing a list of traces.
responses:
BadRequest400:
description: The request was invalid or malformed
@ -13881,8 +12942,6 @@ tags:
description: ''
- name: SyntheticDataGeneration (Coming Soon)
description: ''
- name: Telemetry
description: ''
- name: ToolGroups
description: ''
- name: ToolRuntime
@ -13912,7 +12971,6 @@ x-tagGroups:
- ScoringFunctions
- Shields
- SyntheticDataGeneration (Coming Soon)
- Telemetry
- ToolGroups
- ToolRuntime
- VectorDBs

View file

@ -16,15 +16,12 @@ from typing import (
from pydantic import BaseModel, Field
from llama_stack.apis.version import LLAMA_STACK_API_V1, LLAMA_STACK_API_V1ALPHA
from llama_stack.models.llama.datatypes import Primitive
from llama_stack.schema_utils import json_schema_type, register_schema, webmethod
from llama_stack.schema_utils import json_schema_type, register_schema
# Add this constant near the top of the file, after the imports
DEFAULT_TTL_DAYS = 7
REQUIRED_SCOPE = "telemetry.read"
@json_schema_type
class SpanStatus(Enum):
@ -413,7 +410,6 @@ class QueryMetricsResponse(BaseModel):
@runtime_checkable
class Telemetry(Protocol):
@webmethod(route="/telemetry/events", method="POST", level=LLAMA_STACK_API_V1)
async def log_event(
self,
event: Event,
@ -426,14 +422,6 @@ class Telemetry(Protocol):
"""
...
@webmethod(
route="/telemetry/traces",
method="POST",
required_scope=REQUIRED_SCOPE,
deprecated=True,
level=LLAMA_STACK_API_V1,
)
@webmethod(route="/telemetry/traces", method="POST", required_scope=REQUIRED_SCOPE, level=LLAMA_STACK_API_V1ALPHA)
async def query_traces(
self,
attribute_filters: list[QueryCondition] | None = None,
@ -451,19 +439,6 @@ class Telemetry(Protocol):
"""
...
@webmethod(
route="/telemetry/traces/{trace_id:path}",
method="GET",
required_scope=REQUIRED_SCOPE,
deprecated=True,
level=LLAMA_STACK_API_V1,
)
@webmethod(
route="/telemetry/traces/{trace_id:path}",
method="GET",
required_scope=REQUIRED_SCOPE,
level=LLAMA_STACK_API_V1ALPHA,
)
async def get_trace(self, trace_id: str) -> Trace:
"""Get a trace by its ID.
@ -472,19 +447,6 @@ class Telemetry(Protocol):
"""
...
@webmethod(
route="/telemetry/traces/{trace_id:path}/spans/{span_id:path}",
method="GET",
required_scope=REQUIRED_SCOPE,
deprecated=True,
level=LLAMA_STACK_API_V1,
)
@webmethod(
route="/telemetry/traces/{trace_id:path}/spans/{span_id:path}",
method="GET",
required_scope=REQUIRED_SCOPE,
level=LLAMA_STACK_API_V1ALPHA,
)
async def get_span(self, trace_id: str, span_id: str) -> Span:
"""Get a span by its ID.
@ -494,19 +456,6 @@ class Telemetry(Protocol):
"""
...
@webmethod(
route="/telemetry/spans/{span_id:path}/tree",
method="POST",
deprecated=True,
required_scope=REQUIRED_SCOPE,
level=LLAMA_STACK_API_V1,
)
@webmethod(
route="/telemetry/spans/{span_id:path}/tree",
method="POST",
required_scope=REQUIRED_SCOPE,
level=LLAMA_STACK_API_V1ALPHA,
)
async def get_span_tree(
self,
span_id: str,
@ -522,14 +471,6 @@ class Telemetry(Protocol):
"""
...
@webmethod(
route="/telemetry/spans",
method="POST",
required_scope=REQUIRED_SCOPE,
deprecated=True,
level=LLAMA_STACK_API_V1,
)
@webmethod(route="/telemetry/spans", method="POST", required_scope=REQUIRED_SCOPE, level=LLAMA_STACK_API_V1ALPHA)
async def query_spans(
self,
attribute_filters: list[QueryCondition],
@ -545,8 +486,6 @@ class Telemetry(Protocol):
"""
...
@webmethod(route="/telemetry/spans/export", method="POST", deprecated=True, level=LLAMA_STACK_API_V1)
@webmethod(route="/telemetry/spans/export", method="POST", level=LLAMA_STACK_API_V1ALPHA)
async def save_spans_to_dataset(
self,
attribute_filters: list[QueryCondition],
@ -563,19 +502,6 @@ class Telemetry(Protocol):
"""
...
@webmethod(
route="/telemetry/metrics/{metric_name}",
method="POST",
required_scope=REQUIRED_SCOPE,
deprecated=True,
level=LLAMA_STACK_API_V1,
)
@webmethod(
route="/telemetry/metrics/{metric_name}",
method="POST",
required_scope=REQUIRED_SCOPE,
level=LLAMA_STACK_API_V1ALPHA,
)
async def query_metrics(
self,
metric_name: str,

View file

@ -32,7 +32,7 @@ from llama_stack.providers.utils.sqlstore.sqlstore import (
sqlstore_impl,
)
logger = get_logger(name=__name__, category="openai::conversations")
logger = get_logger(name=__name__, category="openai_conversations")
class ConversationServiceConfig(BaseModel):

View file

@ -611,7 +611,7 @@ class InferenceRouter(Inference):
completion_text += "".join(choice_data["content_parts"])
# Add metrics to the chunk
if self.telemetry and chunk.usage:
if self.telemetry and hasattr(chunk, "usage") and chunk.usage:
metrics = self._construct_metrics(
prompt_tokens=chunk.usage.prompt_tokens,
completion_tokens=chunk.usage.completion_tokens,

View file

@ -98,7 +98,10 @@ class DiskDistributionRegistry(DistributionRegistry):
existing_obj = await self.get(obj.type, obj.identifier)
# dont register if the object's providerid already exists
if existing_obj and existing_obj.provider_id == obj.provider_id:
return False
raise ValueError(
f"Provider '{obj.provider_id}' is already registered."
f"Unregister the existing provider first before registering it again."
)
await self.kvstore.set(
KEY_FORMAT.format(type=obj.type, identifier=obj.identifier),

View file

@ -3,3 +3,5 @@
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
from .watsonx import get_distribution_template # noqa: F401

View file

@ -3,44 +3,33 @@ distribution_spec:
description: Use watsonx for running LLM inference
providers:
inference:
- provider_id: watsonx
provider_type: remote::watsonx
- provider_id: sentence-transformers
provider_type: inline::sentence-transformers
- provider_type: remote::watsonx
- provider_type: inline::sentence-transformers
vector_io:
- provider_id: faiss
provider_type: inline::faiss
- provider_type: inline::faiss
safety:
- provider_id: llama-guard
provider_type: inline::llama-guard
- provider_type: inline::llama-guard
agents:
- provider_id: meta-reference
provider_type: inline::meta-reference
- provider_type: inline::meta-reference
telemetry:
- provider_id: meta-reference
provider_type: inline::meta-reference
- provider_type: inline::meta-reference
eval:
- provider_id: meta-reference
provider_type: inline::meta-reference
- provider_type: inline::meta-reference
datasetio:
- provider_id: huggingface
provider_type: remote::huggingface
- provider_id: localfs
provider_type: inline::localfs
- provider_type: remote::huggingface
- provider_type: inline::localfs
scoring:
- provider_id: basic
provider_type: inline::basic
- provider_id: llm-as-judge
provider_type: inline::llm-as-judge
- provider_id: braintrust
provider_type: inline::braintrust
- provider_type: inline::basic
- provider_type: inline::llm-as-judge
- provider_type: inline::braintrust
tool_runtime:
- provider_type: remote::brave-search
- provider_type: remote::tavily-search
- provider_type: inline::rag-runtime
- provider_type: remote::model-context-protocol
files:
- provider_type: inline::localfs
image_type: venv
additional_pip_packages:
- aiosqlite
- sqlalchemy[asyncio]
- aiosqlite
- aiosqlite

View file

@ -4,13 +4,13 @@ apis:
- agents
- datasetio
- eval
- files
- inference
- safety
- scoring
- telemetry
- tool_runtime
- vector_io
- files
providers:
inference:
- provider_id: watsonx
@ -19,8 +19,6 @@ providers:
url: ${env.WATSONX_BASE_URL:=https://us-south.ml.cloud.ibm.com}
api_key: ${env.WATSONX_API_KEY:=}
project_id: ${env.WATSONX_PROJECT_ID:=}
- provider_id: sentence-transformers
provider_type: inline::sentence-transformers
vector_io:
- provider_id: faiss
provider_type: inline::faiss
@ -48,7 +46,7 @@ providers:
provider_type: inline::meta-reference
config:
service_name: "${env.OTEL_SERVICE_NAME:=\u200B}"
sinks: ${env.TELEMETRY_SINKS:=console,sqlite}
sinks: ${env.TELEMETRY_SINKS:=sqlite}
sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/trace_store.db
otel_exporter_otlp_endpoint: ${env.OTEL_EXPORTER_OTLP_ENDPOINT:=}
eval:
@ -109,102 +107,7 @@ metadata_store:
inference_store:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/inference_store.db
models:
- metadata: {}
model_id: meta-llama/llama-3-3-70b-instruct
provider_id: watsonx
provider_model_id: meta-llama/llama-3-3-70b-instruct
model_type: llm
- metadata: {}
model_id: meta-llama/Llama-3.3-70B-Instruct
provider_id: watsonx
provider_model_id: meta-llama/llama-3-3-70b-instruct
model_type: llm
- metadata: {}
model_id: meta-llama/llama-2-13b-chat
provider_id: watsonx
provider_model_id: meta-llama/llama-2-13b-chat
model_type: llm
- metadata: {}
model_id: meta-llama/Llama-2-13b
provider_id: watsonx
provider_model_id: meta-llama/llama-2-13b-chat
model_type: llm
- metadata: {}
model_id: meta-llama/llama-3-1-70b-instruct
provider_id: watsonx
provider_model_id: meta-llama/llama-3-1-70b-instruct
model_type: llm
- metadata: {}
model_id: meta-llama/Llama-3.1-70B-Instruct
provider_id: watsonx
provider_model_id: meta-llama/llama-3-1-70b-instruct
model_type: llm
- metadata: {}
model_id: meta-llama/llama-3-1-8b-instruct
provider_id: watsonx
provider_model_id: meta-llama/llama-3-1-8b-instruct
model_type: llm
- metadata: {}
model_id: meta-llama/Llama-3.1-8B-Instruct
provider_id: watsonx
provider_model_id: meta-llama/llama-3-1-8b-instruct
model_type: llm
- metadata: {}
model_id: meta-llama/llama-3-2-11b-vision-instruct
provider_id: watsonx
provider_model_id: meta-llama/llama-3-2-11b-vision-instruct
model_type: llm
- metadata: {}
model_id: meta-llama/Llama-3.2-11B-Vision-Instruct
provider_id: watsonx
provider_model_id: meta-llama/llama-3-2-11b-vision-instruct
model_type: llm
- metadata: {}
model_id: meta-llama/llama-3-2-1b-instruct
provider_id: watsonx
provider_model_id: meta-llama/llama-3-2-1b-instruct
model_type: llm
- metadata: {}
model_id: meta-llama/Llama-3.2-1B-Instruct
provider_id: watsonx
provider_model_id: meta-llama/llama-3-2-1b-instruct
model_type: llm
- metadata: {}
model_id: meta-llama/llama-3-2-3b-instruct
provider_id: watsonx
provider_model_id: meta-llama/llama-3-2-3b-instruct
model_type: llm
- metadata: {}
model_id: meta-llama/Llama-3.2-3B-Instruct
provider_id: watsonx
provider_model_id: meta-llama/llama-3-2-3b-instruct
model_type: llm
- metadata: {}
model_id: meta-llama/llama-3-2-90b-vision-instruct
provider_id: watsonx
provider_model_id: meta-llama/llama-3-2-90b-vision-instruct
model_type: llm
- metadata: {}
model_id: meta-llama/Llama-3.2-90B-Vision-Instruct
provider_id: watsonx
provider_model_id: meta-llama/llama-3-2-90b-vision-instruct
model_type: llm
- metadata: {}
model_id: meta-llama/llama-guard-3-11b-vision
provider_id: watsonx
provider_model_id: meta-llama/llama-guard-3-11b-vision
model_type: llm
- metadata: {}
model_id: meta-llama/Llama-Guard-3-11B-Vision
provider_id: watsonx
provider_model_id: meta-llama/llama-guard-3-11b-vision
model_type: llm
- metadata:
embedding_dimension: 384
model_id: all-MiniLM-L6-v2
provider_id: sentence-transformers
model_type: embedding
models: []
shields: []
vector_dbs: []
datasets: []

View file

@ -4,17 +4,11 @@
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
from pathlib import Path
from llama_stack.apis.models import ModelType
from llama_stack.core.datatypes import BuildProvider, ModelInput, Provider, ToolGroupInput
from llama_stack.distributions.template import DistributionTemplate, RunConfigSettings, get_model_registry
from llama_stack.core.datatypes import BuildProvider, Provider, ToolGroupInput
from llama_stack.distributions.template import DistributionTemplate, RunConfigSettings
from llama_stack.providers.inline.files.localfs.config import LocalfsFilesImplConfig
from llama_stack.providers.inline.inference.sentence_transformers import (
SentenceTransformersInferenceConfig,
)
from llama_stack.providers.remote.inference.watsonx import WatsonXConfig
from llama_stack.providers.remote.inference.watsonx.models import MODEL_ENTRIES
def get_distribution_template(name: str = "watsonx") -> DistributionTemplate:
@ -52,15 +46,6 @@ def get_distribution_template(name: str = "watsonx") -> DistributionTemplate:
config=WatsonXConfig.sample_run_config(),
)
embedding_provider = Provider(
provider_id="sentence-transformers",
provider_type="inline::sentence-transformers",
config=SentenceTransformersInferenceConfig.sample_run_config(),
)
available_models = {
"watsonx": MODEL_ENTRIES,
}
default_tool_groups = [
ToolGroupInput(
toolgroup_id="builtin::websearch",
@ -72,36 +57,25 @@ def get_distribution_template(name: str = "watsonx") -> DistributionTemplate:
),
]
embedding_model = ModelInput(
model_id="all-MiniLM-L6-v2",
provider_id="sentence-transformers",
model_type=ModelType.embedding,
metadata={
"embedding_dimension": 384,
},
)
files_provider = Provider(
provider_id="meta-reference-files",
provider_type="inline::localfs",
config=LocalfsFilesImplConfig.sample_run_config(f"~/.llama/distributions/{name}"),
)
default_models, _ = get_model_registry(available_models)
return DistributionTemplate(
name=name,
distro_type="remote_hosted",
description="Use watsonx for running LLM inference",
container_image=None,
template_path=Path(__file__).parent / "doc_template.md",
template_path=None,
providers=providers,
available_models_by_provider=available_models,
run_configs={
"run.yaml": RunConfigSettings(
provider_overrides={
"inference": [inference_provider, embedding_provider],
"inference": [inference_provider],
"files": [files_provider],
},
default_models=default_models + [embedding_model],
default_models=[],
default_tool_groups=default_tool_groups,
),
},

View file

@ -31,12 +31,17 @@ CATEGORIES = [
"client",
"telemetry",
"openai_responses",
"openai_conversations",
"testing",
"providers",
"models",
"files",
"vector_io",
"tool_runtime",
"cli",
"post_training",
"scoring",
"tests",
]
UNCATEGORIZED = "uncategorized"
@ -261,11 +266,12 @@ def get_logger(
if root_category in _category_levels:
log_level = _category_levels[root_category]
else:
log_level = _category_levels.get("root", DEFAULT_LOG_LEVEL)
if category != UNCATEGORIZED:
logging.warning(
f"Unknown logging category: {category}. Falling back to default 'root' level: {log_level}"
raise ValueError(
f"Unknown logging category: {category}. To resolve, choose a valid category from the CATEGORIES list "
f"or add it to the CATEGORIES list. Available categories: {CATEGORIES}"
)
log_level = _category_levels.get("root", DEFAULT_LOG_LEVEL)
logger.setLevel(log_level)
return logging.LoggerAdapter(logger, {"category": category})

View file

@ -11,19 +11,13 @@
# top-level folder for each specific model found within the models/ directory at
# the top-level of this source tree.
import json
import textwrap
from pathlib import Path
from pydantic import BaseModel, Field
from llama_stack.models.llama.datatypes import (
RawContent,
RawMediaItem,
RawMessage,
RawTextItem,
StopReason,
ToolCall,
ToolPromptFormat,
)
from llama_stack.models.llama.llama4.tokenizer import Tokenizer
@ -175,25 +169,6 @@ def llama3_1_builtin_code_interpreter_dialog(tool_prompt_format=ToolPromptFormat
return messages
def llama3_1_builtin_tool_call_with_image_dialog(
tool_prompt_format=ToolPromptFormat.json,
):
this_dir = Path(__file__).parent
with open(this_dir / "llama3/dog.jpg", "rb") as f:
img = f.read()
interface = LLama31Interface(tool_prompt_format)
messages = interface.system_messages(**system_message_builtin_tools_only())
messages += interface.user_message(content=[RawMediaItem(data=img), RawTextItem(text="What is this dog breed?")])
messages += interface.assistant_response_messages(
"Based on the description of the dog in the image, it appears to be a small breed dog, possibly a terrier mix",
StopReason.end_of_turn,
)
messages += interface.user_message("Search the web for some food recommendations for the indentified breed")
return messages
def llama3_1_custom_tool_call_dialog(tool_prompt_format=ToolPromptFormat.json):
interface = LLama31Interface(tool_prompt_format)
@ -202,35 +177,6 @@ def llama3_1_custom_tool_call_dialog(tool_prompt_format=ToolPromptFormat.json):
return messages
def llama3_1_e2e_tool_call_dialog(tool_prompt_format=ToolPromptFormat.json):
tool_response = json.dumps(["great song1", "awesome song2", "cool song3"])
interface = LLama31Interface(tool_prompt_format)
messages = interface.system_messages(**system_message_custom_tools_only())
messages += interface.user_message(content="Use tools to get latest trending songs")
messages.append(
RawMessage(
role="assistant",
content="",
stop_reason=StopReason.end_of_message,
tool_calls=[
ToolCall(
call_id="call_id",
tool_name="trending_songs",
arguments={"n": "10", "genre": "latest"},
)
],
),
)
messages.append(
RawMessage(
role="assistant",
content=tool_response,
)
)
return messages
def llama3_2_user_assistant_conversation():
return UseCase(
title="User and assistant conversation",

View file

@ -7,8 +7,6 @@
import copy
import json
import re
import secrets
import string
import uuid
import warnings
from collections.abc import AsyncGenerator
@ -84,11 +82,6 @@ from llama_stack.providers.utils.telemetry import tracing
from .persistence import AgentPersistence
from .safety import SafetyException, ShieldRunnerMixin
def make_random_string(length: int = 8):
return "".join(secrets.choice(string.ascii_letters + string.digits) for _ in range(length))
TOOLS_ATTACHMENT_KEY_REGEX = re.compile(r"__tools_attachment__=(\{.*?\})")
MEMORY_QUERY_TOOL = "knowledge_search"
WEB_SEARCH_TOOL = "web_search"

View file

@ -269,7 +269,7 @@ class OpenAIResponsesImpl:
response_tools=tools,
temperature=temperature,
response_format=response_format,
inputs=input,
inputs=all_input,
)
# Create orchestrator and delegate streaming logic

View file

@ -175,6 +175,8 @@ class StreamingResponseOrchestrator:
):
yield stream_event
messages = next_turn_messages
if not function_tool_calls and not non_function_tool_calls:
break
@ -187,9 +189,7 @@ class StreamingResponseOrchestrator:
logger.info(f"Exiting inference loop since iteration count({n_iter}) exceeds {self.max_infer_iters=}")
break
messages = next_turn_messages
self.final_messages = messages.copy() + [current_response.choices[0].message]
self.final_messages = messages.copy()
# Create final response
final_response = OpenAIResponseObject(
@ -232,9 +232,11 @@ class StreamingResponseOrchestrator:
non_function_tool_calls.append(tool_call)
else:
logger.info(f"Approval denied for {tool_call.id} on {tool_call.function.name}")
next_turn_messages.pop()
else:
logger.info(f"Requesting approval for {tool_call.id} on {tool_call.function.name}")
approvals.append(tool_call)
next_turn_messages.pop()
else:
non_function_tool_calls.append(tool_call)

View file

@ -8,8 +8,6 @@ import asyncio
import base64
import io
import mimetypes
import secrets
import string
from typing import Any
import httpx
@ -52,10 +50,6 @@ from .context_retriever import generate_rag_query
log = get_logger(name=__name__, category="tool_runtime")
def make_random_string(length: int = 8):
return "".join(secrets.choice(string.ascii_letters + string.digits) for _ in range(length))
async def raw_data_from_doc(doc: RAGDocument) -> tuple[bytes, str]:
"""Get raw binary data and mime type from a RAGDocument for file upload."""
if isinstance(doc.content, URL):

View file

@ -268,7 +268,7 @@ Available Models:
api=Api.inference,
adapter_type="watsonx",
provider_type="remote::watsonx",
pip_packages=["ibm_watsonx_ai"],
pip_packages=["litellm"],
module="llama_stack.providers.remote.inference.watsonx",
config_class="llama_stack.providers.remote.inference.watsonx.WatsonXConfig",
provider_data_validator="llama_stack.providers.remote.inference.watsonx.WatsonXProviderDataValidator",

View file

@ -1,217 +0,0 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
import warnings
from collections.abc import AsyncGenerator
from typing import Any
from openai import AsyncStream
from openai.types.chat.chat_completion import (
Choice as OpenAIChoice,
)
from openai.types.completion import Completion as OpenAICompletion
from openai.types.completion_choice import Logprobs as OpenAICompletionLogprobs
from llama_stack.apis.inference import (
ChatCompletionRequest,
CompletionRequest,
CompletionResponse,
CompletionResponseStreamChunk,
GreedySamplingStrategy,
JsonSchemaResponseFormat,
TokenLogProbs,
TopKSamplingStrategy,
TopPSamplingStrategy,
)
from llama_stack.providers.utils.inference.openai_compat import (
_convert_openai_finish_reason,
convert_message_to_openai_dict_new,
convert_tooldef_to_openai_tool,
)
async def convert_chat_completion_request(
request: ChatCompletionRequest,
n: int = 1,
) -> dict:
"""
Convert a ChatCompletionRequest to an OpenAI API-compatible dictionary.
"""
# model -> model
# messages -> messages
# sampling_params TODO(mattf): review strategy
# strategy=greedy -> nvext.top_k = -1, temperature = temperature
# strategy=top_p -> nvext.top_k = -1, top_p = top_p
# strategy=top_k -> nvext.top_k = top_k
# temperature -> temperature
# top_p -> top_p
# top_k -> nvext.top_k
# max_tokens -> max_tokens
# repetition_penalty -> nvext.repetition_penalty
# response_format -> GrammarResponseFormat TODO(mf)
# response_format -> JsonSchemaResponseFormat: response_format = "json_object" & nvext["guided_json"] = json_schema
# tools -> tools
# tool_choice ("auto", "required") -> tool_choice
# tool_prompt_format -> TBD
# stream -> stream
# logprobs -> logprobs
if request.response_format and not isinstance(request.response_format, JsonSchemaResponseFormat):
raise ValueError(
f"Unsupported response format: {request.response_format}. Only JsonSchemaResponseFormat is supported."
)
nvext = {}
payload: dict[str, Any] = dict(
model=request.model,
messages=[await convert_message_to_openai_dict_new(message) for message in request.messages],
stream=request.stream,
n=n,
extra_body=dict(nvext=nvext),
extra_headers={
b"User-Agent": b"llama-stack: nvidia-inference-adapter",
},
)
if request.response_format:
# server bug - setting guided_json changes the behavior of response_format resulting in an error
# payload.update(response_format="json_object")
nvext.update(guided_json=request.response_format.json_schema)
if request.tools:
payload.update(tools=[convert_tooldef_to_openai_tool(tool) for tool in request.tools])
if request.tool_config.tool_choice:
payload.update(
tool_choice=request.tool_config.tool_choice.value
) # we cannot include tool_choice w/o tools, server will complain
if request.logprobs:
payload.update(logprobs=True)
payload.update(top_logprobs=request.logprobs.top_k)
if request.sampling_params:
nvext.update(repetition_penalty=request.sampling_params.repetition_penalty)
if request.sampling_params.max_tokens:
payload.update(max_tokens=request.sampling_params.max_tokens)
strategy = request.sampling_params.strategy
if isinstance(strategy, TopPSamplingStrategy):
nvext.update(top_k=-1)
payload.update(top_p=strategy.top_p)
payload.update(temperature=strategy.temperature)
elif isinstance(strategy, TopKSamplingStrategy):
if strategy.top_k != -1 and strategy.top_k < 1:
warnings.warn("top_k must be -1 or >= 1", stacklevel=2)
nvext.update(top_k=strategy.top_k)
elif isinstance(strategy, GreedySamplingStrategy):
nvext.update(top_k=-1)
else:
raise ValueError(f"Unsupported sampling strategy: {strategy}")
return payload
def convert_completion_request(
request: CompletionRequest,
n: int = 1,
) -> dict:
"""
Convert a ChatCompletionRequest to an OpenAI API-compatible dictionary.
"""
# model -> model
# prompt -> prompt
# sampling_params TODO(mattf): review strategy
# strategy=greedy -> nvext.top_k = -1, temperature = temperature
# strategy=top_p -> nvext.top_k = -1, top_p = top_p
# strategy=top_k -> nvext.top_k = top_k
# temperature -> temperature
# top_p -> top_p
# top_k -> nvext.top_k
# max_tokens -> max_tokens
# repetition_penalty -> nvext.repetition_penalty
# response_format -> nvext.guided_json
# stream -> stream
# logprobs.top_k -> logprobs
nvext = {}
payload: dict[str, Any] = dict(
model=request.model,
prompt=request.content,
stream=request.stream,
extra_body=dict(nvext=nvext),
extra_headers={
b"User-Agent": b"llama-stack: nvidia-inference-adapter",
},
n=n,
)
if request.response_format:
# this is not openai compliant, it is a nim extension
nvext.update(guided_json=request.response_format.json_schema)
if request.logprobs:
payload.update(logprobs=request.logprobs.top_k)
if request.sampling_params:
nvext.update(repetition_penalty=request.sampling_params.repetition_penalty)
if request.sampling_params.max_tokens:
payload.update(max_tokens=request.sampling_params.max_tokens)
if request.sampling_params.strategy == "top_p":
nvext.update(top_k=-1)
payload.update(top_p=request.sampling_params.top_p)
elif request.sampling_params.strategy == "top_k":
if request.sampling_params.top_k != -1 and request.sampling_params.top_k < 1:
warnings.warn("top_k must be -1 or >= 1", stacklevel=2)
nvext.update(top_k=request.sampling_params.top_k)
elif request.sampling_params.strategy == "greedy":
nvext.update(top_k=-1)
payload.update(temperature=request.sampling_params.temperature)
return payload
def _convert_openai_completion_logprobs(
logprobs: OpenAICompletionLogprobs | None,
) -> list[TokenLogProbs] | None:
"""
Convert an OpenAI CompletionLogprobs into a list of TokenLogProbs.
"""
if not logprobs:
return None
return [TokenLogProbs(logprobs_by_token=logprobs) for logprobs in logprobs.top_logprobs]
def convert_openai_completion_choice(
choice: OpenAIChoice,
) -> CompletionResponse:
"""
Convert an OpenAI Completion Choice into a CompletionResponse.
"""
return CompletionResponse(
content=choice.text,
stop_reason=_convert_openai_finish_reason(choice.finish_reason),
logprobs=_convert_openai_completion_logprobs(choice.logprobs),
)
async def convert_openai_completion_stream(
stream: AsyncStream[OpenAICompletion],
) -> AsyncGenerator[CompletionResponse, None]:
"""
Convert a stream of OpenAI Completions into a stream
of ChatCompletionResponseStreamChunks.
"""
async for chunk in stream:
choice = chunk.choices[0]
yield CompletionResponseStreamChunk(
delta=choice.text,
stop_reason=_convert_openai_finish_reason(choice.finish_reason),
logprobs=_convert_openai_completion_logprobs(choice.logprobs),
)

View file

@ -4,53 +4,8 @@
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
import httpx
from llama_stack.log import get_logger
from . import NVIDIAConfig
logger = get_logger(name=__name__, category="inference::nvidia")
def _is_nvidia_hosted(config: NVIDIAConfig) -> bool:
return "integrate.api.nvidia.com" in config.url
async def _get_health(url: str) -> tuple[bool, bool]:
"""
Query {url}/v1/health/{live,ready} to check if the server is running and ready
Args:
url (str): URL of the server
Returns:
Tuple[bool, bool]: (is_live, is_ready)
"""
async with httpx.AsyncClient() as client:
live = await client.get(f"{url}/v1/health/live")
ready = await client.get(f"{url}/v1/health/ready")
return live.status_code == 200, ready.status_code == 200
async def check_health(config: NVIDIAConfig) -> None:
"""
Check if the server is running and ready
Args:
url (str): URL of the server
Raises:
RuntimeError: If the server is not running or ready
"""
if not _is_nvidia_hosted(config):
logger.info("Checking NVIDIA NIM health...")
try:
is_live, is_ready = await _get_health(config.url)
if not is_live:
raise ConnectionError("NVIDIA NIM is not running")
if not is_ready:
raise ConnectionError("NVIDIA NIM is not ready")
# TODO(mf): should we wait for the server to be ready?
except httpx.ConnectError as e:
raise ConnectionError(f"Failed to connect to NVIDIA NIM: {e}") from e

View file

@ -4,19 +4,12 @@
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
from llama_stack.apis.inference import Inference
from .config import WatsonXConfig
async def get_adapter_impl(config: WatsonXConfig, _deps) -> Inference:
# import dynamically so `llama stack build` does not fail due to missing dependencies
async def get_adapter_impl(config: WatsonXConfig, _deps):
# import dynamically so the import is used only when it is needed
from .watsonx import WatsonXInferenceAdapter
if not isinstance(config, WatsonXConfig):
raise RuntimeError(f"Unexpected config type: {type(config)}")
adapter = WatsonXInferenceAdapter(config)
return adapter
__all__ = ["get_adapter_impl", "WatsonXConfig"]

View file

@ -7,16 +7,18 @@
import os
from typing import Any
from pydantic import BaseModel, Field, SecretStr
from pydantic import BaseModel, ConfigDict, Field, SecretStr
from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
from llama_stack.schema_utils import json_schema_type
class WatsonXProviderDataValidator(BaseModel):
url: str
api_key: str
project_id: str
model_config = ConfigDict(
from_attributes=True,
extra="forbid",
)
watsonx_api_key: str | None
@json_schema_type
@ -25,13 +27,17 @@ class WatsonXConfig(RemoteInferenceProviderConfig):
default_factory=lambda: os.getenv("WATSONX_BASE_URL", "https://us-south.ml.cloud.ibm.com"),
description="A base url for accessing the watsonx.ai",
)
# This seems like it should be required, but none of the other remote inference
# providers require it, so this is optional here too for consistency.
# The OpenAIConfig uses default=None instead, so this is following that precedent.
api_key: SecretStr | None = Field(
default_factory=lambda: os.getenv("WATSONX_API_KEY"),
description="The watsonx API key",
default=None,
description="The watsonx.ai API key",
)
# As above, this is optional here too for consistency.
project_id: str | None = Field(
default_factory=lambda: os.getenv("WATSONX_PROJECT_ID"),
description="The Project ID key",
default=None,
description="The watsonx.ai project ID",
)
timeout: int = Field(
default=60,

View file

@ -1,47 +0,0 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
from llama_stack.models.llama.sku_types import CoreModelId
from llama_stack.providers.utils.inference.model_registry import build_hf_repo_model_entry
MODEL_ENTRIES = [
build_hf_repo_model_entry(
"meta-llama/llama-3-3-70b-instruct",
CoreModelId.llama3_3_70b_instruct.value,
),
build_hf_repo_model_entry(
"meta-llama/llama-2-13b-chat",
CoreModelId.llama2_13b.value,
),
build_hf_repo_model_entry(
"meta-llama/llama-3-1-70b-instruct",
CoreModelId.llama3_1_70b_instruct.value,
),
build_hf_repo_model_entry(
"meta-llama/llama-3-1-8b-instruct",
CoreModelId.llama3_1_8b_instruct.value,
),
build_hf_repo_model_entry(
"meta-llama/llama-3-2-11b-vision-instruct",
CoreModelId.llama3_2_11b_vision_instruct.value,
),
build_hf_repo_model_entry(
"meta-llama/llama-3-2-1b-instruct",
CoreModelId.llama3_2_1b_instruct.value,
),
build_hf_repo_model_entry(
"meta-llama/llama-3-2-3b-instruct",
CoreModelId.llama3_2_3b_instruct.value,
),
build_hf_repo_model_entry(
"meta-llama/llama-3-2-90b-vision-instruct",
CoreModelId.llama3_2_90b_vision_instruct.value,
),
build_hf_repo_model_entry(
"meta-llama/llama-guard-3-11b-vision",
CoreModelId.llama_guard_3_11b_vision.value,
),
]

View file

@ -4,240 +4,120 @@
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
from collections.abc import AsyncGenerator, AsyncIterator
from typing import Any
from ibm_watsonx_ai.foundation_models import Model
from ibm_watsonx_ai.metanames import GenTextParamsMetaNames as GenParams
from openai import AsyncOpenAI
import requests
from llama_stack.apis.inference import (
ChatCompletionRequest,
CompletionRequest,
GreedySamplingStrategy,
Inference,
OpenAIChatCompletion,
OpenAIChatCompletionChunk,
OpenAICompletion,
OpenAIEmbeddingsResponse,
OpenAIMessageParam,
OpenAIResponseFormatParam,
TopKSamplingStrategy,
TopPSamplingStrategy,
)
from llama_stack.log import get_logger
from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper
from llama_stack.providers.utils.inference.openai_compat import (
prepare_openai_completion_params,
)
from llama_stack.providers.utils.inference.prompt_adapter import (
chat_completion_request_to_prompt,
completion_request_to_prompt,
request_has_media,
)
from . import WatsonXConfig
from .models import MODEL_ENTRIES
logger = get_logger(name=__name__, category="inference::watsonx")
from llama_stack.apis.inference import ChatCompletionRequest
from llama_stack.apis.models import Model
from llama_stack.apis.models.models import ModelType
from llama_stack.providers.remote.inference.watsonx.config import WatsonXConfig
from llama_stack.providers.utils.inference.litellm_openai_mixin import LiteLLMOpenAIMixin
# Note on structured output
# WatsonX returns responses with a json embedded into a string.
# Examples:
class WatsonXInferenceAdapter(LiteLLMOpenAIMixin):
_model_cache: dict[str, Model] = {}
# ChatCompletionResponse(completion_message=CompletionMessage(content='```json\n{\n
# "first_name": "Michael",\n "last_name": "Jordan",\n'...)
# Not even a valid JSON, but we can still extract the JSON from the content
def __init__(self, config: WatsonXConfig):
LiteLLMOpenAIMixin.__init__(
self,
litellm_provider_name="watsonx",
api_key_from_config=config.api_key.get_secret_value() if config.api_key else None,
provider_data_api_key_field="watsonx_api_key",
)
self.available_models = None
self.config = config
# CompletionResponse(content=' \nThe best answer is $\\boxed{\\{"name": "Michael Jordan",
# "year_born": "1963", "year_retired": "2003"\\}}$')
# Find the start of the boxed content
def get_base_url(self) -> str:
return self.config.url
async def _get_params(self, request: ChatCompletionRequest) -> dict[str, Any]:
# Get base parameters from parent
params = await super()._get_params(request)
class WatsonXInferenceAdapter(Inference, ModelRegistryHelper):
def __init__(self, config: WatsonXConfig) -> None:
ModelRegistryHelper.__init__(self, model_entries=MODEL_ENTRIES)
logger.info(f"Initializing watsonx InferenceAdapter({config.url})...")
self._config = config
self._openai_client: AsyncOpenAI | None = None
self._project_id = self._config.project_id
def _get_client(self, model_id) -> Model:
config_api_key = self._config.api_key.get_secret_value() if self._config.api_key else None
config_url = self._config.url
project_id = self._config.project_id
credentials = {"url": config_url, "apikey": config_api_key}
return Model(model_id=model_id, credentials=credentials, project_id=project_id)
def _get_openai_client(self) -> AsyncOpenAI:
if not self._openai_client:
self._openai_client = AsyncOpenAI(
base_url=f"{self._config.url}/openai/v1",
api_key=self._config.api_key,
)
return self._openai_client
async def _get_params(self, request: ChatCompletionRequest | CompletionRequest) -> dict:
input_dict = {"params": {}}
media_present = request_has_media(request)
llama_model = self.get_llama_model(request.model)
if isinstance(request, ChatCompletionRequest):
input_dict["prompt"] = await chat_completion_request_to_prompt(request, llama_model)
else:
assert not media_present, "Together does not support media for Completion requests"
input_dict["prompt"] = await completion_request_to_prompt(request)
if request.sampling_params:
if request.sampling_params.strategy:
input_dict["params"][GenParams.DECODING_METHOD] = request.sampling_params.strategy.type
if request.sampling_params.max_tokens:
input_dict["params"][GenParams.MAX_NEW_TOKENS] = request.sampling_params.max_tokens
if request.sampling_params.repetition_penalty:
input_dict["params"][GenParams.REPETITION_PENALTY] = request.sampling_params.repetition_penalty
if isinstance(request.sampling_params.strategy, TopPSamplingStrategy):
input_dict["params"][GenParams.TOP_P] = request.sampling_params.strategy.top_p
input_dict["params"][GenParams.TEMPERATURE] = request.sampling_params.strategy.temperature
if isinstance(request.sampling_params.strategy, TopKSamplingStrategy):
input_dict["params"][GenParams.TOP_K] = request.sampling_params.strategy.top_k
if isinstance(request.sampling_params.strategy, GreedySamplingStrategy):
input_dict["params"][GenParams.TEMPERATURE] = 0.0
input_dict["params"][GenParams.STOP_SEQUENCES] = ["<|endoftext|>"]
params = {
**input_dict,
}
# Add watsonx.ai specific parameters
params["project_id"] = self.config.project_id
params["time_limit"] = self.config.timeout
return params
async def openai_embeddings(
self,
model: str,
input: str | list[str],
encoding_format: str | None = "float",
dimensions: int | None = None,
user: str | None = None,
) -> OpenAIEmbeddingsResponse:
raise NotImplementedError()
# Copied from OpenAIMixin
async def check_model_availability(self, model: str) -> bool:
"""
Check if a specific model is available from the provider's /v1/models.
async def openai_completion(
self,
model: str,
prompt: str | list[str] | list[int] | list[list[int]],
best_of: int | None = None,
echo: bool | None = None,
frequency_penalty: float | None = None,
logit_bias: dict[str, float] | None = None,
logprobs: bool | None = None,
max_tokens: int | None = None,
n: int | None = None,
presence_penalty: float | None = None,
seed: int | None = None,
stop: str | list[str] | None = None,
stream: bool | None = None,
stream_options: dict[str, Any] | None = None,
temperature: float | None = None,
top_p: float | None = None,
user: str | None = None,
guided_choice: list[str] | None = None,
prompt_logprobs: int | None = None,
suffix: str | None = None,
) -> OpenAICompletion:
model_obj = await self.model_store.get_model(model)
params = await prepare_openai_completion_params(
model=model_obj.provider_resource_id,
prompt=prompt,
best_of=best_of,
echo=echo,
frequency_penalty=frequency_penalty,
logit_bias=logit_bias,
logprobs=logprobs,
max_tokens=max_tokens,
n=n,
presence_penalty=presence_penalty,
seed=seed,
stop=stop,
stream=stream,
stream_options=stream_options,
temperature=temperature,
top_p=top_p,
user=user,
)
return await self._get_openai_client().completions.create(**params) # type: ignore
:param model: The model identifier to check.
:return: True if the model is available dynamically, False otherwise.
"""
if not self._model_cache:
await self.list_models()
return model in self._model_cache
async def openai_chat_completion(
self,
model: str,
messages: list[OpenAIMessageParam],
frequency_penalty: float | None = None,
function_call: str | dict[str, Any] | None = None,
functions: list[dict[str, Any]] | None = None,
logit_bias: dict[str, float] | None = None,
logprobs: bool | None = None,
max_completion_tokens: int | None = None,
max_tokens: int | None = None,
n: int | None = None,
parallel_tool_calls: bool | None = None,
presence_penalty: float | None = None,
response_format: OpenAIResponseFormatParam | None = None,
seed: int | None = None,
stop: str | list[str] | None = None,
stream: bool | None = None,
stream_options: dict[str, Any] | None = None,
temperature: float | None = None,
tool_choice: str | dict[str, Any] | None = None,
tools: list[dict[str, Any]] | None = None,
top_logprobs: int | None = None,
top_p: float | None = None,
user: str | None = None,
) -> OpenAIChatCompletion | AsyncIterator[OpenAIChatCompletionChunk]:
model_obj = await self.model_store.get_model(model)
params = await prepare_openai_completion_params(
model=model_obj.provider_resource_id,
messages=messages,
frequency_penalty=frequency_penalty,
function_call=function_call,
functions=functions,
logit_bias=logit_bias,
logprobs=logprobs,
max_completion_tokens=max_completion_tokens,
max_tokens=max_tokens,
n=n,
parallel_tool_calls=parallel_tool_calls,
presence_penalty=presence_penalty,
response_format=response_format,
seed=seed,
stop=stop,
stream=stream,
stream_options=stream_options,
temperature=temperature,
tool_choice=tool_choice,
tools=tools,
top_logprobs=top_logprobs,
top_p=top_p,
user=user,
)
if params.get("stream", False):
return self._stream_openai_chat_completion(params)
return await self._get_openai_client().chat.completions.create(**params) # type: ignore
async def list_models(self) -> list[Model] | None:
self._model_cache = {}
models = []
for model_spec in self._get_model_specs():
functions = [f["id"] for f in model_spec.get("functions", [])]
# Format: {"embedding_dimension": 1536, "context_length": 8192}
async def _stream_openai_chat_completion(self, params: dict) -> AsyncGenerator:
# watsonx.ai sometimes adds usage data to the stream
include_usage = False
if params.get("stream_options", None):
include_usage = params["stream_options"].get("include_usage", False)
stream = await self._get_openai_client().chat.completions.create(**params)
# Example of an embedding model:
# {'model_id': 'ibm/granite-embedding-278m-multilingual',
# 'label': 'granite-embedding-278m-multilingual',
# 'model_limits': {'max_sequence_length': 512, 'embedding_dimension': 768},
# ...
provider_resource_id = f"{self.__provider_id__}/{model_spec['model_id']}"
if "embedding" in functions:
embedding_dimension = model_spec["model_limits"]["embedding_dimension"]
context_length = model_spec["model_limits"]["max_sequence_length"]
embedding_metadata = {
"embedding_dimension": embedding_dimension,
"context_length": context_length,
}
model = Model(
identifier=model_spec["model_id"],
provider_resource_id=provider_resource_id,
provider_id=self.__provider_id__,
metadata=embedding_metadata,
model_type=ModelType.embedding,
)
self._model_cache[provider_resource_id] = model
models.append(model)
if "text_chat" in functions:
model = Model(
identifier=model_spec["model_id"],
provider_resource_id=provider_resource_id,
provider_id=self.__provider_id__,
metadata={},
model_type=ModelType.llm,
)
# In theory, I guess it is possible that a model could be both an embedding model and a text chat model.
# In that case, the cache will record the generator Model object, and the list which we return will have
# both the generator Model object and the text chat Model object. That's fine because the cache is
# only used for check_model_availability() anyway.
self._model_cache[provider_resource_id] = model
models.append(model)
return models
seen_finish_reason = False
async for chunk in stream:
# Final usage chunk with no choices that the user didn't request, so discard
if not include_usage and seen_finish_reason and len(chunk.choices) == 0:
break
yield chunk
for choice in chunk.choices:
if choice.finish_reason:
seen_finish_reason = True
break
# LiteLLM provides methods to list models for many providers, but not for watsonx.ai.
# So we need to implement our own method to list models by calling the watsonx.ai API.
def _get_model_specs(self) -> list[dict[str, Any]]:
"""
Retrieves foundation model specifications from the watsonx.ai API.
"""
url = f"{self.config.url}/ml/v1/foundation_model_specs?version=2023-10-25"
headers = {
# Note that there is no authorization header. Listing models does not require authentication.
"Content-Type": "application/json",
}
response = requests.get(url, headers=headers)
# --- Process the Response ---
# Raise an exception for bad status codes (4xx or 5xx)
response.raise_for_status()
# If the request is successful, parse and return the JSON response.
# The response should contain a list of model specifications
response_data = response.json()
if "resources" not in response_data:
raise ValueError("Resources not found in response")
return response_data["resources"]

View file

@ -4,6 +4,8 @@
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
import base64
import struct
from collections.abc import AsyncIterator
from typing import Any
@ -16,6 +18,7 @@ from llama_stack.apis.inference import (
OpenAIChatCompletion,
OpenAIChatCompletionChunk,
OpenAICompletion,
OpenAIEmbeddingData,
OpenAIEmbeddingsResponse,
OpenAIEmbeddingUsage,
OpenAIMessageParam,
@ -26,7 +29,6 @@ from llama_stack.core.request_headers import NeedsRequestProviderData
from llama_stack.log import get_logger
from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper, ProviderModelEntry
from llama_stack.providers.utils.inference.openai_compat import (
b64_encode_openai_embeddings_response,
convert_message_to_openai_dict_new,
convert_tooldef_to_openai_tool,
get_sampling_options,
@ -349,3 +351,28 @@ class LiteLLMOpenAIMixin(
return False
return model in litellm.models_by_provider[self.litellm_provider_name]
def b64_encode_openai_embeddings_response(
response_data: list[dict], encoding_format: str | None = "float"
) -> list[OpenAIEmbeddingData]:
"""
Process the OpenAI embeddings response to encode the embeddings in base64 format if specified.
"""
data = []
for i, embedding_data in enumerate(response_data):
if encoding_format == "base64":
byte_array = bytearray()
for embedding_value in embedding_data["embedding"]:
byte_array.extend(struct.pack("f", float(embedding_value)))
response_embedding = base64.b64encode(byte_array).decode("utf-8")
else:
response_embedding = embedding_data["embedding"]
data.append(
OpenAIEmbeddingData(
embedding=response_embedding,
index=i,
)
)
return data

View file

@ -3,9 +3,7 @@
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
import base64
import json
import struct
import time
import uuid
import warnings
@ -103,7 +101,6 @@ from llama_stack.apis.inference import (
JsonSchemaResponseFormat,
Message,
OpenAIChatCompletion,
OpenAIEmbeddingData,
OpenAIMessageParam,
OpenAIResponseFormatParam,
SamplingParams,
@ -1402,28 +1399,3 @@ def prepare_openai_embeddings_params(
params["user"] = user
return params
def b64_encode_openai_embeddings_response(
response_data: dict, encoding_format: str | None = "float"
) -> list[OpenAIEmbeddingData]:
"""
Process the OpenAI embeddings response to encode the embeddings in base64 format if specified.
"""
data = []
for i, embedding_data in enumerate(response_data):
if encoding_format == "base64":
byte_array = bytearray()
for embedding_value in embedding_data.embedding:
byte_array.extend(struct.pack("f", float(embedding_value)))
response_embedding = base64.b64encode(byte_array).decode("utf-8")
else:
response_embedding = embedding_data.embedding
data.append(
OpenAIEmbeddingData(
embedding=response_embedding,
index=i,
)
)
return data

View file

@ -296,15 +296,14 @@ class OpenAIVectorStoreMixin(ABC):
async def shutdown(self) -> None:
"""Clean up mixin resources including background tasks."""
# Cancel any running file batch tasks gracefully
if hasattr(self, "_file_batch_tasks"):
tasks_to_cancel = list(self._file_batch_tasks.items())
for _, task in tasks_to_cancel:
if not task.done():
task.cancel()
try:
await task
except asyncio.CancelledError:
pass
tasks_to_cancel = list(self._file_batch_tasks.items())
for _, task in tasks_to_cancel:
if not task.done():
task.cancel()
try:
await task
except asyncio.CancelledError:
pass
@abstractmethod
async def delete_chunks(self, store_id: str, chunks_for_deletion: list[ChunkForDeletion]) -> None:

View file

@ -20,7 +20,6 @@ from pydantic import BaseModel
from llama_stack.apis.common.content_types import (
URL,
InterleavedContent,
TextContentItem,
)
from llama_stack.apis.tools import RAGDocument
from llama_stack.apis.vector_dbs import VectorDB
@ -129,26 +128,6 @@ def content_from_data_and_mime_type(data: bytes | str, mime_type: str | None, en
return ""
def concat_interleaved_content(content: list[InterleavedContent]) -> InterleavedContent:
"""concatenate interleaved content into a single list. ensure that 'str's are converted to TextContentItem when in a list"""
ret = []
def _process(c):
if isinstance(c, str):
ret.append(TextContentItem(text=c))
elif isinstance(c, list):
for item in c:
_process(item)
else:
ret.append(c)
for c in content:
_process(c)
return ret
async def content_from_doc(doc: RAGDocument) -> str:
if isinstance(doc.content, URL):
if doc.content.uri.startswith("data:"):

View file

@ -18,6 +18,8 @@ from llama_stack.providers.remote.inference.openai.config import OpenAIConfig
from llama_stack.providers.remote.inference.openai.openai import OpenAIInferenceAdapter
from llama_stack.providers.remote.inference.together.config import TogetherImplConfig
from llama_stack.providers.remote.inference.together.together import TogetherInferenceAdapter
from llama_stack.providers.remote.inference.watsonx.config import WatsonXConfig
from llama_stack.providers.remote.inference.watsonx.watsonx import WatsonXInferenceAdapter
@pytest.mark.parametrize(
@ -58,3 +60,29 @@ def test_openai_provider_data_used(config_cls, adapter_cls, provider_data_valida
{"x-llamastack-provider-data": json.dumps({inference_adapter.provider_data_api_key_field: api_key})}
):
assert inference_adapter.client.api_key == api_key
@pytest.mark.parametrize(
"config_cls,adapter_cls,provider_data_validator",
[
(
WatsonXConfig,
WatsonXInferenceAdapter,
"llama_stack.providers.remote.inference.watsonx.config.WatsonXProviderDataValidator",
),
],
)
def test_litellm_provider_data_used(config_cls, adapter_cls, provider_data_validator: str):
"""Validate data for LiteLLM-based providers. Similar to test_openai_provider_data_used, but without the
assumption that there is an OpenAI-compatible client object."""
inference_adapter = adapter_cls(config=config_cls())
inference_adapter.__provider_spec__ = MagicMock()
inference_adapter.__provider_spec__.provider_data_validator = provider_data_validator
for api_key in ["test1", "test2"]:
with request_provider_data_context(
{"x-llamastack-provider-data": json.dumps({inference_adapter.provider_data_api_key_field: api_key})}
):
assert inference_adapter.get_api_key() == api_key

View file

@ -125,8 +125,15 @@ async def test_duplicate_provider_registration(cached_disk_dist_registry):
provider_resource_id="test_vector_db_2",
provider_id="baz", # Same provider_id
)
await cached_disk_dist_registry.register(duplicate_vector_db)
# Now we expect a ValueError to be raised for duplicate registration
with pytest.raises(
ValueError,
match=r"Provider 'baz' is already registered.*Unregister the existing provider first before registering it again.",
):
await cached_disk_dist_registry.register(duplicate_vector_db)
# Verify the original registration is still intact
result = await cached_disk_dist_registry.get("vector_db", "test_vector_db_2")
assert result is not None
assert result.embedding_model == original_vector_db.embedding_model # Original values preserved