Merge ed4e452de0 into sapling-pr-archive-ehhuang

This commit is contained in:
ehhuang 2025-10-08 11:39:41 -07:00 committed by GitHub
commit 08d46d6363
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
37 changed files with 242 additions and 6940 deletions

View file

@ -24,7 +24,7 @@ jobs:
runs-on: ubuntu-latest runs-on: ubuntu-latest
steps: steps:
- name: Stale Action - name: Stale Action
uses: actions/stale@3a9db7e6a41a89f618792c92c0e97cc736e1b13f # v10.0.0 uses: actions/stale@5f858e3efba33a5ca4407a664cc011ad407f2008 # v10.1.0
with: with:
stale-issue-label: 'stale' stale-issue-label: 'stale'
stale-issue-message: > stale-issue-message: >

View file

@ -17,8 +17,8 @@ IBM WatsonX inference provider for accessing AI models on IBM's WatsonX platform
| `allowed_models` | `list[str \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. | | `allowed_models` | `list[str \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. |
| `refresh_models` | `<class 'bool'>` | No | False | Whether to refresh models periodically from the provider | | `refresh_models` | `<class 'bool'>` | No | False | Whether to refresh models periodically from the provider |
| `url` | `<class 'str'>` | No | https://us-south.ml.cloud.ibm.com | A base url for accessing the watsonx.ai | | `url` | `<class 'str'>` | No | https://us-south.ml.cloud.ibm.com | A base url for accessing the watsonx.ai |
| `api_key` | `pydantic.types.SecretStr \| None` | No | | The watsonx API key | | `api_key` | `pydantic.types.SecretStr \| None` | No | | The watsonx.ai API key |
| `project_id` | `str \| None` | No | | The Project ID key | | `project_id` | `str \| None` | No | | The watsonx.ai project ID |
| `timeout` | `<class 'int'>` | No | 60 | Timeout for the HTTP requests | | `timeout` | `<class 'int'>` | No | 60 | Timeout for the HTTP requests |
## Sample Configuration ## Sample Configuration

View file

@ -3526,343 +3526,6 @@
}, },
"deprecated": true "deprecated": true
} }
},
"/v1/telemetry/metrics/{metric_name}": {
"post": {
"responses": {
"200": {
"description": "A QueryMetricsResponse.",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/QueryMetricsResponse"
}
}
}
},
"400": {
"$ref": "#/components/responses/BadRequest400"
},
"429": {
"$ref": "#/components/responses/TooManyRequests429"
},
"500": {
"$ref": "#/components/responses/InternalServerError500"
},
"default": {
"$ref": "#/components/responses/DefaultError"
}
},
"tags": [
"Telemetry"
],
"summary": "Query metrics.",
"description": "Query metrics.",
"parameters": [
{
"name": "metric_name",
"in": "path",
"description": "The name of the metric to query.",
"required": true,
"schema": {
"type": "string"
}
}
],
"requestBody": {
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/QueryMetricsRequest"
}
}
},
"required": true
},
"deprecated": true
}
},
"/v1/telemetry/spans": {
"post": {
"responses": {
"200": {
"description": "A QuerySpansResponse.",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/QuerySpansResponse"
}
}
}
},
"400": {
"$ref": "#/components/responses/BadRequest400"
},
"429": {
"$ref": "#/components/responses/TooManyRequests429"
},
"500": {
"$ref": "#/components/responses/InternalServerError500"
},
"default": {
"$ref": "#/components/responses/DefaultError"
}
},
"tags": [
"Telemetry"
],
"summary": "Query spans.",
"description": "Query spans.",
"parameters": [],
"requestBody": {
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/QuerySpansRequest"
}
}
},
"required": true
},
"deprecated": true
}
},
"/v1/telemetry/spans/export": {
"post": {
"responses": {
"200": {
"description": "OK"
},
"400": {
"$ref": "#/components/responses/BadRequest400"
},
"429": {
"$ref": "#/components/responses/TooManyRequests429"
},
"500": {
"$ref": "#/components/responses/InternalServerError500"
},
"default": {
"$ref": "#/components/responses/DefaultError"
}
},
"tags": [
"Telemetry"
],
"summary": "Save spans to a dataset.",
"description": "Save spans to a dataset.",
"parameters": [],
"requestBody": {
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/SaveSpansToDatasetRequest"
}
}
},
"required": true
},
"deprecated": true
}
},
"/v1/telemetry/spans/{span_id}/tree": {
"post": {
"responses": {
"200": {
"description": "A QuerySpanTreeResponse.",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/QuerySpanTreeResponse"
}
}
}
},
"400": {
"$ref": "#/components/responses/BadRequest400"
},
"429": {
"$ref": "#/components/responses/TooManyRequests429"
},
"500": {
"$ref": "#/components/responses/InternalServerError500"
},
"default": {
"$ref": "#/components/responses/DefaultError"
}
},
"tags": [
"Telemetry"
],
"summary": "Get a span tree by its ID.",
"description": "Get a span tree by its ID.",
"parameters": [
{
"name": "span_id",
"in": "path",
"description": "The ID of the span to get the tree from.",
"required": true,
"schema": {
"type": "string"
}
}
],
"requestBody": {
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/GetSpanTreeRequest"
}
}
},
"required": true
},
"deprecated": true
}
},
"/v1/telemetry/traces": {
"post": {
"responses": {
"200": {
"description": "A QueryTracesResponse.",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/QueryTracesResponse"
}
}
}
},
"400": {
"$ref": "#/components/responses/BadRequest400"
},
"429": {
"$ref": "#/components/responses/TooManyRequests429"
},
"500": {
"$ref": "#/components/responses/InternalServerError500"
},
"default": {
"$ref": "#/components/responses/DefaultError"
}
},
"tags": [
"Telemetry"
],
"summary": "Query traces.",
"description": "Query traces.",
"parameters": [],
"requestBody": {
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/QueryTracesRequest"
}
}
},
"required": true
},
"deprecated": true
}
},
"/v1/telemetry/traces/{trace_id}": {
"get": {
"responses": {
"200": {
"description": "A Trace.",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/Trace"
}
}
}
},
"400": {
"$ref": "#/components/responses/BadRequest400"
},
"429": {
"$ref": "#/components/responses/TooManyRequests429"
},
"500": {
"$ref": "#/components/responses/InternalServerError500"
},
"default": {
"$ref": "#/components/responses/DefaultError"
}
},
"tags": [
"Telemetry"
],
"summary": "Get a trace by its ID.",
"description": "Get a trace by its ID.",
"parameters": [
{
"name": "trace_id",
"in": "path",
"description": "The ID of the trace to get.",
"required": true,
"schema": {
"type": "string"
}
}
],
"deprecated": true
}
},
"/v1/telemetry/traces/{trace_id}/spans/{span_id}": {
"get": {
"responses": {
"200": {
"description": "A Span.",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/Span"
}
}
}
},
"400": {
"$ref": "#/components/responses/BadRequest400"
},
"429": {
"$ref": "#/components/responses/TooManyRequests429"
},
"500": {
"$ref": "#/components/responses/InternalServerError500"
},
"default": {
"$ref": "#/components/responses/DefaultError"
}
},
"tags": [
"Telemetry"
],
"summary": "Get a span by its ID.",
"description": "Get a span by its ID.",
"parameters": [
{
"name": "trace_id",
"in": "path",
"description": "The ID of the trace to get the span from.",
"required": true,
"schema": {
"type": "string"
}
},
{
"name": "span_id",
"in": "path",
"description": "The ID of the span to get.",
"required": true,
"schema": {
"type": "string"
}
}
],
"deprecated": true
}
} }
}, },
"jsonSchemaDialect": "https://json-schema.org/draft/2020-12/schema", "jsonSchemaDialect": "https://json-schema.org/draft/2020-12/schema",
@ -12716,561 +12379,6 @@
"logger_config" "logger_config"
], ],
"title": "SupervisedFineTuneRequest" "title": "SupervisedFineTuneRequest"
},
"QueryMetricsRequest": {
"type": "object",
"properties": {
"start_time": {
"type": "integer",
"description": "The start time of the metric to query."
},
"end_time": {
"type": "integer",
"description": "The end time of the metric to query."
},
"granularity": {
"type": "string",
"description": "The granularity of the metric to query."
},
"query_type": {
"type": "string",
"enum": [
"range",
"instant"
],
"description": "The type of query to perform."
},
"label_matchers": {
"type": "array",
"items": {
"type": "object",
"properties": {
"name": {
"type": "string",
"description": "The name of the label to match"
},
"value": {
"type": "string",
"description": "The value to match against"
},
"operator": {
"type": "string",
"enum": [
"=",
"!=",
"=~",
"!~"
],
"description": "The comparison operator to use for matching",
"default": "="
}
},
"additionalProperties": false,
"required": [
"name",
"value",
"operator"
],
"title": "MetricLabelMatcher",
"description": "A matcher for filtering metrics by label values."
},
"description": "The label matchers to apply to the metric."
}
},
"additionalProperties": false,
"required": [
"start_time",
"query_type"
],
"title": "QueryMetricsRequest"
},
"MetricDataPoint": {
"type": "object",
"properties": {
"timestamp": {
"type": "integer",
"description": "Unix timestamp when the metric value was recorded"
},
"value": {
"type": "number",
"description": "The numeric value of the metric at this timestamp"
},
"unit": {
"type": "string"
}
},
"additionalProperties": false,
"required": [
"timestamp",
"value",
"unit"
],
"title": "MetricDataPoint",
"description": "A single data point in a metric time series."
},
"MetricLabel": {
"type": "object",
"properties": {
"name": {
"type": "string",
"description": "The name of the label"
},
"value": {
"type": "string",
"description": "The value of the label"
}
},
"additionalProperties": false,
"required": [
"name",
"value"
],
"title": "MetricLabel",
"description": "A label associated with a metric."
},
"MetricSeries": {
"type": "object",
"properties": {
"metric": {
"type": "string",
"description": "The name of the metric"
},
"labels": {
"type": "array",
"items": {
"$ref": "#/components/schemas/MetricLabel"
},
"description": "List of labels associated with this metric series"
},
"values": {
"type": "array",
"items": {
"$ref": "#/components/schemas/MetricDataPoint"
},
"description": "List of data points in chronological order"
}
},
"additionalProperties": false,
"required": [
"metric",
"labels",
"values"
],
"title": "MetricSeries",
"description": "A time series of metric data points."
},
"QueryMetricsResponse": {
"type": "object",
"properties": {
"data": {
"type": "array",
"items": {
"$ref": "#/components/schemas/MetricSeries"
},
"description": "List of metric series matching the query criteria"
}
},
"additionalProperties": false,
"required": [
"data"
],
"title": "QueryMetricsResponse",
"description": "Response containing metric time series data."
},
"QueryCondition": {
"type": "object",
"properties": {
"key": {
"type": "string",
"description": "The attribute key to filter on"
},
"op": {
"$ref": "#/components/schemas/QueryConditionOp",
"description": "The comparison operator to apply"
},
"value": {
"oneOf": [
{
"type": "null"
},
{
"type": "boolean"
},
{
"type": "number"
},
{
"type": "string"
},
{
"type": "array"
},
{
"type": "object"
}
],
"description": "The value to compare against"
}
},
"additionalProperties": false,
"required": [
"key",
"op",
"value"
],
"title": "QueryCondition",
"description": "A condition for filtering query results."
},
"QueryConditionOp": {
"type": "string",
"enum": [
"eq",
"ne",
"gt",
"lt"
],
"title": "QueryConditionOp",
"description": "Comparison operators for query conditions."
},
"QuerySpansRequest": {
"type": "object",
"properties": {
"attribute_filters": {
"type": "array",
"items": {
"$ref": "#/components/schemas/QueryCondition"
},
"description": "The attribute filters to apply to the spans."
},
"attributes_to_return": {
"type": "array",
"items": {
"type": "string"
},
"description": "The attributes to return in the spans."
},
"max_depth": {
"type": "integer",
"description": "The maximum depth of the tree."
}
},
"additionalProperties": false,
"required": [
"attribute_filters",
"attributes_to_return"
],
"title": "QuerySpansRequest"
},
"Span": {
"type": "object",
"properties": {
"span_id": {
"type": "string",
"description": "Unique identifier for the span"
},
"trace_id": {
"type": "string",
"description": "Unique identifier for the trace this span belongs to"
},
"parent_span_id": {
"type": "string",
"description": "(Optional) Unique identifier for the parent span, if this is a child span"
},
"name": {
"type": "string",
"description": "Human-readable name describing the operation this span represents"
},
"start_time": {
"type": "string",
"format": "date-time",
"description": "Timestamp when the operation began"
},
"end_time": {
"type": "string",
"format": "date-time",
"description": "(Optional) Timestamp when the operation finished, if completed"
},
"attributes": {
"type": "object",
"additionalProperties": {
"oneOf": [
{
"type": "null"
},
{
"type": "boolean"
},
{
"type": "number"
},
{
"type": "string"
},
{
"type": "array"
},
{
"type": "object"
}
]
},
"description": "(Optional) Key-value pairs containing additional metadata about the span"
}
},
"additionalProperties": false,
"required": [
"span_id",
"trace_id",
"name",
"start_time"
],
"title": "Span",
"description": "A span representing a single operation within a trace."
},
"QuerySpansResponse": {
"type": "object",
"properties": {
"data": {
"type": "array",
"items": {
"$ref": "#/components/schemas/Span"
},
"description": "List of spans matching the query criteria"
}
},
"additionalProperties": false,
"required": [
"data"
],
"title": "QuerySpansResponse",
"description": "Response containing a list of spans."
},
"SaveSpansToDatasetRequest": {
"type": "object",
"properties": {
"attribute_filters": {
"type": "array",
"items": {
"$ref": "#/components/schemas/QueryCondition"
},
"description": "The attribute filters to apply to the spans."
},
"attributes_to_save": {
"type": "array",
"items": {
"type": "string"
},
"description": "The attributes to save to the dataset."
},
"dataset_id": {
"type": "string",
"description": "The ID of the dataset to save the spans to."
},
"max_depth": {
"type": "integer",
"description": "The maximum depth of the tree."
}
},
"additionalProperties": false,
"required": [
"attribute_filters",
"attributes_to_save",
"dataset_id"
],
"title": "SaveSpansToDatasetRequest"
},
"GetSpanTreeRequest": {
"type": "object",
"properties": {
"attributes_to_return": {
"type": "array",
"items": {
"type": "string"
},
"description": "The attributes to return in the tree."
},
"max_depth": {
"type": "integer",
"description": "The maximum depth of the tree."
}
},
"additionalProperties": false,
"title": "GetSpanTreeRequest"
},
"SpanStatus": {
"type": "string",
"enum": [
"ok",
"error"
],
"title": "SpanStatus",
"description": "The status of a span indicating whether it completed successfully or with an error."
},
"SpanWithStatus": {
"type": "object",
"properties": {
"span_id": {
"type": "string",
"description": "Unique identifier for the span"
},
"trace_id": {
"type": "string",
"description": "Unique identifier for the trace this span belongs to"
},
"parent_span_id": {
"type": "string",
"description": "(Optional) Unique identifier for the parent span, if this is a child span"
},
"name": {
"type": "string",
"description": "Human-readable name describing the operation this span represents"
},
"start_time": {
"type": "string",
"format": "date-time",
"description": "Timestamp when the operation began"
},
"end_time": {
"type": "string",
"format": "date-time",
"description": "(Optional) Timestamp when the operation finished, if completed"
},
"attributes": {
"type": "object",
"additionalProperties": {
"oneOf": [
{
"type": "null"
},
{
"type": "boolean"
},
{
"type": "number"
},
{
"type": "string"
},
{
"type": "array"
},
{
"type": "object"
}
]
},
"description": "(Optional) Key-value pairs containing additional metadata about the span"
},
"status": {
"$ref": "#/components/schemas/SpanStatus",
"description": "(Optional) The current status of the span"
}
},
"additionalProperties": false,
"required": [
"span_id",
"trace_id",
"name",
"start_time"
],
"title": "SpanWithStatus",
"description": "A span that includes status information."
},
"QuerySpanTreeResponse": {
"type": "object",
"properties": {
"data": {
"type": "object",
"additionalProperties": {
"$ref": "#/components/schemas/SpanWithStatus"
},
"description": "Dictionary mapping span IDs to spans with status information"
}
},
"additionalProperties": false,
"required": [
"data"
],
"title": "QuerySpanTreeResponse",
"description": "Response containing a tree structure of spans."
},
"QueryTracesRequest": {
"type": "object",
"properties": {
"attribute_filters": {
"type": "array",
"items": {
"$ref": "#/components/schemas/QueryCondition"
},
"description": "The attribute filters to apply to the traces."
},
"limit": {
"type": "integer",
"description": "The limit of traces to return."
},
"offset": {
"type": "integer",
"description": "The offset of the traces to return."
},
"order_by": {
"type": "array",
"items": {
"type": "string"
},
"description": "The order by of the traces to return."
}
},
"additionalProperties": false,
"title": "QueryTracesRequest"
},
"Trace": {
"type": "object",
"properties": {
"trace_id": {
"type": "string",
"description": "Unique identifier for the trace"
},
"root_span_id": {
"type": "string",
"description": "Unique identifier for the root span that started this trace"
},
"start_time": {
"type": "string",
"format": "date-time",
"description": "Timestamp when the trace began"
},
"end_time": {
"type": "string",
"format": "date-time",
"description": "(Optional) Timestamp when the trace finished, if completed"
}
},
"additionalProperties": false,
"required": [
"trace_id",
"root_span_id",
"start_time"
],
"title": "Trace",
"description": "A trace representing the complete execution path of a request across multiple operations."
},
"QueryTracesResponse": {
"type": "object",
"properties": {
"data": {
"type": "array",
"items": {
"$ref": "#/components/schemas/Trace"
},
"description": "List of traces matching the query criteria"
}
},
"additionalProperties": false,
"required": [
"data"
],
"title": "QueryTracesResponse",
"description": "Response containing a list of traces."
} }
}, },
"responses": { "responses": {
@ -13387,10 +12495,6 @@
"description": "OpenAI-compatible Moderations API.", "description": "OpenAI-compatible Moderations API.",
"x-displayName": "Safety" "x-displayName": "Safety"
}, },
{
"name": "Telemetry",
"description": ""
},
{ {
"name": "VectorIO", "name": "VectorIO",
"description": "" "description": ""
@ -13410,7 +12514,6 @@
"Models", "Models",
"PostTraining (Coming Soon)", "PostTraining (Coming Soon)",
"Safety", "Safety",
"Telemetry",
"VectorIO" "VectorIO"
] ]
} }

View file

@ -2593,238 +2593,6 @@ paths:
$ref: '#/components/schemas/SupervisedFineTuneRequest' $ref: '#/components/schemas/SupervisedFineTuneRequest'
required: true required: true
deprecated: true deprecated: true
/v1/telemetry/metrics/{metric_name}:
post:
responses:
'200':
description: A QueryMetricsResponse.
content:
application/json:
schema:
$ref: '#/components/schemas/QueryMetricsResponse'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- Telemetry
summary: Query metrics.
description: Query metrics.
parameters:
- name: metric_name
in: path
description: The name of the metric to query.
required: true
schema:
type: string
requestBody:
content:
application/json:
schema:
$ref: '#/components/schemas/QueryMetricsRequest'
required: true
deprecated: true
/v1/telemetry/spans:
post:
responses:
'200':
description: A QuerySpansResponse.
content:
application/json:
schema:
$ref: '#/components/schemas/QuerySpansResponse'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- Telemetry
summary: Query spans.
description: Query spans.
parameters: []
requestBody:
content:
application/json:
schema:
$ref: '#/components/schemas/QuerySpansRequest'
required: true
deprecated: true
/v1/telemetry/spans/export:
post:
responses:
'200':
description: OK
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- Telemetry
summary: Save spans to a dataset.
description: Save spans to a dataset.
parameters: []
requestBody:
content:
application/json:
schema:
$ref: '#/components/schemas/SaveSpansToDatasetRequest'
required: true
deprecated: true
/v1/telemetry/spans/{span_id}/tree:
post:
responses:
'200':
description: A QuerySpanTreeResponse.
content:
application/json:
schema:
$ref: '#/components/schemas/QuerySpanTreeResponse'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- Telemetry
summary: Get a span tree by its ID.
description: Get a span tree by its ID.
parameters:
- name: span_id
in: path
description: The ID of the span to get the tree from.
required: true
schema:
type: string
requestBody:
content:
application/json:
schema:
$ref: '#/components/schemas/GetSpanTreeRequest'
required: true
deprecated: true
/v1/telemetry/traces:
post:
responses:
'200':
description: A QueryTracesResponse.
content:
application/json:
schema:
$ref: '#/components/schemas/QueryTracesResponse'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- Telemetry
summary: Query traces.
description: Query traces.
parameters: []
requestBody:
content:
application/json:
schema:
$ref: '#/components/schemas/QueryTracesRequest'
required: true
deprecated: true
/v1/telemetry/traces/{trace_id}:
get:
responses:
'200':
description: A Trace.
content:
application/json:
schema:
$ref: '#/components/schemas/Trace'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- Telemetry
summary: Get a trace by its ID.
description: Get a trace by its ID.
parameters:
- name: trace_id
in: path
description: The ID of the trace to get.
required: true
schema:
type: string
deprecated: true
/v1/telemetry/traces/{trace_id}/spans/{span_id}:
get:
responses:
'200':
description: A Span.
content:
application/json:
schema:
$ref: '#/components/schemas/Span'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- Telemetry
summary: Get a span by its ID.
description: Get a span by its ID.
parameters:
- name: trace_id
in: path
description: >-
The ID of the trace to get the span from.
required: true
schema:
type: string
- name: span_id
in: path
description: The ID of the span to get.
required: true
schema:
type: string
deprecated: true
jsonSchemaDialect: >- jsonSchemaDialect: >-
https://json-schema.org/draft/2020-12/schema https://json-schema.org/draft/2020-12/schema
components: components:
@ -9510,434 +9278,6 @@ components:
- hyperparam_search_config - hyperparam_search_config
- logger_config - logger_config
title: SupervisedFineTuneRequest title: SupervisedFineTuneRequest
QueryMetricsRequest:
type: object
properties:
start_time:
type: integer
description: The start time of the metric to query.
end_time:
type: integer
description: The end time of the metric to query.
granularity:
type: string
description: The granularity of the metric to query.
query_type:
type: string
enum:
- range
- instant
description: The type of query to perform.
label_matchers:
type: array
items:
type: object
properties:
name:
type: string
description: The name of the label to match
value:
type: string
description: The value to match against
operator:
type: string
enum:
- '='
- '!='
- =~
- '!~'
description: >-
The comparison operator to use for matching
default: '='
additionalProperties: false
required:
- name
- value
- operator
title: MetricLabelMatcher
description: >-
A matcher for filtering metrics by label values.
description: >-
The label matchers to apply to the metric.
additionalProperties: false
required:
- start_time
- query_type
title: QueryMetricsRequest
MetricDataPoint:
type: object
properties:
timestamp:
type: integer
description: >-
Unix timestamp when the metric value was recorded
value:
type: number
description: >-
The numeric value of the metric at this timestamp
unit:
type: string
additionalProperties: false
required:
- timestamp
- value
- unit
title: MetricDataPoint
description: >-
A single data point in a metric time series.
MetricLabel:
type: object
properties:
name:
type: string
description: The name of the label
value:
type: string
description: The value of the label
additionalProperties: false
required:
- name
- value
title: MetricLabel
description: A label associated with a metric.
MetricSeries:
type: object
properties:
metric:
type: string
description: The name of the metric
labels:
type: array
items:
$ref: '#/components/schemas/MetricLabel'
description: >-
List of labels associated with this metric series
values:
type: array
items:
$ref: '#/components/schemas/MetricDataPoint'
description: >-
List of data points in chronological order
additionalProperties: false
required:
- metric
- labels
- values
title: MetricSeries
description: A time series of metric data points.
QueryMetricsResponse:
type: object
properties:
data:
type: array
items:
$ref: '#/components/schemas/MetricSeries'
description: >-
List of metric series matching the query criteria
additionalProperties: false
required:
- data
title: QueryMetricsResponse
description: >-
Response containing metric time series data.
QueryCondition:
type: object
properties:
key:
type: string
description: The attribute key to filter on
op:
$ref: '#/components/schemas/QueryConditionOp'
description: The comparison operator to apply
value:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
description: The value to compare against
additionalProperties: false
required:
- key
- op
- value
title: QueryCondition
description: A condition for filtering query results.
QueryConditionOp:
type: string
enum:
- eq
- ne
- gt
- lt
title: QueryConditionOp
description: >-
Comparison operators for query conditions.
QuerySpansRequest:
type: object
properties:
attribute_filters:
type: array
items:
$ref: '#/components/schemas/QueryCondition'
description: >-
The attribute filters to apply to the spans.
attributes_to_return:
type: array
items:
type: string
description: The attributes to return in the spans.
max_depth:
type: integer
description: The maximum depth of the tree.
additionalProperties: false
required:
- attribute_filters
- attributes_to_return
title: QuerySpansRequest
Span:
type: object
properties:
span_id:
type: string
description: Unique identifier for the span
trace_id:
type: string
description: >-
Unique identifier for the trace this span belongs to
parent_span_id:
type: string
description: >-
(Optional) Unique identifier for the parent span, if this is a child span
name:
type: string
description: >-
Human-readable name describing the operation this span represents
start_time:
type: string
format: date-time
description: Timestamp when the operation began
end_time:
type: string
format: date-time
description: >-
(Optional) Timestamp when the operation finished, if completed
attributes:
type: object
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
description: >-
(Optional) Key-value pairs containing additional metadata about the span
additionalProperties: false
required:
- span_id
- trace_id
- name
- start_time
title: Span
description: >-
A span representing a single operation within a trace.
QuerySpansResponse:
type: object
properties:
data:
type: array
items:
$ref: '#/components/schemas/Span'
description: >-
List of spans matching the query criteria
additionalProperties: false
required:
- data
title: QuerySpansResponse
description: Response containing a list of spans.
SaveSpansToDatasetRequest:
type: object
properties:
attribute_filters:
type: array
items:
$ref: '#/components/schemas/QueryCondition'
description: >-
The attribute filters to apply to the spans.
attributes_to_save:
type: array
items:
type: string
description: The attributes to save to the dataset.
dataset_id:
type: string
description: >-
The ID of the dataset to save the spans to.
max_depth:
type: integer
description: The maximum depth of the tree.
additionalProperties: false
required:
- attribute_filters
- attributes_to_save
- dataset_id
title: SaveSpansToDatasetRequest
GetSpanTreeRequest:
type: object
properties:
attributes_to_return:
type: array
items:
type: string
description: The attributes to return in the tree.
max_depth:
type: integer
description: The maximum depth of the tree.
additionalProperties: false
title: GetSpanTreeRequest
SpanStatus:
type: string
enum:
- ok
- error
title: SpanStatus
description: >-
The status of a span indicating whether it completed successfully or with
an error.
SpanWithStatus:
type: object
properties:
span_id:
type: string
description: Unique identifier for the span
trace_id:
type: string
description: >-
Unique identifier for the trace this span belongs to
parent_span_id:
type: string
description: >-
(Optional) Unique identifier for the parent span, if this is a child span
name:
type: string
description: >-
Human-readable name describing the operation this span represents
start_time:
type: string
format: date-time
description: Timestamp when the operation began
end_time:
type: string
format: date-time
description: >-
(Optional) Timestamp when the operation finished, if completed
attributes:
type: object
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
description: >-
(Optional) Key-value pairs containing additional metadata about the span
status:
$ref: '#/components/schemas/SpanStatus'
description: >-
(Optional) The current status of the span
additionalProperties: false
required:
- span_id
- trace_id
- name
- start_time
title: SpanWithStatus
description: A span that includes status information.
QuerySpanTreeResponse:
type: object
properties:
data:
type: object
additionalProperties:
$ref: '#/components/schemas/SpanWithStatus'
description: >-
Dictionary mapping span IDs to spans with status information
additionalProperties: false
required:
- data
title: QuerySpanTreeResponse
description: >-
Response containing a tree structure of spans.
QueryTracesRequest:
type: object
properties:
attribute_filters:
type: array
items:
$ref: '#/components/schemas/QueryCondition'
description: >-
The attribute filters to apply to the traces.
limit:
type: integer
description: The limit of traces to return.
offset:
type: integer
description: The offset of the traces to return.
order_by:
type: array
items:
type: string
description: The order by of the traces to return.
additionalProperties: false
title: QueryTracesRequest
Trace:
type: object
properties:
trace_id:
type: string
description: Unique identifier for the trace
root_span_id:
type: string
description: >-
Unique identifier for the root span that started this trace
start_time:
type: string
format: date-time
description: Timestamp when the trace began
end_time:
type: string
format: date-time
description: >-
(Optional) Timestamp when the trace finished, if completed
additionalProperties: false
required:
- trace_id
- root_span_id
- start_time
title: Trace
description: >-
A trace representing the complete execution path of a request across multiple
operations.
QueryTracesResponse:
type: object
properties:
data:
type: array
items:
$ref: '#/components/schemas/Trace'
description: >-
List of traces matching the query criteria
additionalProperties: false
required:
- data
title: QueryTracesResponse
description: Response containing a list of traces.
responses: responses:
BadRequest400: BadRequest400:
description: The request was invalid or malformed description: The request was invalid or malformed
@ -10043,8 +9383,6 @@ tags:
- name: Safety - name: Safety
description: OpenAI-compatible Moderations API. description: OpenAI-compatible Moderations API.
x-displayName: Safety x-displayName: Safety
- name: Telemetry
description: ''
- name: VectorIO - name: VectorIO
description: '' description: ''
x-tagGroups: x-tagGroups:
@ -10060,5 +9398,4 @@ x-tagGroups:
- Models - Models
- PostTraining (Coming Soon) - PostTraining (Coming Soon)
- Safety - Safety
- Telemetry
- VectorIO - VectorIO

View file

@ -1711,343 +1711,6 @@
}, },
"deprecated": false "deprecated": false
} }
},
"/v1alpha/telemetry/metrics/{metric_name}": {
"post": {
"responses": {
"200": {
"description": "A QueryMetricsResponse.",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/QueryMetricsResponse"
}
}
}
},
"400": {
"$ref": "#/components/responses/BadRequest400"
},
"429": {
"$ref": "#/components/responses/TooManyRequests429"
},
"500": {
"$ref": "#/components/responses/InternalServerError500"
},
"default": {
"$ref": "#/components/responses/DefaultError"
}
},
"tags": [
"Telemetry"
],
"summary": "Query metrics.",
"description": "Query metrics.",
"parameters": [
{
"name": "metric_name",
"in": "path",
"description": "The name of the metric to query.",
"required": true,
"schema": {
"type": "string"
}
}
],
"requestBody": {
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/QueryMetricsRequest"
}
}
},
"required": true
},
"deprecated": false
}
},
"/v1alpha/telemetry/spans": {
"post": {
"responses": {
"200": {
"description": "A QuerySpansResponse.",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/QuerySpansResponse"
}
}
}
},
"400": {
"$ref": "#/components/responses/BadRequest400"
},
"429": {
"$ref": "#/components/responses/TooManyRequests429"
},
"500": {
"$ref": "#/components/responses/InternalServerError500"
},
"default": {
"$ref": "#/components/responses/DefaultError"
}
},
"tags": [
"Telemetry"
],
"summary": "Query spans.",
"description": "Query spans.",
"parameters": [],
"requestBody": {
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/QuerySpansRequest"
}
}
},
"required": true
},
"deprecated": false
}
},
"/v1alpha/telemetry/spans/export": {
"post": {
"responses": {
"200": {
"description": "OK"
},
"400": {
"$ref": "#/components/responses/BadRequest400"
},
"429": {
"$ref": "#/components/responses/TooManyRequests429"
},
"500": {
"$ref": "#/components/responses/InternalServerError500"
},
"default": {
"$ref": "#/components/responses/DefaultError"
}
},
"tags": [
"Telemetry"
],
"summary": "Save spans to a dataset.",
"description": "Save spans to a dataset.",
"parameters": [],
"requestBody": {
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/SaveSpansToDatasetRequest"
}
}
},
"required": true
},
"deprecated": false
}
},
"/v1alpha/telemetry/spans/{span_id}/tree": {
"post": {
"responses": {
"200": {
"description": "A QuerySpanTreeResponse.",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/QuerySpanTreeResponse"
}
}
}
},
"400": {
"$ref": "#/components/responses/BadRequest400"
},
"429": {
"$ref": "#/components/responses/TooManyRequests429"
},
"500": {
"$ref": "#/components/responses/InternalServerError500"
},
"default": {
"$ref": "#/components/responses/DefaultError"
}
},
"tags": [
"Telemetry"
],
"summary": "Get a span tree by its ID.",
"description": "Get a span tree by its ID.",
"parameters": [
{
"name": "span_id",
"in": "path",
"description": "The ID of the span to get the tree from.",
"required": true,
"schema": {
"type": "string"
}
}
],
"requestBody": {
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/GetSpanTreeRequest"
}
}
},
"required": true
},
"deprecated": false
}
},
"/v1alpha/telemetry/traces": {
"post": {
"responses": {
"200": {
"description": "A QueryTracesResponse.",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/QueryTracesResponse"
}
}
}
},
"400": {
"$ref": "#/components/responses/BadRequest400"
},
"429": {
"$ref": "#/components/responses/TooManyRequests429"
},
"500": {
"$ref": "#/components/responses/InternalServerError500"
},
"default": {
"$ref": "#/components/responses/DefaultError"
}
},
"tags": [
"Telemetry"
],
"summary": "Query traces.",
"description": "Query traces.",
"parameters": [],
"requestBody": {
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/QueryTracesRequest"
}
}
},
"required": true
},
"deprecated": false
}
},
"/v1alpha/telemetry/traces/{trace_id}": {
"get": {
"responses": {
"200": {
"description": "A Trace.",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/Trace"
}
}
}
},
"400": {
"$ref": "#/components/responses/BadRequest400"
},
"429": {
"$ref": "#/components/responses/TooManyRequests429"
},
"500": {
"$ref": "#/components/responses/InternalServerError500"
},
"default": {
"$ref": "#/components/responses/DefaultError"
}
},
"tags": [
"Telemetry"
],
"summary": "Get a trace by its ID.",
"description": "Get a trace by its ID.",
"parameters": [
{
"name": "trace_id",
"in": "path",
"description": "The ID of the trace to get.",
"required": true,
"schema": {
"type": "string"
}
}
],
"deprecated": false
}
},
"/v1alpha/telemetry/traces/{trace_id}/spans/{span_id}": {
"get": {
"responses": {
"200": {
"description": "A Span.",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/Span"
}
}
}
},
"400": {
"$ref": "#/components/responses/BadRequest400"
},
"429": {
"$ref": "#/components/responses/TooManyRequests429"
},
"500": {
"$ref": "#/components/responses/InternalServerError500"
},
"default": {
"$ref": "#/components/responses/DefaultError"
}
},
"tags": [
"Telemetry"
],
"summary": "Get a span by its ID.",
"description": "Get a span by its ID.",
"parameters": [
{
"name": "trace_id",
"in": "path",
"description": "The ID of the trace to get the span from.",
"required": true,
"schema": {
"type": "string"
}
},
{
"name": "span_id",
"in": "path",
"description": "The ID of the span to get.",
"required": true,
"schema": {
"type": "string"
}
}
],
"deprecated": false
}
} }
}, },
"jsonSchemaDialect": "https://json-schema.org/draft/2020-12/schema", "jsonSchemaDialect": "https://json-schema.org/draft/2020-12/schema",
@ -5765,561 +5428,6 @@
"logger_config" "logger_config"
], ],
"title": "SupervisedFineTuneRequest" "title": "SupervisedFineTuneRequest"
},
"QueryMetricsRequest": {
"type": "object",
"properties": {
"start_time": {
"type": "integer",
"description": "The start time of the metric to query."
},
"end_time": {
"type": "integer",
"description": "The end time of the metric to query."
},
"granularity": {
"type": "string",
"description": "The granularity of the metric to query."
},
"query_type": {
"type": "string",
"enum": [
"range",
"instant"
],
"description": "The type of query to perform."
},
"label_matchers": {
"type": "array",
"items": {
"type": "object",
"properties": {
"name": {
"type": "string",
"description": "The name of the label to match"
},
"value": {
"type": "string",
"description": "The value to match against"
},
"operator": {
"type": "string",
"enum": [
"=",
"!=",
"=~",
"!~"
],
"description": "The comparison operator to use for matching",
"default": "="
}
},
"additionalProperties": false,
"required": [
"name",
"value",
"operator"
],
"title": "MetricLabelMatcher",
"description": "A matcher for filtering metrics by label values."
},
"description": "The label matchers to apply to the metric."
}
},
"additionalProperties": false,
"required": [
"start_time",
"query_type"
],
"title": "QueryMetricsRequest"
},
"MetricDataPoint": {
"type": "object",
"properties": {
"timestamp": {
"type": "integer",
"description": "Unix timestamp when the metric value was recorded"
},
"value": {
"type": "number",
"description": "The numeric value of the metric at this timestamp"
},
"unit": {
"type": "string"
}
},
"additionalProperties": false,
"required": [
"timestamp",
"value",
"unit"
],
"title": "MetricDataPoint",
"description": "A single data point in a metric time series."
},
"MetricLabel": {
"type": "object",
"properties": {
"name": {
"type": "string",
"description": "The name of the label"
},
"value": {
"type": "string",
"description": "The value of the label"
}
},
"additionalProperties": false,
"required": [
"name",
"value"
],
"title": "MetricLabel",
"description": "A label associated with a metric."
},
"MetricSeries": {
"type": "object",
"properties": {
"metric": {
"type": "string",
"description": "The name of the metric"
},
"labels": {
"type": "array",
"items": {
"$ref": "#/components/schemas/MetricLabel"
},
"description": "List of labels associated with this metric series"
},
"values": {
"type": "array",
"items": {
"$ref": "#/components/schemas/MetricDataPoint"
},
"description": "List of data points in chronological order"
}
},
"additionalProperties": false,
"required": [
"metric",
"labels",
"values"
],
"title": "MetricSeries",
"description": "A time series of metric data points."
},
"QueryMetricsResponse": {
"type": "object",
"properties": {
"data": {
"type": "array",
"items": {
"$ref": "#/components/schemas/MetricSeries"
},
"description": "List of metric series matching the query criteria"
}
},
"additionalProperties": false,
"required": [
"data"
],
"title": "QueryMetricsResponse",
"description": "Response containing metric time series data."
},
"QueryCondition": {
"type": "object",
"properties": {
"key": {
"type": "string",
"description": "The attribute key to filter on"
},
"op": {
"$ref": "#/components/schemas/QueryConditionOp",
"description": "The comparison operator to apply"
},
"value": {
"oneOf": [
{
"type": "null"
},
{
"type": "boolean"
},
{
"type": "number"
},
{
"type": "string"
},
{
"type": "array"
},
{
"type": "object"
}
],
"description": "The value to compare against"
}
},
"additionalProperties": false,
"required": [
"key",
"op",
"value"
],
"title": "QueryCondition",
"description": "A condition for filtering query results."
},
"QueryConditionOp": {
"type": "string",
"enum": [
"eq",
"ne",
"gt",
"lt"
],
"title": "QueryConditionOp",
"description": "Comparison operators for query conditions."
},
"QuerySpansRequest": {
"type": "object",
"properties": {
"attribute_filters": {
"type": "array",
"items": {
"$ref": "#/components/schemas/QueryCondition"
},
"description": "The attribute filters to apply to the spans."
},
"attributes_to_return": {
"type": "array",
"items": {
"type": "string"
},
"description": "The attributes to return in the spans."
},
"max_depth": {
"type": "integer",
"description": "The maximum depth of the tree."
}
},
"additionalProperties": false,
"required": [
"attribute_filters",
"attributes_to_return"
],
"title": "QuerySpansRequest"
},
"Span": {
"type": "object",
"properties": {
"span_id": {
"type": "string",
"description": "Unique identifier for the span"
},
"trace_id": {
"type": "string",
"description": "Unique identifier for the trace this span belongs to"
},
"parent_span_id": {
"type": "string",
"description": "(Optional) Unique identifier for the parent span, if this is a child span"
},
"name": {
"type": "string",
"description": "Human-readable name describing the operation this span represents"
},
"start_time": {
"type": "string",
"format": "date-time",
"description": "Timestamp when the operation began"
},
"end_time": {
"type": "string",
"format": "date-time",
"description": "(Optional) Timestamp when the operation finished, if completed"
},
"attributes": {
"type": "object",
"additionalProperties": {
"oneOf": [
{
"type": "null"
},
{
"type": "boolean"
},
{
"type": "number"
},
{
"type": "string"
},
{
"type": "array"
},
{
"type": "object"
}
]
},
"description": "(Optional) Key-value pairs containing additional metadata about the span"
}
},
"additionalProperties": false,
"required": [
"span_id",
"trace_id",
"name",
"start_time"
],
"title": "Span",
"description": "A span representing a single operation within a trace."
},
"QuerySpansResponse": {
"type": "object",
"properties": {
"data": {
"type": "array",
"items": {
"$ref": "#/components/schemas/Span"
},
"description": "List of spans matching the query criteria"
}
},
"additionalProperties": false,
"required": [
"data"
],
"title": "QuerySpansResponse",
"description": "Response containing a list of spans."
},
"SaveSpansToDatasetRequest": {
"type": "object",
"properties": {
"attribute_filters": {
"type": "array",
"items": {
"$ref": "#/components/schemas/QueryCondition"
},
"description": "The attribute filters to apply to the spans."
},
"attributes_to_save": {
"type": "array",
"items": {
"type": "string"
},
"description": "The attributes to save to the dataset."
},
"dataset_id": {
"type": "string",
"description": "The ID of the dataset to save the spans to."
},
"max_depth": {
"type": "integer",
"description": "The maximum depth of the tree."
}
},
"additionalProperties": false,
"required": [
"attribute_filters",
"attributes_to_save",
"dataset_id"
],
"title": "SaveSpansToDatasetRequest"
},
"GetSpanTreeRequest": {
"type": "object",
"properties": {
"attributes_to_return": {
"type": "array",
"items": {
"type": "string"
},
"description": "The attributes to return in the tree."
},
"max_depth": {
"type": "integer",
"description": "The maximum depth of the tree."
}
},
"additionalProperties": false,
"title": "GetSpanTreeRequest"
},
"SpanStatus": {
"type": "string",
"enum": [
"ok",
"error"
],
"title": "SpanStatus",
"description": "The status of a span indicating whether it completed successfully or with an error."
},
"SpanWithStatus": {
"type": "object",
"properties": {
"span_id": {
"type": "string",
"description": "Unique identifier for the span"
},
"trace_id": {
"type": "string",
"description": "Unique identifier for the trace this span belongs to"
},
"parent_span_id": {
"type": "string",
"description": "(Optional) Unique identifier for the parent span, if this is a child span"
},
"name": {
"type": "string",
"description": "Human-readable name describing the operation this span represents"
},
"start_time": {
"type": "string",
"format": "date-time",
"description": "Timestamp when the operation began"
},
"end_time": {
"type": "string",
"format": "date-time",
"description": "(Optional) Timestamp when the operation finished, if completed"
},
"attributes": {
"type": "object",
"additionalProperties": {
"oneOf": [
{
"type": "null"
},
{
"type": "boolean"
},
{
"type": "number"
},
{
"type": "string"
},
{
"type": "array"
},
{
"type": "object"
}
]
},
"description": "(Optional) Key-value pairs containing additional metadata about the span"
},
"status": {
"$ref": "#/components/schemas/SpanStatus",
"description": "(Optional) The current status of the span"
}
},
"additionalProperties": false,
"required": [
"span_id",
"trace_id",
"name",
"start_time"
],
"title": "SpanWithStatus",
"description": "A span that includes status information."
},
"QuerySpanTreeResponse": {
"type": "object",
"properties": {
"data": {
"type": "object",
"additionalProperties": {
"$ref": "#/components/schemas/SpanWithStatus"
},
"description": "Dictionary mapping span IDs to spans with status information"
}
},
"additionalProperties": false,
"required": [
"data"
],
"title": "QuerySpanTreeResponse",
"description": "Response containing a tree structure of spans."
},
"QueryTracesRequest": {
"type": "object",
"properties": {
"attribute_filters": {
"type": "array",
"items": {
"$ref": "#/components/schemas/QueryCondition"
},
"description": "The attribute filters to apply to the traces."
},
"limit": {
"type": "integer",
"description": "The limit of traces to return."
},
"offset": {
"type": "integer",
"description": "The offset of the traces to return."
},
"order_by": {
"type": "array",
"items": {
"type": "string"
},
"description": "The order by of the traces to return."
}
},
"additionalProperties": false,
"title": "QueryTracesRequest"
},
"Trace": {
"type": "object",
"properties": {
"trace_id": {
"type": "string",
"description": "Unique identifier for the trace"
},
"root_span_id": {
"type": "string",
"description": "Unique identifier for the root span that started this trace"
},
"start_time": {
"type": "string",
"format": "date-time",
"description": "Timestamp when the trace began"
},
"end_time": {
"type": "string",
"format": "date-time",
"description": "(Optional) Timestamp when the trace finished, if completed"
}
},
"additionalProperties": false,
"required": [
"trace_id",
"root_span_id",
"start_time"
],
"title": "Trace",
"description": "A trace representing the complete execution path of a request across multiple operations."
},
"QueryTracesResponse": {
"type": "object",
"properties": {
"data": {
"type": "array",
"items": {
"$ref": "#/components/schemas/Trace"
},
"description": "List of traces matching the query criteria"
}
},
"additionalProperties": false,
"required": [
"data"
],
"title": "QueryTracesResponse",
"description": "Response containing a list of traces."
} }
}, },
"responses": { "responses": {
@ -6416,10 +5524,6 @@
{ {
"name": "PostTraining (Coming Soon)", "name": "PostTraining (Coming Soon)",
"description": "" "description": ""
},
{
"name": "Telemetry",
"description": ""
} }
], ],
"x-tagGroups": [ "x-tagGroups": [
@ -6431,8 +5535,7 @@
"DatasetIO", "DatasetIO",
"Datasets", "Datasets",
"Eval", "Eval",
"PostTraining (Coming Soon)", "PostTraining (Coming Soon)"
"Telemetry"
] ]
} }
] ]

View file

@ -1224,238 +1224,6 @@ paths:
$ref: '#/components/schemas/SupervisedFineTuneRequest' $ref: '#/components/schemas/SupervisedFineTuneRequest'
required: true required: true
deprecated: false deprecated: false
/v1alpha/telemetry/metrics/{metric_name}:
post:
responses:
'200':
description: A QueryMetricsResponse.
content:
application/json:
schema:
$ref: '#/components/schemas/QueryMetricsResponse'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- Telemetry
summary: Query metrics.
description: Query metrics.
parameters:
- name: metric_name
in: path
description: The name of the metric to query.
required: true
schema:
type: string
requestBody:
content:
application/json:
schema:
$ref: '#/components/schemas/QueryMetricsRequest'
required: true
deprecated: false
/v1alpha/telemetry/spans:
post:
responses:
'200':
description: A QuerySpansResponse.
content:
application/json:
schema:
$ref: '#/components/schemas/QuerySpansResponse'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- Telemetry
summary: Query spans.
description: Query spans.
parameters: []
requestBody:
content:
application/json:
schema:
$ref: '#/components/schemas/QuerySpansRequest'
required: true
deprecated: false
/v1alpha/telemetry/spans/export:
post:
responses:
'200':
description: OK
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- Telemetry
summary: Save spans to a dataset.
description: Save spans to a dataset.
parameters: []
requestBody:
content:
application/json:
schema:
$ref: '#/components/schemas/SaveSpansToDatasetRequest'
required: true
deprecated: false
/v1alpha/telemetry/spans/{span_id}/tree:
post:
responses:
'200':
description: A QuerySpanTreeResponse.
content:
application/json:
schema:
$ref: '#/components/schemas/QuerySpanTreeResponse'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- Telemetry
summary: Get a span tree by its ID.
description: Get a span tree by its ID.
parameters:
- name: span_id
in: path
description: The ID of the span to get the tree from.
required: true
schema:
type: string
requestBody:
content:
application/json:
schema:
$ref: '#/components/schemas/GetSpanTreeRequest'
required: true
deprecated: false
/v1alpha/telemetry/traces:
post:
responses:
'200':
description: A QueryTracesResponse.
content:
application/json:
schema:
$ref: '#/components/schemas/QueryTracesResponse'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- Telemetry
summary: Query traces.
description: Query traces.
parameters: []
requestBody:
content:
application/json:
schema:
$ref: '#/components/schemas/QueryTracesRequest'
required: true
deprecated: false
/v1alpha/telemetry/traces/{trace_id}:
get:
responses:
'200':
description: A Trace.
content:
application/json:
schema:
$ref: '#/components/schemas/Trace'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- Telemetry
summary: Get a trace by its ID.
description: Get a trace by its ID.
parameters:
- name: trace_id
in: path
description: The ID of the trace to get.
required: true
schema:
type: string
deprecated: false
/v1alpha/telemetry/traces/{trace_id}/spans/{span_id}:
get:
responses:
'200':
description: A Span.
content:
application/json:
schema:
$ref: '#/components/schemas/Span'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- Telemetry
summary: Get a span by its ID.
description: Get a span by its ID.
parameters:
- name: trace_id
in: path
description: >-
The ID of the trace to get the span from.
required: true
schema:
type: string
- name: span_id
in: path
description: The ID of the span to get.
required: true
schema:
type: string
deprecated: false
jsonSchemaDialect: >- jsonSchemaDialect: >-
https://json-schema.org/draft/2020-12/schema https://json-schema.org/draft/2020-12/schema
components: components:
@ -4249,434 +4017,6 @@ components:
- hyperparam_search_config - hyperparam_search_config
- logger_config - logger_config
title: SupervisedFineTuneRequest title: SupervisedFineTuneRequest
QueryMetricsRequest:
type: object
properties:
start_time:
type: integer
description: The start time of the metric to query.
end_time:
type: integer
description: The end time of the metric to query.
granularity:
type: string
description: The granularity of the metric to query.
query_type:
type: string
enum:
- range
- instant
description: The type of query to perform.
label_matchers:
type: array
items:
type: object
properties:
name:
type: string
description: The name of the label to match
value:
type: string
description: The value to match against
operator:
type: string
enum:
- '='
- '!='
- =~
- '!~'
description: >-
The comparison operator to use for matching
default: '='
additionalProperties: false
required:
- name
- value
- operator
title: MetricLabelMatcher
description: >-
A matcher for filtering metrics by label values.
description: >-
The label matchers to apply to the metric.
additionalProperties: false
required:
- start_time
- query_type
title: QueryMetricsRequest
MetricDataPoint:
type: object
properties:
timestamp:
type: integer
description: >-
Unix timestamp when the metric value was recorded
value:
type: number
description: >-
The numeric value of the metric at this timestamp
unit:
type: string
additionalProperties: false
required:
- timestamp
- value
- unit
title: MetricDataPoint
description: >-
A single data point in a metric time series.
MetricLabel:
type: object
properties:
name:
type: string
description: The name of the label
value:
type: string
description: The value of the label
additionalProperties: false
required:
- name
- value
title: MetricLabel
description: A label associated with a metric.
MetricSeries:
type: object
properties:
metric:
type: string
description: The name of the metric
labels:
type: array
items:
$ref: '#/components/schemas/MetricLabel'
description: >-
List of labels associated with this metric series
values:
type: array
items:
$ref: '#/components/schemas/MetricDataPoint'
description: >-
List of data points in chronological order
additionalProperties: false
required:
- metric
- labels
- values
title: MetricSeries
description: A time series of metric data points.
QueryMetricsResponse:
type: object
properties:
data:
type: array
items:
$ref: '#/components/schemas/MetricSeries'
description: >-
List of metric series matching the query criteria
additionalProperties: false
required:
- data
title: QueryMetricsResponse
description: >-
Response containing metric time series data.
QueryCondition:
type: object
properties:
key:
type: string
description: The attribute key to filter on
op:
$ref: '#/components/schemas/QueryConditionOp'
description: The comparison operator to apply
value:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
description: The value to compare against
additionalProperties: false
required:
- key
- op
- value
title: QueryCondition
description: A condition for filtering query results.
QueryConditionOp:
type: string
enum:
- eq
- ne
- gt
- lt
title: QueryConditionOp
description: >-
Comparison operators for query conditions.
QuerySpansRequest:
type: object
properties:
attribute_filters:
type: array
items:
$ref: '#/components/schemas/QueryCondition'
description: >-
The attribute filters to apply to the spans.
attributes_to_return:
type: array
items:
type: string
description: The attributes to return in the spans.
max_depth:
type: integer
description: The maximum depth of the tree.
additionalProperties: false
required:
- attribute_filters
- attributes_to_return
title: QuerySpansRequest
Span:
type: object
properties:
span_id:
type: string
description: Unique identifier for the span
trace_id:
type: string
description: >-
Unique identifier for the trace this span belongs to
parent_span_id:
type: string
description: >-
(Optional) Unique identifier for the parent span, if this is a child span
name:
type: string
description: >-
Human-readable name describing the operation this span represents
start_time:
type: string
format: date-time
description: Timestamp when the operation began
end_time:
type: string
format: date-time
description: >-
(Optional) Timestamp when the operation finished, if completed
attributes:
type: object
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
description: >-
(Optional) Key-value pairs containing additional metadata about the span
additionalProperties: false
required:
- span_id
- trace_id
- name
- start_time
title: Span
description: >-
A span representing a single operation within a trace.
QuerySpansResponse:
type: object
properties:
data:
type: array
items:
$ref: '#/components/schemas/Span'
description: >-
List of spans matching the query criteria
additionalProperties: false
required:
- data
title: QuerySpansResponse
description: Response containing a list of spans.
SaveSpansToDatasetRequest:
type: object
properties:
attribute_filters:
type: array
items:
$ref: '#/components/schemas/QueryCondition'
description: >-
The attribute filters to apply to the spans.
attributes_to_save:
type: array
items:
type: string
description: The attributes to save to the dataset.
dataset_id:
type: string
description: >-
The ID of the dataset to save the spans to.
max_depth:
type: integer
description: The maximum depth of the tree.
additionalProperties: false
required:
- attribute_filters
- attributes_to_save
- dataset_id
title: SaveSpansToDatasetRequest
GetSpanTreeRequest:
type: object
properties:
attributes_to_return:
type: array
items:
type: string
description: The attributes to return in the tree.
max_depth:
type: integer
description: The maximum depth of the tree.
additionalProperties: false
title: GetSpanTreeRequest
SpanStatus:
type: string
enum:
- ok
- error
title: SpanStatus
description: >-
The status of a span indicating whether it completed successfully or with
an error.
SpanWithStatus:
type: object
properties:
span_id:
type: string
description: Unique identifier for the span
trace_id:
type: string
description: >-
Unique identifier for the trace this span belongs to
parent_span_id:
type: string
description: >-
(Optional) Unique identifier for the parent span, if this is a child span
name:
type: string
description: >-
Human-readable name describing the operation this span represents
start_time:
type: string
format: date-time
description: Timestamp when the operation began
end_time:
type: string
format: date-time
description: >-
(Optional) Timestamp when the operation finished, if completed
attributes:
type: object
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
description: >-
(Optional) Key-value pairs containing additional metadata about the span
status:
$ref: '#/components/schemas/SpanStatus'
description: >-
(Optional) The current status of the span
additionalProperties: false
required:
- span_id
- trace_id
- name
- start_time
title: SpanWithStatus
description: A span that includes status information.
QuerySpanTreeResponse:
type: object
properties:
data:
type: object
additionalProperties:
$ref: '#/components/schemas/SpanWithStatus'
description: >-
Dictionary mapping span IDs to spans with status information
additionalProperties: false
required:
- data
title: QuerySpanTreeResponse
description: >-
Response containing a tree structure of spans.
QueryTracesRequest:
type: object
properties:
attribute_filters:
type: array
items:
$ref: '#/components/schemas/QueryCondition'
description: >-
The attribute filters to apply to the traces.
limit:
type: integer
description: The limit of traces to return.
offset:
type: integer
description: The offset of the traces to return.
order_by:
type: array
items:
type: string
description: The order by of the traces to return.
additionalProperties: false
title: QueryTracesRequest
Trace:
type: object
properties:
trace_id:
type: string
description: Unique identifier for the trace
root_span_id:
type: string
description: >-
Unique identifier for the root span that started this trace
start_time:
type: string
format: date-time
description: Timestamp when the trace began
end_time:
type: string
format: date-time
description: >-
(Optional) Timestamp when the trace finished, if completed
additionalProperties: false
required:
- trace_id
- root_span_id
- start_time
title: Trace
description: >-
A trace representing the complete execution path of a request across multiple
operations.
QueryTracesResponse:
type: object
properties:
data:
type: array
items:
$ref: '#/components/schemas/Trace'
description: >-
List of traces matching the query criteria
additionalProperties: false
required:
- data
title: QueryTracesResponse
description: Response containing a list of traces.
responses: responses:
BadRequest400: BadRequest400:
description: The request was invalid or malformed description: The request was invalid or malformed
@ -4784,8 +4124,6 @@ tags:
Llama Stack Evaluation API for running evaluations on model and agent candidates. Llama Stack Evaluation API for running evaluations on model and agent candidates.
- name: PostTraining (Coming Soon) - name: PostTraining (Coming Soon)
description: '' description: ''
- name: Telemetry
description: ''
x-tagGroups: x-tagGroups:
- name: Operations - name: Operations
tags: tags:
@ -4795,4 +4133,3 @@ x-tagGroups:
- Datasets - Datasets
- Eval - Eval
- PostTraining (Coming Soon) - PostTraining (Coming Soon)
- Telemetry

View file

@ -2525,44 +2525,6 @@
"deprecated": false "deprecated": false
} }
}, },
"/v1/telemetry/events": {
"post": {
"responses": {
"200": {
"description": "OK"
},
"400": {
"$ref": "#/components/responses/BadRequest400"
},
"429": {
"$ref": "#/components/responses/TooManyRequests429"
},
"500": {
"$ref": "#/components/responses/InternalServerError500"
},
"default": {
"$ref": "#/components/responses/DefaultError"
}
},
"tags": [
"Telemetry"
],
"summary": "Log an event.",
"description": "Log an event.",
"parameters": [],
"requestBody": {
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/LogEventRequest"
}
}
},
"required": true
},
"deprecated": false
}
},
"/v1/tool-runtime/invoke": { "/v1/tool-runtime/invoke": {
"post": { "post": {
"responses": { "responses": {
@ -10364,354 +10326,6 @@
"title": "SyntheticDataGenerationResponse", "title": "SyntheticDataGenerationResponse",
"description": "Response from the synthetic data generation. Batch of (prompt, response, score) tuples that pass the threshold." "description": "Response from the synthetic data generation. Batch of (prompt, response, score) tuples that pass the threshold."
}, },
"Event": {
"oneOf": [
{
"$ref": "#/components/schemas/UnstructuredLogEvent"
},
{
"$ref": "#/components/schemas/MetricEvent"
},
{
"$ref": "#/components/schemas/StructuredLogEvent"
}
],
"discriminator": {
"propertyName": "type",
"mapping": {
"unstructured_log": "#/components/schemas/UnstructuredLogEvent",
"metric": "#/components/schemas/MetricEvent",
"structured_log": "#/components/schemas/StructuredLogEvent"
}
}
},
"EventType": {
"type": "string",
"enum": [
"unstructured_log",
"structured_log",
"metric"
],
"title": "EventType",
"description": "The type of telemetry event being logged."
},
"LogSeverity": {
"type": "string",
"enum": [
"verbose",
"debug",
"info",
"warn",
"error",
"critical"
],
"title": "LogSeverity",
"description": "The severity level of a log message."
},
"MetricEvent": {
"type": "object",
"properties": {
"trace_id": {
"type": "string",
"description": "Unique identifier for the trace this event belongs to"
},
"span_id": {
"type": "string",
"description": "Unique identifier for the span this event belongs to"
},
"timestamp": {
"type": "string",
"format": "date-time",
"description": "Timestamp when the event occurred"
},
"attributes": {
"type": "object",
"additionalProperties": {
"oneOf": [
{
"type": "string"
},
{
"type": "integer"
},
{
"type": "number"
},
{
"type": "boolean"
},
{
"type": "null"
}
]
},
"description": "(Optional) Key-value pairs containing additional metadata about the event"
},
"type": {
"$ref": "#/components/schemas/EventType",
"const": "metric",
"default": "metric",
"description": "Event type identifier set to METRIC"
},
"metric": {
"type": "string",
"description": "The name of the metric being measured"
},
"value": {
"oneOf": [
{
"type": "integer"
},
{
"type": "number"
}
],
"description": "The numeric value of the metric measurement"
},
"unit": {
"type": "string",
"description": "The unit of measurement for the metric value"
}
},
"additionalProperties": false,
"required": [
"trace_id",
"span_id",
"timestamp",
"type",
"metric",
"value",
"unit"
],
"title": "MetricEvent",
"description": "A metric event containing a measured value."
},
"SpanEndPayload": {
"type": "object",
"properties": {
"type": {
"$ref": "#/components/schemas/StructuredLogType",
"const": "span_end",
"default": "span_end",
"description": "Payload type identifier set to SPAN_END"
},
"status": {
"$ref": "#/components/schemas/SpanStatus",
"description": "The final status of the span indicating success or failure"
}
},
"additionalProperties": false,
"required": [
"type",
"status"
],
"title": "SpanEndPayload",
"description": "Payload for a span end event."
},
"SpanStartPayload": {
"type": "object",
"properties": {
"type": {
"$ref": "#/components/schemas/StructuredLogType",
"const": "span_start",
"default": "span_start",
"description": "Payload type identifier set to SPAN_START"
},
"name": {
"type": "string",
"description": "Human-readable name describing the operation this span represents"
},
"parent_span_id": {
"type": "string",
"description": "(Optional) Unique identifier for the parent span, if this is a child span"
}
},
"additionalProperties": false,
"required": [
"type",
"name"
],
"title": "SpanStartPayload",
"description": "Payload for a span start event."
},
"SpanStatus": {
"type": "string",
"enum": [
"ok",
"error"
],
"title": "SpanStatus",
"description": "The status of a span indicating whether it completed successfully or with an error."
},
"StructuredLogEvent": {
"type": "object",
"properties": {
"trace_id": {
"type": "string",
"description": "Unique identifier for the trace this event belongs to"
},
"span_id": {
"type": "string",
"description": "Unique identifier for the span this event belongs to"
},
"timestamp": {
"type": "string",
"format": "date-time",
"description": "Timestamp when the event occurred"
},
"attributes": {
"type": "object",
"additionalProperties": {
"oneOf": [
{
"type": "string"
},
{
"type": "integer"
},
{
"type": "number"
},
{
"type": "boolean"
},
{
"type": "null"
}
]
},
"description": "(Optional) Key-value pairs containing additional metadata about the event"
},
"type": {
"$ref": "#/components/schemas/EventType",
"const": "structured_log",
"default": "structured_log",
"description": "Event type identifier set to STRUCTURED_LOG"
},
"payload": {
"oneOf": [
{
"$ref": "#/components/schemas/SpanStartPayload"
},
{
"$ref": "#/components/schemas/SpanEndPayload"
}
],
"discriminator": {
"propertyName": "type",
"mapping": {
"span_start": "#/components/schemas/SpanStartPayload",
"span_end": "#/components/schemas/SpanEndPayload"
}
},
"description": "The structured payload data for the log event"
}
},
"additionalProperties": false,
"required": [
"trace_id",
"span_id",
"timestamp",
"type",
"payload"
],
"title": "StructuredLogEvent",
"description": "A structured log event containing typed payload data."
},
"StructuredLogType": {
"type": "string",
"enum": [
"span_start",
"span_end"
],
"title": "StructuredLogType",
"description": "The type of structured log event payload."
},
"UnstructuredLogEvent": {
"type": "object",
"properties": {
"trace_id": {
"type": "string",
"description": "Unique identifier for the trace this event belongs to"
},
"span_id": {
"type": "string",
"description": "Unique identifier for the span this event belongs to"
},
"timestamp": {
"type": "string",
"format": "date-time",
"description": "Timestamp when the event occurred"
},
"attributes": {
"type": "object",
"additionalProperties": {
"oneOf": [
{
"type": "string"
},
{
"type": "integer"
},
{
"type": "number"
},
{
"type": "boolean"
},
{
"type": "null"
}
]
},
"description": "(Optional) Key-value pairs containing additional metadata about the event"
},
"type": {
"$ref": "#/components/schemas/EventType",
"const": "unstructured_log",
"default": "unstructured_log",
"description": "Event type identifier set to UNSTRUCTURED_LOG"
},
"message": {
"type": "string",
"description": "The log message text"
},
"severity": {
"$ref": "#/components/schemas/LogSeverity",
"description": "The severity level of the log message"
}
},
"additionalProperties": false,
"required": [
"trace_id",
"span_id",
"timestamp",
"type",
"message",
"severity"
],
"title": "UnstructuredLogEvent",
"description": "An unstructured log event containing a simple text message."
},
"LogEventRequest": {
"type": "object",
"properties": {
"event": {
"$ref": "#/components/schemas/Event",
"description": "The event to log."
},
"ttl_seconds": {
"type": "integer",
"description": "The time to live of the event."
}
},
"additionalProperties": false,
"required": [
"event",
"ttl_seconds"
],
"title": "LogEventRequest"
},
"InvokeToolRequest": { "InvokeToolRequest": {
"type": "object", "type": "object",
"properties": { "properties": {
@ -12962,10 +12576,6 @@
"name": "SyntheticDataGeneration (Coming Soon)", "name": "SyntheticDataGeneration (Coming Soon)",
"description": "" "description": ""
}, },
{
"name": "Telemetry",
"description": ""
},
{ {
"name": "ToolGroups", "name": "ToolGroups",
"description": "" "description": ""
@ -13000,7 +12610,6 @@
"ScoringFunctions", "ScoringFunctions",
"Shields", "Shields",
"SyntheticDataGeneration (Coming Soon)", "SyntheticDataGeneration (Coming Soon)",
"Telemetry",
"ToolGroups", "ToolGroups",
"ToolRuntime", "ToolRuntime",
"VectorDBs", "VectorDBs",

View file

@ -1944,33 +1944,6 @@ paths:
$ref: '#/components/schemas/SyntheticDataGenerateRequest' $ref: '#/components/schemas/SyntheticDataGenerateRequest'
required: true required: true
deprecated: false deprecated: false
/v1/telemetry/events:
post:
responses:
'200':
description: OK
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- Telemetry
summary: Log an event.
description: Log an event.
parameters: []
requestBody:
content:
application/json:
schema:
$ref: '#/components/schemas/LogEventRequest'
required: true
deprecated: false
/v1/tool-runtime/invoke: /v1/tool-runtime/invoke:
post: post:
responses: responses:
@ -7840,267 +7813,6 @@ components:
description: >- description: >-
Response from the synthetic data generation. Batch of (prompt, response, score) Response from the synthetic data generation. Batch of (prompt, response, score)
tuples that pass the threshold. tuples that pass the threshold.
Event:
oneOf:
- $ref: '#/components/schemas/UnstructuredLogEvent'
- $ref: '#/components/schemas/MetricEvent'
- $ref: '#/components/schemas/StructuredLogEvent'
discriminator:
propertyName: type
mapping:
unstructured_log: '#/components/schemas/UnstructuredLogEvent'
metric: '#/components/schemas/MetricEvent'
structured_log: '#/components/schemas/StructuredLogEvent'
EventType:
type: string
enum:
- unstructured_log
- structured_log
- metric
title: EventType
description: >-
The type of telemetry event being logged.
LogSeverity:
type: string
enum:
- verbose
- debug
- info
- warn
- error
- critical
title: LogSeverity
description: The severity level of a log message.
MetricEvent:
type: object
properties:
trace_id:
type: string
description: >-
Unique identifier for the trace this event belongs to
span_id:
type: string
description: >-
Unique identifier for the span this event belongs to
timestamp:
type: string
format: date-time
description: Timestamp when the event occurred
attributes:
type: object
additionalProperties:
oneOf:
- type: string
- type: integer
- type: number
- type: boolean
- type: 'null'
description: >-
(Optional) Key-value pairs containing additional metadata about the event
type:
$ref: '#/components/schemas/EventType'
const: metric
default: metric
description: Event type identifier set to METRIC
metric:
type: string
description: The name of the metric being measured
value:
oneOf:
- type: integer
- type: number
description: >-
The numeric value of the metric measurement
unit:
type: string
description: >-
The unit of measurement for the metric value
additionalProperties: false
required:
- trace_id
- span_id
- timestamp
- type
- metric
- value
- unit
title: MetricEvent
description: >-
A metric event containing a measured value.
SpanEndPayload:
type: object
properties:
type:
$ref: '#/components/schemas/StructuredLogType'
const: span_end
default: span_end
description: Payload type identifier set to SPAN_END
status:
$ref: '#/components/schemas/SpanStatus'
description: >-
The final status of the span indicating success or failure
additionalProperties: false
required:
- type
- status
title: SpanEndPayload
description: Payload for a span end event.
SpanStartPayload:
type: object
properties:
type:
$ref: '#/components/schemas/StructuredLogType'
const: span_start
default: span_start
description: >-
Payload type identifier set to SPAN_START
name:
type: string
description: >-
Human-readable name describing the operation this span represents
parent_span_id:
type: string
description: >-
(Optional) Unique identifier for the parent span, if this is a child span
additionalProperties: false
required:
- type
- name
title: SpanStartPayload
description: Payload for a span start event.
SpanStatus:
type: string
enum:
- ok
- error
title: SpanStatus
description: >-
The status of a span indicating whether it completed successfully or with
an error.
StructuredLogEvent:
type: object
properties:
trace_id:
type: string
description: >-
Unique identifier for the trace this event belongs to
span_id:
type: string
description: >-
Unique identifier for the span this event belongs to
timestamp:
type: string
format: date-time
description: Timestamp when the event occurred
attributes:
type: object
additionalProperties:
oneOf:
- type: string
- type: integer
- type: number
- type: boolean
- type: 'null'
description: >-
(Optional) Key-value pairs containing additional metadata about the event
type:
$ref: '#/components/schemas/EventType'
const: structured_log
default: structured_log
description: >-
Event type identifier set to STRUCTURED_LOG
payload:
oneOf:
- $ref: '#/components/schemas/SpanStartPayload'
- $ref: '#/components/schemas/SpanEndPayload'
discriminator:
propertyName: type
mapping:
span_start: '#/components/schemas/SpanStartPayload'
span_end: '#/components/schemas/SpanEndPayload'
description: >-
The structured payload data for the log event
additionalProperties: false
required:
- trace_id
- span_id
- timestamp
- type
- payload
title: StructuredLogEvent
description: >-
A structured log event containing typed payload data.
StructuredLogType:
type: string
enum:
- span_start
- span_end
title: StructuredLogType
description: >-
The type of structured log event payload.
UnstructuredLogEvent:
type: object
properties:
trace_id:
type: string
description: >-
Unique identifier for the trace this event belongs to
span_id:
type: string
description: >-
Unique identifier for the span this event belongs to
timestamp:
type: string
format: date-time
description: Timestamp when the event occurred
attributes:
type: object
additionalProperties:
oneOf:
- type: string
- type: integer
- type: number
- type: boolean
- type: 'null'
description: >-
(Optional) Key-value pairs containing additional metadata about the event
type:
$ref: '#/components/schemas/EventType'
const: unstructured_log
default: unstructured_log
description: >-
Event type identifier set to UNSTRUCTURED_LOG
message:
type: string
description: The log message text
severity:
$ref: '#/components/schemas/LogSeverity'
description: The severity level of the log message
additionalProperties: false
required:
- trace_id
- span_id
- timestamp
- type
- message
- severity
title: UnstructuredLogEvent
description: >-
An unstructured log event containing a simple text message.
LogEventRequest:
type: object
properties:
event:
$ref: '#/components/schemas/Event'
description: The event to log.
ttl_seconds:
type: integer
description: The time to live of the event.
additionalProperties: false
required:
- event
- ttl_seconds
title: LogEventRequest
InvokeToolRequest: InvokeToolRequest:
type: object type: object
properties: properties:
@ -9833,8 +9545,6 @@ tags:
description: '' description: ''
- name: SyntheticDataGeneration (Coming Soon) - name: SyntheticDataGeneration (Coming Soon)
description: '' description: ''
- name: Telemetry
description: ''
- name: ToolGroups - name: ToolGroups
description: '' description: ''
- name: ToolRuntime - name: ToolRuntime
@ -9859,7 +9569,6 @@ x-tagGroups:
- ScoringFunctions - ScoringFunctions
- Shields - Shields
- SyntheticDataGeneration (Coming Soon) - SyntheticDataGeneration (Coming Soon)
- Telemetry
- ToolGroups - ToolGroups
- ToolRuntime - ToolRuntime
- VectorDBs - VectorDBs

File diff suppressed because it is too large Load diff

View file

@ -1947,33 +1947,6 @@ paths:
$ref: '#/components/schemas/SyntheticDataGenerateRequest' $ref: '#/components/schemas/SyntheticDataGenerateRequest'
required: true required: true
deprecated: false deprecated: false
/v1/telemetry/events:
post:
responses:
'200':
description: OK
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- Telemetry
summary: Log an event.
description: Log an event.
parameters: []
requestBody:
content:
application/json:
schema:
$ref: '#/components/schemas/LogEventRequest'
required: true
deprecated: false
/v1/tool-runtime/invoke: /v1/tool-runtime/invoke:
post: post:
responses: responses:
@ -4392,238 +4365,6 @@ paths:
$ref: '#/components/schemas/SupervisedFineTuneRequest' $ref: '#/components/schemas/SupervisedFineTuneRequest'
required: true required: true
deprecated: false deprecated: false
/v1alpha/telemetry/metrics/{metric_name}:
post:
responses:
'200':
description: A QueryMetricsResponse.
content:
application/json:
schema:
$ref: '#/components/schemas/QueryMetricsResponse'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- Telemetry
summary: Query metrics.
description: Query metrics.
parameters:
- name: metric_name
in: path
description: The name of the metric to query.
required: true
schema:
type: string
requestBody:
content:
application/json:
schema:
$ref: '#/components/schemas/QueryMetricsRequest'
required: true
deprecated: false
/v1alpha/telemetry/spans:
post:
responses:
'200':
description: A QuerySpansResponse.
content:
application/json:
schema:
$ref: '#/components/schemas/QuerySpansResponse'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- Telemetry
summary: Query spans.
description: Query spans.
parameters: []
requestBody:
content:
application/json:
schema:
$ref: '#/components/schemas/QuerySpansRequest'
required: true
deprecated: false
/v1alpha/telemetry/spans/export:
post:
responses:
'200':
description: OK
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- Telemetry
summary: Save spans to a dataset.
description: Save spans to a dataset.
parameters: []
requestBody:
content:
application/json:
schema:
$ref: '#/components/schemas/SaveSpansToDatasetRequest'
required: true
deprecated: false
/v1alpha/telemetry/spans/{span_id}/tree:
post:
responses:
'200':
description: A QuerySpanTreeResponse.
content:
application/json:
schema:
$ref: '#/components/schemas/QuerySpanTreeResponse'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- Telemetry
summary: Get a span tree by its ID.
description: Get a span tree by its ID.
parameters:
- name: span_id
in: path
description: The ID of the span to get the tree from.
required: true
schema:
type: string
requestBody:
content:
application/json:
schema:
$ref: '#/components/schemas/GetSpanTreeRequest'
required: true
deprecated: false
/v1alpha/telemetry/traces:
post:
responses:
'200':
description: A QueryTracesResponse.
content:
application/json:
schema:
$ref: '#/components/schemas/QueryTracesResponse'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- Telemetry
summary: Query traces.
description: Query traces.
parameters: []
requestBody:
content:
application/json:
schema:
$ref: '#/components/schemas/QueryTracesRequest'
required: true
deprecated: false
/v1alpha/telemetry/traces/{trace_id}:
get:
responses:
'200':
description: A Trace.
content:
application/json:
schema:
$ref: '#/components/schemas/Trace'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- Telemetry
summary: Get a trace by its ID.
description: Get a trace by its ID.
parameters:
- name: trace_id
in: path
description: The ID of the trace to get.
required: true
schema:
type: string
deprecated: false
/v1alpha/telemetry/traces/{trace_id}/spans/{span_id}:
get:
responses:
'200':
description: A Span.
content:
application/json:
schema:
$ref: '#/components/schemas/Span'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- Telemetry
summary: Get a span by its ID.
description: Get a span by its ID.
parameters:
- name: trace_id
in: path
description: >-
The ID of the trace to get the span from.
required: true
schema:
type: string
- name: span_id
in: path
description: The ID of the span to get.
required: true
schema:
type: string
deprecated: false
jsonSchemaDialect: >- jsonSchemaDialect: >-
https://json-schema.org/draft/2020-12/schema https://json-schema.org/draft/2020-12/schema
components: components:
@ -9285,267 +9026,6 @@ components:
description: >- description: >-
Response from the synthetic data generation. Batch of (prompt, response, score) Response from the synthetic data generation. Batch of (prompt, response, score)
tuples that pass the threshold. tuples that pass the threshold.
Event:
oneOf:
- $ref: '#/components/schemas/UnstructuredLogEvent'
- $ref: '#/components/schemas/MetricEvent'
- $ref: '#/components/schemas/StructuredLogEvent'
discriminator:
propertyName: type
mapping:
unstructured_log: '#/components/schemas/UnstructuredLogEvent'
metric: '#/components/schemas/MetricEvent'
structured_log: '#/components/schemas/StructuredLogEvent'
EventType:
type: string
enum:
- unstructured_log
- structured_log
- metric
title: EventType
description: >-
The type of telemetry event being logged.
LogSeverity:
type: string
enum:
- verbose
- debug
- info
- warn
- error
- critical
title: LogSeverity
description: The severity level of a log message.
MetricEvent:
type: object
properties:
trace_id:
type: string
description: >-
Unique identifier for the trace this event belongs to
span_id:
type: string
description: >-
Unique identifier for the span this event belongs to
timestamp:
type: string
format: date-time
description: Timestamp when the event occurred
attributes:
type: object
additionalProperties:
oneOf:
- type: string
- type: integer
- type: number
- type: boolean
- type: 'null'
description: >-
(Optional) Key-value pairs containing additional metadata about the event
type:
$ref: '#/components/schemas/EventType'
const: metric
default: metric
description: Event type identifier set to METRIC
metric:
type: string
description: The name of the metric being measured
value:
oneOf:
- type: integer
- type: number
description: >-
The numeric value of the metric measurement
unit:
type: string
description: >-
The unit of measurement for the metric value
additionalProperties: false
required:
- trace_id
- span_id
- timestamp
- type
- metric
- value
- unit
title: MetricEvent
description: >-
A metric event containing a measured value.
SpanEndPayload:
type: object
properties:
type:
$ref: '#/components/schemas/StructuredLogType'
const: span_end
default: span_end
description: Payload type identifier set to SPAN_END
status:
$ref: '#/components/schemas/SpanStatus'
description: >-
The final status of the span indicating success or failure
additionalProperties: false
required:
- type
- status
title: SpanEndPayload
description: Payload for a span end event.
SpanStartPayload:
type: object
properties:
type:
$ref: '#/components/schemas/StructuredLogType'
const: span_start
default: span_start
description: >-
Payload type identifier set to SPAN_START
name:
type: string
description: >-
Human-readable name describing the operation this span represents
parent_span_id:
type: string
description: >-
(Optional) Unique identifier for the parent span, if this is a child span
additionalProperties: false
required:
- type
- name
title: SpanStartPayload
description: Payload for a span start event.
SpanStatus:
type: string
enum:
- ok
- error
title: SpanStatus
description: >-
The status of a span indicating whether it completed successfully or with
an error.
StructuredLogEvent:
type: object
properties:
trace_id:
type: string
description: >-
Unique identifier for the trace this event belongs to
span_id:
type: string
description: >-
Unique identifier for the span this event belongs to
timestamp:
type: string
format: date-time
description: Timestamp when the event occurred
attributes:
type: object
additionalProperties:
oneOf:
- type: string
- type: integer
- type: number
- type: boolean
- type: 'null'
description: >-
(Optional) Key-value pairs containing additional metadata about the event
type:
$ref: '#/components/schemas/EventType'
const: structured_log
default: structured_log
description: >-
Event type identifier set to STRUCTURED_LOG
payload:
oneOf:
- $ref: '#/components/schemas/SpanStartPayload'
- $ref: '#/components/schemas/SpanEndPayload'
discriminator:
propertyName: type
mapping:
span_start: '#/components/schemas/SpanStartPayload'
span_end: '#/components/schemas/SpanEndPayload'
description: >-
The structured payload data for the log event
additionalProperties: false
required:
- trace_id
- span_id
- timestamp
- type
- payload
title: StructuredLogEvent
description: >-
A structured log event containing typed payload data.
StructuredLogType:
type: string
enum:
- span_start
- span_end
title: StructuredLogType
description: >-
The type of structured log event payload.
UnstructuredLogEvent:
type: object
properties:
trace_id:
type: string
description: >-
Unique identifier for the trace this event belongs to
span_id:
type: string
description: >-
Unique identifier for the span this event belongs to
timestamp:
type: string
format: date-time
description: Timestamp when the event occurred
attributes:
type: object
additionalProperties:
oneOf:
- type: string
- type: integer
- type: number
- type: boolean
- type: 'null'
description: >-
(Optional) Key-value pairs containing additional metadata about the event
type:
$ref: '#/components/schemas/EventType'
const: unstructured_log
default: unstructured_log
description: >-
Event type identifier set to UNSTRUCTURED_LOG
message:
type: string
description: The log message text
severity:
$ref: '#/components/schemas/LogSeverity'
description: The severity level of the log message
additionalProperties: false
required:
- trace_id
- span_id
- timestamp
- type
- message
- severity
title: UnstructuredLogEvent
description: >-
An unstructured log event containing a simple text message.
LogEventRequest:
type: object
properties:
event:
$ref: '#/components/schemas/Event'
description: The event to log.
ttl_seconds:
type: integer
description: The time to live of the event.
additionalProperties: false
required:
- event
- ttl_seconds
title: LogEventRequest
InvokeToolRequest: InvokeToolRequest:
type: object type: object
properties: properties:
@ -13349,425 +12829,6 @@ components:
- hyperparam_search_config - hyperparam_search_config
- logger_config - logger_config
title: SupervisedFineTuneRequest title: SupervisedFineTuneRequest
QueryMetricsRequest:
type: object
properties:
start_time:
type: integer
description: The start time of the metric to query.
end_time:
type: integer
description: The end time of the metric to query.
granularity:
type: string
description: The granularity of the metric to query.
query_type:
type: string
enum:
- range
- instant
description: The type of query to perform.
label_matchers:
type: array
items:
type: object
properties:
name:
type: string
description: The name of the label to match
value:
type: string
description: The value to match against
operator:
type: string
enum:
- '='
- '!='
- =~
- '!~'
description: >-
The comparison operator to use for matching
default: '='
additionalProperties: false
required:
- name
- value
- operator
title: MetricLabelMatcher
description: >-
A matcher for filtering metrics by label values.
description: >-
The label matchers to apply to the metric.
additionalProperties: false
required:
- start_time
- query_type
title: QueryMetricsRequest
MetricDataPoint:
type: object
properties:
timestamp:
type: integer
description: >-
Unix timestamp when the metric value was recorded
value:
type: number
description: >-
The numeric value of the metric at this timestamp
unit:
type: string
additionalProperties: false
required:
- timestamp
- value
- unit
title: MetricDataPoint
description: >-
A single data point in a metric time series.
MetricLabel:
type: object
properties:
name:
type: string
description: The name of the label
value:
type: string
description: The value of the label
additionalProperties: false
required:
- name
- value
title: MetricLabel
description: A label associated with a metric.
MetricSeries:
type: object
properties:
metric:
type: string
description: The name of the metric
labels:
type: array
items:
$ref: '#/components/schemas/MetricLabel'
description: >-
List of labels associated with this metric series
values:
type: array
items:
$ref: '#/components/schemas/MetricDataPoint'
description: >-
List of data points in chronological order
additionalProperties: false
required:
- metric
- labels
- values
title: MetricSeries
description: A time series of metric data points.
QueryMetricsResponse:
type: object
properties:
data:
type: array
items:
$ref: '#/components/schemas/MetricSeries'
description: >-
List of metric series matching the query criteria
additionalProperties: false
required:
- data
title: QueryMetricsResponse
description: >-
Response containing metric time series data.
QueryCondition:
type: object
properties:
key:
type: string
description: The attribute key to filter on
op:
$ref: '#/components/schemas/QueryConditionOp'
description: The comparison operator to apply
value:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
description: The value to compare against
additionalProperties: false
required:
- key
- op
- value
title: QueryCondition
description: A condition for filtering query results.
QueryConditionOp:
type: string
enum:
- eq
- ne
- gt
- lt
title: QueryConditionOp
description: >-
Comparison operators for query conditions.
QuerySpansRequest:
type: object
properties:
attribute_filters:
type: array
items:
$ref: '#/components/schemas/QueryCondition'
description: >-
The attribute filters to apply to the spans.
attributes_to_return:
type: array
items:
type: string
description: The attributes to return in the spans.
max_depth:
type: integer
description: The maximum depth of the tree.
additionalProperties: false
required:
- attribute_filters
- attributes_to_return
title: QuerySpansRequest
Span:
type: object
properties:
span_id:
type: string
description: Unique identifier for the span
trace_id:
type: string
description: >-
Unique identifier for the trace this span belongs to
parent_span_id:
type: string
description: >-
(Optional) Unique identifier for the parent span, if this is a child span
name:
type: string
description: >-
Human-readable name describing the operation this span represents
start_time:
type: string
format: date-time
description: Timestamp when the operation began
end_time:
type: string
format: date-time
description: >-
(Optional) Timestamp when the operation finished, if completed
attributes:
type: object
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
description: >-
(Optional) Key-value pairs containing additional metadata about the span
additionalProperties: false
required:
- span_id
- trace_id
- name
- start_time
title: Span
description: >-
A span representing a single operation within a trace.
QuerySpansResponse:
type: object
properties:
data:
type: array
items:
$ref: '#/components/schemas/Span'
description: >-
List of spans matching the query criteria
additionalProperties: false
required:
- data
title: QuerySpansResponse
description: Response containing a list of spans.
SaveSpansToDatasetRequest:
type: object
properties:
attribute_filters:
type: array
items:
$ref: '#/components/schemas/QueryCondition'
description: >-
The attribute filters to apply to the spans.
attributes_to_save:
type: array
items:
type: string
description: The attributes to save to the dataset.
dataset_id:
type: string
description: >-
The ID of the dataset to save the spans to.
max_depth:
type: integer
description: The maximum depth of the tree.
additionalProperties: false
required:
- attribute_filters
- attributes_to_save
- dataset_id
title: SaveSpansToDatasetRequest
GetSpanTreeRequest:
type: object
properties:
attributes_to_return:
type: array
items:
type: string
description: The attributes to return in the tree.
max_depth:
type: integer
description: The maximum depth of the tree.
additionalProperties: false
title: GetSpanTreeRequest
SpanWithStatus:
type: object
properties:
span_id:
type: string
description: Unique identifier for the span
trace_id:
type: string
description: >-
Unique identifier for the trace this span belongs to
parent_span_id:
type: string
description: >-
(Optional) Unique identifier for the parent span, if this is a child span
name:
type: string
description: >-
Human-readable name describing the operation this span represents
start_time:
type: string
format: date-time
description: Timestamp when the operation began
end_time:
type: string
format: date-time
description: >-
(Optional) Timestamp when the operation finished, if completed
attributes:
type: object
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
description: >-
(Optional) Key-value pairs containing additional metadata about the span
status:
$ref: '#/components/schemas/SpanStatus'
description: >-
(Optional) The current status of the span
additionalProperties: false
required:
- span_id
- trace_id
- name
- start_time
title: SpanWithStatus
description: A span that includes status information.
QuerySpanTreeResponse:
type: object
properties:
data:
type: object
additionalProperties:
$ref: '#/components/schemas/SpanWithStatus'
description: >-
Dictionary mapping span IDs to spans with status information
additionalProperties: false
required:
- data
title: QuerySpanTreeResponse
description: >-
Response containing a tree structure of spans.
QueryTracesRequest:
type: object
properties:
attribute_filters:
type: array
items:
$ref: '#/components/schemas/QueryCondition'
description: >-
The attribute filters to apply to the traces.
limit:
type: integer
description: The limit of traces to return.
offset:
type: integer
description: The offset of the traces to return.
order_by:
type: array
items:
type: string
description: The order by of the traces to return.
additionalProperties: false
title: QueryTracesRequest
Trace:
type: object
properties:
trace_id:
type: string
description: Unique identifier for the trace
root_span_id:
type: string
description: >-
Unique identifier for the root span that started this trace
start_time:
type: string
format: date-time
description: Timestamp when the trace began
end_time:
type: string
format: date-time
description: >-
(Optional) Timestamp when the trace finished, if completed
additionalProperties: false
required:
- trace_id
- root_span_id
- start_time
title: Trace
description: >-
A trace representing the complete execution path of a request across multiple
operations.
QueryTracesResponse:
type: object
properties:
data:
type: array
items:
$ref: '#/components/schemas/Trace'
description: >-
List of traces matching the query criteria
additionalProperties: false
required:
- data
title: QueryTracesResponse
description: Response containing a list of traces.
responses: responses:
BadRequest400: BadRequest400:
description: The request was invalid or malformed description: The request was invalid or malformed
@ -13881,8 +12942,6 @@ tags:
description: '' description: ''
- name: SyntheticDataGeneration (Coming Soon) - name: SyntheticDataGeneration (Coming Soon)
description: '' description: ''
- name: Telemetry
description: ''
- name: ToolGroups - name: ToolGroups
description: '' description: ''
- name: ToolRuntime - name: ToolRuntime
@ -13912,7 +12971,6 @@ x-tagGroups:
- ScoringFunctions - ScoringFunctions
- Shields - Shields
- SyntheticDataGeneration (Coming Soon) - SyntheticDataGeneration (Coming Soon)
- Telemetry
- ToolGroups - ToolGroups
- ToolRuntime - ToolRuntime
- VectorDBs - VectorDBs

View file

@ -16,15 +16,12 @@ from typing import (
from pydantic import BaseModel, Field from pydantic import BaseModel, Field
from llama_stack.apis.version import LLAMA_STACK_API_V1, LLAMA_STACK_API_V1ALPHA
from llama_stack.models.llama.datatypes import Primitive from llama_stack.models.llama.datatypes import Primitive
from llama_stack.schema_utils import json_schema_type, register_schema, webmethod from llama_stack.schema_utils import json_schema_type, register_schema
# Add this constant near the top of the file, after the imports # Add this constant near the top of the file, after the imports
DEFAULT_TTL_DAYS = 7 DEFAULT_TTL_DAYS = 7
REQUIRED_SCOPE = "telemetry.read"
@json_schema_type @json_schema_type
class SpanStatus(Enum): class SpanStatus(Enum):
@ -413,7 +410,6 @@ class QueryMetricsResponse(BaseModel):
@runtime_checkable @runtime_checkable
class Telemetry(Protocol): class Telemetry(Protocol):
@webmethod(route="/telemetry/events", method="POST", level=LLAMA_STACK_API_V1)
async def log_event( async def log_event(
self, self,
event: Event, event: Event,
@ -426,14 +422,6 @@ class Telemetry(Protocol):
""" """
... ...
@webmethod(
route="/telemetry/traces",
method="POST",
required_scope=REQUIRED_SCOPE,
deprecated=True,
level=LLAMA_STACK_API_V1,
)
@webmethod(route="/telemetry/traces", method="POST", required_scope=REQUIRED_SCOPE, level=LLAMA_STACK_API_V1ALPHA)
async def query_traces( async def query_traces(
self, self,
attribute_filters: list[QueryCondition] | None = None, attribute_filters: list[QueryCondition] | None = None,
@ -451,19 +439,6 @@ class Telemetry(Protocol):
""" """
... ...
@webmethod(
route="/telemetry/traces/{trace_id:path}",
method="GET",
required_scope=REQUIRED_SCOPE,
deprecated=True,
level=LLAMA_STACK_API_V1,
)
@webmethod(
route="/telemetry/traces/{trace_id:path}",
method="GET",
required_scope=REQUIRED_SCOPE,
level=LLAMA_STACK_API_V1ALPHA,
)
async def get_trace(self, trace_id: str) -> Trace: async def get_trace(self, trace_id: str) -> Trace:
"""Get a trace by its ID. """Get a trace by its ID.
@ -472,19 +447,6 @@ class Telemetry(Protocol):
""" """
... ...
@webmethod(
route="/telemetry/traces/{trace_id:path}/spans/{span_id:path}",
method="GET",
required_scope=REQUIRED_SCOPE,
deprecated=True,
level=LLAMA_STACK_API_V1,
)
@webmethod(
route="/telemetry/traces/{trace_id:path}/spans/{span_id:path}",
method="GET",
required_scope=REQUIRED_SCOPE,
level=LLAMA_STACK_API_V1ALPHA,
)
async def get_span(self, trace_id: str, span_id: str) -> Span: async def get_span(self, trace_id: str, span_id: str) -> Span:
"""Get a span by its ID. """Get a span by its ID.
@ -494,19 +456,6 @@ class Telemetry(Protocol):
""" """
... ...
@webmethod(
route="/telemetry/spans/{span_id:path}/tree",
method="POST",
deprecated=True,
required_scope=REQUIRED_SCOPE,
level=LLAMA_STACK_API_V1,
)
@webmethod(
route="/telemetry/spans/{span_id:path}/tree",
method="POST",
required_scope=REQUIRED_SCOPE,
level=LLAMA_STACK_API_V1ALPHA,
)
async def get_span_tree( async def get_span_tree(
self, self,
span_id: str, span_id: str,
@ -522,14 +471,6 @@ class Telemetry(Protocol):
""" """
... ...
@webmethod(
route="/telemetry/spans",
method="POST",
required_scope=REQUIRED_SCOPE,
deprecated=True,
level=LLAMA_STACK_API_V1,
)
@webmethod(route="/telemetry/spans", method="POST", required_scope=REQUIRED_SCOPE, level=LLAMA_STACK_API_V1ALPHA)
async def query_spans( async def query_spans(
self, self,
attribute_filters: list[QueryCondition], attribute_filters: list[QueryCondition],
@ -545,8 +486,6 @@ class Telemetry(Protocol):
""" """
... ...
@webmethod(route="/telemetry/spans/export", method="POST", deprecated=True, level=LLAMA_STACK_API_V1)
@webmethod(route="/telemetry/spans/export", method="POST", level=LLAMA_STACK_API_V1ALPHA)
async def save_spans_to_dataset( async def save_spans_to_dataset(
self, self,
attribute_filters: list[QueryCondition], attribute_filters: list[QueryCondition],
@ -563,19 +502,6 @@ class Telemetry(Protocol):
""" """
... ...
@webmethod(
route="/telemetry/metrics/{metric_name}",
method="POST",
required_scope=REQUIRED_SCOPE,
deprecated=True,
level=LLAMA_STACK_API_V1,
)
@webmethod(
route="/telemetry/metrics/{metric_name}",
method="POST",
required_scope=REQUIRED_SCOPE,
level=LLAMA_STACK_API_V1ALPHA,
)
async def query_metrics( async def query_metrics(
self, self,
metric_name: str, metric_name: str,

View file

@ -32,7 +32,7 @@ from llama_stack.providers.utils.sqlstore.sqlstore import (
sqlstore_impl, sqlstore_impl,
) )
logger = get_logger(name=__name__, category="openai::conversations") logger = get_logger(name=__name__, category="openai_conversations")
class ConversationServiceConfig(BaseModel): class ConversationServiceConfig(BaseModel):

View file

@ -611,7 +611,7 @@ class InferenceRouter(Inference):
completion_text += "".join(choice_data["content_parts"]) completion_text += "".join(choice_data["content_parts"])
# Add metrics to the chunk # Add metrics to the chunk
if self.telemetry and chunk.usage: if self.telemetry and hasattr(chunk, "usage") and chunk.usage:
metrics = self._construct_metrics( metrics = self._construct_metrics(
prompt_tokens=chunk.usage.prompt_tokens, prompt_tokens=chunk.usage.prompt_tokens,
completion_tokens=chunk.usage.completion_tokens, completion_tokens=chunk.usage.completion_tokens,

View file

@ -98,7 +98,10 @@ class DiskDistributionRegistry(DistributionRegistry):
existing_obj = await self.get(obj.type, obj.identifier) existing_obj = await self.get(obj.type, obj.identifier)
# dont register if the object's providerid already exists # dont register if the object's providerid already exists
if existing_obj and existing_obj.provider_id == obj.provider_id: if existing_obj and existing_obj.provider_id == obj.provider_id:
return False raise ValueError(
f"Provider '{obj.provider_id}' is already registered."
f"Unregister the existing provider first before registering it again."
)
await self.kvstore.set( await self.kvstore.set(
KEY_FORMAT.format(type=obj.type, identifier=obj.identifier), KEY_FORMAT.format(type=obj.type, identifier=obj.identifier),

View file

@ -3,3 +3,5 @@
# #
# This source code is licensed under the terms described in the LICENSE file in # This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree. # the root directory of this source tree.
from .watsonx import get_distribution_template # noqa: F401

View file

@ -3,44 +3,33 @@ distribution_spec:
description: Use watsonx for running LLM inference description: Use watsonx for running LLM inference
providers: providers:
inference: inference:
- provider_id: watsonx - provider_type: remote::watsonx
provider_type: remote::watsonx - provider_type: inline::sentence-transformers
- provider_id: sentence-transformers
provider_type: inline::sentence-transformers
vector_io: vector_io:
- provider_id: faiss - provider_type: inline::faiss
provider_type: inline::faiss
safety: safety:
- provider_id: llama-guard - provider_type: inline::llama-guard
provider_type: inline::llama-guard
agents: agents:
- provider_id: meta-reference - provider_type: inline::meta-reference
provider_type: inline::meta-reference
telemetry: telemetry:
- provider_id: meta-reference - provider_type: inline::meta-reference
provider_type: inline::meta-reference
eval: eval:
- provider_id: meta-reference - provider_type: inline::meta-reference
provider_type: inline::meta-reference
datasetio: datasetio:
- provider_id: huggingface - provider_type: remote::huggingface
provider_type: remote::huggingface - provider_type: inline::localfs
- provider_id: localfs
provider_type: inline::localfs
scoring: scoring:
- provider_id: basic - provider_type: inline::basic
provider_type: inline::basic - provider_type: inline::llm-as-judge
- provider_id: llm-as-judge - provider_type: inline::braintrust
provider_type: inline::llm-as-judge
- provider_id: braintrust
provider_type: inline::braintrust
tool_runtime: tool_runtime:
- provider_type: remote::brave-search - provider_type: remote::brave-search
- provider_type: remote::tavily-search - provider_type: remote::tavily-search
- provider_type: inline::rag-runtime - provider_type: inline::rag-runtime
- provider_type: remote::model-context-protocol - provider_type: remote::model-context-protocol
files:
- provider_type: inline::localfs
image_type: venv image_type: venv
additional_pip_packages: additional_pip_packages:
- aiosqlite
- sqlalchemy[asyncio] - sqlalchemy[asyncio]
- aiosqlite
- aiosqlite

View file

@ -4,13 +4,13 @@ apis:
- agents - agents
- datasetio - datasetio
- eval - eval
- files
- inference - inference
- safety - safety
- scoring - scoring
- telemetry - telemetry
- tool_runtime - tool_runtime
- vector_io - vector_io
- files
providers: providers:
inference: inference:
- provider_id: watsonx - provider_id: watsonx
@ -19,8 +19,6 @@ providers:
url: ${env.WATSONX_BASE_URL:=https://us-south.ml.cloud.ibm.com} url: ${env.WATSONX_BASE_URL:=https://us-south.ml.cloud.ibm.com}
api_key: ${env.WATSONX_API_KEY:=} api_key: ${env.WATSONX_API_KEY:=}
project_id: ${env.WATSONX_PROJECT_ID:=} project_id: ${env.WATSONX_PROJECT_ID:=}
- provider_id: sentence-transformers
provider_type: inline::sentence-transformers
vector_io: vector_io:
- provider_id: faiss - provider_id: faiss
provider_type: inline::faiss provider_type: inline::faiss
@ -48,7 +46,7 @@ providers:
provider_type: inline::meta-reference provider_type: inline::meta-reference
config: config:
service_name: "${env.OTEL_SERVICE_NAME:=\u200B}" service_name: "${env.OTEL_SERVICE_NAME:=\u200B}"
sinks: ${env.TELEMETRY_SINKS:=console,sqlite} sinks: ${env.TELEMETRY_SINKS:=sqlite}
sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/trace_store.db sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/trace_store.db
otel_exporter_otlp_endpoint: ${env.OTEL_EXPORTER_OTLP_ENDPOINT:=} otel_exporter_otlp_endpoint: ${env.OTEL_EXPORTER_OTLP_ENDPOINT:=}
eval: eval:
@ -109,102 +107,7 @@ metadata_store:
inference_store: inference_store:
type: sqlite type: sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/inference_store.db db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/inference_store.db
models: models: []
- metadata: {}
model_id: meta-llama/llama-3-3-70b-instruct
provider_id: watsonx
provider_model_id: meta-llama/llama-3-3-70b-instruct
model_type: llm
- metadata: {}
model_id: meta-llama/Llama-3.3-70B-Instruct
provider_id: watsonx
provider_model_id: meta-llama/llama-3-3-70b-instruct
model_type: llm
- metadata: {}
model_id: meta-llama/llama-2-13b-chat
provider_id: watsonx
provider_model_id: meta-llama/llama-2-13b-chat
model_type: llm
- metadata: {}
model_id: meta-llama/Llama-2-13b
provider_id: watsonx
provider_model_id: meta-llama/llama-2-13b-chat
model_type: llm
- metadata: {}
model_id: meta-llama/llama-3-1-70b-instruct
provider_id: watsonx
provider_model_id: meta-llama/llama-3-1-70b-instruct
model_type: llm
- metadata: {}
model_id: meta-llama/Llama-3.1-70B-Instruct
provider_id: watsonx
provider_model_id: meta-llama/llama-3-1-70b-instruct
model_type: llm
- metadata: {}
model_id: meta-llama/llama-3-1-8b-instruct
provider_id: watsonx
provider_model_id: meta-llama/llama-3-1-8b-instruct
model_type: llm
- metadata: {}
model_id: meta-llama/Llama-3.1-8B-Instruct
provider_id: watsonx
provider_model_id: meta-llama/llama-3-1-8b-instruct
model_type: llm
- metadata: {}
model_id: meta-llama/llama-3-2-11b-vision-instruct
provider_id: watsonx
provider_model_id: meta-llama/llama-3-2-11b-vision-instruct
model_type: llm
- metadata: {}
model_id: meta-llama/Llama-3.2-11B-Vision-Instruct
provider_id: watsonx
provider_model_id: meta-llama/llama-3-2-11b-vision-instruct
model_type: llm
- metadata: {}
model_id: meta-llama/llama-3-2-1b-instruct
provider_id: watsonx
provider_model_id: meta-llama/llama-3-2-1b-instruct
model_type: llm
- metadata: {}
model_id: meta-llama/Llama-3.2-1B-Instruct
provider_id: watsonx
provider_model_id: meta-llama/llama-3-2-1b-instruct
model_type: llm
- metadata: {}
model_id: meta-llama/llama-3-2-3b-instruct
provider_id: watsonx
provider_model_id: meta-llama/llama-3-2-3b-instruct
model_type: llm
- metadata: {}
model_id: meta-llama/Llama-3.2-3B-Instruct
provider_id: watsonx
provider_model_id: meta-llama/llama-3-2-3b-instruct
model_type: llm
- metadata: {}
model_id: meta-llama/llama-3-2-90b-vision-instruct
provider_id: watsonx
provider_model_id: meta-llama/llama-3-2-90b-vision-instruct
model_type: llm
- metadata: {}
model_id: meta-llama/Llama-3.2-90B-Vision-Instruct
provider_id: watsonx
provider_model_id: meta-llama/llama-3-2-90b-vision-instruct
model_type: llm
- metadata: {}
model_id: meta-llama/llama-guard-3-11b-vision
provider_id: watsonx
provider_model_id: meta-llama/llama-guard-3-11b-vision
model_type: llm
- metadata: {}
model_id: meta-llama/Llama-Guard-3-11B-Vision
provider_id: watsonx
provider_model_id: meta-llama/llama-guard-3-11b-vision
model_type: llm
- metadata:
embedding_dimension: 384
model_id: all-MiniLM-L6-v2
provider_id: sentence-transformers
model_type: embedding
shields: [] shields: []
vector_dbs: [] vector_dbs: []
datasets: [] datasets: []

View file

@ -4,17 +4,11 @@
# This source code is licensed under the terms described in the LICENSE file in # This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree. # the root directory of this source tree.
from pathlib import Path
from llama_stack.apis.models import ModelType from llama_stack.core.datatypes import BuildProvider, Provider, ToolGroupInput
from llama_stack.core.datatypes import BuildProvider, ModelInput, Provider, ToolGroupInput from llama_stack.distributions.template import DistributionTemplate, RunConfigSettings
from llama_stack.distributions.template import DistributionTemplate, RunConfigSettings, get_model_registry
from llama_stack.providers.inline.files.localfs.config import LocalfsFilesImplConfig from llama_stack.providers.inline.files.localfs.config import LocalfsFilesImplConfig
from llama_stack.providers.inline.inference.sentence_transformers import (
SentenceTransformersInferenceConfig,
)
from llama_stack.providers.remote.inference.watsonx import WatsonXConfig from llama_stack.providers.remote.inference.watsonx import WatsonXConfig
from llama_stack.providers.remote.inference.watsonx.models import MODEL_ENTRIES
def get_distribution_template(name: str = "watsonx") -> DistributionTemplate: def get_distribution_template(name: str = "watsonx") -> DistributionTemplate:
@ -52,15 +46,6 @@ def get_distribution_template(name: str = "watsonx") -> DistributionTemplate:
config=WatsonXConfig.sample_run_config(), config=WatsonXConfig.sample_run_config(),
) )
embedding_provider = Provider(
provider_id="sentence-transformers",
provider_type="inline::sentence-transformers",
config=SentenceTransformersInferenceConfig.sample_run_config(),
)
available_models = {
"watsonx": MODEL_ENTRIES,
}
default_tool_groups = [ default_tool_groups = [
ToolGroupInput( ToolGroupInput(
toolgroup_id="builtin::websearch", toolgroup_id="builtin::websearch",
@ -72,36 +57,25 @@ def get_distribution_template(name: str = "watsonx") -> DistributionTemplate:
), ),
] ]
embedding_model = ModelInput(
model_id="all-MiniLM-L6-v2",
provider_id="sentence-transformers",
model_type=ModelType.embedding,
metadata={
"embedding_dimension": 384,
},
)
files_provider = Provider( files_provider = Provider(
provider_id="meta-reference-files", provider_id="meta-reference-files",
provider_type="inline::localfs", provider_type="inline::localfs",
config=LocalfsFilesImplConfig.sample_run_config(f"~/.llama/distributions/{name}"), config=LocalfsFilesImplConfig.sample_run_config(f"~/.llama/distributions/{name}"),
) )
default_models, _ = get_model_registry(available_models)
return DistributionTemplate( return DistributionTemplate(
name=name, name=name,
distro_type="remote_hosted", distro_type="remote_hosted",
description="Use watsonx for running LLM inference", description="Use watsonx for running LLM inference",
container_image=None, container_image=None,
template_path=Path(__file__).parent / "doc_template.md", template_path=None,
providers=providers, providers=providers,
available_models_by_provider=available_models,
run_configs={ run_configs={
"run.yaml": RunConfigSettings( "run.yaml": RunConfigSettings(
provider_overrides={ provider_overrides={
"inference": [inference_provider, embedding_provider], "inference": [inference_provider],
"files": [files_provider], "files": [files_provider],
}, },
default_models=default_models + [embedding_model], default_models=[],
default_tool_groups=default_tool_groups, default_tool_groups=default_tool_groups,
), ),
}, },

View file

@ -31,12 +31,17 @@ CATEGORIES = [
"client", "client",
"telemetry", "telemetry",
"openai_responses", "openai_responses",
"openai_conversations",
"testing", "testing",
"providers", "providers",
"models", "models",
"files", "files",
"vector_io", "vector_io",
"tool_runtime", "tool_runtime",
"cli",
"post_training",
"scoring",
"tests",
] ]
UNCATEGORIZED = "uncategorized" UNCATEGORIZED = "uncategorized"
@ -261,11 +266,12 @@ def get_logger(
if root_category in _category_levels: if root_category in _category_levels:
log_level = _category_levels[root_category] log_level = _category_levels[root_category]
else: else:
log_level = _category_levels.get("root", DEFAULT_LOG_LEVEL)
if category != UNCATEGORIZED: if category != UNCATEGORIZED:
logging.warning( raise ValueError(
f"Unknown logging category: {category}. Falling back to default 'root' level: {log_level}" f"Unknown logging category: {category}. To resolve, choose a valid category from the CATEGORIES list "
f"or add it to the CATEGORIES list. Available categories: {CATEGORIES}"
) )
log_level = _category_levels.get("root", DEFAULT_LOG_LEVEL)
logger.setLevel(log_level) logger.setLevel(log_level)
return logging.LoggerAdapter(logger, {"category": category}) return logging.LoggerAdapter(logger, {"category": category})

View file

@ -11,19 +11,13 @@
# top-level folder for each specific model found within the models/ directory at # top-level folder for each specific model found within the models/ directory at
# the top-level of this source tree. # the top-level of this source tree.
import json
import textwrap import textwrap
from pathlib import Path
from pydantic import BaseModel, Field from pydantic import BaseModel, Field
from llama_stack.models.llama.datatypes import ( from llama_stack.models.llama.datatypes import (
RawContent, RawContent,
RawMediaItem,
RawMessage, RawMessage,
RawTextItem,
StopReason,
ToolCall,
ToolPromptFormat, ToolPromptFormat,
) )
from llama_stack.models.llama.llama4.tokenizer import Tokenizer from llama_stack.models.llama.llama4.tokenizer import Tokenizer
@ -175,25 +169,6 @@ def llama3_1_builtin_code_interpreter_dialog(tool_prompt_format=ToolPromptFormat
return messages return messages
def llama3_1_builtin_tool_call_with_image_dialog(
tool_prompt_format=ToolPromptFormat.json,
):
this_dir = Path(__file__).parent
with open(this_dir / "llama3/dog.jpg", "rb") as f:
img = f.read()
interface = LLama31Interface(tool_prompt_format)
messages = interface.system_messages(**system_message_builtin_tools_only())
messages += interface.user_message(content=[RawMediaItem(data=img), RawTextItem(text="What is this dog breed?")])
messages += interface.assistant_response_messages(
"Based on the description of the dog in the image, it appears to be a small breed dog, possibly a terrier mix",
StopReason.end_of_turn,
)
messages += interface.user_message("Search the web for some food recommendations for the indentified breed")
return messages
def llama3_1_custom_tool_call_dialog(tool_prompt_format=ToolPromptFormat.json): def llama3_1_custom_tool_call_dialog(tool_prompt_format=ToolPromptFormat.json):
interface = LLama31Interface(tool_prompt_format) interface = LLama31Interface(tool_prompt_format)
@ -202,35 +177,6 @@ def llama3_1_custom_tool_call_dialog(tool_prompt_format=ToolPromptFormat.json):
return messages return messages
def llama3_1_e2e_tool_call_dialog(tool_prompt_format=ToolPromptFormat.json):
tool_response = json.dumps(["great song1", "awesome song2", "cool song3"])
interface = LLama31Interface(tool_prompt_format)
messages = interface.system_messages(**system_message_custom_tools_only())
messages += interface.user_message(content="Use tools to get latest trending songs")
messages.append(
RawMessage(
role="assistant",
content="",
stop_reason=StopReason.end_of_message,
tool_calls=[
ToolCall(
call_id="call_id",
tool_name="trending_songs",
arguments={"n": "10", "genre": "latest"},
)
],
),
)
messages.append(
RawMessage(
role="assistant",
content=tool_response,
)
)
return messages
def llama3_2_user_assistant_conversation(): def llama3_2_user_assistant_conversation():
return UseCase( return UseCase(
title="User and assistant conversation", title="User and assistant conversation",

View file

@ -7,8 +7,6 @@
import copy import copy
import json import json
import re import re
import secrets
import string
import uuid import uuid
import warnings import warnings
from collections.abc import AsyncGenerator from collections.abc import AsyncGenerator
@ -84,11 +82,6 @@ from llama_stack.providers.utils.telemetry import tracing
from .persistence import AgentPersistence from .persistence import AgentPersistence
from .safety import SafetyException, ShieldRunnerMixin from .safety import SafetyException, ShieldRunnerMixin
def make_random_string(length: int = 8):
return "".join(secrets.choice(string.ascii_letters + string.digits) for _ in range(length))
TOOLS_ATTACHMENT_KEY_REGEX = re.compile(r"__tools_attachment__=(\{.*?\})") TOOLS_ATTACHMENT_KEY_REGEX = re.compile(r"__tools_attachment__=(\{.*?\})")
MEMORY_QUERY_TOOL = "knowledge_search" MEMORY_QUERY_TOOL = "knowledge_search"
WEB_SEARCH_TOOL = "web_search" WEB_SEARCH_TOOL = "web_search"

View file

@ -269,7 +269,7 @@ class OpenAIResponsesImpl:
response_tools=tools, response_tools=tools,
temperature=temperature, temperature=temperature,
response_format=response_format, response_format=response_format,
inputs=input, inputs=all_input,
) )
# Create orchestrator and delegate streaming logic # Create orchestrator and delegate streaming logic

View file

@ -175,6 +175,8 @@ class StreamingResponseOrchestrator:
): ):
yield stream_event yield stream_event
messages = next_turn_messages
if not function_tool_calls and not non_function_tool_calls: if not function_tool_calls and not non_function_tool_calls:
break break
@ -187,9 +189,7 @@ class StreamingResponseOrchestrator:
logger.info(f"Exiting inference loop since iteration count({n_iter}) exceeds {self.max_infer_iters=}") logger.info(f"Exiting inference loop since iteration count({n_iter}) exceeds {self.max_infer_iters=}")
break break
messages = next_turn_messages self.final_messages = messages.copy()
self.final_messages = messages.copy() + [current_response.choices[0].message]
# Create final response # Create final response
final_response = OpenAIResponseObject( final_response = OpenAIResponseObject(
@ -232,9 +232,11 @@ class StreamingResponseOrchestrator:
non_function_tool_calls.append(tool_call) non_function_tool_calls.append(tool_call)
else: else:
logger.info(f"Approval denied for {tool_call.id} on {tool_call.function.name}") logger.info(f"Approval denied for {tool_call.id} on {tool_call.function.name}")
next_turn_messages.pop()
else: else:
logger.info(f"Requesting approval for {tool_call.id} on {tool_call.function.name}") logger.info(f"Requesting approval for {tool_call.id} on {tool_call.function.name}")
approvals.append(tool_call) approvals.append(tool_call)
next_turn_messages.pop()
else: else:
non_function_tool_calls.append(tool_call) non_function_tool_calls.append(tool_call)

View file

@ -8,8 +8,6 @@ import asyncio
import base64 import base64
import io import io
import mimetypes import mimetypes
import secrets
import string
from typing import Any from typing import Any
import httpx import httpx
@ -52,10 +50,6 @@ from .context_retriever import generate_rag_query
log = get_logger(name=__name__, category="tool_runtime") log = get_logger(name=__name__, category="tool_runtime")
def make_random_string(length: int = 8):
return "".join(secrets.choice(string.ascii_letters + string.digits) for _ in range(length))
async def raw_data_from_doc(doc: RAGDocument) -> tuple[bytes, str]: async def raw_data_from_doc(doc: RAGDocument) -> tuple[bytes, str]:
"""Get raw binary data and mime type from a RAGDocument for file upload.""" """Get raw binary data and mime type from a RAGDocument for file upload."""
if isinstance(doc.content, URL): if isinstance(doc.content, URL):

View file

@ -268,7 +268,7 @@ Available Models:
api=Api.inference, api=Api.inference,
adapter_type="watsonx", adapter_type="watsonx",
provider_type="remote::watsonx", provider_type="remote::watsonx",
pip_packages=["ibm_watsonx_ai"], pip_packages=["litellm"],
module="llama_stack.providers.remote.inference.watsonx", module="llama_stack.providers.remote.inference.watsonx",
config_class="llama_stack.providers.remote.inference.watsonx.WatsonXConfig", config_class="llama_stack.providers.remote.inference.watsonx.WatsonXConfig",
provider_data_validator="llama_stack.providers.remote.inference.watsonx.WatsonXProviderDataValidator", provider_data_validator="llama_stack.providers.remote.inference.watsonx.WatsonXProviderDataValidator",

View file

@ -1,217 +0,0 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
import warnings
from collections.abc import AsyncGenerator
from typing import Any
from openai import AsyncStream
from openai.types.chat.chat_completion import (
Choice as OpenAIChoice,
)
from openai.types.completion import Completion as OpenAICompletion
from openai.types.completion_choice import Logprobs as OpenAICompletionLogprobs
from llama_stack.apis.inference import (
ChatCompletionRequest,
CompletionRequest,
CompletionResponse,
CompletionResponseStreamChunk,
GreedySamplingStrategy,
JsonSchemaResponseFormat,
TokenLogProbs,
TopKSamplingStrategy,
TopPSamplingStrategy,
)
from llama_stack.providers.utils.inference.openai_compat import (
_convert_openai_finish_reason,
convert_message_to_openai_dict_new,
convert_tooldef_to_openai_tool,
)
async def convert_chat_completion_request(
request: ChatCompletionRequest,
n: int = 1,
) -> dict:
"""
Convert a ChatCompletionRequest to an OpenAI API-compatible dictionary.
"""
# model -> model
# messages -> messages
# sampling_params TODO(mattf): review strategy
# strategy=greedy -> nvext.top_k = -1, temperature = temperature
# strategy=top_p -> nvext.top_k = -1, top_p = top_p
# strategy=top_k -> nvext.top_k = top_k
# temperature -> temperature
# top_p -> top_p
# top_k -> nvext.top_k
# max_tokens -> max_tokens
# repetition_penalty -> nvext.repetition_penalty
# response_format -> GrammarResponseFormat TODO(mf)
# response_format -> JsonSchemaResponseFormat: response_format = "json_object" & nvext["guided_json"] = json_schema
# tools -> tools
# tool_choice ("auto", "required") -> tool_choice
# tool_prompt_format -> TBD
# stream -> stream
# logprobs -> logprobs
if request.response_format and not isinstance(request.response_format, JsonSchemaResponseFormat):
raise ValueError(
f"Unsupported response format: {request.response_format}. Only JsonSchemaResponseFormat is supported."
)
nvext = {}
payload: dict[str, Any] = dict(
model=request.model,
messages=[await convert_message_to_openai_dict_new(message) for message in request.messages],
stream=request.stream,
n=n,
extra_body=dict(nvext=nvext),
extra_headers={
b"User-Agent": b"llama-stack: nvidia-inference-adapter",
},
)
if request.response_format:
# server bug - setting guided_json changes the behavior of response_format resulting in an error
# payload.update(response_format="json_object")
nvext.update(guided_json=request.response_format.json_schema)
if request.tools:
payload.update(tools=[convert_tooldef_to_openai_tool(tool) for tool in request.tools])
if request.tool_config.tool_choice:
payload.update(
tool_choice=request.tool_config.tool_choice.value
) # we cannot include tool_choice w/o tools, server will complain
if request.logprobs:
payload.update(logprobs=True)
payload.update(top_logprobs=request.logprobs.top_k)
if request.sampling_params:
nvext.update(repetition_penalty=request.sampling_params.repetition_penalty)
if request.sampling_params.max_tokens:
payload.update(max_tokens=request.sampling_params.max_tokens)
strategy = request.sampling_params.strategy
if isinstance(strategy, TopPSamplingStrategy):
nvext.update(top_k=-1)
payload.update(top_p=strategy.top_p)
payload.update(temperature=strategy.temperature)
elif isinstance(strategy, TopKSamplingStrategy):
if strategy.top_k != -1 and strategy.top_k < 1:
warnings.warn("top_k must be -1 or >= 1", stacklevel=2)
nvext.update(top_k=strategy.top_k)
elif isinstance(strategy, GreedySamplingStrategy):
nvext.update(top_k=-1)
else:
raise ValueError(f"Unsupported sampling strategy: {strategy}")
return payload
def convert_completion_request(
request: CompletionRequest,
n: int = 1,
) -> dict:
"""
Convert a ChatCompletionRequest to an OpenAI API-compatible dictionary.
"""
# model -> model
# prompt -> prompt
# sampling_params TODO(mattf): review strategy
# strategy=greedy -> nvext.top_k = -1, temperature = temperature
# strategy=top_p -> nvext.top_k = -1, top_p = top_p
# strategy=top_k -> nvext.top_k = top_k
# temperature -> temperature
# top_p -> top_p
# top_k -> nvext.top_k
# max_tokens -> max_tokens
# repetition_penalty -> nvext.repetition_penalty
# response_format -> nvext.guided_json
# stream -> stream
# logprobs.top_k -> logprobs
nvext = {}
payload: dict[str, Any] = dict(
model=request.model,
prompt=request.content,
stream=request.stream,
extra_body=dict(nvext=nvext),
extra_headers={
b"User-Agent": b"llama-stack: nvidia-inference-adapter",
},
n=n,
)
if request.response_format:
# this is not openai compliant, it is a nim extension
nvext.update(guided_json=request.response_format.json_schema)
if request.logprobs:
payload.update(logprobs=request.logprobs.top_k)
if request.sampling_params:
nvext.update(repetition_penalty=request.sampling_params.repetition_penalty)
if request.sampling_params.max_tokens:
payload.update(max_tokens=request.sampling_params.max_tokens)
if request.sampling_params.strategy == "top_p":
nvext.update(top_k=-1)
payload.update(top_p=request.sampling_params.top_p)
elif request.sampling_params.strategy == "top_k":
if request.sampling_params.top_k != -1 and request.sampling_params.top_k < 1:
warnings.warn("top_k must be -1 or >= 1", stacklevel=2)
nvext.update(top_k=request.sampling_params.top_k)
elif request.sampling_params.strategy == "greedy":
nvext.update(top_k=-1)
payload.update(temperature=request.sampling_params.temperature)
return payload
def _convert_openai_completion_logprobs(
logprobs: OpenAICompletionLogprobs | None,
) -> list[TokenLogProbs] | None:
"""
Convert an OpenAI CompletionLogprobs into a list of TokenLogProbs.
"""
if not logprobs:
return None
return [TokenLogProbs(logprobs_by_token=logprobs) for logprobs in logprobs.top_logprobs]
def convert_openai_completion_choice(
choice: OpenAIChoice,
) -> CompletionResponse:
"""
Convert an OpenAI Completion Choice into a CompletionResponse.
"""
return CompletionResponse(
content=choice.text,
stop_reason=_convert_openai_finish_reason(choice.finish_reason),
logprobs=_convert_openai_completion_logprobs(choice.logprobs),
)
async def convert_openai_completion_stream(
stream: AsyncStream[OpenAICompletion],
) -> AsyncGenerator[CompletionResponse, None]:
"""
Convert a stream of OpenAI Completions into a stream
of ChatCompletionResponseStreamChunks.
"""
async for chunk in stream:
choice = chunk.choices[0]
yield CompletionResponseStreamChunk(
delta=choice.text,
stop_reason=_convert_openai_finish_reason(choice.finish_reason),
logprobs=_convert_openai_completion_logprobs(choice.logprobs),
)

View file

@ -4,53 +4,8 @@
# This source code is licensed under the terms described in the LICENSE file in # This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree. # the root directory of this source tree.
import httpx
from llama_stack.log import get_logger
from . import NVIDIAConfig from . import NVIDIAConfig
logger = get_logger(name=__name__, category="inference::nvidia")
def _is_nvidia_hosted(config: NVIDIAConfig) -> bool: def _is_nvidia_hosted(config: NVIDIAConfig) -> bool:
return "integrate.api.nvidia.com" in config.url return "integrate.api.nvidia.com" in config.url
async def _get_health(url: str) -> tuple[bool, bool]:
"""
Query {url}/v1/health/{live,ready} to check if the server is running and ready
Args:
url (str): URL of the server
Returns:
Tuple[bool, bool]: (is_live, is_ready)
"""
async with httpx.AsyncClient() as client:
live = await client.get(f"{url}/v1/health/live")
ready = await client.get(f"{url}/v1/health/ready")
return live.status_code == 200, ready.status_code == 200
async def check_health(config: NVIDIAConfig) -> None:
"""
Check if the server is running and ready
Args:
url (str): URL of the server
Raises:
RuntimeError: If the server is not running or ready
"""
if not _is_nvidia_hosted(config):
logger.info("Checking NVIDIA NIM health...")
try:
is_live, is_ready = await _get_health(config.url)
if not is_live:
raise ConnectionError("NVIDIA NIM is not running")
if not is_ready:
raise ConnectionError("NVIDIA NIM is not ready")
# TODO(mf): should we wait for the server to be ready?
except httpx.ConnectError as e:
raise ConnectionError(f"Failed to connect to NVIDIA NIM: {e}") from e

View file

@ -4,19 +4,12 @@
# This source code is licensed under the terms described in the LICENSE file in # This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree. # the root directory of this source tree.
from llama_stack.apis.inference import Inference
from .config import WatsonXConfig from .config import WatsonXConfig
async def get_adapter_impl(config: WatsonXConfig, _deps) -> Inference: async def get_adapter_impl(config: WatsonXConfig, _deps):
# import dynamically so `llama stack build` does not fail due to missing dependencies # import dynamically so the import is used only when it is needed
from .watsonx import WatsonXInferenceAdapter from .watsonx import WatsonXInferenceAdapter
if not isinstance(config, WatsonXConfig):
raise RuntimeError(f"Unexpected config type: {type(config)}")
adapter = WatsonXInferenceAdapter(config) adapter = WatsonXInferenceAdapter(config)
return adapter return adapter
__all__ = ["get_adapter_impl", "WatsonXConfig"]

View file

@ -7,16 +7,18 @@
import os import os
from typing import Any from typing import Any
from pydantic import BaseModel, Field, SecretStr from pydantic import BaseModel, ConfigDict, Field, SecretStr
from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
from llama_stack.schema_utils import json_schema_type from llama_stack.schema_utils import json_schema_type
class WatsonXProviderDataValidator(BaseModel): class WatsonXProviderDataValidator(BaseModel):
url: str model_config = ConfigDict(
api_key: str from_attributes=True,
project_id: str extra="forbid",
)
watsonx_api_key: str | None
@json_schema_type @json_schema_type
@ -25,13 +27,17 @@ class WatsonXConfig(RemoteInferenceProviderConfig):
default_factory=lambda: os.getenv("WATSONX_BASE_URL", "https://us-south.ml.cloud.ibm.com"), default_factory=lambda: os.getenv("WATSONX_BASE_URL", "https://us-south.ml.cloud.ibm.com"),
description="A base url for accessing the watsonx.ai", description="A base url for accessing the watsonx.ai",
) )
# This seems like it should be required, but none of the other remote inference
# providers require it, so this is optional here too for consistency.
# The OpenAIConfig uses default=None instead, so this is following that precedent.
api_key: SecretStr | None = Field( api_key: SecretStr | None = Field(
default_factory=lambda: os.getenv("WATSONX_API_KEY"), default=None,
description="The watsonx API key", description="The watsonx.ai API key",
) )
# As above, this is optional here too for consistency.
project_id: str | None = Field( project_id: str | None = Field(
default_factory=lambda: os.getenv("WATSONX_PROJECT_ID"), default=None,
description="The Project ID key", description="The watsonx.ai project ID",
) )
timeout: int = Field( timeout: int = Field(
default=60, default=60,

View file

@ -1,47 +0,0 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
from llama_stack.models.llama.sku_types import CoreModelId
from llama_stack.providers.utils.inference.model_registry import build_hf_repo_model_entry
MODEL_ENTRIES = [
build_hf_repo_model_entry(
"meta-llama/llama-3-3-70b-instruct",
CoreModelId.llama3_3_70b_instruct.value,
),
build_hf_repo_model_entry(
"meta-llama/llama-2-13b-chat",
CoreModelId.llama2_13b.value,
),
build_hf_repo_model_entry(
"meta-llama/llama-3-1-70b-instruct",
CoreModelId.llama3_1_70b_instruct.value,
),
build_hf_repo_model_entry(
"meta-llama/llama-3-1-8b-instruct",
CoreModelId.llama3_1_8b_instruct.value,
),
build_hf_repo_model_entry(
"meta-llama/llama-3-2-11b-vision-instruct",
CoreModelId.llama3_2_11b_vision_instruct.value,
),
build_hf_repo_model_entry(
"meta-llama/llama-3-2-1b-instruct",
CoreModelId.llama3_2_1b_instruct.value,
),
build_hf_repo_model_entry(
"meta-llama/llama-3-2-3b-instruct",
CoreModelId.llama3_2_3b_instruct.value,
),
build_hf_repo_model_entry(
"meta-llama/llama-3-2-90b-vision-instruct",
CoreModelId.llama3_2_90b_vision_instruct.value,
),
build_hf_repo_model_entry(
"meta-llama/llama-guard-3-11b-vision",
CoreModelId.llama_guard_3_11b_vision.value,
),
]

View file

@ -4,240 +4,120 @@
# This source code is licensed under the terms described in the LICENSE file in # This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree. # the root directory of this source tree.
from collections.abc import AsyncGenerator, AsyncIterator
from typing import Any from typing import Any
from ibm_watsonx_ai.foundation_models import Model import requests
from ibm_watsonx_ai.metanames import GenTextParamsMetaNames as GenParams
from openai import AsyncOpenAI
from llama_stack.apis.inference import ( from llama_stack.apis.inference import ChatCompletionRequest
ChatCompletionRequest, from llama_stack.apis.models import Model
CompletionRequest, from llama_stack.apis.models.models import ModelType
GreedySamplingStrategy, from llama_stack.providers.remote.inference.watsonx.config import WatsonXConfig
Inference, from llama_stack.providers.utils.inference.litellm_openai_mixin import LiteLLMOpenAIMixin
OpenAIChatCompletion,
OpenAIChatCompletionChunk,
OpenAICompletion,
OpenAIEmbeddingsResponse,
OpenAIMessageParam,
OpenAIResponseFormatParam,
TopKSamplingStrategy,
TopPSamplingStrategy,
)
from llama_stack.log import get_logger
from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper
from llama_stack.providers.utils.inference.openai_compat import (
prepare_openai_completion_params,
)
from llama_stack.providers.utils.inference.prompt_adapter import (
chat_completion_request_to_prompt,
completion_request_to_prompt,
request_has_media,
)
from . import WatsonXConfig
from .models import MODEL_ENTRIES
logger = get_logger(name=__name__, category="inference::watsonx")
# Note on structured output class WatsonXInferenceAdapter(LiteLLMOpenAIMixin):
# WatsonX returns responses with a json embedded into a string. _model_cache: dict[str, Model] = {}
# Examples:
# ChatCompletionResponse(completion_message=CompletionMessage(content='```json\n{\n def __init__(self, config: WatsonXConfig):
# "first_name": "Michael",\n "last_name": "Jordan",\n'...) LiteLLMOpenAIMixin.__init__(
# Not even a valid JSON, but we can still extract the JSON from the content self,
litellm_provider_name="watsonx",
api_key_from_config=config.api_key.get_secret_value() if config.api_key else None,
provider_data_api_key_field="watsonx_api_key",
)
self.available_models = None
self.config = config
# CompletionResponse(content=' \nThe best answer is $\\boxed{\\{"name": "Michael Jordan", def get_base_url(self) -> str:
# "year_born": "1963", "year_retired": "2003"\\}}$') return self.config.url
# Find the start of the boxed content
async def _get_params(self, request: ChatCompletionRequest) -> dict[str, Any]:
# Get base parameters from parent
params = await super()._get_params(request)
class WatsonXInferenceAdapter(Inference, ModelRegistryHelper): # Add watsonx.ai specific parameters
def __init__(self, config: WatsonXConfig) -> None: params["project_id"] = self.config.project_id
ModelRegistryHelper.__init__(self, model_entries=MODEL_ENTRIES) params["time_limit"] = self.config.timeout
logger.info(f"Initializing watsonx InferenceAdapter({config.url})...")
self._config = config
self._openai_client: AsyncOpenAI | None = None
self._project_id = self._config.project_id
def _get_client(self, model_id) -> Model:
config_api_key = self._config.api_key.get_secret_value() if self._config.api_key else None
config_url = self._config.url
project_id = self._config.project_id
credentials = {"url": config_url, "apikey": config_api_key}
return Model(model_id=model_id, credentials=credentials, project_id=project_id)
def _get_openai_client(self) -> AsyncOpenAI:
if not self._openai_client:
self._openai_client = AsyncOpenAI(
base_url=f"{self._config.url}/openai/v1",
api_key=self._config.api_key,
)
return self._openai_client
async def _get_params(self, request: ChatCompletionRequest | CompletionRequest) -> dict:
input_dict = {"params": {}}
media_present = request_has_media(request)
llama_model = self.get_llama_model(request.model)
if isinstance(request, ChatCompletionRequest):
input_dict["prompt"] = await chat_completion_request_to_prompt(request, llama_model)
else:
assert not media_present, "Together does not support media for Completion requests"
input_dict["prompt"] = await completion_request_to_prompt(request)
if request.sampling_params:
if request.sampling_params.strategy:
input_dict["params"][GenParams.DECODING_METHOD] = request.sampling_params.strategy.type
if request.sampling_params.max_tokens:
input_dict["params"][GenParams.MAX_NEW_TOKENS] = request.sampling_params.max_tokens
if request.sampling_params.repetition_penalty:
input_dict["params"][GenParams.REPETITION_PENALTY] = request.sampling_params.repetition_penalty
if isinstance(request.sampling_params.strategy, TopPSamplingStrategy):
input_dict["params"][GenParams.TOP_P] = request.sampling_params.strategy.top_p
input_dict["params"][GenParams.TEMPERATURE] = request.sampling_params.strategy.temperature
if isinstance(request.sampling_params.strategy, TopKSamplingStrategy):
input_dict["params"][GenParams.TOP_K] = request.sampling_params.strategy.top_k
if isinstance(request.sampling_params.strategy, GreedySamplingStrategy):
input_dict["params"][GenParams.TEMPERATURE] = 0.0
input_dict["params"][GenParams.STOP_SEQUENCES] = ["<|endoftext|>"]
params = {
**input_dict,
}
return params return params
async def openai_embeddings( # Copied from OpenAIMixin
self, async def check_model_availability(self, model: str) -> bool:
model: str, """
input: str | list[str], Check if a specific model is available from the provider's /v1/models.
encoding_format: str | None = "float",
dimensions: int | None = None,
user: str | None = None,
) -> OpenAIEmbeddingsResponse:
raise NotImplementedError()
async def openai_completion( :param model: The model identifier to check.
self, :return: True if the model is available dynamically, False otherwise.
model: str, """
prompt: str | list[str] | list[int] | list[list[int]], if not self._model_cache:
best_of: int | None = None, await self.list_models()
echo: bool | None = None, return model in self._model_cache
frequency_penalty: float | None = None,
logit_bias: dict[str, float] | None = None,
logprobs: bool | None = None,
max_tokens: int | None = None,
n: int | None = None,
presence_penalty: float | None = None,
seed: int | None = None,
stop: str | list[str] | None = None,
stream: bool | None = None,
stream_options: dict[str, Any] | None = None,
temperature: float | None = None,
top_p: float | None = None,
user: str | None = None,
guided_choice: list[str] | None = None,
prompt_logprobs: int | None = None,
suffix: str | None = None,
) -> OpenAICompletion:
model_obj = await self.model_store.get_model(model)
params = await prepare_openai_completion_params(
model=model_obj.provider_resource_id,
prompt=prompt,
best_of=best_of,
echo=echo,
frequency_penalty=frequency_penalty,
logit_bias=logit_bias,
logprobs=logprobs,
max_tokens=max_tokens,
n=n,
presence_penalty=presence_penalty,
seed=seed,
stop=stop,
stream=stream,
stream_options=stream_options,
temperature=temperature,
top_p=top_p,
user=user,
)
return await self._get_openai_client().completions.create(**params) # type: ignore
async def openai_chat_completion( async def list_models(self) -> list[Model] | None:
self, self._model_cache = {}
model: str, models = []
messages: list[OpenAIMessageParam], for model_spec in self._get_model_specs():
frequency_penalty: float | None = None, functions = [f["id"] for f in model_spec.get("functions", [])]
function_call: str | dict[str, Any] | None = None, # Format: {"embedding_dimension": 1536, "context_length": 8192}
functions: list[dict[str, Any]] | None = None,
logit_bias: dict[str, float] | None = None,
logprobs: bool | None = None,
max_completion_tokens: int | None = None,
max_tokens: int | None = None,
n: int | None = None,
parallel_tool_calls: bool | None = None,
presence_penalty: float | None = None,
response_format: OpenAIResponseFormatParam | None = None,
seed: int | None = None,
stop: str | list[str] | None = None,
stream: bool | None = None,
stream_options: dict[str, Any] | None = None,
temperature: float | None = None,
tool_choice: str | dict[str, Any] | None = None,
tools: list[dict[str, Any]] | None = None,
top_logprobs: int | None = None,
top_p: float | None = None,
user: str | None = None,
) -> OpenAIChatCompletion | AsyncIterator[OpenAIChatCompletionChunk]:
model_obj = await self.model_store.get_model(model)
params = await prepare_openai_completion_params(
model=model_obj.provider_resource_id,
messages=messages,
frequency_penalty=frequency_penalty,
function_call=function_call,
functions=functions,
logit_bias=logit_bias,
logprobs=logprobs,
max_completion_tokens=max_completion_tokens,
max_tokens=max_tokens,
n=n,
parallel_tool_calls=parallel_tool_calls,
presence_penalty=presence_penalty,
response_format=response_format,
seed=seed,
stop=stop,
stream=stream,
stream_options=stream_options,
temperature=temperature,
tool_choice=tool_choice,
tools=tools,
top_logprobs=top_logprobs,
top_p=top_p,
user=user,
)
if params.get("stream", False):
return self._stream_openai_chat_completion(params)
return await self._get_openai_client().chat.completions.create(**params) # type: ignore
async def _stream_openai_chat_completion(self, params: dict) -> AsyncGenerator: # Example of an embedding model:
# watsonx.ai sometimes adds usage data to the stream # {'model_id': 'ibm/granite-embedding-278m-multilingual',
include_usage = False # 'label': 'granite-embedding-278m-multilingual',
if params.get("stream_options", None): # 'model_limits': {'max_sequence_length': 512, 'embedding_dimension': 768},
include_usage = params["stream_options"].get("include_usage", False) # ...
stream = await self._get_openai_client().chat.completions.create(**params) provider_resource_id = f"{self.__provider_id__}/{model_spec['model_id']}"
if "embedding" in functions:
embedding_dimension = model_spec["model_limits"]["embedding_dimension"]
context_length = model_spec["model_limits"]["max_sequence_length"]
embedding_metadata = {
"embedding_dimension": embedding_dimension,
"context_length": context_length,
}
model = Model(
identifier=model_spec["model_id"],
provider_resource_id=provider_resource_id,
provider_id=self.__provider_id__,
metadata=embedding_metadata,
model_type=ModelType.embedding,
)
self._model_cache[provider_resource_id] = model
models.append(model)
if "text_chat" in functions:
model = Model(
identifier=model_spec["model_id"],
provider_resource_id=provider_resource_id,
provider_id=self.__provider_id__,
metadata={},
model_type=ModelType.llm,
)
# In theory, I guess it is possible that a model could be both an embedding model and a text chat model.
# In that case, the cache will record the generator Model object, and the list which we return will have
# both the generator Model object and the text chat Model object. That's fine because the cache is
# only used for check_model_availability() anyway.
self._model_cache[provider_resource_id] = model
models.append(model)
return models
seen_finish_reason = False # LiteLLM provides methods to list models for many providers, but not for watsonx.ai.
async for chunk in stream: # So we need to implement our own method to list models by calling the watsonx.ai API.
# Final usage chunk with no choices that the user didn't request, so discard def _get_model_specs(self) -> list[dict[str, Any]]:
if not include_usage and seen_finish_reason and len(chunk.choices) == 0: """
break Retrieves foundation model specifications from the watsonx.ai API.
yield chunk """
for choice in chunk.choices: url = f"{self.config.url}/ml/v1/foundation_model_specs?version=2023-10-25"
if choice.finish_reason: headers = {
seen_finish_reason = True # Note that there is no authorization header. Listing models does not require authentication.
break "Content-Type": "application/json",
}
response = requests.get(url, headers=headers)
# --- Process the Response ---
# Raise an exception for bad status codes (4xx or 5xx)
response.raise_for_status()
# If the request is successful, parse and return the JSON response.
# The response should contain a list of model specifications
response_data = response.json()
if "resources" not in response_data:
raise ValueError("Resources not found in response")
return response_data["resources"]

View file

@ -4,6 +4,8 @@
# This source code is licensed under the terms described in the LICENSE file in # This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree. # the root directory of this source tree.
import base64
import struct
from collections.abc import AsyncIterator from collections.abc import AsyncIterator
from typing import Any from typing import Any
@ -16,6 +18,7 @@ from llama_stack.apis.inference import (
OpenAIChatCompletion, OpenAIChatCompletion,
OpenAIChatCompletionChunk, OpenAIChatCompletionChunk,
OpenAICompletion, OpenAICompletion,
OpenAIEmbeddingData,
OpenAIEmbeddingsResponse, OpenAIEmbeddingsResponse,
OpenAIEmbeddingUsage, OpenAIEmbeddingUsage,
OpenAIMessageParam, OpenAIMessageParam,
@ -26,7 +29,6 @@ from llama_stack.core.request_headers import NeedsRequestProviderData
from llama_stack.log import get_logger from llama_stack.log import get_logger
from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper, ProviderModelEntry from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper, ProviderModelEntry
from llama_stack.providers.utils.inference.openai_compat import ( from llama_stack.providers.utils.inference.openai_compat import (
b64_encode_openai_embeddings_response,
convert_message_to_openai_dict_new, convert_message_to_openai_dict_new,
convert_tooldef_to_openai_tool, convert_tooldef_to_openai_tool,
get_sampling_options, get_sampling_options,
@ -349,3 +351,28 @@ class LiteLLMOpenAIMixin(
return False return False
return model in litellm.models_by_provider[self.litellm_provider_name] return model in litellm.models_by_provider[self.litellm_provider_name]
def b64_encode_openai_embeddings_response(
response_data: list[dict], encoding_format: str | None = "float"
) -> list[OpenAIEmbeddingData]:
"""
Process the OpenAI embeddings response to encode the embeddings in base64 format if specified.
"""
data = []
for i, embedding_data in enumerate(response_data):
if encoding_format == "base64":
byte_array = bytearray()
for embedding_value in embedding_data["embedding"]:
byte_array.extend(struct.pack("f", float(embedding_value)))
response_embedding = base64.b64encode(byte_array).decode("utf-8")
else:
response_embedding = embedding_data["embedding"]
data.append(
OpenAIEmbeddingData(
embedding=response_embedding,
index=i,
)
)
return data

View file

@ -3,9 +3,7 @@
# #
# This source code is licensed under the terms described in the LICENSE file in # This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree. # the root directory of this source tree.
import base64
import json import json
import struct
import time import time
import uuid import uuid
import warnings import warnings
@ -103,7 +101,6 @@ from llama_stack.apis.inference import (
JsonSchemaResponseFormat, JsonSchemaResponseFormat,
Message, Message,
OpenAIChatCompletion, OpenAIChatCompletion,
OpenAIEmbeddingData,
OpenAIMessageParam, OpenAIMessageParam,
OpenAIResponseFormatParam, OpenAIResponseFormatParam,
SamplingParams, SamplingParams,
@ -1402,28 +1399,3 @@ def prepare_openai_embeddings_params(
params["user"] = user params["user"] = user
return params return params
def b64_encode_openai_embeddings_response(
response_data: dict, encoding_format: str | None = "float"
) -> list[OpenAIEmbeddingData]:
"""
Process the OpenAI embeddings response to encode the embeddings in base64 format if specified.
"""
data = []
for i, embedding_data in enumerate(response_data):
if encoding_format == "base64":
byte_array = bytearray()
for embedding_value in embedding_data.embedding:
byte_array.extend(struct.pack("f", float(embedding_value)))
response_embedding = base64.b64encode(byte_array).decode("utf-8")
else:
response_embedding = embedding_data.embedding
data.append(
OpenAIEmbeddingData(
embedding=response_embedding,
index=i,
)
)
return data

View file

@ -296,15 +296,14 @@ class OpenAIVectorStoreMixin(ABC):
async def shutdown(self) -> None: async def shutdown(self) -> None:
"""Clean up mixin resources including background tasks.""" """Clean up mixin resources including background tasks."""
# Cancel any running file batch tasks gracefully # Cancel any running file batch tasks gracefully
if hasattr(self, "_file_batch_tasks"): tasks_to_cancel = list(self._file_batch_tasks.items())
tasks_to_cancel = list(self._file_batch_tasks.items()) for _, task in tasks_to_cancel:
for _, task in tasks_to_cancel: if not task.done():
if not task.done(): task.cancel()
task.cancel() try:
try: await task
await task except asyncio.CancelledError:
except asyncio.CancelledError: pass
pass
@abstractmethod @abstractmethod
async def delete_chunks(self, store_id: str, chunks_for_deletion: list[ChunkForDeletion]) -> None: async def delete_chunks(self, store_id: str, chunks_for_deletion: list[ChunkForDeletion]) -> None:

View file

@ -20,7 +20,6 @@ from pydantic import BaseModel
from llama_stack.apis.common.content_types import ( from llama_stack.apis.common.content_types import (
URL, URL,
InterleavedContent, InterleavedContent,
TextContentItem,
) )
from llama_stack.apis.tools import RAGDocument from llama_stack.apis.tools import RAGDocument
from llama_stack.apis.vector_dbs import VectorDB from llama_stack.apis.vector_dbs import VectorDB
@ -129,26 +128,6 @@ def content_from_data_and_mime_type(data: bytes | str, mime_type: str | None, en
return "" return ""
def concat_interleaved_content(content: list[InterleavedContent]) -> InterleavedContent:
"""concatenate interleaved content into a single list. ensure that 'str's are converted to TextContentItem when in a list"""
ret = []
def _process(c):
if isinstance(c, str):
ret.append(TextContentItem(text=c))
elif isinstance(c, list):
for item in c:
_process(item)
else:
ret.append(c)
for c in content:
_process(c)
return ret
async def content_from_doc(doc: RAGDocument) -> str: async def content_from_doc(doc: RAGDocument) -> str:
if isinstance(doc.content, URL): if isinstance(doc.content, URL):
if doc.content.uri.startswith("data:"): if doc.content.uri.startswith("data:"):

View file

@ -18,6 +18,8 @@ from llama_stack.providers.remote.inference.openai.config import OpenAIConfig
from llama_stack.providers.remote.inference.openai.openai import OpenAIInferenceAdapter from llama_stack.providers.remote.inference.openai.openai import OpenAIInferenceAdapter
from llama_stack.providers.remote.inference.together.config import TogetherImplConfig from llama_stack.providers.remote.inference.together.config import TogetherImplConfig
from llama_stack.providers.remote.inference.together.together import TogetherInferenceAdapter from llama_stack.providers.remote.inference.together.together import TogetherInferenceAdapter
from llama_stack.providers.remote.inference.watsonx.config import WatsonXConfig
from llama_stack.providers.remote.inference.watsonx.watsonx import WatsonXInferenceAdapter
@pytest.mark.parametrize( @pytest.mark.parametrize(
@ -58,3 +60,29 @@ def test_openai_provider_data_used(config_cls, adapter_cls, provider_data_valida
{"x-llamastack-provider-data": json.dumps({inference_adapter.provider_data_api_key_field: api_key})} {"x-llamastack-provider-data": json.dumps({inference_adapter.provider_data_api_key_field: api_key})}
): ):
assert inference_adapter.client.api_key == api_key assert inference_adapter.client.api_key == api_key
@pytest.mark.parametrize(
"config_cls,adapter_cls,provider_data_validator",
[
(
WatsonXConfig,
WatsonXInferenceAdapter,
"llama_stack.providers.remote.inference.watsonx.config.WatsonXProviderDataValidator",
),
],
)
def test_litellm_provider_data_used(config_cls, adapter_cls, provider_data_validator: str):
"""Validate data for LiteLLM-based providers. Similar to test_openai_provider_data_used, but without the
assumption that there is an OpenAI-compatible client object."""
inference_adapter = adapter_cls(config=config_cls())
inference_adapter.__provider_spec__ = MagicMock()
inference_adapter.__provider_spec__.provider_data_validator = provider_data_validator
for api_key in ["test1", "test2"]:
with request_provider_data_context(
{"x-llamastack-provider-data": json.dumps({inference_adapter.provider_data_api_key_field: api_key})}
):
assert inference_adapter.get_api_key() == api_key

View file

@ -125,8 +125,15 @@ async def test_duplicate_provider_registration(cached_disk_dist_registry):
provider_resource_id="test_vector_db_2", provider_resource_id="test_vector_db_2",
provider_id="baz", # Same provider_id provider_id="baz", # Same provider_id
) )
await cached_disk_dist_registry.register(duplicate_vector_db)
# Now we expect a ValueError to be raised for duplicate registration
with pytest.raises(
ValueError,
match=r"Provider 'baz' is already registered.*Unregister the existing provider first before registering it again.",
):
await cached_disk_dist_registry.register(duplicate_vector_db)
# Verify the original registration is still intact
result = await cached_disk_dist_registry.get("vector_db", "test_vector_db_2") result = await cached_disk_dist_registry.get("vector_db", "test_vector_db_2")
assert result is not None assert result is not None
assert result.embedding_model == original_vector_db.embedding_model # Original values preserved assert result.embedding_model == original_vector_db.embedding_model # Original values preserved