mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-10-08 21:04:39 +00:00
Merge ed4e452de0
into sapling-pr-archive-ehhuang
This commit is contained in:
commit
08d46d6363
37 changed files with 242 additions and 6940 deletions
2
.github/workflows/stale_bot.yml
vendored
2
.github/workflows/stale_bot.yml
vendored
|
@ -24,7 +24,7 @@ jobs:
|
|||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Stale Action
|
||||
uses: actions/stale@3a9db7e6a41a89f618792c92c0e97cc736e1b13f # v10.0.0
|
||||
uses: actions/stale@5f858e3efba33a5ca4407a664cc011ad407f2008 # v10.1.0
|
||||
with:
|
||||
stale-issue-label: 'stale'
|
||||
stale-issue-message: >
|
||||
|
|
|
@ -17,8 +17,8 @@ IBM WatsonX inference provider for accessing AI models on IBM's WatsonX platform
|
|||
| `allowed_models` | `list[str \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. |
|
||||
| `refresh_models` | `<class 'bool'>` | No | False | Whether to refresh models periodically from the provider |
|
||||
| `url` | `<class 'str'>` | No | https://us-south.ml.cloud.ibm.com | A base url for accessing the watsonx.ai |
|
||||
| `api_key` | `pydantic.types.SecretStr \| None` | No | | The watsonx API key |
|
||||
| `project_id` | `str \| None` | No | | The Project ID key |
|
||||
| `api_key` | `pydantic.types.SecretStr \| None` | No | | The watsonx.ai API key |
|
||||
| `project_id` | `str \| None` | No | | The watsonx.ai project ID |
|
||||
| `timeout` | `<class 'int'>` | No | 60 | Timeout for the HTTP requests |
|
||||
|
||||
## Sample Configuration
|
||||
|
|
897
docs/static/deprecated-llama-stack-spec.html
vendored
897
docs/static/deprecated-llama-stack-spec.html
vendored
|
@ -3526,343 +3526,6 @@
|
|||
},
|
||||
"deprecated": true
|
||||
}
|
||||
},
|
||||
"/v1/telemetry/metrics/{metric_name}": {
|
||||
"post": {
|
||||
"responses": {
|
||||
"200": {
|
||||
"description": "A QueryMetricsResponse.",
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/QueryMetricsResponse"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"400": {
|
||||
"$ref": "#/components/responses/BadRequest400"
|
||||
},
|
||||
"429": {
|
||||
"$ref": "#/components/responses/TooManyRequests429"
|
||||
},
|
||||
"500": {
|
||||
"$ref": "#/components/responses/InternalServerError500"
|
||||
},
|
||||
"default": {
|
||||
"$ref": "#/components/responses/DefaultError"
|
||||
}
|
||||
},
|
||||
"tags": [
|
||||
"Telemetry"
|
||||
],
|
||||
"summary": "Query metrics.",
|
||||
"description": "Query metrics.",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "metric_name",
|
||||
"in": "path",
|
||||
"description": "The name of the metric to query.",
|
||||
"required": true,
|
||||
"schema": {
|
||||
"type": "string"
|
||||
}
|
||||
}
|
||||
],
|
||||
"requestBody": {
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/QueryMetricsRequest"
|
||||
}
|
||||
}
|
||||
},
|
||||
"required": true
|
||||
},
|
||||
"deprecated": true
|
||||
}
|
||||
},
|
||||
"/v1/telemetry/spans": {
|
||||
"post": {
|
||||
"responses": {
|
||||
"200": {
|
||||
"description": "A QuerySpansResponse.",
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/QuerySpansResponse"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"400": {
|
||||
"$ref": "#/components/responses/BadRequest400"
|
||||
},
|
||||
"429": {
|
||||
"$ref": "#/components/responses/TooManyRequests429"
|
||||
},
|
||||
"500": {
|
||||
"$ref": "#/components/responses/InternalServerError500"
|
||||
},
|
||||
"default": {
|
||||
"$ref": "#/components/responses/DefaultError"
|
||||
}
|
||||
},
|
||||
"tags": [
|
||||
"Telemetry"
|
||||
],
|
||||
"summary": "Query spans.",
|
||||
"description": "Query spans.",
|
||||
"parameters": [],
|
||||
"requestBody": {
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/QuerySpansRequest"
|
||||
}
|
||||
}
|
||||
},
|
||||
"required": true
|
||||
},
|
||||
"deprecated": true
|
||||
}
|
||||
},
|
||||
"/v1/telemetry/spans/export": {
|
||||
"post": {
|
||||
"responses": {
|
||||
"200": {
|
||||
"description": "OK"
|
||||
},
|
||||
"400": {
|
||||
"$ref": "#/components/responses/BadRequest400"
|
||||
},
|
||||
"429": {
|
||||
"$ref": "#/components/responses/TooManyRequests429"
|
||||
},
|
||||
"500": {
|
||||
"$ref": "#/components/responses/InternalServerError500"
|
||||
},
|
||||
"default": {
|
||||
"$ref": "#/components/responses/DefaultError"
|
||||
}
|
||||
},
|
||||
"tags": [
|
||||
"Telemetry"
|
||||
],
|
||||
"summary": "Save spans to a dataset.",
|
||||
"description": "Save spans to a dataset.",
|
||||
"parameters": [],
|
||||
"requestBody": {
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/SaveSpansToDatasetRequest"
|
||||
}
|
||||
}
|
||||
},
|
||||
"required": true
|
||||
},
|
||||
"deprecated": true
|
||||
}
|
||||
},
|
||||
"/v1/telemetry/spans/{span_id}/tree": {
|
||||
"post": {
|
||||
"responses": {
|
||||
"200": {
|
||||
"description": "A QuerySpanTreeResponse.",
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/QuerySpanTreeResponse"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"400": {
|
||||
"$ref": "#/components/responses/BadRequest400"
|
||||
},
|
||||
"429": {
|
||||
"$ref": "#/components/responses/TooManyRequests429"
|
||||
},
|
||||
"500": {
|
||||
"$ref": "#/components/responses/InternalServerError500"
|
||||
},
|
||||
"default": {
|
||||
"$ref": "#/components/responses/DefaultError"
|
||||
}
|
||||
},
|
||||
"tags": [
|
||||
"Telemetry"
|
||||
],
|
||||
"summary": "Get a span tree by its ID.",
|
||||
"description": "Get a span tree by its ID.",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "span_id",
|
||||
"in": "path",
|
||||
"description": "The ID of the span to get the tree from.",
|
||||
"required": true,
|
||||
"schema": {
|
||||
"type": "string"
|
||||
}
|
||||
}
|
||||
],
|
||||
"requestBody": {
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/GetSpanTreeRequest"
|
||||
}
|
||||
}
|
||||
},
|
||||
"required": true
|
||||
},
|
||||
"deprecated": true
|
||||
}
|
||||
},
|
||||
"/v1/telemetry/traces": {
|
||||
"post": {
|
||||
"responses": {
|
||||
"200": {
|
||||
"description": "A QueryTracesResponse.",
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/QueryTracesResponse"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"400": {
|
||||
"$ref": "#/components/responses/BadRequest400"
|
||||
},
|
||||
"429": {
|
||||
"$ref": "#/components/responses/TooManyRequests429"
|
||||
},
|
||||
"500": {
|
||||
"$ref": "#/components/responses/InternalServerError500"
|
||||
},
|
||||
"default": {
|
||||
"$ref": "#/components/responses/DefaultError"
|
||||
}
|
||||
},
|
||||
"tags": [
|
||||
"Telemetry"
|
||||
],
|
||||
"summary": "Query traces.",
|
||||
"description": "Query traces.",
|
||||
"parameters": [],
|
||||
"requestBody": {
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/QueryTracesRequest"
|
||||
}
|
||||
}
|
||||
},
|
||||
"required": true
|
||||
},
|
||||
"deprecated": true
|
||||
}
|
||||
},
|
||||
"/v1/telemetry/traces/{trace_id}": {
|
||||
"get": {
|
||||
"responses": {
|
||||
"200": {
|
||||
"description": "A Trace.",
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/Trace"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"400": {
|
||||
"$ref": "#/components/responses/BadRequest400"
|
||||
},
|
||||
"429": {
|
||||
"$ref": "#/components/responses/TooManyRequests429"
|
||||
},
|
||||
"500": {
|
||||
"$ref": "#/components/responses/InternalServerError500"
|
||||
},
|
||||
"default": {
|
||||
"$ref": "#/components/responses/DefaultError"
|
||||
}
|
||||
},
|
||||
"tags": [
|
||||
"Telemetry"
|
||||
],
|
||||
"summary": "Get a trace by its ID.",
|
||||
"description": "Get a trace by its ID.",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "trace_id",
|
||||
"in": "path",
|
||||
"description": "The ID of the trace to get.",
|
||||
"required": true,
|
||||
"schema": {
|
||||
"type": "string"
|
||||
}
|
||||
}
|
||||
],
|
||||
"deprecated": true
|
||||
}
|
||||
},
|
||||
"/v1/telemetry/traces/{trace_id}/spans/{span_id}": {
|
||||
"get": {
|
||||
"responses": {
|
||||
"200": {
|
||||
"description": "A Span.",
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/Span"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"400": {
|
||||
"$ref": "#/components/responses/BadRequest400"
|
||||
},
|
||||
"429": {
|
||||
"$ref": "#/components/responses/TooManyRequests429"
|
||||
},
|
||||
"500": {
|
||||
"$ref": "#/components/responses/InternalServerError500"
|
||||
},
|
||||
"default": {
|
||||
"$ref": "#/components/responses/DefaultError"
|
||||
}
|
||||
},
|
||||
"tags": [
|
||||
"Telemetry"
|
||||
],
|
||||
"summary": "Get a span by its ID.",
|
||||
"description": "Get a span by its ID.",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "trace_id",
|
||||
"in": "path",
|
||||
"description": "The ID of the trace to get the span from.",
|
||||
"required": true,
|
||||
"schema": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "span_id",
|
||||
"in": "path",
|
||||
"description": "The ID of the span to get.",
|
||||
"required": true,
|
||||
"schema": {
|
||||
"type": "string"
|
||||
}
|
||||
}
|
||||
],
|
||||
"deprecated": true
|
||||
}
|
||||
}
|
||||
},
|
||||
"jsonSchemaDialect": "https://json-schema.org/draft/2020-12/schema",
|
||||
|
@ -12716,561 +12379,6 @@
|
|||
"logger_config"
|
||||
],
|
||||
"title": "SupervisedFineTuneRequest"
|
||||
},
|
||||
"QueryMetricsRequest": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"start_time": {
|
||||
"type": "integer",
|
||||
"description": "The start time of the metric to query."
|
||||
},
|
||||
"end_time": {
|
||||
"type": "integer",
|
||||
"description": "The end time of the metric to query."
|
||||
},
|
||||
"granularity": {
|
||||
"type": "string",
|
||||
"description": "The granularity of the metric to query."
|
||||
},
|
||||
"query_type": {
|
||||
"type": "string",
|
||||
"enum": [
|
||||
"range",
|
||||
"instant"
|
||||
],
|
||||
"description": "The type of query to perform."
|
||||
},
|
||||
"label_matchers": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"name": {
|
||||
"type": "string",
|
||||
"description": "The name of the label to match"
|
||||
},
|
||||
"value": {
|
||||
"type": "string",
|
||||
"description": "The value to match against"
|
||||
},
|
||||
"operator": {
|
||||
"type": "string",
|
||||
"enum": [
|
||||
"=",
|
||||
"!=",
|
||||
"=~",
|
||||
"!~"
|
||||
],
|
||||
"description": "The comparison operator to use for matching",
|
||||
"default": "="
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"name",
|
||||
"value",
|
||||
"operator"
|
||||
],
|
||||
"title": "MetricLabelMatcher",
|
||||
"description": "A matcher for filtering metrics by label values."
|
||||
},
|
||||
"description": "The label matchers to apply to the metric."
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"start_time",
|
||||
"query_type"
|
||||
],
|
||||
"title": "QueryMetricsRequest"
|
||||
},
|
||||
"MetricDataPoint": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"timestamp": {
|
||||
"type": "integer",
|
||||
"description": "Unix timestamp when the metric value was recorded"
|
||||
},
|
||||
"value": {
|
||||
"type": "number",
|
||||
"description": "The numeric value of the metric at this timestamp"
|
||||
},
|
||||
"unit": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"timestamp",
|
||||
"value",
|
||||
"unit"
|
||||
],
|
||||
"title": "MetricDataPoint",
|
||||
"description": "A single data point in a metric time series."
|
||||
},
|
||||
"MetricLabel": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"name": {
|
||||
"type": "string",
|
||||
"description": "The name of the label"
|
||||
},
|
||||
"value": {
|
||||
"type": "string",
|
||||
"description": "The value of the label"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"name",
|
||||
"value"
|
||||
],
|
||||
"title": "MetricLabel",
|
||||
"description": "A label associated with a metric."
|
||||
},
|
||||
"MetricSeries": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"metric": {
|
||||
"type": "string",
|
||||
"description": "The name of the metric"
|
||||
},
|
||||
"labels": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"$ref": "#/components/schemas/MetricLabel"
|
||||
},
|
||||
"description": "List of labels associated with this metric series"
|
||||
},
|
||||
"values": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"$ref": "#/components/schemas/MetricDataPoint"
|
||||
},
|
||||
"description": "List of data points in chronological order"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"metric",
|
||||
"labels",
|
||||
"values"
|
||||
],
|
||||
"title": "MetricSeries",
|
||||
"description": "A time series of metric data points."
|
||||
},
|
||||
"QueryMetricsResponse": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"data": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"$ref": "#/components/schemas/MetricSeries"
|
||||
},
|
||||
"description": "List of metric series matching the query criteria"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"data"
|
||||
],
|
||||
"title": "QueryMetricsResponse",
|
||||
"description": "Response containing metric time series data."
|
||||
},
|
||||
"QueryCondition": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"key": {
|
||||
"type": "string",
|
||||
"description": "The attribute key to filter on"
|
||||
},
|
||||
"op": {
|
||||
"$ref": "#/components/schemas/QueryConditionOp",
|
||||
"description": "The comparison operator to apply"
|
||||
},
|
||||
"value": {
|
||||
"oneOf": [
|
||||
{
|
||||
"type": "null"
|
||||
},
|
||||
{
|
||||
"type": "boolean"
|
||||
},
|
||||
{
|
||||
"type": "number"
|
||||
},
|
||||
{
|
||||
"type": "string"
|
||||
},
|
||||
{
|
||||
"type": "array"
|
||||
},
|
||||
{
|
||||
"type": "object"
|
||||
}
|
||||
],
|
||||
"description": "The value to compare against"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"key",
|
||||
"op",
|
||||
"value"
|
||||
],
|
||||
"title": "QueryCondition",
|
||||
"description": "A condition for filtering query results."
|
||||
},
|
||||
"QueryConditionOp": {
|
||||
"type": "string",
|
||||
"enum": [
|
||||
"eq",
|
||||
"ne",
|
||||
"gt",
|
||||
"lt"
|
||||
],
|
||||
"title": "QueryConditionOp",
|
||||
"description": "Comparison operators for query conditions."
|
||||
},
|
||||
"QuerySpansRequest": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"attribute_filters": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"$ref": "#/components/schemas/QueryCondition"
|
||||
},
|
||||
"description": "The attribute filters to apply to the spans."
|
||||
},
|
||||
"attributes_to_return": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "string"
|
||||
},
|
||||
"description": "The attributes to return in the spans."
|
||||
},
|
||||
"max_depth": {
|
||||
"type": "integer",
|
||||
"description": "The maximum depth of the tree."
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"attribute_filters",
|
||||
"attributes_to_return"
|
||||
],
|
||||
"title": "QuerySpansRequest"
|
||||
},
|
||||
"Span": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"span_id": {
|
||||
"type": "string",
|
||||
"description": "Unique identifier for the span"
|
||||
},
|
||||
"trace_id": {
|
||||
"type": "string",
|
||||
"description": "Unique identifier for the trace this span belongs to"
|
||||
},
|
||||
"parent_span_id": {
|
||||
"type": "string",
|
||||
"description": "(Optional) Unique identifier for the parent span, if this is a child span"
|
||||
},
|
||||
"name": {
|
||||
"type": "string",
|
||||
"description": "Human-readable name describing the operation this span represents"
|
||||
},
|
||||
"start_time": {
|
||||
"type": "string",
|
||||
"format": "date-time",
|
||||
"description": "Timestamp when the operation began"
|
||||
},
|
||||
"end_time": {
|
||||
"type": "string",
|
||||
"format": "date-time",
|
||||
"description": "(Optional) Timestamp when the operation finished, if completed"
|
||||
},
|
||||
"attributes": {
|
||||
"type": "object",
|
||||
"additionalProperties": {
|
||||
"oneOf": [
|
||||
{
|
||||
"type": "null"
|
||||
},
|
||||
{
|
||||
"type": "boolean"
|
||||
},
|
||||
{
|
||||
"type": "number"
|
||||
},
|
||||
{
|
||||
"type": "string"
|
||||
},
|
||||
{
|
||||
"type": "array"
|
||||
},
|
||||
{
|
||||
"type": "object"
|
||||
}
|
||||
]
|
||||
},
|
||||
"description": "(Optional) Key-value pairs containing additional metadata about the span"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"span_id",
|
||||
"trace_id",
|
||||
"name",
|
||||
"start_time"
|
||||
],
|
||||
"title": "Span",
|
||||
"description": "A span representing a single operation within a trace."
|
||||
},
|
||||
"QuerySpansResponse": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"data": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"$ref": "#/components/schemas/Span"
|
||||
},
|
||||
"description": "List of spans matching the query criteria"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"data"
|
||||
],
|
||||
"title": "QuerySpansResponse",
|
||||
"description": "Response containing a list of spans."
|
||||
},
|
||||
"SaveSpansToDatasetRequest": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"attribute_filters": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"$ref": "#/components/schemas/QueryCondition"
|
||||
},
|
||||
"description": "The attribute filters to apply to the spans."
|
||||
},
|
||||
"attributes_to_save": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "string"
|
||||
},
|
||||
"description": "The attributes to save to the dataset."
|
||||
},
|
||||
"dataset_id": {
|
||||
"type": "string",
|
||||
"description": "The ID of the dataset to save the spans to."
|
||||
},
|
||||
"max_depth": {
|
||||
"type": "integer",
|
||||
"description": "The maximum depth of the tree."
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"attribute_filters",
|
||||
"attributes_to_save",
|
||||
"dataset_id"
|
||||
],
|
||||
"title": "SaveSpansToDatasetRequest"
|
||||
},
|
||||
"GetSpanTreeRequest": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"attributes_to_return": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "string"
|
||||
},
|
||||
"description": "The attributes to return in the tree."
|
||||
},
|
||||
"max_depth": {
|
||||
"type": "integer",
|
||||
"description": "The maximum depth of the tree."
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"title": "GetSpanTreeRequest"
|
||||
},
|
||||
"SpanStatus": {
|
||||
"type": "string",
|
||||
"enum": [
|
||||
"ok",
|
||||
"error"
|
||||
],
|
||||
"title": "SpanStatus",
|
||||
"description": "The status of a span indicating whether it completed successfully or with an error."
|
||||
},
|
||||
"SpanWithStatus": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"span_id": {
|
||||
"type": "string",
|
||||
"description": "Unique identifier for the span"
|
||||
},
|
||||
"trace_id": {
|
||||
"type": "string",
|
||||
"description": "Unique identifier for the trace this span belongs to"
|
||||
},
|
||||
"parent_span_id": {
|
||||
"type": "string",
|
||||
"description": "(Optional) Unique identifier for the parent span, if this is a child span"
|
||||
},
|
||||
"name": {
|
||||
"type": "string",
|
||||
"description": "Human-readable name describing the operation this span represents"
|
||||
},
|
||||
"start_time": {
|
||||
"type": "string",
|
||||
"format": "date-time",
|
||||
"description": "Timestamp when the operation began"
|
||||
},
|
||||
"end_time": {
|
||||
"type": "string",
|
||||
"format": "date-time",
|
||||
"description": "(Optional) Timestamp when the operation finished, if completed"
|
||||
},
|
||||
"attributes": {
|
||||
"type": "object",
|
||||
"additionalProperties": {
|
||||
"oneOf": [
|
||||
{
|
||||
"type": "null"
|
||||
},
|
||||
{
|
||||
"type": "boolean"
|
||||
},
|
||||
{
|
||||
"type": "number"
|
||||
},
|
||||
{
|
||||
"type": "string"
|
||||
},
|
||||
{
|
||||
"type": "array"
|
||||
},
|
||||
{
|
||||
"type": "object"
|
||||
}
|
||||
]
|
||||
},
|
||||
"description": "(Optional) Key-value pairs containing additional metadata about the span"
|
||||
},
|
||||
"status": {
|
||||
"$ref": "#/components/schemas/SpanStatus",
|
||||
"description": "(Optional) The current status of the span"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"span_id",
|
||||
"trace_id",
|
||||
"name",
|
||||
"start_time"
|
||||
],
|
||||
"title": "SpanWithStatus",
|
||||
"description": "A span that includes status information."
|
||||
},
|
||||
"QuerySpanTreeResponse": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"data": {
|
||||
"type": "object",
|
||||
"additionalProperties": {
|
||||
"$ref": "#/components/schemas/SpanWithStatus"
|
||||
},
|
||||
"description": "Dictionary mapping span IDs to spans with status information"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"data"
|
||||
],
|
||||
"title": "QuerySpanTreeResponse",
|
||||
"description": "Response containing a tree structure of spans."
|
||||
},
|
||||
"QueryTracesRequest": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"attribute_filters": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"$ref": "#/components/schemas/QueryCondition"
|
||||
},
|
||||
"description": "The attribute filters to apply to the traces."
|
||||
},
|
||||
"limit": {
|
||||
"type": "integer",
|
||||
"description": "The limit of traces to return."
|
||||
},
|
||||
"offset": {
|
||||
"type": "integer",
|
||||
"description": "The offset of the traces to return."
|
||||
},
|
||||
"order_by": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "string"
|
||||
},
|
||||
"description": "The order by of the traces to return."
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"title": "QueryTracesRequest"
|
||||
},
|
||||
"Trace": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"trace_id": {
|
||||
"type": "string",
|
||||
"description": "Unique identifier for the trace"
|
||||
},
|
||||
"root_span_id": {
|
||||
"type": "string",
|
||||
"description": "Unique identifier for the root span that started this trace"
|
||||
},
|
||||
"start_time": {
|
||||
"type": "string",
|
||||
"format": "date-time",
|
||||
"description": "Timestamp when the trace began"
|
||||
},
|
||||
"end_time": {
|
||||
"type": "string",
|
||||
"format": "date-time",
|
||||
"description": "(Optional) Timestamp when the trace finished, if completed"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"trace_id",
|
||||
"root_span_id",
|
||||
"start_time"
|
||||
],
|
||||
"title": "Trace",
|
||||
"description": "A trace representing the complete execution path of a request across multiple operations."
|
||||
},
|
||||
"QueryTracesResponse": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"data": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"$ref": "#/components/schemas/Trace"
|
||||
},
|
||||
"description": "List of traces matching the query criteria"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"data"
|
||||
],
|
||||
"title": "QueryTracesResponse",
|
||||
"description": "Response containing a list of traces."
|
||||
}
|
||||
},
|
||||
"responses": {
|
||||
|
@ -13387,10 +12495,6 @@
|
|||
"description": "OpenAI-compatible Moderations API.",
|
||||
"x-displayName": "Safety"
|
||||
},
|
||||
{
|
||||
"name": "Telemetry",
|
||||
"description": ""
|
||||
},
|
||||
{
|
||||
"name": "VectorIO",
|
||||
"description": ""
|
||||
|
@ -13410,7 +12514,6 @@
|
|||
"Models",
|
||||
"PostTraining (Coming Soon)",
|
||||
"Safety",
|
||||
"Telemetry",
|
||||
"VectorIO"
|
||||
]
|
||||
}
|
||||
|
|
663
docs/static/deprecated-llama-stack-spec.yaml
vendored
663
docs/static/deprecated-llama-stack-spec.yaml
vendored
|
@ -2593,238 +2593,6 @@ paths:
|
|||
$ref: '#/components/schemas/SupervisedFineTuneRequest'
|
||||
required: true
|
||||
deprecated: true
|
||||
/v1/telemetry/metrics/{metric_name}:
|
||||
post:
|
||||
responses:
|
||||
'200':
|
||||
description: A QueryMetricsResponse.
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/QueryMetricsResponse'
|
||||
'400':
|
||||
$ref: '#/components/responses/BadRequest400'
|
||||
'429':
|
||||
$ref: >-
|
||||
#/components/responses/TooManyRequests429
|
||||
'500':
|
||||
$ref: >-
|
||||
#/components/responses/InternalServerError500
|
||||
default:
|
||||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Telemetry
|
||||
summary: Query metrics.
|
||||
description: Query metrics.
|
||||
parameters:
|
||||
- name: metric_name
|
||||
in: path
|
||||
description: The name of the metric to query.
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
requestBody:
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/QueryMetricsRequest'
|
||||
required: true
|
||||
deprecated: true
|
||||
/v1/telemetry/spans:
|
||||
post:
|
||||
responses:
|
||||
'200':
|
||||
description: A QuerySpansResponse.
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/QuerySpansResponse'
|
||||
'400':
|
||||
$ref: '#/components/responses/BadRequest400'
|
||||
'429':
|
||||
$ref: >-
|
||||
#/components/responses/TooManyRequests429
|
||||
'500':
|
||||
$ref: >-
|
||||
#/components/responses/InternalServerError500
|
||||
default:
|
||||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Telemetry
|
||||
summary: Query spans.
|
||||
description: Query spans.
|
||||
parameters: []
|
||||
requestBody:
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/QuerySpansRequest'
|
||||
required: true
|
||||
deprecated: true
|
||||
/v1/telemetry/spans/export:
|
||||
post:
|
||||
responses:
|
||||
'200':
|
||||
description: OK
|
||||
'400':
|
||||
$ref: '#/components/responses/BadRequest400'
|
||||
'429':
|
||||
$ref: >-
|
||||
#/components/responses/TooManyRequests429
|
||||
'500':
|
||||
$ref: >-
|
||||
#/components/responses/InternalServerError500
|
||||
default:
|
||||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Telemetry
|
||||
summary: Save spans to a dataset.
|
||||
description: Save spans to a dataset.
|
||||
parameters: []
|
||||
requestBody:
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/SaveSpansToDatasetRequest'
|
||||
required: true
|
||||
deprecated: true
|
||||
/v1/telemetry/spans/{span_id}/tree:
|
||||
post:
|
||||
responses:
|
||||
'200':
|
||||
description: A QuerySpanTreeResponse.
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/QuerySpanTreeResponse'
|
||||
'400':
|
||||
$ref: '#/components/responses/BadRequest400'
|
||||
'429':
|
||||
$ref: >-
|
||||
#/components/responses/TooManyRequests429
|
||||
'500':
|
||||
$ref: >-
|
||||
#/components/responses/InternalServerError500
|
||||
default:
|
||||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Telemetry
|
||||
summary: Get a span tree by its ID.
|
||||
description: Get a span tree by its ID.
|
||||
parameters:
|
||||
- name: span_id
|
||||
in: path
|
||||
description: The ID of the span to get the tree from.
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
requestBody:
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/GetSpanTreeRequest'
|
||||
required: true
|
||||
deprecated: true
|
||||
/v1/telemetry/traces:
|
||||
post:
|
||||
responses:
|
||||
'200':
|
||||
description: A QueryTracesResponse.
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/QueryTracesResponse'
|
||||
'400':
|
||||
$ref: '#/components/responses/BadRequest400'
|
||||
'429':
|
||||
$ref: >-
|
||||
#/components/responses/TooManyRequests429
|
||||
'500':
|
||||
$ref: >-
|
||||
#/components/responses/InternalServerError500
|
||||
default:
|
||||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Telemetry
|
||||
summary: Query traces.
|
||||
description: Query traces.
|
||||
parameters: []
|
||||
requestBody:
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/QueryTracesRequest'
|
||||
required: true
|
||||
deprecated: true
|
||||
/v1/telemetry/traces/{trace_id}:
|
||||
get:
|
||||
responses:
|
||||
'200':
|
||||
description: A Trace.
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/Trace'
|
||||
'400':
|
||||
$ref: '#/components/responses/BadRequest400'
|
||||
'429':
|
||||
$ref: >-
|
||||
#/components/responses/TooManyRequests429
|
||||
'500':
|
||||
$ref: >-
|
||||
#/components/responses/InternalServerError500
|
||||
default:
|
||||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Telemetry
|
||||
summary: Get a trace by its ID.
|
||||
description: Get a trace by its ID.
|
||||
parameters:
|
||||
- name: trace_id
|
||||
in: path
|
||||
description: The ID of the trace to get.
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
deprecated: true
|
||||
/v1/telemetry/traces/{trace_id}/spans/{span_id}:
|
||||
get:
|
||||
responses:
|
||||
'200':
|
||||
description: A Span.
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/Span'
|
||||
'400':
|
||||
$ref: '#/components/responses/BadRequest400'
|
||||
'429':
|
||||
$ref: >-
|
||||
#/components/responses/TooManyRequests429
|
||||
'500':
|
||||
$ref: >-
|
||||
#/components/responses/InternalServerError500
|
||||
default:
|
||||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Telemetry
|
||||
summary: Get a span by its ID.
|
||||
description: Get a span by its ID.
|
||||
parameters:
|
||||
- name: trace_id
|
||||
in: path
|
||||
description: >-
|
||||
The ID of the trace to get the span from.
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
- name: span_id
|
||||
in: path
|
||||
description: The ID of the span to get.
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
deprecated: true
|
||||
jsonSchemaDialect: >-
|
||||
https://json-schema.org/draft/2020-12/schema
|
||||
components:
|
||||
|
@ -9510,434 +9278,6 @@ components:
|
|||
- hyperparam_search_config
|
||||
- logger_config
|
||||
title: SupervisedFineTuneRequest
|
||||
QueryMetricsRequest:
|
||||
type: object
|
||||
properties:
|
||||
start_time:
|
||||
type: integer
|
||||
description: The start time of the metric to query.
|
||||
end_time:
|
||||
type: integer
|
||||
description: The end time of the metric to query.
|
||||
granularity:
|
||||
type: string
|
||||
description: The granularity of the metric to query.
|
||||
query_type:
|
||||
type: string
|
||||
enum:
|
||||
- range
|
||||
- instant
|
||||
description: The type of query to perform.
|
||||
label_matchers:
|
||||
type: array
|
||||
items:
|
||||
type: object
|
||||
properties:
|
||||
name:
|
||||
type: string
|
||||
description: The name of the label to match
|
||||
value:
|
||||
type: string
|
||||
description: The value to match against
|
||||
operator:
|
||||
type: string
|
||||
enum:
|
||||
- '='
|
||||
- '!='
|
||||
- =~
|
||||
- '!~'
|
||||
description: >-
|
||||
The comparison operator to use for matching
|
||||
default: '='
|
||||
additionalProperties: false
|
||||
required:
|
||||
- name
|
||||
- value
|
||||
- operator
|
||||
title: MetricLabelMatcher
|
||||
description: >-
|
||||
A matcher for filtering metrics by label values.
|
||||
description: >-
|
||||
The label matchers to apply to the metric.
|
||||
additionalProperties: false
|
||||
required:
|
||||
- start_time
|
||||
- query_type
|
||||
title: QueryMetricsRequest
|
||||
MetricDataPoint:
|
||||
type: object
|
||||
properties:
|
||||
timestamp:
|
||||
type: integer
|
||||
description: >-
|
||||
Unix timestamp when the metric value was recorded
|
||||
value:
|
||||
type: number
|
||||
description: >-
|
||||
The numeric value of the metric at this timestamp
|
||||
unit:
|
||||
type: string
|
||||
additionalProperties: false
|
||||
required:
|
||||
- timestamp
|
||||
- value
|
||||
- unit
|
||||
title: MetricDataPoint
|
||||
description: >-
|
||||
A single data point in a metric time series.
|
||||
MetricLabel:
|
||||
type: object
|
||||
properties:
|
||||
name:
|
||||
type: string
|
||||
description: The name of the label
|
||||
value:
|
||||
type: string
|
||||
description: The value of the label
|
||||
additionalProperties: false
|
||||
required:
|
||||
- name
|
||||
- value
|
||||
title: MetricLabel
|
||||
description: A label associated with a metric.
|
||||
MetricSeries:
|
||||
type: object
|
||||
properties:
|
||||
metric:
|
||||
type: string
|
||||
description: The name of the metric
|
||||
labels:
|
||||
type: array
|
||||
items:
|
||||
$ref: '#/components/schemas/MetricLabel'
|
||||
description: >-
|
||||
List of labels associated with this metric series
|
||||
values:
|
||||
type: array
|
||||
items:
|
||||
$ref: '#/components/schemas/MetricDataPoint'
|
||||
description: >-
|
||||
List of data points in chronological order
|
||||
additionalProperties: false
|
||||
required:
|
||||
- metric
|
||||
- labels
|
||||
- values
|
||||
title: MetricSeries
|
||||
description: A time series of metric data points.
|
||||
QueryMetricsResponse:
|
||||
type: object
|
||||
properties:
|
||||
data:
|
||||
type: array
|
||||
items:
|
||||
$ref: '#/components/schemas/MetricSeries'
|
||||
description: >-
|
||||
List of metric series matching the query criteria
|
||||
additionalProperties: false
|
||||
required:
|
||||
- data
|
||||
title: QueryMetricsResponse
|
||||
description: >-
|
||||
Response containing metric time series data.
|
||||
QueryCondition:
|
||||
type: object
|
||||
properties:
|
||||
key:
|
||||
type: string
|
||||
description: The attribute key to filter on
|
||||
op:
|
||||
$ref: '#/components/schemas/QueryConditionOp'
|
||||
description: The comparison operator to apply
|
||||
value:
|
||||
oneOf:
|
||||
- type: 'null'
|
||||
- type: boolean
|
||||
- type: number
|
||||
- type: string
|
||||
- type: array
|
||||
- type: object
|
||||
description: The value to compare against
|
||||
additionalProperties: false
|
||||
required:
|
||||
- key
|
||||
- op
|
||||
- value
|
||||
title: QueryCondition
|
||||
description: A condition for filtering query results.
|
||||
QueryConditionOp:
|
||||
type: string
|
||||
enum:
|
||||
- eq
|
||||
- ne
|
||||
- gt
|
||||
- lt
|
||||
title: QueryConditionOp
|
||||
description: >-
|
||||
Comparison operators for query conditions.
|
||||
QuerySpansRequest:
|
||||
type: object
|
||||
properties:
|
||||
attribute_filters:
|
||||
type: array
|
||||
items:
|
||||
$ref: '#/components/schemas/QueryCondition'
|
||||
description: >-
|
||||
The attribute filters to apply to the spans.
|
||||
attributes_to_return:
|
||||
type: array
|
||||
items:
|
||||
type: string
|
||||
description: The attributes to return in the spans.
|
||||
max_depth:
|
||||
type: integer
|
||||
description: The maximum depth of the tree.
|
||||
additionalProperties: false
|
||||
required:
|
||||
- attribute_filters
|
||||
- attributes_to_return
|
||||
title: QuerySpansRequest
|
||||
Span:
|
||||
type: object
|
||||
properties:
|
||||
span_id:
|
||||
type: string
|
||||
description: Unique identifier for the span
|
||||
trace_id:
|
||||
type: string
|
||||
description: >-
|
||||
Unique identifier for the trace this span belongs to
|
||||
parent_span_id:
|
||||
type: string
|
||||
description: >-
|
||||
(Optional) Unique identifier for the parent span, if this is a child span
|
||||
name:
|
||||
type: string
|
||||
description: >-
|
||||
Human-readable name describing the operation this span represents
|
||||
start_time:
|
||||
type: string
|
||||
format: date-time
|
||||
description: Timestamp when the operation began
|
||||
end_time:
|
||||
type: string
|
||||
format: date-time
|
||||
description: >-
|
||||
(Optional) Timestamp when the operation finished, if completed
|
||||
attributes:
|
||||
type: object
|
||||
additionalProperties:
|
||||
oneOf:
|
||||
- type: 'null'
|
||||
- type: boolean
|
||||
- type: number
|
||||
- type: string
|
||||
- type: array
|
||||
- type: object
|
||||
description: >-
|
||||
(Optional) Key-value pairs containing additional metadata about the span
|
||||
additionalProperties: false
|
||||
required:
|
||||
- span_id
|
||||
- trace_id
|
||||
- name
|
||||
- start_time
|
||||
title: Span
|
||||
description: >-
|
||||
A span representing a single operation within a trace.
|
||||
QuerySpansResponse:
|
||||
type: object
|
||||
properties:
|
||||
data:
|
||||
type: array
|
||||
items:
|
||||
$ref: '#/components/schemas/Span'
|
||||
description: >-
|
||||
List of spans matching the query criteria
|
||||
additionalProperties: false
|
||||
required:
|
||||
- data
|
||||
title: QuerySpansResponse
|
||||
description: Response containing a list of spans.
|
||||
SaveSpansToDatasetRequest:
|
||||
type: object
|
||||
properties:
|
||||
attribute_filters:
|
||||
type: array
|
||||
items:
|
||||
$ref: '#/components/schemas/QueryCondition'
|
||||
description: >-
|
||||
The attribute filters to apply to the spans.
|
||||
attributes_to_save:
|
||||
type: array
|
||||
items:
|
||||
type: string
|
||||
description: The attributes to save to the dataset.
|
||||
dataset_id:
|
||||
type: string
|
||||
description: >-
|
||||
The ID of the dataset to save the spans to.
|
||||
max_depth:
|
||||
type: integer
|
||||
description: The maximum depth of the tree.
|
||||
additionalProperties: false
|
||||
required:
|
||||
- attribute_filters
|
||||
- attributes_to_save
|
||||
- dataset_id
|
||||
title: SaveSpansToDatasetRequest
|
||||
GetSpanTreeRequest:
|
||||
type: object
|
||||
properties:
|
||||
attributes_to_return:
|
||||
type: array
|
||||
items:
|
||||
type: string
|
||||
description: The attributes to return in the tree.
|
||||
max_depth:
|
||||
type: integer
|
||||
description: The maximum depth of the tree.
|
||||
additionalProperties: false
|
||||
title: GetSpanTreeRequest
|
||||
SpanStatus:
|
||||
type: string
|
||||
enum:
|
||||
- ok
|
||||
- error
|
||||
title: SpanStatus
|
||||
description: >-
|
||||
The status of a span indicating whether it completed successfully or with
|
||||
an error.
|
||||
SpanWithStatus:
|
||||
type: object
|
||||
properties:
|
||||
span_id:
|
||||
type: string
|
||||
description: Unique identifier for the span
|
||||
trace_id:
|
||||
type: string
|
||||
description: >-
|
||||
Unique identifier for the trace this span belongs to
|
||||
parent_span_id:
|
||||
type: string
|
||||
description: >-
|
||||
(Optional) Unique identifier for the parent span, if this is a child span
|
||||
name:
|
||||
type: string
|
||||
description: >-
|
||||
Human-readable name describing the operation this span represents
|
||||
start_time:
|
||||
type: string
|
||||
format: date-time
|
||||
description: Timestamp when the operation began
|
||||
end_time:
|
||||
type: string
|
||||
format: date-time
|
||||
description: >-
|
||||
(Optional) Timestamp when the operation finished, if completed
|
||||
attributes:
|
||||
type: object
|
||||
additionalProperties:
|
||||
oneOf:
|
||||
- type: 'null'
|
||||
- type: boolean
|
||||
- type: number
|
||||
- type: string
|
||||
- type: array
|
||||
- type: object
|
||||
description: >-
|
||||
(Optional) Key-value pairs containing additional metadata about the span
|
||||
status:
|
||||
$ref: '#/components/schemas/SpanStatus'
|
||||
description: >-
|
||||
(Optional) The current status of the span
|
||||
additionalProperties: false
|
||||
required:
|
||||
- span_id
|
||||
- trace_id
|
||||
- name
|
||||
- start_time
|
||||
title: SpanWithStatus
|
||||
description: A span that includes status information.
|
||||
QuerySpanTreeResponse:
|
||||
type: object
|
||||
properties:
|
||||
data:
|
||||
type: object
|
||||
additionalProperties:
|
||||
$ref: '#/components/schemas/SpanWithStatus'
|
||||
description: >-
|
||||
Dictionary mapping span IDs to spans with status information
|
||||
additionalProperties: false
|
||||
required:
|
||||
- data
|
||||
title: QuerySpanTreeResponse
|
||||
description: >-
|
||||
Response containing a tree structure of spans.
|
||||
QueryTracesRequest:
|
||||
type: object
|
||||
properties:
|
||||
attribute_filters:
|
||||
type: array
|
||||
items:
|
||||
$ref: '#/components/schemas/QueryCondition'
|
||||
description: >-
|
||||
The attribute filters to apply to the traces.
|
||||
limit:
|
||||
type: integer
|
||||
description: The limit of traces to return.
|
||||
offset:
|
||||
type: integer
|
||||
description: The offset of the traces to return.
|
||||
order_by:
|
||||
type: array
|
||||
items:
|
||||
type: string
|
||||
description: The order by of the traces to return.
|
||||
additionalProperties: false
|
||||
title: QueryTracesRequest
|
||||
Trace:
|
||||
type: object
|
||||
properties:
|
||||
trace_id:
|
||||
type: string
|
||||
description: Unique identifier for the trace
|
||||
root_span_id:
|
||||
type: string
|
||||
description: >-
|
||||
Unique identifier for the root span that started this trace
|
||||
start_time:
|
||||
type: string
|
||||
format: date-time
|
||||
description: Timestamp when the trace began
|
||||
end_time:
|
||||
type: string
|
||||
format: date-time
|
||||
description: >-
|
||||
(Optional) Timestamp when the trace finished, if completed
|
||||
additionalProperties: false
|
||||
required:
|
||||
- trace_id
|
||||
- root_span_id
|
||||
- start_time
|
||||
title: Trace
|
||||
description: >-
|
||||
A trace representing the complete execution path of a request across multiple
|
||||
operations.
|
||||
QueryTracesResponse:
|
||||
type: object
|
||||
properties:
|
||||
data:
|
||||
type: array
|
||||
items:
|
||||
$ref: '#/components/schemas/Trace'
|
||||
description: >-
|
||||
List of traces matching the query criteria
|
||||
additionalProperties: false
|
||||
required:
|
||||
- data
|
||||
title: QueryTracesResponse
|
||||
description: Response containing a list of traces.
|
||||
responses:
|
||||
BadRequest400:
|
||||
description: The request was invalid or malformed
|
||||
|
@ -10043,8 +9383,6 @@ tags:
|
|||
- name: Safety
|
||||
description: OpenAI-compatible Moderations API.
|
||||
x-displayName: Safety
|
||||
- name: Telemetry
|
||||
description: ''
|
||||
- name: VectorIO
|
||||
description: ''
|
||||
x-tagGroups:
|
||||
|
@ -10060,5 +9398,4 @@ x-tagGroups:
|
|||
- Models
|
||||
- PostTraining (Coming Soon)
|
||||
- Safety
|
||||
- Telemetry
|
||||
- VectorIO
|
||||
|
|
899
docs/static/experimental-llama-stack-spec.html
vendored
899
docs/static/experimental-llama-stack-spec.html
vendored
|
@ -1711,343 +1711,6 @@
|
|||
},
|
||||
"deprecated": false
|
||||
}
|
||||
},
|
||||
"/v1alpha/telemetry/metrics/{metric_name}": {
|
||||
"post": {
|
||||
"responses": {
|
||||
"200": {
|
||||
"description": "A QueryMetricsResponse.",
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/QueryMetricsResponse"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"400": {
|
||||
"$ref": "#/components/responses/BadRequest400"
|
||||
},
|
||||
"429": {
|
||||
"$ref": "#/components/responses/TooManyRequests429"
|
||||
},
|
||||
"500": {
|
||||
"$ref": "#/components/responses/InternalServerError500"
|
||||
},
|
||||
"default": {
|
||||
"$ref": "#/components/responses/DefaultError"
|
||||
}
|
||||
},
|
||||
"tags": [
|
||||
"Telemetry"
|
||||
],
|
||||
"summary": "Query metrics.",
|
||||
"description": "Query metrics.",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "metric_name",
|
||||
"in": "path",
|
||||
"description": "The name of the metric to query.",
|
||||
"required": true,
|
||||
"schema": {
|
||||
"type": "string"
|
||||
}
|
||||
}
|
||||
],
|
||||
"requestBody": {
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/QueryMetricsRequest"
|
||||
}
|
||||
}
|
||||
},
|
||||
"required": true
|
||||
},
|
||||
"deprecated": false
|
||||
}
|
||||
},
|
||||
"/v1alpha/telemetry/spans": {
|
||||
"post": {
|
||||
"responses": {
|
||||
"200": {
|
||||
"description": "A QuerySpansResponse.",
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/QuerySpansResponse"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"400": {
|
||||
"$ref": "#/components/responses/BadRequest400"
|
||||
},
|
||||
"429": {
|
||||
"$ref": "#/components/responses/TooManyRequests429"
|
||||
},
|
||||
"500": {
|
||||
"$ref": "#/components/responses/InternalServerError500"
|
||||
},
|
||||
"default": {
|
||||
"$ref": "#/components/responses/DefaultError"
|
||||
}
|
||||
},
|
||||
"tags": [
|
||||
"Telemetry"
|
||||
],
|
||||
"summary": "Query spans.",
|
||||
"description": "Query spans.",
|
||||
"parameters": [],
|
||||
"requestBody": {
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/QuerySpansRequest"
|
||||
}
|
||||
}
|
||||
},
|
||||
"required": true
|
||||
},
|
||||
"deprecated": false
|
||||
}
|
||||
},
|
||||
"/v1alpha/telemetry/spans/export": {
|
||||
"post": {
|
||||
"responses": {
|
||||
"200": {
|
||||
"description": "OK"
|
||||
},
|
||||
"400": {
|
||||
"$ref": "#/components/responses/BadRequest400"
|
||||
},
|
||||
"429": {
|
||||
"$ref": "#/components/responses/TooManyRequests429"
|
||||
},
|
||||
"500": {
|
||||
"$ref": "#/components/responses/InternalServerError500"
|
||||
},
|
||||
"default": {
|
||||
"$ref": "#/components/responses/DefaultError"
|
||||
}
|
||||
},
|
||||
"tags": [
|
||||
"Telemetry"
|
||||
],
|
||||
"summary": "Save spans to a dataset.",
|
||||
"description": "Save spans to a dataset.",
|
||||
"parameters": [],
|
||||
"requestBody": {
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/SaveSpansToDatasetRequest"
|
||||
}
|
||||
}
|
||||
},
|
||||
"required": true
|
||||
},
|
||||
"deprecated": false
|
||||
}
|
||||
},
|
||||
"/v1alpha/telemetry/spans/{span_id}/tree": {
|
||||
"post": {
|
||||
"responses": {
|
||||
"200": {
|
||||
"description": "A QuerySpanTreeResponse.",
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/QuerySpanTreeResponse"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"400": {
|
||||
"$ref": "#/components/responses/BadRequest400"
|
||||
},
|
||||
"429": {
|
||||
"$ref": "#/components/responses/TooManyRequests429"
|
||||
},
|
||||
"500": {
|
||||
"$ref": "#/components/responses/InternalServerError500"
|
||||
},
|
||||
"default": {
|
||||
"$ref": "#/components/responses/DefaultError"
|
||||
}
|
||||
},
|
||||
"tags": [
|
||||
"Telemetry"
|
||||
],
|
||||
"summary": "Get a span tree by its ID.",
|
||||
"description": "Get a span tree by its ID.",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "span_id",
|
||||
"in": "path",
|
||||
"description": "The ID of the span to get the tree from.",
|
||||
"required": true,
|
||||
"schema": {
|
||||
"type": "string"
|
||||
}
|
||||
}
|
||||
],
|
||||
"requestBody": {
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/GetSpanTreeRequest"
|
||||
}
|
||||
}
|
||||
},
|
||||
"required": true
|
||||
},
|
||||
"deprecated": false
|
||||
}
|
||||
},
|
||||
"/v1alpha/telemetry/traces": {
|
||||
"post": {
|
||||
"responses": {
|
||||
"200": {
|
||||
"description": "A QueryTracesResponse.",
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/QueryTracesResponse"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"400": {
|
||||
"$ref": "#/components/responses/BadRequest400"
|
||||
},
|
||||
"429": {
|
||||
"$ref": "#/components/responses/TooManyRequests429"
|
||||
},
|
||||
"500": {
|
||||
"$ref": "#/components/responses/InternalServerError500"
|
||||
},
|
||||
"default": {
|
||||
"$ref": "#/components/responses/DefaultError"
|
||||
}
|
||||
},
|
||||
"tags": [
|
||||
"Telemetry"
|
||||
],
|
||||
"summary": "Query traces.",
|
||||
"description": "Query traces.",
|
||||
"parameters": [],
|
||||
"requestBody": {
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/QueryTracesRequest"
|
||||
}
|
||||
}
|
||||
},
|
||||
"required": true
|
||||
},
|
||||
"deprecated": false
|
||||
}
|
||||
},
|
||||
"/v1alpha/telemetry/traces/{trace_id}": {
|
||||
"get": {
|
||||
"responses": {
|
||||
"200": {
|
||||
"description": "A Trace.",
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/Trace"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"400": {
|
||||
"$ref": "#/components/responses/BadRequest400"
|
||||
},
|
||||
"429": {
|
||||
"$ref": "#/components/responses/TooManyRequests429"
|
||||
},
|
||||
"500": {
|
||||
"$ref": "#/components/responses/InternalServerError500"
|
||||
},
|
||||
"default": {
|
||||
"$ref": "#/components/responses/DefaultError"
|
||||
}
|
||||
},
|
||||
"tags": [
|
||||
"Telemetry"
|
||||
],
|
||||
"summary": "Get a trace by its ID.",
|
||||
"description": "Get a trace by its ID.",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "trace_id",
|
||||
"in": "path",
|
||||
"description": "The ID of the trace to get.",
|
||||
"required": true,
|
||||
"schema": {
|
||||
"type": "string"
|
||||
}
|
||||
}
|
||||
],
|
||||
"deprecated": false
|
||||
}
|
||||
},
|
||||
"/v1alpha/telemetry/traces/{trace_id}/spans/{span_id}": {
|
||||
"get": {
|
||||
"responses": {
|
||||
"200": {
|
||||
"description": "A Span.",
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/Span"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"400": {
|
||||
"$ref": "#/components/responses/BadRequest400"
|
||||
},
|
||||
"429": {
|
||||
"$ref": "#/components/responses/TooManyRequests429"
|
||||
},
|
||||
"500": {
|
||||
"$ref": "#/components/responses/InternalServerError500"
|
||||
},
|
||||
"default": {
|
||||
"$ref": "#/components/responses/DefaultError"
|
||||
}
|
||||
},
|
||||
"tags": [
|
||||
"Telemetry"
|
||||
],
|
||||
"summary": "Get a span by its ID.",
|
||||
"description": "Get a span by its ID.",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "trace_id",
|
||||
"in": "path",
|
||||
"description": "The ID of the trace to get the span from.",
|
||||
"required": true,
|
||||
"schema": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "span_id",
|
||||
"in": "path",
|
||||
"description": "The ID of the span to get.",
|
||||
"required": true,
|
||||
"schema": {
|
||||
"type": "string"
|
||||
}
|
||||
}
|
||||
],
|
||||
"deprecated": false
|
||||
}
|
||||
}
|
||||
},
|
||||
"jsonSchemaDialect": "https://json-schema.org/draft/2020-12/schema",
|
||||
|
@ -5765,561 +5428,6 @@
|
|||
"logger_config"
|
||||
],
|
||||
"title": "SupervisedFineTuneRequest"
|
||||
},
|
||||
"QueryMetricsRequest": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"start_time": {
|
||||
"type": "integer",
|
||||
"description": "The start time of the metric to query."
|
||||
},
|
||||
"end_time": {
|
||||
"type": "integer",
|
||||
"description": "The end time of the metric to query."
|
||||
},
|
||||
"granularity": {
|
||||
"type": "string",
|
||||
"description": "The granularity of the metric to query."
|
||||
},
|
||||
"query_type": {
|
||||
"type": "string",
|
||||
"enum": [
|
||||
"range",
|
||||
"instant"
|
||||
],
|
||||
"description": "The type of query to perform."
|
||||
},
|
||||
"label_matchers": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"name": {
|
||||
"type": "string",
|
||||
"description": "The name of the label to match"
|
||||
},
|
||||
"value": {
|
||||
"type": "string",
|
||||
"description": "The value to match against"
|
||||
},
|
||||
"operator": {
|
||||
"type": "string",
|
||||
"enum": [
|
||||
"=",
|
||||
"!=",
|
||||
"=~",
|
||||
"!~"
|
||||
],
|
||||
"description": "The comparison operator to use for matching",
|
||||
"default": "="
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"name",
|
||||
"value",
|
||||
"operator"
|
||||
],
|
||||
"title": "MetricLabelMatcher",
|
||||
"description": "A matcher for filtering metrics by label values."
|
||||
},
|
||||
"description": "The label matchers to apply to the metric."
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"start_time",
|
||||
"query_type"
|
||||
],
|
||||
"title": "QueryMetricsRequest"
|
||||
},
|
||||
"MetricDataPoint": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"timestamp": {
|
||||
"type": "integer",
|
||||
"description": "Unix timestamp when the metric value was recorded"
|
||||
},
|
||||
"value": {
|
||||
"type": "number",
|
||||
"description": "The numeric value of the metric at this timestamp"
|
||||
},
|
||||
"unit": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"timestamp",
|
||||
"value",
|
||||
"unit"
|
||||
],
|
||||
"title": "MetricDataPoint",
|
||||
"description": "A single data point in a metric time series."
|
||||
},
|
||||
"MetricLabel": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"name": {
|
||||
"type": "string",
|
||||
"description": "The name of the label"
|
||||
},
|
||||
"value": {
|
||||
"type": "string",
|
||||
"description": "The value of the label"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"name",
|
||||
"value"
|
||||
],
|
||||
"title": "MetricLabel",
|
||||
"description": "A label associated with a metric."
|
||||
},
|
||||
"MetricSeries": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"metric": {
|
||||
"type": "string",
|
||||
"description": "The name of the metric"
|
||||
},
|
||||
"labels": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"$ref": "#/components/schemas/MetricLabel"
|
||||
},
|
||||
"description": "List of labels associated with this metric series"
|
||||
},
|
||||
"values": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"$ref": "#/components/schemas/MetricDataPoint"
|
||||
},
|
||||
"description": "List of data points in chronological order"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"metric",
|
||||
"labels",
|
||||
"values"
|
||||
],
|
||||
"title": "MetricSeries",
|
||||
"description": "A time series of metric data points."
|
||||
},
|
||||
"QueryMetricsResponse": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"data": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"$ref": "#/components/schemas/MetricSeries"
|
||||
},
|
||||
"description": "List of metric series matching the query criteria"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"data"
|
||||
],
|
||||
"title": "QueryMetricsResponse",
|
||||
"description": "Response containing metric time series data."
|
||||
},
|
||||
"QueryCondition": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"key": {
|
||||
"type": "string",
|
||||
"description": "The attribute key to filter on"
|
||||
},
|
||||
"op": {
|
||||
"$ref": "#/components/schemas/QueryConditionOp",
|
||||
"description": "The comparison operator to apply"
|
||||
},
|
||||
"value": {
|
||||
"oneOf": [
|
||||
{
|
||||
"type": "null"
|
||||
},
|
||||
{
|
||||
"type": "boolean"
|
||||
},
|
||||
{
|
||||
"type": "number"
|
||||
},
|
||||
{
|
||||
"type": "string"
|
||||
},
|
||||
{
|
||||
"type": "array"
|
||||
},
|
||||
{
|
||||
"type": "object"
|
||||
}
|
||||
],
|
||||
"description": "The value to compare against"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"key",
|
||||
"op",
|
||||
"value"
|
||||
],
|
||||
"title": "QueryCondition",
|
||||
"description": "A condition for filtering query results."
|
||||
},
|
||||
"QueryConditionOp": {
|
||||
"type": "string",
|
||||
"enum": [
|
||||
"eq",
|
||||
"ne",
|
||||
"gt",
|
||||
"lt"
|
||||
],
|
||||
"title": "QueryConditionOp",
|
||||
"description": "Comparison operators for query conditions."
|
||||
},
|
||||
"QuerySpansRequest": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"attribute_filters": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"$ref": "#/components/schemas/QueryCondition"
|
||||
},
|
||||
"description": "The attribute filters to apply to the spans."
|
||||
},
|
||||
"attributes_to_return": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "string"
|
||||
},
|
||||
"description": "The attributes to return in the spans."
|
||||
},
|
||||
"max_depth": {
|
||||
"type": "integer",
|
||||
"description": "The maximum depth of the tree."
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"attribute_filters",
|
||||
"attributes_to_return"
|
||||
],
|
||||
"title": "QuerySpansRequest"
|
||||
},
|
||||
"Span": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"span_id": {
|
||||
"type": "string",
|
||||
"description": "Unique identifier for the span"
|
||||
},
|
||||
"trace_id": {
|
||||
"type": "string",
|
||||
"description": "Unique identifier for the trace this span belongs to"
|
||||
},
|
||||
"parent_span_id": {
|
||||
"type": "string",
|
||||
"description": "(Optional) Unique identifier for the parent span, if this is a child span"
|
||||
},
|
||||
"name": {
|
||||
"type": "string",
|
||||
"description": "Human-readable name describing the operation this span represents"
|
||||
},
|
||||
"start_time": {
|
||||
"type": "string",
|
||||
"format": "date-time",
|
||||
"description": "Timestamp when the operation began"
|
||||
},
|
||||
"end_time": {
|
||||
"type": "string",
|
||||
"format": "date-time",
|
||||
"description": "(Optional) Timestamp when the operation finished, if completed"
|
||||
},
|
||||
"attributes": {
|
||||
"type": "object",
|
||||
"additionalProperties": {
|
||||
"oneOf": [
|
||||
{
|
||||
"type": "null"
|
||||
},
|
||||
{
|
||||
"type": "boolean"
|
||||
},
|
||||
{
|
||||
"type": "number"
|
||||
},
|
||||
{
|
||||
"type": "string"
|
||||
},
|
||||
{
|
||||
"type": "array"
|
||||
},
|
||||
{
|
||||
"type": "object"
|
||||
}
|
||||
]
|
||||
},
|
||||
"description": "(Optional) Key-value pairs containing additional metadata about the span"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"span_id",
|
||||
"trace_id",
|
||||
"name",
|
||||
"start_time"
|
||||
],
|
||||
"title": "Span",
|
||||
"description": "A span representing a single operation within a trace."
|
||||
},
|
||||
"QuerySpansResponse": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"data": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"$ref": "#/components/schemas/Span"
|
||||
},
|
||||
"description": "List of spans matching the query criteria"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"data"
|
||||
],
|
||||
"title": "QuerySpansResponse",
|
||||
"description": "Response containing a list of spans."
|
||||
},
|
||||
"SaveSpansToDatasetRequest": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"attribute_filters": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"$ref": "#/components/schemas/QueryCondition"
|
||||
},
|
||||
"description": "The attribute filters to apply to the spans."
|
||||
},
|
||||
"attributes_to_save": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "string"
|
||||
},
|
||||
"description": "The attributes to save to the dataset."
|
||||
},
|
||||
"dataset_id": {
|
||||
"type": "string",
|
||||
"description": "The ID of the dataset to save the spans to."
|
||||
},
|
||||
"max_depth": {
|
||||
"type": "integer",
|
||||
"description": "The maximum depth of the tree."
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"attribute_filters",
|
||||
"attributes_to_save",
|
||||
"dataset_id"
|
||||
],
|
||||
"title": "SaveSpansToDatasetRequest"
|
||||
},
|
||||
"GetSpanTreeRequest": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"attributes_to_return": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "string"
|
||||
},
|
||||
"description": "The attributes to return in the tree."
|
||||
},
|
||||
"max_depth": {
|
||||
"type": "integer",
|
||||
"description": "The maximum depth of the tree."
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"title": "GetSpanTreeRequest"
|
||||
},
|
||||
"SpanStatus": {
|
||||
"type": "string",
|
||||
"enum": [
|
||||
"ok",
|
||||
"error"
|
||||
],
|
||||
"title": "SpanStatus",
|
||||
"description": "The status of a span indicating whether it completed successfully or with an error."
|
||||
},
|
||||
"SpanWithStatus": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"span_id": {
|
||||
"type": "string",
|
||||
"description": "Unique identifier for the span"
|
||||
},
|
||||
"trace_id": {
|
||||
"type": "string",
|
||||
"description": "Unique identifier for the trace this span belongs to"
|
||||
},
|
||||
"parent_span_id": {
|
||||
"type": "string",
|
||||
"description": "(Optional) Unique identifier for the parent span, if this is a child span"
|
||||
},
|
||||
"name": {
|
||||
"type": "string",
|
||||
"description": "Human-readable name describing the operation this span represents"
|
||||
},
|
||||
"start_time": {
|
||||
"type": "string",
|
||||
"format": "date-time",
|
||||
"description": "Timestamp when the operation began"
|
||||
},
|
||||
"end_time": {
|
||||
"type": "string",
|
||||
"format": "date-time",
|
||||
"description": "(Optional) Timestamp when the operation finished, if completed"
|
||||
},
|
||||
"attributes": {
|
||||
"type": "object",
|
||||
"additionalProperties": {
|
||||
"oneOf": [
|
||||
{
|
||||
"type": "null"
|
||||
},
|
||||
{
|
||||
"type": "boolean"
|
||||
},
|
||||
{
|
||||
"type": "number"
|
||||
},
|
||||
{
|
||||
"type": "string"
|
||||
},
|
||||
{
|
||||
"type": "array"
|
||||
},
|
||||
{
|
||||
"type": "object"
|
||||
}
|
||||
]
|
||||
},
|
||||
"description": "(Optional) Key-value pairs containing additional metadata about the span"
|
||||
},
|
||||
"status": {
|
||||
"$ref": "#/components/schemas/SpanStatus",
|
||||
"description": "(Optional) The current status of the span"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"span_id",
|
||||
"trace_id",
|
||||
"name",
|
||||
"start_time"
|
||||
],
|
||||
"title": "SpanWithStatus",
|
||||
"description": "A span that includes status information."
|
||||
},
|
||||
"QuerySpanTreeResponse": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"data": {
|
||||
"type": "object",
|
||||
"additionalProperties": {
|
||||
"$ref": "#/components/schemas/SpanWithStatus"
|
||||
},
|
||||
"description": "Dictionary mapping span IDs to spans with status information"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"data"
|
||||
],
|
||||
"title": "QuerySpanTreeResponse",
|
||||
"description": "Response containing a tree structure of spans."
|
||||
},
|
||||
"QueryTracesRequest": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"attribute_filters": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"$ref": "#/components/schemas/QueryCondition"
|
||||
},
|
||||
"description": "The attribute filters to apply to the traces."
|
||||
},
|
||||
"limit": {
|
||||
"type": "integer",
|
||||
"description": "The limit of traces to return."
|
||||
},
|
||||
"offset": {
|
||||
"type": "integer",
|
||||
"description": "The offset of the traces to return."
|
||||
},
|
||||
"order_by": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "string"
|
||||
},
|
||||
"description": "The order by of the traces to return."
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"title": "QueryTracesRequest"
|
||||
},
|
||||
"Trace": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"trace_id": {
|
||||
"type": "string",
|
||||
"description": "Unique identifier for the trace"
|
||||
},
|
||||
"root_span_id": {
|
||||
"type": "string",
|
||||
"description": "Unique identifier for the root span that started this trace"
|
||||
},
|
||||
"start_time": {
|
||||
"type": "string",
|
||||
"format": "date-time",
|
||||
"description": "Timestamp when the trace began"
|
||||
},
|
||||
"end_time": {
|
||||
"type": "string",
|
||||
"format": "date-time",
|
||||
"description": "(Optional) Timestamp when the trace finished, if completed"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"trace_id",
|
||||
"root_span_id",
|
||||
"start_time"
|
||||
],
|
||||
"title": "Trace",
|
||||
"description": "A trace representing the complete execution path of a request across multiple operations."
|
||||
},
|
||||
"QueryTracesResponse": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"data": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"$ref": "#/components/schemas/Trace"
|
||||
},
|
||||
"description": "List of traces matching the query criteria"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"data"
|
||||
],
|
||||
"title": "QueryTracesResponse",
|
||||
"description": "Response containing a list of traces."
|
||||
}
|
||||
},
|
||||
"responses": {
|
||||
|
@ -6416,10 +5524,6 @@
|
|||
{
|
||||
"name": "PostTraining (Coming Soon)",
|
||||
"description": ""
|
||||
},
|
||||
{
|
||||
"name": "Telemetry",
|
||||
"description": ""
|
||||
}
|
||||
],
|
||||
"x-tagGroups": [
|
||||
|
@ -6431,8 +5535,7 @@
|
|||
"DatasetIO",
|
||||
"Datasets",
|
||||
"Eval",
|
||||
"PostTraining (Coming Soon)",
|
||||
"Telemetry"
|
||||
"PostTraining (Coming Soon)"
|
||||
]
|
||||
}
|
||||
]
|
||||
|
|
663
docs/static/experimental-llama-stack-spec.yaml
vendored
663
docs/static/experimental-llama-stack-spec.yaml
vendored
|
@ -1224,238 +1224,6 @@ paths:
|
|||
$ref: '#/components/schemas/SupervisedFineTuneRequest'
|
||||
required: true
|
||||
deprecated: false
|
||||
/v1alpha/telemetry/metrics/{metric_name}:
|
||||
post:
|
||||
responses:
|
||||
'200':
|
||||
description: A QueryMetricsResponse.
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/QueryMetricsResponse'
|
||||
'400':
|
||||
$ref: '#/components/responses/BadRequest400'
|
||||
'429':
|
||||
$ref: >-
|
||||
#/components/responses/TooManyRequests429
|
||||
'500':
|
||||
$ref: >-
|
||||
#/components/responses/InternalServerError500
|
||||
default:
|
||||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Telemetry
|
||||
summary: Query metrics.
|
||||
description: Query metrics.
|
||||
parameters:
|
||||
- name: metric_name
|
||||
in: path
|
||||
description: The name of the metric to query.
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
requestBody:
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/QueryMetricsRequest'
|
||||
required: true
|
||||
deprecated: false
|
||||
/v1alpha/telemetry/spans:
|
||||
post:
|
||||
responses:
|
||||
'200':
|
||||
description: A QuerySpansResponse.
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/QuerySpansResponse'
|
||||
'400':
|
||||
$ref: '#/components/responses/BadRequest400'
|
||||
'429':
|
||||
$ref: >-
|
||||
#/components/responses/TooManyRequests429
|
||||
'500':
|
||||
$ref: >-
|
||||
#/components/responses/InternalServerError500
|
||||
default:
|
||||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Telemetry
|
||||
summary: Query spans.
|
||||
description: Query spans.
|
||||
parameters: []
|
||||
requestBody:
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/QuerySpansRequest'
|
||||
required: true
|
||||
deprecated: false
|
||||
/v1alpha/telemetry/spans/export:
|
||||
post:
|
||||
responses:
|
||||
'200':
|
||||
description: OK
|
||||
'400':
|
||||
$ref: '#/components/responses/BadRequest400'
|
||||
'429':
|
||||
$ref: >-
|
||||
#/components/responses/TooManyRequests429
|
||||
'500':
|
||||
$ref: >-
|
||||
#/components/responses/InternalServerError500
|
||||
default:
|
||||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Telemetry
|
||||
summary: Save spans to a dataset.
|
||||
description: Save spans to a dataset.
|
||||
parameters: []
|
||||
requestBody:
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/SaveSpansToDatasetRequest'
|
||||
required: true
|
||||
deprecated: false
|
||||
/v1alpha/telemetry/spans/{span_id}/tree:
|
||||
post:
|
||||
responses:
|
||||
'200':
|
||||
description: A QuerySpanTreeResponse.
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/QuerySpanTreeResponse'
|
||||
'400':
|
||||
$ref: '#/components/responses/BadRequest400'
|
||||
'429':
|
||||
$ref: >-
|
||||
#/components/responses/TooManyRequests429
|
||||
'500':
|
||||
$ref: >-
|
||||
#/components/responses/InternalServerError500
|
||||
default:
|
||||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Telemetry
|
||||
summary: Get a span tree by its ID.
|
||||
description: Get a span tree by its ID.
|
||||
parameters:
|
||||
- name: span_id
|
||||
in: path
|
||||
description: The ID of the span to get the tree from.
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
requestBody:
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/GetSpanTreeRequest'
|
||||
required: true
|
||||
deprecated: false
|
||||
/v1alpha/telemetry/traces:
|
||||
post:
|
||||
responses:
|
||||
'200':
|
||||
description: A QueryTracesResponse.
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/QueryTracesResponse'
|
||||
'400':
|
||||
$ref: '#/components/responses/BadRequest400'
|
||||
'429':
|
||||
$ref: >-
|
||||
#/components/responses/TooManyRequests429
|
||||
'500':
|
||||
$ref: >-
|
||||
#/components/responses/InternalServerError500
|
||||
default:
|
||||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Telemetry
|
||||
summary: Query traces.
|
||||
description: Query traces.
|
||||
parameters: []
|
||||
requestBody:
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/QueryTracesRequest'
|
||||
required: true
|
||||
deprecated: false
|
||||
/v1alpha/telemetry/traces/{trace_id}:
|
||||
get:
|
||||
responses:
|
||||
'200':
|
||||
description: A Trace.
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/Trace'
|
||||
'400':
|
||||
$ref: '#/components/responses/BadRequest400'
|
||||
'429':
|
||||
$ref: >-
|
||||
#/components/responses/TooManyRequests429
|
||||
'500':
|
||||
$ref: >-
|
||||
#/components/responses/InternalServerError500
|
||||
default:
|
||||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Telemetry
|
||||
summary: Get a trace by its ID.
|
||||
description: Get a trace by its ID.
|
||||
parameters:
|
||||
- name: trace_id
|
||||
in: path
|
||||
description: The ID of the trace to get.
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
deprecated: false
|
||||
/v1alpha/telemetry/traces/{trace_id}/spans/{span_id}:
|
||||
get:
|
||||
responses:
|
||||
'200':
|
||||
description: A Span.
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/Span'
|
||||
'400':
|
||||
$ref: '#/components/responses/BadRequest400'
|
||||
'429':
|
||||
$ref: >-
|
||||
#/components/responses/TooManyRequests429
|
||||
'500':
|
||||
$ref: >-
|
||||
#/components/responses/InternalServerError500
|
||||
default:
|
||||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Telemetry
|
||||
summary: Get a span by its ID.
|
||||
description: Get a span by its ID.
|
||||
parameters:
|
||||
- name: trace_id
|
||||
in: path
|
||||
description: >-
|
||||
The ID of the trace to get the span from.
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
- name: span_id
|
||||
in: path
|
||||
description: The ID of the span to get.
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
deprecated: false
|
||||
jsonSchemaDialect: >-
|
||||
https://json-schema.org/draft/2020-12/schema
|
||||
components:
|
||||
|
@ -4249,434 +4017,6 @@ components:
|
|||
- hyperparam_search_config
|
||||
- logger_config
|
||||
title: SupervisedFineTuneRequest
|
||||
QueryMetricsRequest:
|
||||
type: object
|
||||
properties:
|
||||
start_time:
|
||||
type: integer
|
||||
description: The start time of the metric to query.
|
||||
end_time:
|
||||
type: integer
|
||||
description: The end time of the metric to query.
|
||||
granularity:
|
||||
type: string
|
||||
description: The granularity of the metric to query.
|
||||
query_type:
|
||||
type: string
|
||||
enum:
|
||||
- range
|
||||
- instant
|
||||
description: The type of query to perform.
|
||||
label_matchers:
|
||||
type: array
|
||||
items:
|
||||
type: object
|
||||
properties:
|
||||
name:
|
||||
type: string
|
||||
description: The name of the label to match
|
||||
value:
|
||||
type: string
|
||||
description: The value to match against
|
||||
operator:
|
||||
type: string
|
||||
enum:
|
||||
- '='
|
||||
- '!='
|
||||
- =~
|
||||
- '!~'
|
||||
description: >-
|
||||
The comparison operator to use for matching
|
||||
default: '='
|
||||
additionalProperties: false
|
||||
required:
|
||||
- name
|
||||
- value
|
||||
- operator
|
||||
title: MetricLabelMatcher
|
||||
description: >-
|
||||
A matcher for filtering metrics by label values.
|
||||
description: >-
|
||||
The label matchers to apply to the metric.
|
||||
additionalProperties: false
|
||||
required:
|
||||
- start_time
|
||||
- query_type
|
||||
title: QueryMetricsRequest
|
||||
MetricDataPoint:
|
||||
type: object
|
||||
properties:
|
||||
timestamp:
|
||||
type: integer
|
||||
description: >-
|
||||
Unix timestamp when the metric value was recorded
|
||||
value:
|
||||
type: number
|
||||
description: >-
|
||||
The numeric value of the metric at this timestamp
|
||||
unit:
|
||||
type: string
|
||||
additionalProperties: false
|
||||
required:
|
||||
- timestamp
|
||||
- value
|
||||
- unit
|
||||
title: MetricDataPoint
|
||||
description: >-
|
||||
A single data point in a metric time series.
|
||||
MetricLabel:
|
||||
type: object
|
||||
properties:
|
||||
name:
|
||||
type: string
|
||||
description: The name of the label
|
||||
value:
|
||||
type: string
|
||||
description: The value of the label
|
||||
additionalProperties: false
|
||||
required:
|
||||
- name
|
||||
- value
|
||||
title: MetricLabel
|
||||
description: A label associated with a metric.
|
||||
MetricSeries:
|
||||
type: object
|
||||
properties:
|
||||
metric:
|
||||
type: string
|
||||
description: The name of the metric
|
||||
labels:
|
||||
type: array
|
||||
items:
|
||||
$ref: '#/components/schemas/MetricLabel'
|
||||
description: >-
|
||||
List of labels associated with this metric series
|
||||
values:
|
||||
type: array
|
||||
items:
|
||||
$ref: '#/components/schemas/MetricDataPoint'
|
||||
description: >-
|
||||
List of data points in chronological order
|
||||
additionalProperties: false
|
||||
required:
|
||||
- metric
|
||||
- labels
|
||||
- values
|
||||
title: MetricSeries
|
||||
description: A time series of metric data points.
|
||||
QueryMetricsResponse:
|
||||
type: object
|
||||
properties:
|
||||
data:
|
||||
type: array
|
||||
items:
|
||||
$ref: '#/components/schemas/MetricSeries'
|
||||
description: >-
|
||||
List of metric series matching the query criteria
|
||||
additionalProperties: false
|
||||
required:
|
||||
- data
|
||||
title: QueryMetricsResponse
|
||||
description: >-
|
||||
Response containing metric time series data.
|
||||
QueryCondition:
|
||||
type: object
|
||||
properties:
|
||||
key:
|
||||
type: string
|
||||
description: The attribute key to filter on
|
||||
op:
|
||||
$ref: '#/components/schemas/QueryConditionOp'
|
||||
description: The comparison operator to apply
|
||||
value:
|
||||
oneOf:
|
||||
- type: 'null'
|
||||
- type: boolean
|
||||
- type: number
|
||||
- type: string
|
||||
- type: array
|
||||
- type: object
|
||||
description: The value to compare against
|
||||
additionalProperties: false
|
||||
required:
|
||||
- key
|
||||
- op
|
||||
- value
|
||||
title: QueryCondition
|
||||
description: A condition for filtering query results.
|
||||
QueryConditionOp:
|
||||
type: string
|
||||
enum:
|
||||
- eq
|
||||
- ne
|
||||
- gt
|
||||
- lt
|
||||
title: QueryConditionOp
|
||||
description: >-
|
||||
Comparison operators for query conditions.
|
||||
QuerySpansRequest:
|
||||
type: object
|
||||
properties:
|
||||
attribute_filters:
|
||||
type: array
|
||||
items:
|
||||
$ref: '#/components/schemas/QueryCondition'
|
||||
description: >-
|
||||
The attribute filters to apply to the spans.
|
||||
attributes_to_return:
|
||||
type: array
|
||||
items:
|
||||
type: string
|
||||
description: The attributes to return in the spans.
|
||||
max_depth:
|
||||
type: integer
|
||||
description: The maximum depth of the tree.
|
||||
additionalProperties: false
|
||||
required:
|
||||
- attribute_filters
|
||||
- attributes_to_return
|
||||
title: QuerySpansRequest
|
||||
Span:
|
||||
type: object
|
||||
properties:
|
||||
span_id:
|
||||
type: string
|
||||
description: Unique identifier for the span
|
||||
trace_id:
|
||||
type: string
|
||||
description: >-
|
||||
Unique identifier for the trace this span belongs to
|
||||
parent_span_id:
|
||||
type: string
|
||||
description: >-
|
||||
(Optional) Unique identifier for the parent span, if this is a child span
|
||||
name:
|
||||
type: string
|
||||
description: >-
|
||||
Human-readable name describing the operation this span represents
|
||||
start_time:
|
||||
type: string
|
||||
format: date-time
|
||||
description: Timestamp when the operation began
|
||||
end_time:
|
||||
type: string
|
||||
format: date-time
|
||||
description: >-
|
||||
(Optional) Timestamp when the operation finished, if completed
|
||||
attributes:
|
||||
type: object
|
||||
additionalProperties:
|
||||
oneOf:
|
||||
- type: 'null'
|
||||
- type: boolean
|
||||
- type: number
|
||||
- type: string
|
||||
- type: array
|
||||
- type: object
|
||||
description: >-
|
||||
(Optional) Key-value pairs containing additional metadata about the span
|
||||
additionalProperties: false
|
||||
required:
|
||||
- span_id
|
||||
- trace_id
|
||||
- name
|
||||
- start_time
|
||||
title: Span
|
||||
description: >-
|
||||
A span representing a single operation within a trace.
|
||||
QuerySpansResponse:
|
||||
type: object
|
||||
properties:
|
||||
data:
|
||||
type: array
|
||||
items:
|
||||
$ref: '#/components/schemas/Span'
|
||||
description: >-
|
||||
List of spans matching the query criteria
|
||||
additionalProperties: false
|
||||
required:
|
||||
- data
|
||||
title: QuerySpansResponse
|
||||
description: Response containing a list of spans.
|
||||
SaveSpansToDatasetRequest:
|
||||
type: object
|
||||
properties:
|
||||
attribute_filters:
|
||||
type: array
|
||||
items:
|
||||
$ref: '#/components/schemas/QueryCondition'
|
||||
description: >-
|
||||
The attribute filters to apply to the spans.
|
||||
attributes_to_save:
|
||||
type: array
|
||||
items:
|
||||
type: string
|
||||
description: The attributes to save to the dataset.
|
||||
dataset_id:
|
||||
type: string
|
||||
description: >-
|
||||
The ID of the dataset to save the spans to.
|
||||
max_depth:
|
||||
type: integer
|
||||
description: The maximum depth of the tree.
|
||||
additionalProperties: false
|
||||
required:
|
||||
- attribute_filters
|
||||
- attributes_to_save
|
||||
- dataset_id
|
||||
title: SaveSpansToDatasetRequest
|
||||
GetSpanTreeRequest:
|
||||
type: object
|
||||
properties:
|
||||
attributes_to_return:
|
||||
type: array
|
||||
items:
|
||||
type: string
|
||||
description: The attributes to return in the tree.
|
||||
max_depth:
|
||||
type: integer
|
||||
description: The maximum depth of the tree.
|
||||
additionalProperties: false
|
||||
title: GetSpanTreeRequest
|
||||
SpanStatus:
|
||||
type: string
|
||||
enum:
|
||||
- ok
|
||||
- error
|
||||
title: SpanStatus
|
||||
description: >-
|
||||
The status of a span indicating whether it completed successfully or with
|
||||
an error.
|
||||
SpanWithStatus:
|
||||
type: object
|
||||
properties:
|
||||
span_id:
|
||||
type: string
|
||||
description: Unique identifier for the span
|
||||
trace_id:
|
||||
type: string
|
||||
description: >-
|
||||
Unique identifier for the trace this span belongs to
|
||||
parent_span_id:
|
||||
type: string
|
||||
description: >-
|
||||
(Optional) Unique identifier for the parent span, if this is a child span
|
||||
name:
|
||||
type: string
|
||||
description: >-
|
||||
Human-readable name describing the operation this span represents
|
||||
start_time:
|
||||
type: string
|
||||
format: date-time
|
||||
description: Timestamp when the operation began
|
||||
end_time:
|
||||
type: string
|
||||
format: date-time
|
||||
description: >-
|
||||
(Optional) Timestamp when the operation finished, if completed
|
||||
attributes:
|
||||
type: object
|
||||
additionalProperties:
|
||||
oneOf:
|
||||
- type: 'null'
|
||||
- type: boolean
|
||||
- type: number
|
||||
- type: string
|
||||
- type: array
|
||||
- type: object
|
||||
description: >-
|
||||
(Optional) Key-value pairs containing additional metadata about the span
|
||||
status:
|
||||
$ref: '#/components/schemas/SpanStatus'
|
||||
description: >-
|
||||
(Optional) The current status of the span
|
||||
additionalProperties: false
|
||||
required:
|
||||
- span_id
|
||||
- trace_id
|
||||
- name
|
||||
- start_time
|
||||
title: SpanWithStatus
|
||||
description: A span that includes status information.
|
||||
QuerySpanTreeResponse:
|
||||
type: object
|
||||
properties:
|
||||
data:
|
||||
type: object
|
||||
additionalProperties:
|
||||
$ref: '#/components/schemas/SpanWithStatus'
|
||||
description: >-
|
||||
Dictionary mapping span IDs to spans with status information
|
||||
additionalProperties: false
|
||||
required:
|
||||
- data
|
||||
title: QuerySpanTreeResponse
|
||||
description: >-
|
||||
Response containing a tree structure of spans.
|
||||
QueryTracesRequest:
|
||||
type: object
|
||||
properties:
|
||||
attribute_filters:
|
||||
type: array
|
||||
items:
|
||||
$ref: '#/components/schemas/QueryCondition'
|
||||
description: >-
|
||||
The attribute filters to apply to the traces.
|
||||
limit:
|
||||
type: integer
|
||||
description: The limit of traces to return.
|
||||
offset:
|
||||
type: integer
|
||||
description: The offset of the traces to return.
|
||||
order_by:
|
||||
type: array
|
||||
items:
|
||||
type: string
|
||||
description: The order by of the traces to return.
|
||||
additionalProperties: false
|
||||
title: QueryTracesRequest
|
||||
Trace:
|
||||
type: object
|
||||
properties:
|
||||
trace_id:
|
||||
type: string
|
||||
description: Unique identifier for the trace
|
||||
root_span_id:
|
||||
type: string
|
||||
description: >-
|
||||
Unique identifier for the root span that started this trace
|
||||
start_time:
|
||||
type: string
|
||||
format: date-time
|
||||
description: Timestamp when the trace began
|
||||
end_time:
|
||||
type: string
|
||||
format: date-time
|
||||
description: >-
|
||||
(Optional) Timestamp when the trace finished, if completed
|
||||
additionalProperties: false
|
||||
required:
|
||||
- trace_id
|
||||
- root_span_id
|
||||
- start_time
|
||||
title: Trace
|
||||
description: >-
|
||||
A trace representing the complete execution path of a request across multiple
|
||||
operations.
|
||||
QueryTracesResponse:
|
||||
type: object
|
||||
properties:
|
||||
data:
|
||||
type: array
|
||||
items:
|
||||
$ref: '#/components/schemas/Trace'
|
||||
description: >-
|
||||
List of traces matching the query criteria
|
||||
additionalProperties: false
|
||||
required:
|
||||
- data
|
||||
title: QueryTracesResponse
|
||||
description: Response containing a list of traces.
|
||||
responses:
|
||||
BadRequest400:
|
||||
description: The request was invalid or malformed
|
||||
|
@ -4784,8 +4124,6 @@ tags:
|
|||
Llama Stack Evaluation API for running evaluations on model and agent candidates.
|
||||
- name: PostTraining (Coming Soon)
|
||||
description: ''
|
||||
- name: Telemetry
|
||||
description: ''
|
||||
x-tagGroups:
|
||||
- name: Operations
|
||||
tags:
|
||||
|
@ -4795,4 +4133,3 @@ x-tagGroups:
|
|||
- Datasets
|
||||
- Eval
|
||||
- PostTraining (Coming Soon)
|
||||
- Telemetry
|
||||
|
|
391
docs/static/llama-stack-spec.html
vendored
391
docs/static/llama-stack-spec.html
vendored
|
@ -2525,44 +2525,6 @@
|
|||
"deprecated": false
|
||||
}
|
||||
},
|
||||
"/v1/telemetry/events": {
|
||||
"post": {
|
||||
"responses": {
|
||||
"200": {
|
||||
"description": "OK"
|
||||
},
|
||||
"400": {
|
||||
"$ref": "#/components/responses/BadRequest400"
|
||||
},
|
||||
"429": {
|
||||
"$ref": "#/components/responses/TooManyRequests429"
|
||||
},
|
||||
"500": {
|
||||
"$ref": "#/components/responses/InternalServerError500"
|
||||
},
|
||||
"default": {
|
||||
"$ref": "#/components/responses/DefaultError"
|
||||
}
|
||||
},
|
||||
"tags": [
|
||||
"Telemetry"
|
||||
],
|
||||
"summary": "Log an event.",
|
||||
"description": "Log an event.",
|
||||
"parameters": [],
|
||||
"requestBody": {
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/LogEventRequest"
|
||||
}
|
||||
}
|
||||
},
|
||||
"required": true
|
||||
},
|
||||
"deprecated": false
|
||||
}
|
||||
},
|
||||
"/v1/tool-runtime/invoke": {
|
||||
"post": {
|
||||
"responses": {
|
||||
|
@ -10364,354 +10326,6 @@
|
|||
"title": "SyntheticDataGenerationResponse",
|
||||
"description": "Response from the synthetic data generation. Batch of (prompt, response, score) tuples that pass the threshold."
|
||||
},
|
||||
"Event": {
|
||||
"oneOf": [
|
||||
{
|
||||
"$ref": "#/components/schemas/UnstructuredLogEvent"
|
||||
},
|
||||
{
|
||||
"$ref": "#/components/schemas/MetricEvent"
|
||||
},
|
||||
{
|
||||
"$ref": "#/components/schemas/StructuredLogEvent"
|
||||
}
|
||||
],
|
||||
"discriminator": {
|
||||
"propertyName": "type",
|
||||
"mapping": {
|
||||
"unstructured_log": "#/components/schemas/UnstructuredLogEvent",
|
||||
"metric": "#/components/schemas/MetricEvent",
|
||||
"structured_log": "#/components/schemas/StructuredLogEvent"
|
||||
}
|
||||
}
|
||||
},
|
||||
"EventType": {
|
||||
"type": "string",
|
||||
"enum": [
|
||||
"unstructured_log",
|
||||
"structured_log",
|
||||
"metric"
|
||||
],
|
||||
"title": "EventType",
|
||||
"description": "The type of telemetry event being logged."
|
||||
},
|
||||
"LogSeverity": {
|
||||
"type": "string",
|
||||
"enum": [
|
||||
"verbose",
|
||||
"debug",
|
||||
"info",
|
||||
"warn",
|
||||
"error",
|
||||
"critical"
|
||||
],
|
||||
"title": "LogSeverity",
|
||||
"description": "The severity level of a log message."
|
||||
},
|
||||
"MetricEvent": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"trace_id": {
|
||||
"type": "string",
|
||||
"description": "Unique identifier for the trace this event belongs to"
|
||||
},
|
||||
"span_id": {
|
||||
"type": "string",
|
||||
"description": "Unique identifier for the span this event belongs to"
|
||||
},
|
||||
"timestamp": {
|
||||
"type": "string",
|
||||
"format": "date-time",
|
||||
"description": "Timestamp when the event occurred"
|
||||
},
|
||||
"attributes": {
|
||||
"type": "object",
|
||||
"additionalProperties": {
|
||||
"oneOf": [
|
||||
{
|
||||
"type": "string"
|
||||
},
|
||||
{
|
||||
"type": "integer"
|
||||
},
|
||||
{
|
||||
"type": "number"
|
||||
},
|
||||
{
|
||||
"type": "boolean"
|
||||
},
|
||||
{
|
||||
"type": "null"
|
||||
}
|
||||
]
|
||||
},
|
||||
"description": "(Optional) Key-value pairs containing additional metadata about the event"
|
||||
},
|
||||
"type": {
|
||||
"$ref": "#/components/schemas/EventType",
|
||||
"const": "metric",
|
||||
"default": "metric",
|
||||
"description": "Event type identifier set to METRIC"
|
||||
},
|
||||
"metric": {
|
||||
"type": "string",
|
||||
"description": "The name of the metric being measured"
|
||||
},
|
||||
"value": {
|
||||
"oneOf": [
|
||||
{
|
||||
"type": "integer"
|
||||
},
|
||||
{
|
||||
"type": "number"
|
||||
}
|
||||
],
|
||||
"description": "The numeric value of the metric measurement"
|
||||
},
|
||||
"unit": {
|
||||
"type": "string",
|
||||
"description": "The unit of measurement for the metric value"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"trace_id",
|
||||
"span_id",
|
||||
"timestamp",
|
||||
"type",
|
||||
"metric",
|
||||
"value",
|
||||
"unit"
|
||||
],
|
||||
"title": "MetricEvent",
|
||||
"description": "A metric event containing a measured value."
|
||||
},
|
||||
"SpanEndPayload": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"type": {
|
||||
"$ref": "#/components/schemas/StructuredLogType",
|
||||
"const": "span_end",
|
||||
"default": "span_end",
|
||||
"description": "Payload type identifier set to SPAN_END"
|
||||
},
|
||||
"status": {
|
||||
"$ref": "#/components/schemas/SpanStatus",
|
||||
"description": "The final status of the span indicating success or failure"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"type",
|
||||
"status"
|
||||
],
|
||||
"title": "SpanEndPayload",
|
||||
"description": "Payload for a span end event."
|
||||
},
|
||||
"SpanStartPayload": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"type": {
|
||||
"$ref": "#/components/schemas/StructuredLogType",
|
||||
"const": "span_start",
|
||||
"default": "span_start",
|
||||
"description": "Payload type identifier set to SPAN_START"
|
||||
},
|
||||
"name": {
|
||||
"type": "string",
|
||||
"description": "Human-readable name describing the operation this span represents"
|
||||
},
|
||||
"parent_span_id": {
|
||||
"type": "string",
|
||||
"description": "(Optional) Unique identifier for the parent span, if this is a child span"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"type",
|
||||
"name"
|
||||
],
|
||||
"title": "SpanStartPayload",
|
||||
"description": "Payload for a span start event."
|
||||
},
|
||||
"SpanStatus": {
|
||||
"type": "string",
|
||||
"enum": [
|
||||
"ok",
|
||||
"error"
|
||||
],
|
||||
"title": "SpanStatus",
|
||||
"description": "The status of a span indicating whether it completed successfully or with an error."
|
||||
},
|
||||
"StructuredLogEvent": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"trace_id": {
|
||||
"type": "string",
|
||||
"description": "Unique identifier for the trace this event belongs to"
|
||||
},
|
||||
"span_id": {
|
||||
"type": "string",
|
||||
"description": "Unique identifier for the span this event belongs to"
|
||||
},
|
||||
"timestamp": {
|
||||
"type": "string",
|
||||
"format": "date-time",
|
||||
"description": "Timestamp when the event occurred"
|
||||
},
|
||||
"attributes": {
|
||||
"type": "object",
|
||||
"additionalProperties": {
|
||||
"oneOf": [
|
||||
{
|
||||
"type": "string"
|
||||
},
|
||||
{
|
||||
"type": "integer"
|
||||
},
|
||||
{
|
||||
"type": "number"
|
||||
},
|
||||
{
|
||||
"type": "boolean"
|
||||
},
|
||||
{
|
||||
"type": "null"
|
||||
}
|
||||
]
|
||||
},
|
||||
"description": "(Optional) Key-value pairs containing additional metadata about the event"
|
||||
},
|
||||
"type": {
|
||||
"$ref": "#/components/schemas/EventType",
|
||||
"const": "structured_log",
|
||||
"default": "structured_log",
|
||||
"description": "Event type identifier set to STRUCTURED_LOG"
|
||||
},
|
||||
"payload": {
|
||||
"oneOf": [
|
||||
{
|
||||
"$ref": "#/components/schemas/SpanStartPayload"
|
||||
},
|
||||
{
|
||||
"$ref": "#/components/schemas/SpanEndPayload"
|
||||
}
|
||||
],
|
||||
"discriminator": {
|
||||
"propertyName": "type",
|
||||
"mapping": {
|
||||
"span_start": "#/components/schemas/SpanStartPayload",
|
||||
"span_end": "#/components/schemas/SpanEndPayload"
|
||||
}
|
||||
},
|
||||
"description": "The structured payload data for the log event"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"trace_id",
|
||||
"span_id",
|
||||
"timestamp",
|
||||
"type",
|
||||
"payload"
|
||||
],
|
||||
"title": "StructuredLogEvent",
|
||||
"description": "A structured log event containing typed payload data."
|
||||
},
|
||||
"StructuredLogType": {
|
||||
"type": "string",
|
||||
"enum": [
|
||||
"span_start",
|
||||
"span_end"
|
||||
],
|
||||
"title": "StructuredLogType",
|
||||
"description": "The type of structured log event payload."
|
||||
},
|
||||
"UnstructuredLogEvent": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"trace_id": {
|
||||
"type": "string",
|
||||
"description": "Unique identifier for the trace this event belongs to"
|
||||
},
|
||||
"span_id": {
|
||||
"type": "string",
|
||||
"description": "Unique identifier for the span this event belongs to"
|
||||
},
|
||||
"timestamp": {
|
||||
"type": "string",
|
||||
"format": "date-time",
|
||||
"description": "Timestamp when the event occurred"
|
||||
},
|
||||
"attributes": {
|
||||
"type": "object",
|
||||
"additionalProperties": {
|
||||
"oneOf": [
|
||||
{
|
||||
"type": "string"
|
||||
},
|
||||
{
|
||||
"type": "integer"
|
||||
},
|
||||
{
|
||||
"type": "number"
|
||||
},
|
||||
{
|
||||
"type": "boolean"
|
||||
},
|
||||
{
|
||||
"type": "null"
|
||||
}
|
||||
]
|
||||
},
|
||||
"description": "(Optional) Key-value pairs containing additional metadata about the event"
|
||||
},
|
||||
"type": {
|
||||
"$ref": "#/components/schemas/EventType",
|
||||
"const": "unstructured_log",
|
||||
"default": "unstructured_log",
|
||||
"description": "Event type identifier set to UNSTRUCTURED_LOG"
|
||||
},
|
||||
"message": {
|
||||
"type": "string",
|
||||
"description": "The log message text"
|
||||
},
|
||||
"severity": {
|
||||
"$ref": "#/components/schemas/LogSeverity",
|
||||
"description": "The severity level of the log message"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"trace_id",
|
||||
"span_id",
|
||||
"timestamp",
|
||||
"type",
|
||||
"message",
|
||||
"severity"
|
||||
],
|
||||
"title": "UnstructuredLogEvent",
|
||||
"description": "An unstructured log event containing a simple text message."
|
||||
},
|
||||
"LogEventRequest": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"event": {
|
||||
"$ref": "#/components/schemas/Event",
|
||||
"description": "The event to log."
|
||||
},
|
||||
"ttl_seconds": {
|
||||
"type": "integer",
|
||||
"description": "The time to live of the event."
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"event",
|
||||
"ttl_seconds"
|
||||
],
|
||||
"title": "LogEventRequest"
|
||||
},
|
||||
"InvokeToolRequest": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
|
@ -12962,10 +12576,6 @@
|
|||
"name": "SyntheticDataGeneration (Coming Soon)",
|
||||
"description": ""
|
||||
},
|
||||
{
|
||||
"name": "Telemetry",
|
||||
"description": ""
|
||||
},
|
||||
{
|
||||
"name": "ToolGroups",
|
||||
"description": ""
|
||||
|
@ -13000,7 +12610,6 @@
|
|||
"ScoringFunctions",
|
||||
"Shields",
|
||||
"SyntheticDataGeneration (Coming Soon)",
|
||||
"Telemetry",
|
||||
"ToolGroups",
|
||||
"ToolRuntime",
|
||||
"VectorDBs",
|
||||
|
|
291
docs/static/llama-stack-spec.yaml
vendored
291
docs/static/llama-stack-spec.yaml
vendored
|
@ -1944,33 +1944,6 @@ paths:
|
|||
$ref: '#/components/schemas/SyntheticDataGenerateRequest'
|
||||
required: true
|
||||
deprecated: false
|
||||
/v1/telemetry/events:
|
||||
post:
|
||||
responses:
|
||||
'200':
|
||||
description: OK
|
||||
'400':
|
||||
$ref: '#/components/responses/BadRequest400'
|
||||
'429':
|
||||
$ref: >-
|
||||
#/components/responses/TooManyRequests429
|
||||
'500':
|
||||
$ref: >-
|
||||
#/components/responses/InternalServerError500
|
||||
default:
|
||||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Telemetry
|
||||
summary: Log an event.
|
||||
description: Log an event.
|
||||
parameters: []
|
||||
requestBody:
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/LogEventRequest'
|
||||
required: true
|
||||
deprecated: false
|
||||
/v1/tool-runtime/invoke:
|
||||
post:
|
||||
responses:
|
||||
|
@ -7840,267 +7813,6 @@ components:
|
|||
description: >-
|
||||
Response from the synthetic data generation. Batch of (prompt, response, score)
|
||||
tuples that pass the threshold.
|
||||
Event:
|
||||
oneOf:
|
||||
- $ref: '#/components/schemas/UnstructuredLogEvent'
|
||||
- $ref: '#/components/schemas/MetricEvent'
|
||||
- $ref: '#/components/schemas/StructuredLogEvent'
|
||||
discriminator:
|
||||
propertyName: type
|
||||
mapping:
|
||||
unstructured_log: '#/components/schemas/UnstructuredLogEvent'
|
||||
metric: '#/components/schemas/MetricEvent'
|
||||
structured_log: '#/components/schemas/StructuredLogEvent'
|
||||
EventType:
|
||||
type: string
|
||||
enum:
|
||||
- unstructured_log
|
||||
- structured_log
|
||||
- metric
|
||||
title: EventType
|
||||
description: >-
|
||||
The type of telemetry event being logged.
|
||||
LogSeverity:
|
||||
type: string
|
||||
enum:
|
||||
- verbose
|
||||
- debug
|
||||
- info
|
||||
- warn
|
||||
- error
|
||||
- critical
|
||||
title: LogSeverity
|
||||
description: The severity level of a log message.
|
||||
MetricEvent:
|
||||
type: object
|
||||
properties:
|
||||
trace_id:
|
||||
type: string
|
||||
description: >-
|
||||
Unique identifier for the trace this event belongs to
|
||||
span_id:
|
||||
type: string
|
||||
description: >-
|
||||
Unique identifier for the span this event belongs to
|
||||
timestamp:
|
||||
type: string
|
||||
format: date-time
|
||||
description: Timestamp when the event occurred
|
||||
attributes:
|
||||
type: object
|
||||
additionalProperties:
|
||||
oneOf:
|
||||
- type: string
|
||||
- type: integer
|
||||
- type: number
|
||||
- type: boolean
|
||||
- type: 'null'
|
||||
description: >-
|
||||
(Optional) Key-value pairs containing additional metadata about the event
|
||||
type:
|
||||
$ref: '#/components/schemas/EventType'
|
||||
const: metric
|
||||
default: metric
|
||||
description: Event type identifier set to METRIC
|
||||
metric:
|
||||
type: string
|
||||
description: The name of the metric being measured
|
||||
value:
|
||||
oneOf:
|
||||
- type: integer
|
||||
- type: number
|
||||
description: >-
|
||||
The numeric value of the metric measurement
|
||||
unit:
|
||||
type: string
|
||||
description: >-
|
||||
The unit of measurement for the metric value
|
||||
additionalProperties: false
|
||||
required:
|
||||
- trace_id
|
||||
- span_id
|
||||
- timestamp
|
||||
- type
|
||||
- metric
|
||||
- value
|
||||
- unit
|
||||
title: MetricEvent
|
||||
description: >-
|
||||
A metric event containing a measured value.
|
||||
SpanEndPayload:
|
||||
type: object
|
||||
properties:
|
||||
type:
|
||||
$ref: '#/components/schemas/StructuredLogType'
|
||||
const: span_end
|
||||
default: span_end
|
||||
description: Payload type identifier set to SPAN_END
|
||||
status:
|
||||
$ref: '#/components/schemas/SpanStatus'
|
||||
description: >-
|
||||
The final status of the span indicating success or failure
|
||||
additionalProperties: false
|
||||
required:
|
||||
- type
|
||||
- status
|
||||
title: SpanEndPayload
|
||||
description: Payload for a span end event.
|
||||
SpanStartPayload:
|
||||
type: object
|
||||
properties:
|
||||
type:
|
||||
$ref: '#/components/schemas/StructuredLogType'
|
||||
const: span_start
|
||||
default: span_start
|
||||
description: >-
|
||||
Payload type identifier set to SPAN_START
|
||||
name:
|
||||
type: string
|
||||
description: >-
|
||||
Human-readable name describing the operation this span represents
|
||||
parent_span_id:
|
||||
type: string
|
||||
description: >-
|
||||
(Optional) Unique identifier for the parent span, if this is a child span
|
||||
additionalProperties: false
|
||||
required:
|
||||
- type
|
||||
- name
|
||||
title: SpanStartPayload
|
||||
description: Payload for a span start event.
|
||||
SpanStatus:
|
||||
type: string
|
||||
enum:
|
||||
- ok
|
||||
- error
|
||||
title: SpanStatus
|
||||
description: >-
|
||||
The status of a span indicating whether it completed successfully or with
|
||||
an error.
|
||||
StructuredLogEvent:
|
||||
type: object
|
||||
properties:
|
||||
trace_id:
|
||||
type: string
|
||||
description: >-
|
||||
Unique identifier for the trace this event belongs to
|
||||
span_id:
|
||||
type: string
|
||||
description: >-
|
||||
Unique identifier for the span this event belongs to
|
||||
timestamp:
|
||||
type: string
|
||||
format: date-time
|
||||
description: Timestamp when the event occurred
|
||||
attributes:
|
||||
type: object
|
||||
additionalProperties:
|
||||
oneOf:
|
||||
- type: string
|
||||
- type: integer
|
||||
- type: number
|
||||
- type: boolean
|
||||
- type: 'null'
|
||||
description: >-
|
||||
(Optional) Key-value pairs containing additional metadata about the event
|
||||
type:
|
||||
$ref: '#/components/schemas/EventType'
|
||||
const: structured_log
|
||||
default: structured_log
|
||||
description: >-
|
||||
Event type identifier set to STRUCTURED_LOG
|
||||
payload:
|
||||
oneOf:
|
||||
- $ref: '#/components/schemas/SpanStartPayload'
|
||||
- $ref: '#/components/schemas/SpanEndPayload'
|
||||
discriminator:
|
||||
propertyName: type
|
||||
mapping:
|
||||
span_start: '#/components/schemas/SpanStartPayload'
|
||||
span_end: '#/components/schemas/SpanEndPayload'
|
||||
description: >-
|
||||
The structured payload data for the log event
|
||||
additionalProperties: false
|
||||
required:
|
||||
- trace_id
|
||||
- span_id
|
||||
- timestamp
|
||||
- type
|
||||
- payload
|
||||
title: StructuredLogEvent
|
||||
description: >-
|
||||
A structured log event containing typed payload data.
|
||||
StructuredLogType:
|
||||
type: string
|
||||
enum:
|
||||
- span_start
|
||||
- span_end
|
||||
title: StructuredLogType
|
||||
description: >-
|
||||
The type of structured log event payload.
|
||||
UnstructuredLogEvent:
|
||||
type: object
|
||||
properties:
|
||||
trace_id:
|
||||
type: string
|
||||
description: >-
|
||||
Unique identifier for the trace this event belongs to
|
||||
span_id:
|
||||
type: string
|
||||
description: >-
|
||||
Unique identifier for the span this event belongs to
|
||||
timestamp:
|
||||
type: string
|
||||
format: date-time
|
||||
description: Timestamp when the event occurred
|
||||
attributes:
|
||||
type: object
|
||||
additionalProperties:
|
||||
oneOf:
|
||||
- type: string
|
||||
- type: integer
|
||||
- type: number
|
||||
- type: boolean
|
||||
- type: 'null'
|
||||
description: >-
|
||||
(Optional) Key-value pairs containing additional metadata about the event
|
||||
type:
|
||||
$ref: '#/components/schemas/EventType'
|
||||
const: unstructured_log
|
||||
default: unstructured_log
|
||||
description: >-
|
||||
Event type identifier set to UNSTRUCTURED_LOG
|
||||
message:
|
||||
type: string
|
||||
description: The log message text
|
||||
severity:
|
||||
$ref: '#/components/schemas/LogSeverity'
|
||||
description: The severity level of the log message
|
||||
additionalProperties: false
|
||||
required:
|
||||
- trace_id
|
||||
- span_id
|
||||
- timestamp
|
||||
- type
|
||||
- message
|
||||
- severity
|
||||
title: UnstructuredLogEvent
|
||||
description: >-
|
||||
An unstructured log event containing a simple text message.
|
||||
LogEventRequest:
|
||||
type: object
|
||||
properties:
|
||||
event:
|
||||
$ref: '#/components/schemas/Event'
|
||||
description: The event to log.
|
||||
ttl_seconds:
|
||||
type: integer
|
||||
description: The time to live of the event.
|
||||
additionalProperties: false
|
||||
required:
|
||||
- event
|
||||
- ttl_seconds
|
||||
title: LogEventRequest
|
||||
InvokeToolRequest:
|
||||
type: object
|
||||
properties:
|
||||
|
@ -9833,8 +9545,6 @@ tags:
|
|||
description: ''
|
||||
- name: SyntheticDataGeneration (Coming Soon)
|
||||
description: ''
|
||||
- name: Telemetry
|
||||
description: ''
|
||||
- name: ToolGroups
|
||||
description: ''
|
||||
- name: ToolRuntime
|
||||
|
@ -9859,7 +9569,6 @@ x-tagGroups:
|
|||
- ScoringFunctions
|
||||
- Shields
|
||||
- SyntheticDataGeneration (Coming Soon)
|
||||
- Telemetry
|
||||
- ToolGroups
|
||||
- ToolRuntime
|
||||
- VectorDBs
|
||||
|
|
1274
docs/static/stainless-llama-stack-spec.html
vendored
1274
docs/static/stainless-llama-stack-spec.html
vendored
File diff suppressed because it is too large
Load diff
942
docs/static/stainless-llama-stack-spec.yaml
vendored
942
docs/static/stainless-llama-stack-spec.yaml
vendored
|
@ -1947,33 +1947,6 @@ paths:
|
|||
$ref: '#/components/schemas/SyntheticDataGenerateRequest'
|
||||
required: true
|
||||
deprecated: false
|
||||
/v1/telemetry/events:
|
||||
post:
|
||||
responses:
|
||||
'200':
|
||||
description: OK
|
||||
'400':
|
||||
$ref: '#/components/responses/BadRequest400'
|
||||
'429':
|
||||
$ref: >-
|
||||
#/components/responses/TooManyRequests429
|
||||
'500':
|
||||
$ref: >-
|
||||
#/components/responses/InternalServerError500
|
||||
default:
|
||||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Telemetry
|
||||
summary: Log an event.
|
||||
description: Log an event.
|
||||
parameters: []
|
||||
requestBody:
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/LogEventRequest'
|
||||
required: true
|
||||
deprecated: false
|
||||
/v1/tool-runtime/invoke:
|
||||
post:
|
||||
responses:
|
||||
|
@ -4392,238 +4365,6 @@ paths:
|
|||
$ref: '#/components/schemas/SupervisedFineTuneRequest'
|
||||
required: true
|
||||
deprecated: false
|
||||
/v1alpha/telemetry/metrics/{metric_name}:
|
||||
post:
|
||||
responses:
|
||||
'200':
|
||||
description: A QueryMetricsResponse.
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/QueryMetricsResponse'
|
||||
'400':
|
||||
$ref: '#/components/responses/BadRequest400'
|
||||
'429':
|
||||
$ref: >-
|
||||
#/components/responses/TooManyRequests429
|
||||
'500':
|
||||
$ref: >-
|
||||
#/components/responses/InternalServerError500
|
||||
default:
|
||||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Telemetry
|
||||
summary: Query metrics.
|
||||
description: Query metrics.
|
||||
parameters:
|
||||
- name: metric_name
|
||||
in: path
|
||||
description: The name of the metric to query.
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
requestBody:
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/QueryMetricsRequest'
|
||||
required: true
|
||||
deprecated: false
|
||||
/v1alpha/telemetry/spans:
|
||||
post:
|
||||
responses:
|
||||
'200':
|
||||
description: A QuerySpansResponse.
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/QuerySpansResponse'
|
||||
'400':
|
||||
$ref: '#/components/responses/BadRequest400'
|
||||
'429':
|
||||
$ref: >-
|
||||
#/components/responses/TooManyRequests429
|
||||
'500':
|
||||
$ref: >-
|
||||
#/components/responses/InternalServerError500
|
||||
default:
|
||||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Telemetry
|
||||
summary: Query spans.
|
||||
description: Query spans.
|
||||
parameters: []
|
||||
requestBody:
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/QuerySpansRequest'
|
||||
required: true
|
||||
deprecated: false
|
||||
/v1alpha/telemetry/spans/export:
|
||||
post:
|
||||
responses:
|
||||
'200':
|
||||
description: OK
|
||||
'400':
|
||||
$ref: '#/components/responses/BadRequest400'
|
||||
'429':
|
||||
$ref: >-
|
||||
#/components/responses/TooManyRequests429
|
||||
'500':
|
||||
$ref: >-
|
||||
#/components/responses/InternalServerError500
|
||||
default:
|
||||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Telemetry
|
||||
summary: Save spans to a dataset.
|
||||
description: Save spans to a dataset.
|
||||
parameters: []
|
||||
requestBody:
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/SaveSpansToDatasetRequest'
|
||||
required: true
|
||||
deprecated: false
|
||||
/v1alpha/telemetry/spans/{span_id}/tree:
|
||||
post:
|
||||
responses:
|
||||
'200':
|
||||
description: A QuerySpanTreeResponse.
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/QuerySpanTreeResponse'
|
||||
'400':
|
||||
$ref: '#/components/responses/BadRequest400'
|
||||
'429':
|
||||
$ref: >-
|
||||
#/components/responses/TooManyRequests429
|
||||
'500':
|
||||
$ref: >-
|
||||
#/components/responses/InternalServerError500
|
||||
default:
|
||||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Telemetry
|
||||
summary: Get a span tree by its ID.
|
||||
description: Get a span tree by its ID.
|
||||
parameters:
|
||||
- name: span_id
|
||||
in: path
|
||||
description: The ID of the span to get the tree from.
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
requestBody:
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/GetSpanTreeRequest'
|
||||
required: true
|
||||
deprecated: false
|
||||
/v1alpha/telemetry/traces:
|
||||
post:
|
||||
responses:
|
||||
'200':
|
||||
description: A QueryTracesResponse.
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/QueryTracesResponse'
|
||||
'400':
|
||||
$ref: '#/components/responses/BadRequest400'
|
||||
'429':
|
||||
$ref: >-
|
||||
#/components/responses/TooManyRequests429
|
||||
'500':
|
||||
$ref: >-
|
||||
#/components/responses/InternalServerError500
|
||||
default:
|
||||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Telemetry
|
||||
summary: Query traces.
|
||||
description: Query traces.
|
||||
parameters: []
|
||||
requestBody:
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/QueryTracesRequest'
|
||||
required: true
|
||||
deprecated: false
|
||||
/v1alpha/telemetry/traces/{trace_id}:
|
||||
get:
|
||||
responses:
|
||||
'200':
|
||||
description: A Trace.
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/Trace'
|
||||
'400':
|
||||
$ref: '#/components/responses/BadRequest400'
|
||||
'429':
|
||||
$ref: >-
|
||||
#/components/responses/TooManyRequests429
|
||||
'500':
|
||||
$ref: >-
|
||||
#/components/responses/InternalServerError500
|
||||
default:
|
||||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Telemetry
|
||||
summary: Get a trace by its ID.
|
||||
description: Get a trace by its ID.
|
||||
parameters:
|
||||
- name: trace_id
|
||||
in: path
|
||||
description: The ID of the trace to get.
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
deprecated: false
|
||||
/v1alpha/telemetry/traces/{trace_id}/spans/{span_id}:
|
||||
get:
|
||||
responses:
|
||||
'200':
|
||||
description: A Span.
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/Span'
|
||||
'400':
|
||||
$ref: '#/components/responses/BadRequest400'
|
||||
'429':
|
||||
$ref: >-
|
||||
#/components/responses/TooManyRequests429
|
||||
'500':
|
||||
$ref: >-
|
||||
#/components/responses/InternalServerError500
|
||||
default:
|
||||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Telemetry
|
||||
summary: Get a span by its ID.
|
||||
description: Get a span by its ID.
|
||||
parameters:
|
||||
- name: trace_id
|
||||
in: path
|
||||
description: >-
|
||||
The ID of the trace to get the span from.
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
- name: span_id
|
||||
in: path
|
||||
description: The ID of the span to get.
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
deprecated: false
|
||||
jsonSchemaDialect: >-
|
||||
https://json-schema.org/draft/2020-12/schema
|
||||
components:
|
||||
|
@ -9285,267 +9026,6 @@ components:
|
|||
description: >-
|
||||
Response from the synthetic data generation. Batch of (prompt, response, score)
|
||||
tuples that pass the threshold.
|
||||
Event:
|
||||
oneOf:
|
||||
- $ref: '#/components/schemas/UnstructuredLogEvent'
|
||||
- $ref: '#/components/schemas/MetricEvent'
|
||||
- $ref: '#/components/schemas/StructuredLogEvent'
|
||||
discriminator:
|
||||
propertyName: type
|
||||
mapping:
|
||||
unstructured_log: '#/components/schemas/UnstructuredLogEvent'
|
||||
metric: '#/components/schemas/MetricEvent'
|
||||
structured_log: '#/components/schemas/StructuredLogEvent'
|
||||
EventType:
|
||||
type: string
|
||||
enum:
|
||||
- unstructured_log
|
||||
- structured_log
|
||||
- metric
|
||||
title: EventType
|
||||
description: >-
|
||||
The type of telemetry event being logged.
|
||||
LogSeverity:
|
||||
type: string
|
||||
enum:
|
||||
- verbose
|
||||
- debug
|
||||
- info
|
||||
- warn
|
||||
- error
|
||||
- critical
|
||||
title: LogSeverity
|
||||
description: The severity level of a log message.
|
||||
MetricEvent:
|
||||
type: object
|
||||
properties:
|
||||
trace_id:
|
||||
type: string
|
||||
description: >-
|
||||
Unique identifier for the trace this event belongs to
|
||||
span_id:
|
||||
type: string
|
||||
description: >-
|
||||
Unique identifier for the span this event belongs to
|
||||
timestamp:
|
||||
type: string
|
||||
format: date-time
|
||||
description: Timestamp when the event occurred
|
||||
attributes:
|
||||
type: object
|
||||
additionalProperties:
|
||||
oneOf:
|
||||
- type: string
|
||||
- type: integer
|
||||
- type: number
|
||||
- type: boolean
|
||||
- type: 'null'
|
||||
description: >-
|
||||
(Optional) Key-value pairs containing additional metadata about the event
|
||||
type:
|
||||
$ref: '#/components/schemas/EventType'
|
||||
const: metric
|
||||
default: metric
|
||||
description: Event type identifier set to METRIC
|
||||
metric:
|
||||
type: string
|
||||
description: The name of the metric being measured
|
||||
value:
|
||||
oneOf:
|
||||
- type: integer
|
||||
- type: number
|
||||
description: >-
|
||||
The numeric value of the metric measurement
|
||||
unit:
|
||||
type: string
|
||||
description: >-
|
||||
The unit of measurement for the metric value
|
||||
additionalProperties: false
|
||||
required:
|
||||
- trace_id
|
||||
- span_id
|
||||
- timestamp
|
||||
- type
|
||||
- metric
|
||||
- value
|
||||
- unit
|
||||
title: MetricEvent
|
||||
description: >-
|
||||
A metric event containing a measured value.
|
||||
SpanEndPayload:
|
||||
type: object
|
||||
properties:
|
||||
type:
|
||||
$ref: '#/components/schemas/StructuredLogType'
|
||||
const: span_end
|
||||
default: span_end
|
||||
description: Payload type identifier set to SPAN_END
|
||||
status:
|
||||
$ref: '#/components/schemas/SpanStatus'
|
||||
description: >-
|
||||
The final status of the span indicating success or failure
|
||||
additionalProperties: false
|
||||
required:
|
||||
- type
|
||||
- status
|
||||
title: SpanEndPayload
|
||||
description: Payload for a span end event.
|
||||
SpanStartPayload:
|
||||
type: object
|
||||
properties:
|
||||
type:
|
||||
$ref: '#/components/schemas/StructuredLogType'
|
||||
const: span_start
|
||||
default: span_start
|
||||
description: >-
|
||||
Payload type identifier set to SPAN_START
|
||||
name:
|
||||
type: string
|
||||
description: >-
|
||||
Human-readable name describing the operation this span represents
|
||||
parent_span_id:
|
||||
type: string
|
||||
description: >-
|
||||
(Optional) Unique identifier for the parent span, if this is a child span
|
||||
additionalProperties: false
|
||||
required:
|
||||
- type
|
||||
- name
|
||||
title: SpanStartPayload
|
||||
description: Payload for a span start event.
|
||||
SpanStatus:
|
||||
type: string
|
||||
enum:
|
||||
- ok
|
||||
- error
|
||||
title: SpanStatus
|
||||
description: >-
|
||||
The status of a span indicating whether it completed successfully or with
|
||||
an error.
|
||||
StructuredLogEvent:
|
||||
type: object
|
||||
properties:
|
||||
trace_id:
|
||||
type: string
|
||||
description: >-
|
||||
Unique identifier for the trace this event belongs to
|
||||
span_id:
|
||||
type: string
|
||||
description: >-
|
||||
Unique identifier for the span this event belongs to
|
||||
timestamp:
|
||||
type: string
|
||||
format: date-time
|
||||
description: Timestamp when the event occurred
|
||||
attributes:
|
||||
type: object
|
||||
additionalProperties:
|
||||
oneOf:
|
||||
- type: string
|
||||
- type: integer
|
||||
- type: number
|
||||
- type: boolean
|
||||
- type: 'null'
|
||||
description: >-
|
||||
(Optional) Key-value pairs containing additional metadata about the event
|
||||
type:
|
||||
$ref: '#/components/schemas/EventType'
|
||||
const: structured_log
|
||||
default: structured_log
|
||||
description: >-
|
||||
Event type identifier set to STRUCTURED_LOG
|
||||
payload:
|
||||
oneOf:
|
||||
- $ref: '#/components/schemas/SpanStartPayload'
|
||||
- $ref: '#/components/schemas/SpanEndPayload'
|
||||
discriminator:
|
||||
propertyName: type
|
||||
mapping:
|
||||
span_start: '#/components/schemas/SpanStartPayload'
|
||||
span_end: '#/components/schemas/SpanEndPayload'
|
||||
description: >-
|
||||
The structured payload data for the log event
|
||||
additionalProperties: false
|
||||
required:
|
||||
- trace_id
|
||||
- span_id
|
||||
- timestamp
|
||||
- type
|
||||
- payload
|
||||
title: StructuredLogEvent
|
||||
description: >-
|
||||
A structured log event containing typed payload data.
|
||||
StructuredLogType:
|
||||
type: string
|
||||
enum:
|
||||
- span_start
|
||||
- span_end
|
||||
title: StructuredLogType
|
||||
description: >-
|
||||
The type of structured log event payload.
|
||||
UnstructuredLogEvent:
|
||||
type: object
|
||||
properties:
|
||||
trace_id:
|
||||
type: string
|
||||
description: >-
|
||||
Unique identifier for the trace this event belongs to
|
||||
span_id:
|
||||
type: string
|
||||
description: >-
|
||||
Unique identifier for the span this event belongs to
|
||||
timestamp:
|
||||
type: string
|
||||
format: date-time
|
||||
description: Timestamp when the event occurred
|
||||
attributes:
|
||||
type: object
|
||||
additionalProperties:
|
||||
oneOf:
|
||||
- type: string
|
||||
- type: integer
|
||||
- type: number
|
||||
- type: boolean
|
||||
- type: 'null'
|
||||
description: >-
|
||||
(Optional) Key-value pairs containing additional metadata about the event
|
||||
type:
|
||||
$ref: '#/components/schemas/EventType'
|
||||
const: unstructured_log
|
||||
default: unstructured_log
|
||||
description: >-
|
||||
Event type identifier set to UNSTRUCTURED_LOG
|
||||
message:
|
||||
type: string
|
||||
description: The log message text
|
||||
severity:
|
||||
$ref: '#/components/schemas/LogSeverity'
|
||||
description: The severity level of the log message
|
||||
additionalProperties: false
|
||||
required:
|
||||
- trace_id
|
||||
- span_id
|
||||
- timestamp
|
||||
- type
|
||||
- message
|
||||
- severity
|
||||
title: UnstructuredLogEvent
|
||||
description: >-
|
||||
An unstructured log event containing a simple text message.
|
||||
LogEventRequest:
|
||||
type: object
|
||||
properties:
|
||||
event:
|
||||
$ref: '#/components/schemas/Event'
|
||||
description: The event to log.
|
||||
ttl_seconds:
|
||||
type: integer
|
||||
description: The time to live of the event.
|
||||
additionalProperties: false
|
||||
required:
|
||||
- event
|
||||
- ttl_seconds
|
||||
title: LogEventRequest
|
||||
InvokeToolRequest:
|
||||
type: object
|
||||
properties:
|
||||
|
@ -13349,425 +12829,6 @@ components:
|
|||
- hyperparam_search_config
|
||||
- logger_config
|
||||
title: SupervisedFineTuneRequest
|
||||
QueryMetricsRequest:
|
||||
type: object
|
||||
properties:
|
||||
start_time:
|
||||
type: integer
|
||||
description: The start time of the metric to query.
|
||||
end_time:
|
||||
type: integer
|
||||
description: The end time of the metric to query.
|
||||
granularity:
|
||||
type: string
|
||||
description: The granularity of the metric to query.
|
||||
query_type:
|
||||
type: string
|
||||
enum:
|
||||
- range
|
||||
- instant
|
||||
description: The type of query to perform.
|
||||
label_matchers:
|
||||
type: array
|
||||
items:
|
||||
type: object
|
||||
properties:
|
||||
name:
|
||||
type: string
|
||||
description: The name of the label to match
|
||||
value:
|
||||
type: string
|
||||
description: The value to match against
|
||||
operator:
|
||||
type: string
|
||||
enum:
|
||||
- '='
|
||||
- '!='
|
||||
- =~
|
||||
- '!~'
|
||||
description: >-
|
||||
The comparison operator to use for matching
|
||||
default: '='
|
||||
additionalProperties: false
|
||||
required:
|
||||
- name
|
||||
- value
|
||||
- operator
|
||||
title: MetricLabelMatcher
|
||||
description: >-
|
||||
A matcher for filtering metrics by label values.
|
||||
description: >-
|
||||
The label matchers to apply to the metric.
|
||||
additionalProperties: false
|
||||
required:
|
||||
- start_time
|
||||
- query_type
|
||||
title: QueryMetricsRequest
|
||||
MetricDataPoint:
|
||||
type: object
|
||||
properties:
|
||||
timestamp:
|
||||
type: integer
|
||||
description: >-
|
||||
Unix timestamp when the metric value was recorded
|
||||
value:
|
||||
type: number
|
||||
description: >-
|
||||
The numeric value of the metric at this timestamp
|
||||
unit:
|
||||
type: string
|
||||
additionalProperties: false
|
||||
required:
|
||||
- timestamp
|
||||
- value
|
||||
- unit
|
||||
title: MetricDataPoint
|
||||
description: >-
|
||||
A single data point in a metric time series.
|
||||
MetricLabel:
|
||||
type: object
|
||||
properties:
|
||||
name:
|
||||
type: string
|
||||
description: The name of the label
|
||||
value:
|
||||
type: string
|
||||
description: The value of the label
|
||||
additionalProperties: false
|
||||
required:
|
||||
- name
|
||||
- value
|
||||
title: MetricLabel
|
||||
description: A label associated with a metric.
|
||||
MetricSeries:
|
||||
type: object
|
||||
properties:
|
||||
metric:
|
||||
type: string
|
||||
description: The name of the metric
|
||||
labels:
|
||||
type: array
|
||||
items:
|
||||
$ref: '#/components/schemas/MetricLabel'
|
||||
description: >-
|
||||
List of labels associated with this metric series
|
||||
values:
|
||||
type: array
|
||||
items:
|
||||
$ref: '#/components/schemas/MetricDataPoint'
|
||||
description: >-
|
||||
List of data points in chronological order
|
||||
additionalProperties: false
|
||||
required:
|
||||
- metric
|
||||
- labels
|
||||
- values
|
||||
title: MetricSeries
|
||||
description: A time series of metric data points.
|
||||
QueryMetricsResponse:
|
||||
type: object
|
||||
properties:
|
||||
data:
|
||||
type: array
|
||||
items:
|
||||
$ref: '#/components/schemas/MetricSeries'
|
||||
description: >-
|
||||
List of metric series matching the query criteria
|
||||
additionalProperties: false
|
||||
required:
|
||||
- data
|
||||
title: QueryMetricsResponse
|
||||
description: >-
|
||||
Response containing metric time series data.
|
||||
QueryCondition:
|
||||
type: object
|
||||
properties:
|
||||
key:
|
||||
type: string
|
||||
description: The attribute key to filter on
|
||||
op:
|
||||
$ref: '#/components/schemas/QueryConditionOp'
|
||||
description: The comparison operator to apply
|
||||
value:
|
||||
oneOf:
|
||||
- type: 'null'
|
||||
- type: boolean
|
||||
- type: number
|
||||
- type: string
|
||||
- type: array
|
||||
- type: object
|
||||
description: The value to compare against
|
||||
additionalProperties: false
|
||||
required:
|
||||
- key
|
||||
- op
|
||||
- value
|
||||
title: QueryCondition
|
||||
description: A condition for filtering query results.
|
||||
QueryConditionOp:
|
||||
type: string
|
||||
enum:
|
||||
- eq
|
||||
- ne
|
||||
- gt
|
||||
- lt
|
||||
title: QueryConditionOp
|
||||
description: >-
|
||||
Comparison operators for query conditions.
|
||||
QuerySpansRequest:
|
||||
type: object
|
||||
properties:
|
||||
attribute_filters:
|
||||
type: array
|
||||
items:
|
||||
$ref: '#/components/schemas/QueryCondition'
|
||||
description: >-
|
||||
The attribute filters to apply to the spans.
|
||||
attributes_to_return:
|
||||
type: array
|
||||
items:
|
||||
type: string
|
||||
description: The attributes to return in the spans.
|
||||
max_depth:
|
||||
type: integer
|
||||
description: The maximum depth of the tree.
|
||||
additionalProperties: false
|
||||
required:
|
||||
- attribute_filters
|
||||
- attributes_to_return
|
||||
title: QuerySpansRequest
|
||||
Span:
|
||||
type: object
|
||||
properties:
|
||||
span_id:
|
||||
type: string
|
||||
description: Unique identifier for the span
|
||||
trace_id:
|
||||
type: string
|
||||
description: >-
|
||||
Unique identifier for the trace this span belongs to
|
||||
parent_span_id:
|
||||
type: string
|
||||
description: >-
|
||||
(Optional) Unique identifier for the parent span, if this is a child span
|
||||
name:
|
||||
type: string
|
||||
description: >-
|
||||
Human-readable name describing the operation this span represents
|
||||
start_time:
|
||||
type: string
|
||||
format: date-time
|
||||
description: Timestamp when the operation began
|
||||
end_time:
|
||||
type: string
|
||||
format: date-time
|
||||
description: >-
|
||||
(Optional) Timestamp when the operation finished, if completed
|
||||
attributes:
|
||||
type: object
|
||||
additionalProperties:
|
||||
oneOf:
|
||||
- type: 'null'
|
||||
- type: boolean
|
||||
- type: number
|
||||
- type: string
|
||||
- type: array
|
||||
- type: object
|
||||
description: >-
|
||||
(Optional) Key-value pairs containing additional metadata about the span
|
||||
additionalProperties: false
|
||||
required:
|
||||
- span_id
|
||||
- trace_id
|
||||
- name
|
||||
- start_time
|
||||
title: Span
|
||||
description: >-
|
||||
A span representing a single operation within a trace.
|
||||
QuerySpansResponse:
|
||||
type: object
|
||||
properties:
|
||||
data:
|
||||
type: array
|
||||
items:
|
||||
$ref: '#/components/schemas/Span'
|
||||
description: >-
|
||||
List of spans matching the query criteria
|
||||
additionalProperties: false
|
||||
required:
|
||||
- data
|
||||
title: QuerySpansResponse
|
||||
description: Response containing a list of spans.
|
||||
SaveSpansToDatasetRequest:
|
||||
type: object
|
||||
properties:
|
||||
attribute_filters:
|
||||
type: array
|
||||
items:
|
||||
$ref: '#/components/schemas/QueryCondition'
|
||||
description: >-
|
||||
The attribute filters to apply to the spans.
|
||||
attributes_to_save:
|
||||
type: array
|
||||
items:
|
||||
type: string
|
||||
description: The attributes to save to the dataset.
|
||||
dataset_id:
|
||||
type: string
|
||||
description: >-
|
||||
The ID of the dataset to save the spans to.
|
||||
max_depth:
|
||||
type: integer
|
||||
description: The maximum depth of the tree.
|
||||
additionalProperties: false
|
||||
required:
|
||||
- attribute_filters
|
||||
- attributes_to_save
|
||||
- dataset_id
|
||||
title: SaveSpansToDatasetRequest
|
||||
GetSpanTreeRequest:
|
||||
type: object
|
||||
properties:
|
||||
attributes_to_return:
|
||||
type: array
|
||||
items:
|
||||
type: string
|
||||
description: The attributes to return in the tree.
|
||||
max_depth:
|
||||
type: integer
|
||||
description: The maximum depth of the tree.
|
||||
additionalProperties: false
|
||||
title: GetSpanTreeRequest
|
||||
SpanWithStatus:
|
||||
type: object
|
||||
properties:
|
||||
span_id:
|
||||
type: string
|
||||
description: Unique identifier for the span
|
||||
trace_id:
|
||||
type: string
|
||||
description: >-
|
||||
Unique identifier for the trace this span belongs to
|
||||
parent_span_id:
|
||||
type: string
|
||||
description: >-
|
||||
(Optional) Unique identifier for the parent span, if this is a child span
|
||||
name:
|
||||
type: string
|
||||
description: >-
|
||||
Human-readable name describing the operation this span represents
|
||||
start_time:
|
||||
type: string
|
||||
format: date-time
|
||||
description: Timestamp when the operation began
|
||||
end_time:
|
||||
type: string
|
||||
format: date-time
|
||||
description: >-
|
||||
(Optional) Timestamp when the operation finished, if completed
|
||||
attributes:
|
||||
type: object
|
||||
additionalProperties:
|
||||
oneOf:
|
||||
- type: 'null'
|
||||
- type: boolean
|
||||
- type: number
|
||||
- type: string
|
||||
- type: array
|
||||
- type: object
|
||||
description: >-
|
||||
(Optional) Key-value pairs containing additional metadata about the span
|
||||
status:
|
||||
$ref: '#/components/schemas/SpanStatus'
|
||||
description: >-
|
||||
(Optional) The current status of the span
|
||||
additionalProperties: false
|
||||
required:
|
||||
- span_id
|
||||
- trace_id
|
||||
- name
|
||||
- start_time
|
||||
title: SpanWithStatus
|
||||
description: A span that includes status information.
|
||||
QuerySpanTreeResponse:
|
||||
type: object
|
||||
properties:
|
||||
data:
|
||||
type: object
|
||||
additionalProperties:
|
||||
$ref: '#/components/schemas/SpanWithStatus'
|
||||
description: >-
|
||||
Dictionary mapping span IDs to spans with status information
|
||||
additionalProperties: false
|
||||
required:
|
||||
- data
|
||||
title: QuerySpanTreeResponse
|
||||
description: >-
|
||||
Response containing a tree structure of spans.
|
||||
QueryTracesRequest:
|
||||
type: object
|
||||
properties:
|
||||
attribute_filters:
|
||||
type: array
|
||||
items:
|
||||
$ref: '#/components/schemas/QueryCondition'
|
||||
description: >-
|
||||
The attribute filters to apply to the traces.
|
||||
limit:
|
||||
type: integer
|
||||
description: The limit of traces to return.
|
||||
offset:
|
||||
type: integer
|
||||
description: The offset of the traces to return.
|
||||
order_by:
|
||||
type: array
|
||||
items:
|
||||
type: string
|
||||
description: The order by of the traces to return.
|
||||
additionalProperties: false
|
||||
title: QueryTracesRequest
|
||||
Trace:
|
||||
type: object
|
||||
properties:
|
||||
trace_id:
|
||||
type: string
|
||||
description: Unique identifier for the trace
|
||||
root_span_id:
|
||||
type: string
|
||||
description: >-
|
||||
Unique identifier for the root span that started this trace
|
||||
start_time:
|
||||
type: string
|
||||
format: date-time
|
||||
description: Timestamp when the trace began
|
||||
end_time:
|
||||
type: string
|
||||
format: date-time
|
||||
description: >-
|
||||
(Optional) Timestamp when the trace finished, if completed
|
||||
additionalProperties: false
|
||||
required:
|
||||
- trace_id
|
||||
- root_span_id
|
||||
- start_time
|
||||
title: Trace
|
||||
description: >-
|
||||
A trace representing the complete execution path of a request across multiple
|
||||
operations.
|
||||
QueryTracesResponse:
|
||||
type: object
|
||||
properties:
|
||||
data:
|
||||
type: array
|
||||
items:
|
||||
$ref: '#/components/schemas/Trace'
|
||||
description: >-
|
||||
List of traces matching the query criteria
|
||||
additionalProperties: false
|
||||
required:
|
||||
- data
|
||||
title: QueryTracesResponse
|
||||
description: Response containing a list of traces.
|
||||
responses:
|
||||
BadRequest400:
|
||||
description: The request was invalid or malformed
|
||||
|
@ -13881,8 +12942,6 @@ tags:
|
|||
description: ''
|
||||
- name: SyntheticDataGeneration (Coming Soon)
|
||||
description: ''
|
||||
- name: Telemetry
|
||||
description: ''
|
||||
- name: ToolGroups
|
||||
description: ''
|
||||
- name: ToolRuntime
|
||||
|
@ -13912,7 +12971,6 @@ x-tagGroups:
|
|||
- ScoringFunctions
|
||||
- Shields
|
||||
- SyntheticDataGeneration (Coming Soon)
|
||||
- Telemetry
|
||||
- ToolGroups
|
||||
- ToolRuntime
|
||||
- VectorDBs
|
||||
|
|
|
@ -16,15 +16,12 @@ from typing import (
|
|||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from llama_stack.apis.version import LLAMA_STACK_API_V1, LLAMA_STACK_API_V1ALPHA
|
||||
from llama_stack.models.llama.datatypes import Primitive
|
||||
from llama_stack.schema_utils import json_schema_type, register_schema, webmethod
|
||||
from llama_stack.schema_utils import json_schema_type, register_schema
|
||||
|
||||
# Add this constant near the top of the file, after the imports
|
||||
DEFAULT_TTL_DAYS = 7
|
||||
|
||||
REQUIRED_SCOPE = "telemetry.read"
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class SpanStatus(Enum):
|
||||
|
@ -413,7 +410,6 @@ class QueryMetricsResponse(BaseModel):
|
|||
|
||||
@runtime_checkable
|
||||
class Telemetry(Protocol):
|
||||
@webmethod(route="/telemetry/events", method="POST", level=LLAMA_STACK_API_V1)
|
||||
async def log_event(
|
||||
self,
|
||||
event: Event,
|
||||
|
@ -426,14 +422,6 @@ class Telemetry(Protocol):
|
|||
"""
|
||||
...
|
||||
|
||||
@webmethod(
|
||||
route="/telemetry/traces",
|
||||
method="POST",
|
||||
required_scope=REQUIRED_SCOPE,
|
||||
deprecated=True,
|
||||
level=LLAMA_STACK_API_V1,
|
||||
)
|
||||
@webmethod(route="/telemetry/traces", method="POST", required_scope=REQUIRED_SCOPE, level=LLAMA_STACK_API_V1ALPHA)
|
||||
async def query_traces(
|
||||
self,
|
||||
attribute_filters: list[QueryCondition] | None = None,
|
||||
|
@ -451,19 +439,6 @@ class Telemetry(Protocol):
|
|||
"""
|
||||
...
|
||||
|
||||
@webmethod(
|
||||
route="/telemetry/traces/{trace_id:path}",
|
||||
method="GET",
|
||||
required_scope=REQUIRED_SCOPE,
|
||||
deprecated=True,
|
||||
level=LLAMA_STACK_API_V1,
|
||||
)
|
||||
@webmethod(
|
||||
route="/telemetry/traces/{trace_id:path}",
|
||||
method="GET",
|
||||
required_scope=REQUIRED_SCOPE,
|
||||
level=LLAMA_STACK_API_V1ALPHA,
|
||||
)
|
||||
async def get_trace(self, trace_id: str) -> Trace:
|
||||
"""Get a trace by its ID.
|
||||
|
||||
|
@ -472,19 +447,6 @@ class Telemetry(Protocol):
|
|||
"""
|
||||
...
|
||||
|
||||
@webmethod(
|
||||
route="/telemetry/traces/{trace_id:path}/spans/{span_id:path}",
|
||||
method="GET",
|
||||
required_scope=REQUIRED_SCOPE,
|
||||
deprecated=True,
|
||||
level=LLAMA_STACK_API_V1,
|
||||
)
|
||||
@webmethod(
|
||||
route="/telemetry/traces/{trace_id:path}/spans/{span_id:path}",
|
||||
method="GET",
|
||||
required_scope=REQUIRED_SCOPE,
|
||||
level=LLAMA_STACK_API_V1ALPHA,
|
||||
)
|
||||
async def get_span(self, trace_id: str, span_id: str) -> Span:
|
||||
"""Get a span by its ID.
|
||||
|
||||
|
@ -494,19 +456,6 @@ class Telemetry(Protocol):
|
|||
"""
|
||||
...
|
||||
|
||||
@webmethod(
|
||||
route="/telemetry/spans/{span_id:path}/tree",
|
||||
method="POST",
|
||||
deprecated=True,
|
||||
required_scope=REQUIRED_SCOPE,
|
||||
level=LLAMA_STACK_API_V1,
|
||||
)
|
||||
@webmethod(
|
||||
route="/telemetry/spans/{span_id:path}/tree",
|
||||
method="POST",
|
||||
required_scope=REQUIRED_SCOPE,
|
||||
level=LLAMA_STACK_API_V1ALPHA,
|
||||
)
|
||||
async def get_span_tree(
|
||||
self,
|
||||
span_id: str,
|
||||
|
@ -522,14 +471,6 @@ class Telemetry(Protocol):
|
|||
"""
|
||||
...
|
||||
|
||||
@webmethod(
|
||||
route="/telemetry/spans",
|
||||
method="POST",
|
||||
required_scope=REQUIRED_SCOPE,
|
||||
deprecated=True,
|
||||
level=LLAMA_STACK_API_V1,
|
||||
)
|
||||
@webmethod(route="/telemetry/spans", method="POST", required_scope=REQUIRED_SCOPE, level=LLAMA_STACK_API_V1ALPHA)
|
||||
async def query_spans(
|
||||
self,
|
||||
attribute_filters: list[QueryCondition],
|
||||
|
@ -545,8 +486,6 @@ class Telemetry(Protocol):
|
|||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/telemetry/spans/export", method="POST", deprecated=True, level=LLAMA_STACK_API_V1)
|
||||
@webmethod(route="/telemetry/spans/export", method="POST", level=LLAMA_STACK_API_V1ALPHA)
|
||||
async def save_spans_to_dataset(
|
||||
self,
|
||||
attribute_filters: list[QueryCondition],
|
||||
|
@ -563,19 +502,6 @@ class Telemetry(Protocol):
|
|||
"""
|
||||
...
|
||||
|
||||
@webmethod(
|
||||
route="/telemetry/metrics/{metric_name}",
|
||||
method="POST",
|
||||
required_scope=REQUIRED_SCOPE,
|
||||
deprecated=True,
|
||||
level=LLAMA_STACK_API_V1,
|
||||
)
|
||||
@webmethod(
|
||||
route="/telemetry/metrics/{metric_name}",
|
||||
method="POST",
|
||||
required_scope=REQUIRED_SCOPE,
|
||||
level=LLAMA_STACK_API_V1ALPHA,
|
||||
)
|
||||
async def query_metrics(
|
||||
self,
|
||||
metric_name: str,
|
||||
|
|
|
@ -32,7 +32,7 @@ from llama_stack.providers.utils.sqlstore.sqlstore import (
|
|||
sqlstore_impl,
|
||||
)
|
||||
|
||||
logger = get_logger(name=__name__, category="openai::conversations")
|
||||
logger = get_logger(name=__name__, category="openai_conversations")
|
||||
|
||||
|
||||
class ConversationServiceConfig(BaseModel):
|
||||
|
|
|
@ -611,7 +611,7 @@ class InferenceRouter(Inference):
|
|||
completion_text += "".join(choice_data["content_parts"])
|
||||
|
||||
# Add metrics to the chunk
|
||||
if self.telemetry and chunk.usage:
|
||||
if self.telemetry and hasattr(chunk, "usage") and chunk.usage:
|
||||
metrics = self._construct_metrics(
|
||||
prompt_tokens=chunk.usage.prompt_tokens,
|
||||
completion_tokens=chunk.usage.completion_tokens,
|
||||
|
|
|
@ -98,7 +98,10 @@ class DiskDistributionRegistry(DistributionRegistry):
|
|||
existing_obj = await self.get(obj.type, obj.identifier)
|
||||
# dont register if the object's providerid already exists
|
||||
if existing_obj and existing_obj.provider_id == obj.provider_id:
|
||||
return False
|
||||
raise ValueError(
|
||||
f"Provider '{obj.provider_id}' is already registered."
|
||||
f"Unregister the existing provider first before registering it again."
|
||||
)
|
||||
|
||||
await self.kvstore.set(
|
||||
KEY_FORMAT.format(type=obj.type, identifier=obj.identifier),
|
||||
|
|
|
@ -3,3 +3,5 @@
|
|||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from .watsonx import get_distribution_template # noqa: F401
|
||||
|
|
|
@ -3,44 +3,33 @@ distribution_spec:
|
|||
description: Use watsonx for running LLM inference
|
||||
providers:
|
||||
inference:
|
||||
- provider_id: watsonx
|
||||
provider_type: remote::watsonx
|
||||
- provider_id: sentence-transformers
|
||||
provider_type: inline::sentence-transformers
|
||||
- provider_type: remote::watsonx
|
||||
- provider_type: inline::sentence-transformers
|
||||
vector_io:
|
||||
- provider_id: faiss
|
||||
provider_type: inline::faiss
|
||||
- provider_type: inline::faiss
|
||||
safety:
|
||||
- provider_id: llama-guard
|
||||
provider_type: inline::llama-guard
|
||||
- provider_type: inline::llama-guard
|
||||
agents:
|
||||
- provider_id: meta-reference
|
||||
provider_type: inline::meta-reference
|
||||
- provider_type: inline::meta-reference
|
||||
telemetry:
|
||||
- provider_id: meta-reference
|
||||
provider_type: inline::meta-reference
|
||||
- provider_type: inline::meta-reference
|
||||
eval:
|
||||
- provider_id: meta-reference
|
||||
provider_type: inline::meta-reference
|
||||
- provider_type: inline::meta-reference
|
||||
datasetio:
|
||||
- provider_id: huggingface
|
||||
provider_type: remote::huggingface
|
||||
- provider_id: localfs
|
||||
provider_type: inline::localfs
|
||||
- provider_type: remote::huggingface
|
||||
- provider_type: inline::localfs
|
||||
scoring:
|
||||
- provider_id: basic
|
||||
provider_type: inline::basic
|
||||
- provider_id: llm-as-judge
|
||||
provider_type: inline::llm-as-judge
|
||||
- provider_id: braintrust
|
||||
provider_type: inline::braintrust
|
||||
- provider_type: inline::basic
|
||||
- provider_type: inline::llm-as-judge
|
||||
- provider_type: inline::braintrust
|
||||
tool_runtime:
|
||||
- provider_type: remote::brave-search
|
||||
- provider_type: remote::tavily-search
|
||||
- provider_type: inline::rag-runtime
|
||||
- provider_type: remote::model-context-protocol
|
||||
files:
|
||||
- provider_type: inline::localfs
|
||||
image_type: venv
|
||||
additional_pip_packages:
|
||||
- aiosqlite
|
||||
- sqlalchemy[asyncio]
|
||||
- aiosqlite
|
||||
- aiosqlite
|
||||
|
|
|
@ -4,13 +4,13 @@ apis:
|
|||
- agents
|
||||
- datasetio
|
||||
- eval
|
||||
- files
|
||||
- inference
|
||||
- safety
|
||||
- scoring
|
||||
- telemetry
|
||||
- tool_runtime
|
||||
- vector_io
|
||||
- files
|
||||
providers:
|
||||
inference:
|
||||
- provider_id: watsonx
|
||||
|
@ -19,8 +19,6 @@ providers:
|
|||
url: ${env.WATSONX_BASE_URL:=https://us-south.ml.cloud.ibm.com}
|
||||
api_key: ${env.WATSONX_API_KEY:=}
|
||||
project_id: ${env.WATSONX_PROJECT_ID:=}
|
||||
- provider_id: sentence-transformers
|
||||
provider_type: inline::sentence-transformers
|
||||
vector_io:
|
||||
- provider_id: faiss
|
||||
provider_type: inline::faiss
|
||||
|
@ -48,7 +46,7 @@ providers:
|
|||
provider_type: inline::meta-reference
|
||||
config:
|
||||
service_name: "${env.OTEL_SERVICE_NAME:=\u200B}"
|
||||
sinks: ${env.TELEMETRY_SINKS:=console,sqlite}
|
||||
sinks: ${env.TELEMETRY_SINKS:=sqlite}
|
||||
sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/trace_store.db
|
||||
otel_exporter_otlp_endpoint: ${env.OTEL_EXPORTER_OTLP_ENDPOINT:=}
|
||||
eval:
|
||||
|
@ -109,102 +107,7 @@ metadata_store:
|
|||
inference_store:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/inference_store.db
|
||||
models:
|
||||
- metadata: {}
|
||||
model_id: meta-llama/llama-3-3-70b-instruct
|
||||
provider_id: watsonx
|
||||
provider_model_id: meta-llama/llama-3-3-70b-instruct
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: meta-llama/Llama-3.3-70B-Instruct
|
||||
provider_id: watsonx
|
||||
provider_model_id: meta-llama/llama-3-3-70b-instruct
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: meta-llama/llama-2-13b-chat
|
||||
provider_id: watsonx
|
||||
provider_model_id: meta-llama/llama-2-13b-chat
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: meta-llama/Llama-2-13b
|
||||
provider_id: watsonx
|
||||
provider_model_id: meta-llama/llama-2-13b-chat
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: meta-llama/llama-3-1-70b-instruct
|
||||
provider_id: watsonx
|
||||
provider_model_id: meta-llama/llama-3-1-70b-instruct
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: meta-llama/Llama-3.1-70B-Instruct
|
||||
provider_id: watsonx
|
||||
provider_model_id: meta-llama/llama-3-1-70b-instruct
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: meta-llama/llama-3-1-8b-instruct
|
||||
provider_id: watsonx
|
||||
provider_model_id: meta-llama/llama-3-1-8b-instruct
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: meta-llama/Llama-3.1-8B-Instruct
|
||||
provider_id: watsonx
|
||||
provider_model_id: meta-llama/llama-3-1-8b-instruct
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: meta-llama/llama-3-2-11b-vision-instruct
|
||||
provider_id: watsonx
|
||||
provider_model_id: meta-llama/llama-3-2-11b-vision-instruct
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: meta-llama/Llama-3.2-11B-Vision-Instruct
|
||||
provider_id: watsonx
|
||||
provider_model_id: meta-llama/llama-3-2-11b-vision-instruct
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: meta-llama/llama-3-2-1b-instruct
|
||||
provider_id: watsonx
|
||||
provider_model_id: meta-llama/llama-3-2-1b-instruct
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: meta-llama/Llama-3.2-1B-Instruct
|
||||
provider_id: watsonx
|
||||
provider_model_id: meta-llama/llama-3-2-1b-instruct
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: meta-llama/llama-3-2-3b-instruct
|
||||
provider_id: watsonx
|
||||
provider_model_id: meta-llama/llama-3-2-3b-instruct
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: meta-llama/Llama-3.2-3B-Instruct
|
||||
provider_id: watsonx
|
||||
provider_model_id: meta-llama/llama-3-2-3b-instruct
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: meta-llama/llama-3-2-90b-vision-instruct
|
||||
provider_id: watsonx
|
||||
provider_model_id: meta-llama/llama-3-2-90b-vision-instruct
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: meta-llama/Llama-3.2-90B-Vision-Instruct
|
||||
provider_id: watsonx
|
||||
provider_model_id: meta-llama/llama-3-2-90b-vision-instruct
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: meta-llama/llama-guard-3-11b-vision
|
||||
provider_id: watsonx
|
||||
provider_model_id: meta-llama/llama-guard-3-11b-vision
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: meta-llama/Llama-Guard-3-11B-Vision
|
||||
provider_id: watsonx
|
||||
provider_model_id: meta-llama/llama-guard-3-11b-vision
|
||||
model_type: llm
|
||||
- metadata:
|
||||
embedding_dimension: 384
|
||||
model_id: all-MiniLM-L6-v2
|
||||
provider_id: sentence-transformers
|
||||
model_type: embedding
|
||||
models: []
|
||||
shields: []
|
||||
vector_dbs: []
|
||||
datasets: []
|
||||
|
|
|
@ -4,17 +4,11 @@
|
|||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
from llama_stack.apis.models import ModelType
|
||||
from llama_stack.core.datatypes import BuildProvider, ModelInput, Provider, ToolGroupInput
|
||||
from llama_stack.distributions.template import DistributionTemplate, RunConfigSettings, get_model_registry
|
||||
from llama_stack.core.datatypes import BuildProvider, Provider, ToolGroupInput
|
||||
from llama_stack.distributions.template import DistributionTemplate, RunConfigSettings
|
||||
from llama_stack.providers.inline.files.localfs.config import LocalfsFilesImplConfig
|
||||
from llama_stack.providers.inline.inference.sentence_transformers import (
|
||||
SentenceTransformersInferenceConfig,
|
||||
)
|
||||
from llama_stack.providers.remote.inference.watsonx import WatsonXConfig
|
||||
from llama_stack.providers.remote.inference.watsonx.models import MODEL_ENTRIES
|
||||
|
||||
|
||||
def get_distribution_template(name: str = "watsonx") -> DistributionTemplate:
|
||||
|
@ -52,15 +46,6 @@ def get_distribution_template(name: str = "watsonx") -> DistributionTemplate:
|
|||
config=WatsonXConfig.sample_run_config(),
|
||||
)
|
||||
|
||||
embedding_provider = Provider(
|
||||
provider_id="sentence-transformers",
|
||||
provider_type="inline::sentence-transformers",
|
||||
config=SentenceTransformersInferenceConfig.sample_run_config(),
|
||||
)
|
||||
|
||||
available_models = {
|
||||
"watsonx": MODEL_ENTRIES,
|
||||
}
|
||||
default_tool_groups = [
|
||||
ToolGroupInput(
|
||||
toolgroup_id="builtin::websearch",
|
||||
|
@ -72,36 +57,25 @@ def get_distribution_template(name: str = "watsonx") -> DistributionTemplate:
|
|||
),
|
||||
]
|
||||
|
||||
embedding_model = ModelInput(
|
||||
model_id="all-MiniLM-L6-v2",
|
||||
provider_id="sentence-transformers",
|
||||
model_type=ModelType.embedding,
|
||||
metadata={
|
||||
"embedding_dimension": 384,
|
||||
},
|
||||
)
|
||||
|
||||
files_provider = Provider(
|
||||
provider_id="meta-reference-files",
|
||||
provider_type="inline::localfs",
|
||||
config=LocalfsFilesImplConfig.sample_run_config(f"~/.llama/distributions/{name}"),
|
||||
)
|
||||
default_models, _ = get_model_registry(available_models)
|
||||
return DistributionTemplate(
|
||||
name=name,
|
||||
distro_type="remote_hosted",
|
||||
description="Use watsonx for running LLM inference",
|
||||
container_image=None,
|
||||
template_path=Path(__file__).parent / "doc_template.md",
|
||||
template_path=None,
|
||||
providers=providers,
|
||||
available_models_by_provider=available_models,
|
||||
run_configs={
|
||||
"run.yaml": RunConfigSettings(
|
||||
provider_overrides={
|
||||
"inference": [inference_provider, embedding_provider],
|
||||
"inference": [inference_provider],
|
||||
"files": [files_provider],
|
||||
},
|
||||
default_models=default_models + [embedding_model],
|
||||
default_models=[],
|
||||
default_tool_groups=default_tool_groups,
|
||||
),
|
||||
},
|
||||
|
|
|
@ -31,12 +31,17 @@ CATEGORIES = [
|
|||
"client",
|
||||
"telemetry",
|
||||
"openai_responses",
|
||||
"openai_conversations",
|
||||
"testing",
|
||||
"providers",
|
||||
"models",
|
||||
"files",
|
||||
"vector_io",
|
||||
"tool_runtime",
|
||||
"cli",
|
||||
"post_training",
|
||||
"scoring",
|
||||
"tests",
|
||||
]
|
||||
UNCATEGORIZED = "uncategorized"
|
||||
|
||||
|
@ -261,11 +266,12 @@ def get_logger(
|
|||
if root_category in _category_levels:
|
||||
log_level = _category_levels[root_category]
|
||||
else:
|
||||
log_level = _category_levels.get("root", DEFAULT_LOG_LEVEL)
|
||||
if category != UNCATEGORIZED:
|
||||
logging.warning(
|
||||
f"Unknown logging category: {category}. Falling back to default 'root' level: {log_level}"
|
||||
raise ValueError(
|
||||
f"Unknown logging category: {category}. To resolve, choose a valid category from the CATEGORIES list "
|
||||
f"or add it to the CATEGORIES list. Available categories: {CATEGORIES}"
|
||||
)
|
||||
log_level = _category_levels.get("root", DEFAULT_LOG_LEVEL)
|
||||
logger.setLevel(log_level)
|
||||
return logging.LoggerAdapter(logger, {"category": category})
|
||||
|
||||
|
|
|
@ -11,19 +11,13 @@
|
|||
# top-level folder for each specific model found within the models/ directory at
|
||||
# the top-level of this source tree.
|
||||
|
||||
import json
|
||||
import textwrap
|
||||
from pathlib import Path
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from llama_stack.models.llama.datatypes import (
|
||||
RawContent,
|
||||
RawMediaItem,
|
||||
RawMessage,
|
||||
RawTextItem,
|
||||
StopReason,
|
||||
ToolCall,
|
||||
ToolPromptFormat,
|
||||
)
|
||||
from llama_stack.models.llama.llama4.tokenizer import Tokenizer
|
||||
|
@ -175,25 +169,6 @@ def llama3_1_builtin_code_interpreter_dialog(tool_prompt_format=ToolPromptFormat
|
|||
return messages
|
||||
|
||||
|
||||
def llama3_1_builtin_tool_call_with_image_dialog(
|
||||
tool_prompt_format=ToolPromptFormat.json,
|
||||
):
|
||||
this_dir = Path(__file__).parent
|
||||
with open(this_dir / "llama3/dog.jpg", "rb") as f:
|
||||
img = f.read()
|
||||
|
||||
interface = LLama31Interface(tool_prompt_format)
|
||||
|
||||
messages = interface.system_messages(**system_message_builtin_tools_only())
|
||||
messages += interface.user_message(content=[RawMediaItem(data=img), RawTextItem(text="What is this dog breed?")])
|
||||
messages += interface.assistant_response_messages(
|
||||
"Based on the description of the dog in the image, it appears to be a small breed dog, possibly a terrier mix",
|
||||
StopReason.end_of_turn,
|
||||
)
|
||||
messages += interface.user_message("Search the web for some food recommendations for the indentified breed")
|
||||
return messages
|
||||
|
||||
|
||||
def llama3_1_custom_tool_call_dialog(tool_prompt_format=ToolPromptFormat.json):
|
||||
interface = LLama31Interface(tool_prompt_format)
|
||||
|
||||
|
@ -202,35 +177,6 @@ def llama3_1_custom_tool_call_dialog(tool_prompt_format=ToolPromptFormat.json):
|
|||
return messages
|
||||
|
||||
|
||||
def llama3_1_e2e_tool_call_dialog(tool_prompt_format=ToolPromptFormat.json):
|
||||
tool_response = json.dumps(["great song1", "awesome song2", "cool song3"])
|
||||
interface = LLama31Interface(tool_prompt_format)
|
||||
|
||||
messages = interface.system_messages(**system_message_custom_tools_only())
|
||||
messages += interface.user_message(content="Use tools to get latest trending songs")
|
||||
messages.append(
|
||||
RawMessage(
|
||||
role="assistant",
|
||||
content="",
|
||||
stop_reason=StopReason.end_of_message,
|
||||
tool_calls=[
|
||||
ToolCall(
|
||||
call_id="call_id",
|
||||
tool_name="trending_songs",
|
||||
arguments={"n": "10", "genre": "latest"},
|
||||
)
|
||||
],
|
||||
),
|
||||
)
|
||||
messages.append(
|
||||
RawMessage(
|
||||
role="assistant",
|
||||
content=tool_response,
|
||||
)
|
||||
)
|
||||
return messages
|
||||
|
||||
|
||||
def llama3_2_user_assistant_conversation():
|
||||
return UseCase(
|
||||
title="User and assistant conversation",
|
||||
|
|
|
@ -7,8 +7,6 @@
|
|||
import copy
|
||||
import json
|
||||
import re
|
||||
import secrets
|
||||
import string
|
||||
import uuid
|
||||
import warnings
|
||||
from collections.abc import AsyncGenerator
|
||||
|
@ -84,11 +82,6 @@ from llama_stack.providers.utils.telemetry import tracing
|
|||
from .persistence import AgentPersistence
|
||||
from .safety import SafetyException, ShieldRunnerMixin
|
||||
|
||||
|
||||
def make_random_string(length: int = 8):
|
||||
return "".join(secrets.choice(string.ascii_letters + string.digits) for _ in range(length))
|
||||
|
||||
|
||||
TOOLS_ATTACHMENT_KEY_REGEX = re.compile(r"__tools_attachment__=(\{.*?\})")
|
||||
MEMORY_QUERY_TOOL = "knowledge_search"
|
||||
WEB_SEARCH_TOOL = "web_search"
|
||||
|
|
|
@ -269,7 +269,7 @@ class OpenAIResponsesImpl:
|
|||
response_tools=tools,
|
||||
temperature=temperature,
|
||||
response_format=response_format,
|
||||
inputs=input,
|
||||
inputs=all_input,
|
||||
)
|
||||
|
||||
# Create orchestrator and delegate streaming logic
|
||||
|
|
|
@ -175,6 +175,8 @@ class StreamingResponseOrchestrator:
|
|||
):
|
||||
yield stream_event
|
||||
|
||||
messages = next_turn_messages
|
||||
|
||||
if not function_tool_calls and not non_function_tool_calls:
|
||||
break
|
||||
|
||||
|
@ -187,9 +189,7 @@ class StreamingResponseOrchestrator:
|
|||
logger.info(f"Exiting inference loop since iteration count({n_iter}) exceeds {self.max_infer_iters=}")
|
||||
break
|
||||
|
||||
messages = next_turn_messages
|
||||
|
||||
self.final_messages = messages.copy() + [current_response.choices[0].message]
|
||||
self.final_messages = messages.copy()
|
||||
|
||||
# Create final response
|
||||
final_response = OpenAIResponseObject(
|
||||
|
@ -232,9 +232,11 @@ class StreamingResponseOrchestrator:
|
|||
non_function_tool_calls.append(tool_call)
|
||||
else:
|
||||
logger.info(f"Approval denied for {tool_call.id} on {tool_call.function.name}")
|
||||
next_turn_messages.pop()
|
||||
else:
|
||||
logger.info(f"Requesting approval for {tool_call.id} on {tool_call.function.name}")
|
||||
approvals.append(tool_call)
|
||||
next_turn_messages.pop()
|
||||
else:
|
||||
non_function_tool_calls.append(tool_call)
|
||||
|
||||
|
|
|
@ -8,8 +8,6 @@ import asyncio
|
|||
import base64
|
||||
import io
|
||||
import mimetypes
|
||||
import secrets
|
||||
import string
|
||||
from typing import Any
|
||||
|
||||
import httpx
|
||||
|
@ -52,10 +50,6 @@ from .context_retriever import generate_rag_query
|
|||
log = get_logger(name=__name__, category="tool_runtime")
|
||||
|
||||
|
||||
def make_random_string(length: int = 8):
|
||||
return "".join(secrets.choice(string.ascii_letters + string.digits) for _ in range(length))
|
||||
|
||||
|
||||
async def raw_data_from_doc(doc: RAGDocument) -> tuple[bytes, str]:
|
||||
"""Get raw binary data and mime type from a RAGDocument for file upload."""
|
||||
if isinstance(doc.content, URL):
|
||||
|
|
|
@ -268,7 +268,7 @@ Available Models:
|
|||
api=Api.inference,
|
||||
adapter_type="watsonx",
|
||||
provider_type="remote::watsonx",
|
||||
pip_packages=["ibm_watsonx_ai"],
|
||||
pip_packages=["litellm"],
|
||||
module="llama_stack.providers.remote.inference.watsonx",
|
||||
config_class="llama_stack.providers.remote.inference.watsonx.WatsonXConfig",
|
||||
provider_data_validator="llama_stack.providers.remote.inference.watsonx.WatsonXProviderDataValidator",
|
||||
|
|
|
@ -1,217 +0,0 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
import warnings
|
||||
from collections.abc import AsyncGenerator
|
||||
from typing import Any
|
||||
|
||||
from openai import AsyncStream
|
||||
from openai.types.chat.chat_completion import (
|
||||
Choice as OpenAIChoice,
|
||||
)
|
||||
from openai.types.completion import Completion as OpenAICompletion
|
||||
from openai.types.completion_choice import Logprobs as OpenAICompletionLogprobs
|
||||
|
||||
from llama_stack.apis.inference import (
|
||||
ChatCompletionRequest,
|
||||
CompletionRequest,
|
||||
CompletionResponse,
|
||||
CompletionResponseStreamChunk,
|
||||
GreedySamplingStrategy,
|
||||
JsonSchemaResponseFormat,
|
||||
TokenLogProbs,
|
||||
TopKSamplingStrategy,
|
||||
TopPSamplingStrategy,
|
||||
)
|
||||
from llama_stack.providers.utils.inference.openai_compat import (
|
||||
_convert_openai_finish_reason,
|
||||
convert_message_to_openai_dict_new,
|
||||
convert_tooldef_to_openai_tool,
|
||||
)
|
||||
|
||||
|
||||
async def convert_chat_completion_request(
|
||||
request: ChatCompletionRequest,
|
||||
n: int = 1,
|
||||
) -> dict:
|
||||
"""
|
||||
Convert a ChatCompletionRequest to an OpenAI API-compatible dictionary.
|
||||
"""
|
||||
# model -> model
|
||||
# messages -> messages
|
||||
# sampling_params TODO(mattf): review strategy
|
||||
# strategy=greedy -> nvext.top_k = -1, temperature = temperature
|
||||
# strategy=top_p -> nvext.top_k = -1, top_p = top_p
|
||||
# strategy=top_k -> nvext.top_k = top_k
|
||||
# temperature -> temperature
|
||||
# top_p -> top_p
|
||||
# top_k -> nvext.top_k
|
||||
# max_tokens -> max_tokens
|
||||
# repetition_penalty -> nvext.repetition_penalty
|
||||
# response_format -> GrammarResponseFormat TODO(mf)
|
||||
# response_format -> JsonSchemaResponseFormat: response_format = "json_object" & nvext["guided_json"] = json_schema
|
||||
# tools -> tools
|
||||
# tool_choice ("auto", "required") -> tool_choice
|
||||
# tool_prompt_format -> TBD
|
||||
# stream -> stream
|
||||
# logprobs -> logprobs
|
||||
|
||||
if request.response_format and not isinstance(request.response_format, JsonSchemaResponseFormat):
|
||||
raise ValueError(
|
||||
f"Unsupported response format: {request.response_format}. Only JsonSchemaResponseFormat is supported."
|
||||
)
|
||||
|
||||
nvext = {}
|
||||
payload: dict[str, Any] = dict(
|
||||
model=request.model,
|
||||
messages=[await convert_message_to_openai_dict_new(message) for message in request.messages],
|
||||
stream=request.stream,
|
||||
n=n,
|
||||
extra_body=dict(nvext=nvext),
|
||||
extra_headers={
|
||||
b"User-Agent": b"llama-stack: nvidia-inference-adapter",
|
||||
},
|
||||
)
|
||||
|
||||
if request.response_format:
|
||||
# server bug - setting guided_json changes the behavior of response_format resulting in an error
|
||||
# payload.update(response_format="json_object")
|
||||
nvext.update(guided_json=request.response_format.json_schema)
|
||||
|
||||
if request.tools:
|
||||
payload.update(tools=[convert_tooldef_to_openai_tool(tool) for tool in request.tools])
|
||||
if request.tool_config.tool_choice:
|
||||
payload.update(
|
||||
tool_choice=request.tool_config.tool_choice.value
|
||||
) # we cannot include tool_choice w/o tools, server will complain
|
||||
|
||||
if request.logprobs:
|
||||
payload.update(logprobs=True)
|
||||
payload.update(top_logprobs=request.logprobs.top_k)
|
||||
|
||||
if request.sampling_params:
|
||||
nvext.update(repetition_penalty=request.sampling_params.repetition_penalty)
|
||||
|
||||
if request.sampling_params.max_tokens:
|
||||
payload.update(max_tokens=request.sampling_params.max_tokens)
|
||||
|
||||
strategy = request.sampling_params.strategy
|
||||
if isinstance(strategy, TopPSamplingStrategy):
|
||||
nvext.update(top_k=-1)
|
||||
payload.update(top_p=strategy.top_p)
|
||||
payload.update(temperature=strategy.temperature)
|
||||
elif isinstance(strategy, TopKSamplingStrategy):
|
||||
if strategy.top_k != -1 and strategy.top_k < 1:
|
||||
warnings.warn("top_k must be -1 or >= 1", stacklevel=2)
|
||||
nvext.update(top_k=strategy.top_k)
|
||||
elif isinstance(strategy, GreedySamplingStrategy):
|
||||
nvext.update(top_k=-1)
|
||||
else:
|
||||
raise ValueError(f"Unsupported sampling strategy: {strategy}")
|
||||
|
||||
return payload
|
||||
|
||||
|
||||
def convert_completion_request(
|
||||
request: CompletionRequest,
|
||||
n: int = 1,
|
||||
) -> dict:
|
||||
"""
|
||||
Convert a ChatCompletionRequest to an OpenAI API-compatible dictionary.
|
||||
"""
|
||||
# model -> model
|
||||
# prompt -> prompt
|
||||
# sampling_params TODO(mattf): review strategy
|
||||
# strategy=greedy -> nvext.top_k = -1, temperature = temperature
|
||||
# strategy=top_p -> nvext.top_k = -1, top_p = top_p
|
||||
# strategy=top_k -> nvext.top_k = top_k
|
||||
# temperature -> temperature
|
||||
# top_p -> top_p
|
||||
# top_k -> nvext.top_k
|
||||
# max_tokens -> max_tokens
|
||||
# repetition_penalty -> nvext.repetition_penalty
|
||||
# response_format -> nvext.guided_json
|
||||
# stream -> stream
|
||||
# logprobs.top_k -> logprobs
|
||||
|
||||
nvext = {}
|
||||
payload: dict[str, Any] = dict(
|
||||
model=request.model,
|
||||
prompt=request.content,
|
||||
stream=request.stream,
|
||||
extra_body=dict(nvext=nvext),
|
||||
extra_headers={
|
||||
b"User-Agent": b"llama-stack: nvidia-inference-adapter",
|
||||
},
|
||||
n=n,
|
||||
)
|
||||
|
||||
if request.response_format:
|
||||
# this is not openai compliant, it is a nim extension
|
||||
nvext.update(guided_json=request.response_format.json_schema)
|
||||
|
||||
if request.logprobs:
|
||||
payload.update(logprobs=request.logprobs.top_k)
|
||||
|
||||
if request.sampling_params:
|
||||
nvext.update(repetition_penalty=request.sampling_params.repetition_penalty)
|
||||
|
||||
if request.sampling_params.max_tokens:
|
||||
payload.update(max_tokens=request.sampling_params.max_tokens)
|
||||
|
||||
if request.sampling_params.strategy == "top_p":
|
||||
nvext.update(top_k=-1)
|
||||
payload.update(top_p=request.sampling_params.top_p)
|
||||
elif request.sampling_params.strategy == "top_k":
|
||||
if request.sampling_params.top_k != -1 and request.sampling_params.top_k < 1:
|
||||
warnings.warn("top_k must be -1 or >= 1", stacklevel=2)
|
||||
nvext.update(top_k=request.sampling_params.top_k)
|
||||
elif request.sampling_params.strategy == "greedy":
|
||||
nvext.update(top_k=-1)
|
||||
payload.update(temperature=request.sampling_params.temperature)
|
||||
|
||||
return payload
|
||||
|
||||
|
||||
def _convert_openai_completion_logprobs(
|
||||
logprobs: OpenAICompletionLogprobs | None,
|
||||
) -> list[TokenLogProbs] | None:
|
||||
"""
|
||||
Convert an OpenAI CompletionLogprobs into a list of TokenLogProbs.
|
||||
"""
|
||||
if not logprobs:
|
||||
return None
|
||||
|
||||
return [TokenLogProbs(logprobs_by_token=logprobs) for logprobs in logprobs.top_logprobs]
|
||||
|
||||
|
||||
def convert_openai_completion_choice(
|
||||
choice: OpenAIChoice,
|
||||
) -> CompletionResponse:
|
||||
"""
|
||||
Convert an OpenAI Completion Choice into a CompletionResponse.
|
||||
"""
|
||||
return CompletionResponse(
|
||||
content=choice.text,
|
||||
stop_reason=_convert_openai_finish_reason(choice.finish_reason),
|
||||
logprobs=_convert_openai_completion_logprobs(choice.logprobs),
|
||||
)
|
||||
|
||||
|
||||
async def convert_openai_completion_stream(
|
||||
stream: AsyncStream[OpenAICompletion],
|
||||
) -> AsyncGenerator[CompletionResponse, None]:
|
||||
"""
|
||||
Convert a stream of OpenAI Completions into a stream
|
||||
of ChatCompletionResponseStreamChunks.
|
||||
"""
|
||||
async for chunk in stream:
|
||||
choice = chunk.choices[0]
|
||||
yield CompletionResponseStreamChunk(
|
||||
delta=choice.text,
|
||||
stop_reason=_convert_openai_finish_reason(choice.finish_reason),
|
||||
logprobs=_convert_openai_completion_logprobs(choice.logprobs),
|
||||
)
|
|
@ -4,53 +4,8 @@
|
|||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
import httpx
|
||||
|
||||
from llama_stack.log import get_logger
|
||||
|
||||
from . import NVIDIAConfig
|
||||
|
||||
logger = get_logger(name=__name__, category="inference::nvidia")
|
||||
|
||||
|
||||
def _is_nvidia_hosted(config: NVIDIAConfig) -> bool:
|
||||
return "integrate.api.nvidia.com" in config.url
|
||||
|
||||
|
||||
async def _get_health(url: str) -> tuple[bool, bool]:
|
||||
"""
|
||||
Query {url}/v1/health/{live,ready} to check if the server is running and ready
|
||||
|
||||
Args:
|
||||
url (str): URL of the server
|
||||
|
||||
Returns:
|
||||
Tuple[bool, bool]: (is_live, is_ready)
|
||||
"""
|
||||
async with httpx.AsyncClient() as client:
|
||||
live = await client.get(f"{url}/v1/health/live")
|
||||
ready = await client.get(f"{url}/v1/health/ready")
|
||||
return live.status_code == 200, ready.status_code == 200
|
||||
|
||||
|
||||
async def check_health(config: NVIDIAConfig) -> None:
|
||||
"""
|
||||
Check if the server is running and ready
|
||||
|
||||
Args:
|
||||
url (str): URL of the server
|
||||
|
||||
Raises:
|
||||
RuntimeError: If the server is not running or ready
|
||||
"""
|
||||
if not _is_nvidia_hosted(config):
|
||||
logger.info("Checking NVIDIA NIM health...")
|
||||
try:
|
||||
is_live, is_ready = await _get_health(config.url)
|
||||
if not is_live:
|
||||
raise ConnectionError("NVIDIA NIM is not running")
|
||||
if not is_ready:
|
||||
raise ConnectionError("NVIDIA NIM is not ready")
|
||||
# TODO(mf): should we wait for the server to be ready?
|
||||
except httpx.ConnectError as e:
|
||||
raise ConnectionError(f"Failed to connect to NVIDIA NIM: {e}") from e
|
||||
|
|
|
@ -4,19 +4,12 @@
|
|||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from llama_stack.apis.inference import Inference
|
||||
|
||||
from .config import WatsonXConfig
|
||||
|
||||
|
||||
async def get_adapter_impl(config: WatsonXConfig, _deps) -> Inference:
|
||||
# import dynamically so `llama stack build` does not fail due to missing dependencies
|
||||
async def get_adapter_impl(config: WatsonXConfig, _deps):
|
||||
# import dynamically so the import is used only when it is needed
|
||||
from .watsonx import WatsonXInferenceAdapter
|
||||
|
||||
if not isinstance(config, WatsonXConfig):
|
||||
raise RuntimeError(f"Unexpected config type: {type(config)}")
|
||||
adapter = WatsonXInferenceAdapter(config)
|
||||
return adapter
|
||||
|
||||
|
||||
__all__ = ["get_adapter_impl", "WatsonXConfig"]
|
||||
|
|
|
@ -7,16 +7,18 @@
|
|||
import os
|
||||
from typing import Any
|
||||
|
||||
from pydantic import BaseModel, Field, SecretStr
|
||||
from pydantic import BaseModel, ConfigDict, Field, SecretStr
|
||||
|
||||
from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
|
||||
from llama_stack.schema_utils import json_schema_type
|
||||
|
||||
|
||||
class WatsonXProviderDataValidator(BaseModel):
|
||||
url: str
|
||||
api_key: str
|
||||
project_id: str
|
||||
model_config = ConfigDict(
|
||||
from_attributes=True,
|
||||
extra="forbid",
|
||||
)
|
||||
watsonx_api_key: str | None
|
||||
|
||||
|
||||
@json_schema_type
|
||||
|
@ -25,13 +27,17 @@ class WatsonXConfig(RemoteInferenceProviderConfig):
|
|||
default_factory=lambda: os.getenv("WATSONX_BASE_URL", "https://us-south.ml.cloud.ibm.com"),
|
||||
description="A base url for accessing the watsonx.ai",
|
||||
)
|
||||
# This seems like it should be required, but none of the other remote inference
|
||||
# providers require it, so this is optional here too for consistency.
|
||||
# The OpenAIConfig uses default=None instead, so this is following that precedent.
|
||||
api_key: SecretStr | None = Field(
|
||||
default_factory=lambda: os.getenv("WATSONX_API_KEY"),
|
||||
description="The watsonx API key",
|
||||
default=None,
|
||||
description="The watsonx.ai API key",
|
||||
)
|
||||
# As above, this is optional here too for consistency.
|
||||
project_id: str | None = Field(
|
||||
default_factory=lambda: os.getenv("WATSONX_PROJECT_ID"),
|
||||
description="The Project ID key",
|
||||
default=None,
|
||||
description="The watsonx.ai project ID",
|
||||
)
|
||||
timeout: int = Field(
|
||||
default=60,
|
||||
|
|
|
@ -1,47 +0,0 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from llama_stack.models.llama.sku_types import CoreModelId
|
||||
from llama_stack.providers.utils.inference.model_registry import build_hf_repo_model_entry
|
||||
|
||||
MODEL_ENTRIES = [
|
||||
build_hf_repo_model_entry(
|
||||
"meta-llama/llama-3-3-70b-instruct",
|
||||
CoreModelId.llama3_3_70b_instruct.value,
|
||||
),
|
||||
build_hf_repo_model_entry(
|
||||
"meta-llama/llama-2-13b-chat",
|
||||
CoreModelId.llama2_13b.value,
|
||||
),
|
||||
build_hf_repo_model_entry(
|
||||
"meta-llama/llama-3-1-70b-instruct",
|
||||
CoreModelId.llama3_1_70b_instruct.value,
|
||||
),
|
||||
build_hf_repo_model_entry(
|
||||
"meta-llama/llama-3-1-8b-instruct",
|
||||
CoreModelId.llama3_1_8b_instruct.value,
|
||||
),
|
||||
build_hf_repo_model_entry(
|
||||
"meta-llama/llama-3-2-11b-vision-instruct",
|
||||
CoreModelId.llama3_2_11b_vision_instruct.value,
|
||||
),
|
||||
build_hf_repo_model_entry(
|
||||
"meta-llama/llama-3-2-1b-instruct",
|
||||
CoreModelId.llama3_2_1b_instruct.value,
|
||||
),
|
||||
build_hf_repo_model_entry(
|
||||
"meta-llama/llama-3-2-3b-instruct",
|
||||
CoreModelId.llama3_2_3b_instruct.value,
|
||||
),
|
||||
build_hf_repo_model_entry(
|
||||
"meta-llama/llama-3-2-90b-vision-instruct",
|
||||
CoreModelId.llama3_2_90b_vision_instruct.value,
|
||||
),
|
||||
build_hf_repo_model_entry(
|
||||
"meta-llama/llama-guard-3-11b-vision",
|
||||
CoreModelId.llama_guard_3_11b_vision.value,
|
||||
),
|
||||
]
|
|
@ -4,240 +4,120 @@
|
|||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from collections.abc import AsyncGenerator, AsyncIterator
|
||||
from typing import Any
|
||||
|
||||
from ibm_watsonx_ai.foundation_models import Model
|
||||
from ibm_watsonx_ai.metanames import GenTextParamsMetaNames as GenParams
|
||||
from openai import AsyncOpenAI
|
||||
import requests
|
||||
|
||||
from llama_stack.apis.inference import (
|
||||
ChatCompletionRequest,
|
||||
CompletionRequest,
|
||||
GreedySamplingStrategy,
|
||||
Inference,
|
||||
OpenAIChatCompletion,
|
||||
OpenAIChatCompletionChunk,
|
||||
OpenAICompletion,
|
||||
OpenAIEmbeddingsResponse,
|
||||
OpenAIMessageParam,
|
||||
OpenAIResponseFormatParam,
|
||||
TopKSamplingStrategy,
|
||||
TopPSamplingStrategy,
|
||||
)
|
||||
from llama_stack.log import get_logger
|
||||
from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper
|
||||
from llama_stack.providers.utils.inference.openai_compat import (
|
||||
prepare_openai_completion_params,
|
||||
)
|
||||
from llama_stack.providers.utils.inference.prompt_adapter import (
|
||||
chat_completion_request_to_prompt,
|
||||
completion_request_to_prompt,
|
||||
request_has_media,
|
||||
)
|
||||
|
||||
from . import WatsonXConfig
|
||||
from .models import MODEL_ENTRIES
|
||||
|
||||
logger = get_logger(name=__name__, category="inference::watsonx")
|
||||
from llama_stack.apis.inference import ChatCompletionRequest
|
||||
from llama_stack.apis.models import Model
|
||||
from llama_stack.apis.models.models import ModelType
|
||||
from llama_stack.providers.remote.inference.watsonx.config import WatsonXConfig
|
||||
from llama_stack.providers.utils.inference.litellm_openai_mixin import LiteLLMOpenAIMixin
|
||||
|
||||
|
||||
# Note on structured output
|
||||
# WatsonX returns responses with a json embedded into a string.
|
||||
# Examples:
|
||||
class WatsonXInferenceAdapter(LiteLLMOpenAIMixin):
|
||||
_model_cache: dict[str, Model] = {}
|
||||
|
||||
# ChatCompletionResponse(completion_message=CompletionMessage(content='```json\n{\n
|
||||
# "first_name": "Michael",\n "last_name": "Jordan",\n'...)
|
||||
# Not even a valid JSON, but we can still extract the JSON from the content
|
||||
def __init__(self, config: WatsonXConfig):
|
||||
LiteLLMOpenAIMixin.__init__(
|
||||
self,
|
||||
litellm_provider_name="watsonx",
|
||||
api_key_from_config=config.api_key.get_secret_value() if config.api_key else None,
|
||||
provider_data_api_key_field="watsonx_api_key",
|
||||
)
|
||||
self.available_models = None
|
||||
self.config = config
|
||||
|
||||
# CompletionResponse(content=' \nThe best answer is $\\boxed{\\{"name": "Michael Jordan",
|
||||
# "year_born": "1963", "year_retired": "2003"\\}}$')
|
||||
# Find the start of the boxed content
|
||||
def get_base_url(self) -> str:
|
||||
return self.config.url
|
||||
|
||||
async def _get_params(self, request: ChatCompletionRequest) -> dict[str, Any]:
|
||||
# Get base parameters from parent
|
||||
params = await super()._get_params(request)
|
||||
|
||||
class WatsonXInferenceAdapter(Inference, ModelRegistryHelper):
|
||||
def __init__(self, config: WatsonXConfig) -> None:
|
||||
ModelRegistryHelper.__init__(self, model_entries=MODEL_ENTRIES)
|
||||
|
||||
logger.info(f"Initializing watsonx InferenceAdapter({config.url})...")
|
||||
self._config = config
|
||||
self._openai_client: AsyncOpenAI | None = None
|
||||
|
||||
self._project_id = self._config.project_id
|
||||
|
||||
def _get_client(self, model_id) -> Model:
|
||||
config_api_key = self._config.api_key.get_secret_value() if self._config.api_key else None
|
||||
config_url = self._config.url
|
||||
project_id = self._config.project_id
|
||||
credentials = {"url": config_url, "apikey": config_api_key}
|
||||
|
||||
return Model(model_id=model_id, credentials=credentials, project_id=project_id)
|
||||
|
||||
def _get_openai_client(self) -> AsyncOpenAI:
|
||||
if not self._openai_client:
|
||||
self._openai_client = AsyncOpenAI(
|
||||
base_url=f"{self._config.url}/openai/v1",
|
||||
api_key=self._config.api_key,
|
||||
)
|
||||
return self._openai_client
|
||||
|
||||
async def _get_params(self, request: ChatCompletionRequest | CompletionRequest) -> dict:
|
||||
input_dict = {"params": {}}
|
||||
media_present = request_has_media(request)
|
||||
llama_model = self.get_llama_model(request.model)
|
||||
if isinstance(request, ChatCompletionRequest):
|
||||
input_dict["prompt"] = await chat_completion_request_to_prompt(request, llama_model)
|
||||
else:
|
||||
assert not media_present, "Together does not support media for Completion requests"
|
||||
input_dict["prompt"] = await completion_request_to_prompt(request)
|
||||
if request.sampling_params:
|
||||
if request.sampling_params.strategy:
|
||||
input_dict["params"][GenParams.DECODING_METHOD] = request.sampling_params.strategy.type
|
||||
if request.sampling_params.max_tokens:
|
||||
input_dict["params"][GenParams.MAX_NEW_TOKENS] = request.sampling_params.max_tokens
|
||||
if request.sampling_params.repetition_penalty:
|
||||
input_dict["params"][GenParams.REPETITION_PENALTY] = request.sampling_params.repetition_penalty
|
||||
|
||||
if isinstance(request.sampling_params.strategy, TopPSamplingStrategy):
|
||||
input_dict["params"][GenParams.TOP_P] = request.sampling_params.strategy.top_p
|
||||
input_dict["params"][GenParams.TEMPERATURE] = request.sampling_params.strategy.temperature
|
||||
if isinstance(request.sampling_params.strategy, TopKSamplingStrategy):
|
||||
input_dict["params"][GenParams.TOP_K] = request.sampling_params.strategy.top_k
|
||||
if isinstance(request.sampling_params.strategy, GreedySamplingStrategy):
|
||||
input_dict["params"][GenParams.TEMPERATURE] = 0.0
|
||||
|
||||
input_dict["params"][GenParams.STOP_SEQUENCES] = ["<|endoftext|>"]
|
||||
|
||||
params = {
|
||||
**input_dict,
|
||||
}
|
||||
# Add watsonx.ai specific parameters
|
||||
params["project_id"] = self.config.project_id
|
||||
params["time_limit"] = self.config.timeout
|
||||
return params
|
||||
|
||||
async def openai_embeddings(
|
||||
self,
|
||||
model: str,
|
||||
input: str | list[str],
|
||||
encoding_format: str | None = "float",
|
||||
dimensions: int | None = None,
|
||||
user: str | None = None,
|
||||
) -> OpenAIEmbeddingsResponse:
|
||||
raise NotImplementedError()
|
||||
# Copied from OpenAIMixin
|
||||
async def check_model_availability(self, model: str) -> bool:
|
||||
"""
|
||||
Check if a specific model is available from the provider's /v1/models.
|
||||
|
||||
async def openai_completion(
|
||||
self,
|
||||
model: str,
|
||||
prompt: str | list[str] | list[int] | list[list[int]],
|
||||
best_of: int | None = None,
|
||||
echo: bool | None = None,
|
||||
frequency_penalty: float | None = None,
|
||||
logit_bias: dict[str, float] | None = None,
|
||||
logprobs: bool | None = None,
|
||||
max_tokens: int | None = None,
|
||||
n: int | None = None,
|
||||
presence_penalty: float | None = None,
|
||||
seed: int | None = None,
|
||||
stop: str | list[str] | None = None,
|
||||
stream: bool | None = None,
|
||||
stream_options: dict[str, Any] | None = None,
|
||||
temperature: float | None = None,
|
||||
top_p: float | None = None,
|
||||
user: str | None = None,
|
||||
guided_choice: list[str] | None = None,
|
||||
prompt_logprobs: int | None = None,
|
||||
suffix: str | None = None,
|
||||
) -> OpenAICompletion:
|
||||
model_obj = await self.model_store.get_model(model)
|
||||
params = await prepare_openai_completion_params(
|
||||
model=model_obj.provider_resource_id,
|
||||
prompt=prompt,
|
||||
best_of=best_of,
|
||||
echo=echo,
|
||||
frequency_penalty=frequency_penalty,
|
||||
logit_bias=logit_bias,
|
||||
logprobs=logprobs,
|
||||
max_tokens=max_tokens,
|
||||
n=n,
|
||||
presence_penalty=presence_penalty,
|
||||
seed=seed,
|
||||
stop=stop,
|
||||
stream=stream,
|
||||
stream_options=stream_options,
|
||||
temperature=temperature,
|
||||
top_p=top_p,
|
||||
user=user,
|
||||
)
|
||||
return await self._get_openai_client().completions.create(**params) # type: ignore
|
||||
:param model: The model identifier to check.
|
||||
:return: True if the model is available dynamically, False otherwise.
|
||||
"""
|
||||
if not self._model_cache:
|
||||
await self.list_models()
|
||||
return model in self._model_cache
|
||||
|
||||
async def openai_chat_completion(
|
||||
self,
|
||||
model: str,
|
||||
messages: list[OpenAIMessageParam],
|
||||
frequency_penalty: float | None = None,
|
||||
function_call: str | dict[str, Any] | None = None,
|
||||
functions: list[dict[str, Any]] | None = None,
|
||||
logit_bias: dict[str, float] | None = None,
|
||||
logprobs: bool | None = None,
|
||||
max_completion_tokens: int | None = None,
|
||||
max_tokens: int | None = None,
|
||||
n: int | None = None,
|
||||
parallel_tool_calls: bool | None = None,
|
||||
presence_penalty: float | None = None,
|
||||
response_format: OpenAIResponseFormatParam | None = None,
|
||||
seed: int | None = None,
|
||||
stop: str | list[str] | None = None,
|
||||
stream: bool | None = None,
|
||||
stream_options: dict[str, Any] | None = None,
|
||||
temperature: float | None = None,
|
||||
tool_choice: str | dict[str, Any] | None = None,
|
||||
tools: list[dict[str, Any]] | None = None,
|
||||
top_logprobs: int | None = None,
|
||||
top_p: float | None = None,
|
||||
user: str | None = None,
|
||||
) -> OpenAIChatCompletion | AsyncIterator[OpenAIChatCompletionChunk]:
|
||||
model_obj = await self.model_store.get_model(model)
|
||||
params = await prepare_openai_completion_params(
|
||||
model=model_obj.provider_resource_id,
|
||||
messages=messages,
|
||||
frequency_penalty=frequency_penalty,
|
||||
function_call=function_call,
|
||||
functions=functions,
|
||||
logit_bias=logit_bias,
|
||||
logprobs=logprobs,
|
||||
max_completion_tokens=max_completion_tokens,
|
||||
max_tokens=max_tokens,
|
||||
n=n,
|
||||
parallel_tool_calls=parallel_tool_calls,
|
||||
presence_penalty=presence_penalty,
|
||||
response_format=response_format,
|
||||
seed=seed,
|
||||
stop=stop,
|
||||
stream=stream,
|
||||
stream_options=stream_options,
|
||||
temperature=temperature,
|
||||
tool_choice=tool_choice,
|
||||
tools=tools,
|
||||
top_logprobs=top_logprobs,
|
||||
top_p=top_p,
|
||||
user=user,
|
||||
)
|
||||
if params.get("stream", False):
|
||||
return self._stream_openai_chat_completion(params)
|
||||
return await self._get_openai_client().chat.completions.create(**params) # type: ignore
|
||||
async def list_models(self) -> list[Model] | None:
|
||||
self._model_cache = {}
|
||||
models = []
|
||||
for model_spec in self._get_model_specs():
|
||||
functions = [f["id"] for f in model_spec.get("functions", [])]
|
||||
# Format: {"embedding_dimension": 1536, "context_length": 8192}
|
||||
|
||||
async def _stream_openai_chat_completion(self, params: dict) -> AsyncGenerator:
|
||||
# watsonx.ai sometimes adds usage data to the stream
|
||||
include_usage = False
|
||||
if params.get("stream_options", None):
|
||||
include_usage = params["stream_options"].get("include_usage", False)
|
||||
stream = await self._get_openai_client().chat.completions.create(**params)
|
||||
# Example of an embedding model:
|
||||
# {'model_id': 'ibm/granite-embedding-278m-multilingual',
|
||||
# 'label': 'granite-embedding-278m-multilingual',
|
||||
# 'model_limits': {'max_sequence_length': 512, 'embedding_dimension': 768},
|
||||
# ...
|
||||
provider_resource_id = f"{self.__provider_id__}/{model_spec['model_id']}"
|
||||
if "embedding" in functions:
|
||||
embedding_dimension = model_spec["model_limits"]["embedding_dimension"]
|
||||
context_length = model_spec["model_limits"]["max_sequence_length"]
|
||||
embedding_metadata = {
|
||||
"embedding_dimension": embedding_dimension,
|
||||
"context_length": context_length,
|
||||
}
|
||||
model = Model(
|
||||
identifier=model_spec["model_id"],
|
||||
provider_resource_id=provider_resource_id,
|
||||
provider_id=self.__provider_id__,
|
||||
metadata=embedding_metadata,
|
||||
model_type=ModelType.embedding,
|
||||
)
|
||||
self._model_cache[provider_resource_id] = model
|
||||
models.append(model)
|
||||
if "text_chat" in functions:
|
||||
model = Model(
|
||||
identifier=model_spec["model_id"],
|
||||
provider_resource_id=provider_resource_id,
|
||||
provider_id=self.__provider_id__,
|
||||
metadata={},
|
||||
model_type=ModelType.llm,
|
||||
)
|
||||
# In theory, I guess it is possible that a model could be both an embedding model and a text chat model.
|
||||
# In that case, the cache will record the generator Model object, and the list which we return will have
|
||||
# both the generator Model object and the text chat Model object. That's fine because the cache is
|
||||
# only used for check_model_availability() anyway.
|
||||
self._model_cache[provider_resource_id] = model
|
||||
models.append(model)
|
||||
return models
|
||||
|
||||
seen_finish_reason = False
|
||||
async for chunk in stream:
|
||||
# Final usage chunk with no choices that the user didn't request, so discard
|
||||
if not include_usage and seen_finish_reason and len(chunk.choices) == 0:
|
||||
break
|
||||
yield chunk
|
||||
for choice in chunk.choices:
|
||||
if choice.finish_reason:
|
||||
seen_finish_reason = True
|
||||
break
|
||||
# LiteLLM provides methods to list models for many providers, but not for watsonx.ai.
|
||||
# So we need to implement our own method to list models by calling the watsonx.ai API.
|
||||
def _get_model_specs(self) -> list[dict[str, Any]]:
|
||||
"""
|
||||
Retrieves foundation model specifications from the watsonx.ai API.
|
||||
"""
|
||||
url = f"{self.config.url}/ml/v1/foundation_model_specs?version=2023-10-25"
|
||||
headers = {
|
||||
# Note that there is no authorization header. Listing models does not require authentication.
|
||||
"Content-Type": "application/json",
|
||||
}
|
||||
|
||||
response = requests.get(url, headers=headers)
|
||||
|
||||
# --- Process the Response ---
|
||||
# Raise an exception for bad status codes (4xx or 5xx)
|
||||
response.raise_for_status()
|
||||
|
||||
# If the request is successful, parse and return the JSON response.
|
||||
# The response should contain a list of model specifications
|
||||
response_data = response.json()
|
||||
if "resources" not in response_data:
|
||||
raise ValueError("Resources not found in response")
|
||||
return response_data["resources"]
|
||||
|
|
|
@ -4,6 +4,8 @@
|
|||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
import base64
|
||||
import struct
|
||||
from collections.abc import AsyncIterator
|
||||
from typing import Any
|
||||
|
||||
|
@ -16,6 +18,7 @@ from llama_stack.apis.inference import (
|
|||
OpenAIChatCompletion,
|
||||
OpenAIChatCompletionChunk,
|
||||
OpenAICompletion,
|
||||
OpenAIEmbeddingData,
|
||||
OpenAIEmbeddingsResponse,
|
||||
OpenAIEmbeddingUsage,
|
||||
OpenAIMessageParam,
|
||||
|
@ -26,7 +29,6 @@ from llama_stack.core.request_headers import NeedsRequestProviderData
|
|||
from llama_stack.log import get_logger
|
||||
from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper, ProviderModelEntry
|
||||
from llama_stack.providers.utils.inference.openai_compat import (
|
||||
b64_encode_openai_embeddings_response,
|
||||
convert_message_to_openai_dict_new,
|
||||
convert_tooldef_to_openai_tool,
|
||||
get_sampling_options,
|
||||
|
@ -349,3 +351,28 @@ class LiteLLMOpenAIMixin(
|
|||
return False
|
||||
|
||||
return model in litellm.models_by_provider[self.litellm_provider_name]
|
||||
|
||||
|
||||
def b64_encode_openai_embeddings_response(
|
||||
response_data: list[dict], encoding_format: str | None = "float"
|
||||
) -> list[OpenAIEmbeddingData]:
|
||||
"""
|
||||
Process the OpenAI embeddings response to encode the embeddings in base64 format if specified.
|
||||
"""
|
||||
data = []
|
||||
for i, embedding_data in enumerate(response_data):
|
||||
if encoding_format == "base64":
|
||||
byte_array = bytearray()
|
||||
for embedding_value in embedding_data["embedding"]:
|
||||
byte_array.extend(struct.pack("f", float(embedding_value)))
|
||||
|
||||
response_embedding = base64.b64encode(byte_array).decode("utf-8")
|
||||
else:
|
||||
response_embedding = embedding_data["embedding"]
|
||||
data.append(
|
||||
OpenAIEmbeddingData(
|
||||
embedding=response_embedding,
|
||||
index=i,
|
||||
)
|
||||
)
|
||||
return data
|
||||
|
|
|
@ -3,9 +3,7 @@
|
|||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
import base64
|
||||
import json
|
||||
import struct
|
||||
import time
|
||||
import uuid
|
||||
import warnings
|
||||
|
@ -103,7 +101,6 @@ from llama_stack.apis.inference import (
|
|||
JsonSchemaResponseFormat,
|
||||
Message,
|
||||
OpenAIChatCompletion,
|
||||
OpenAIEmbeddingData,
|
||||
OpenAIMessageParam,
|
||||
OpenAIResponseFormatParam,
|
||||
SamplingParams,
|
||||
|
@ -1402,28 +1399,3 @@ def prepare_openai_embeddings_params(
|
|||
params["user"] = user
|
||||
|
||||
return params
|
||||
|
||||
|
||||
def b64_encode_openai_embeddings_response(
|
||||
response_data: dict, encoding_format: str | None = "float"
|
||||
) -> list[OpenAIEmbeddingData]:
|
||||
"""
|
||||
Process the OpenAI embeddings response to encode the embeddings in base64 format if specified.
|
||||
"""
|
||||
data = []
|
||||
for i, embedding_data in enumerate(response_data):
|
||||
if encoding_format == "base64":
|
||||
byte_array = bytearray()
|
||||
for embedding_value in embedding_data.embedding:
|
||||
byte_array.extend(struct.pack("f", float(embedding_value)))
|
||||
|
||||
response_embedding = base64.b64encode(byte_array).decode("utf-8")
|
||||
else:
|
||||
response_embedding = embedding_data.embedding
|
||||
data.append(
|
||||
OpenAIEmbeddingData(
|
||||
embedding=response_embedding,
|
||||
index=i,
|
||||
)
|
||||
)
|
||||
return data
|
||||
|
|
|
@ -296,15 +296,14 @@ class OpenAIVectorStoreMixin(ABC):
|
|||
async def shutdown(self) -> None:
|
||||
"""Clean up mixin resources including background tasks."""
|
||||
# Cancel any running file batch tasks gracefully
|
||||
if hasattr(self, "_file_batch_tasks"):
|
||||
tasks_to_cancel = list(self._file_batch_tasks.items())
|
||||
for _, task in tasks_to_cancel:
|
||||
if not task.done():
|
||||
task.cancel()
|
||||
try:
|
||||
await task
|
||||
except asyncio.CancelledError:
|
||||
pass
|
||||
tasks_to_cancel = list(self._file_batch_tasks.items())
|
||||
for _, task in tasks_to_cancel:
|
||||
if not task.done():
|
||||
task.cancel()
|
||||
try:
|
||||
await task
|
||||
except asyncio.CancelledError:
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def delete_chunks(self, store_id: str, chunks_for_deletion: list[ChunkForDeletion]) -> None:
|
||||
|
|
|
@ -20,7 +20,6 @@ from pydantic import BaseModel
|
|||
from llama_stack.apis.common.content_types import (
|
||||
URL,
|
||||
InterleavedContent,
|
||||
TextContentItem,
|
||||
)
|
||||
from llama_stack.apis.tools import RAGDocument
|
||||
from llama_stack.apis.vector_dbs import VectorDB
|
||||
|
@ -129,26 +128,6 @@ def content_from_data_and_mime_type(data: bytes | str, mime_type: str | None, en
|
|||
return ""
|
||||
|
||||
|
||||
def concat_interleaved_content(content: list[InterleavedContent]) -> InterleavedContent:
|
||||
"""concatenate interleaved content into a single list. ensure that 'str's are converted to TextContentItem when in a list"""
|
||||
|
||||
ret = []
|
||||
|
||||
def _process(c):
|
||||
if isinstance(c, str):
|
||||
ret.append(TextContentItem(text=c))
|
||||
elif isinstance(c, list):
|
||||
for item in c:
|
||||
_process(item)
|
||||
else:
|
||||
ret.append(c)
|
||||
|
||||
for c in content:
|
||||
_process(c)
|
||||
|
||||
return ret
|
||||
|
||||
|
||||
async def content_from_doc(doc: RAGDocument) -> str:
|
||||
if isinstance(doc.content, URL):
|
||||
if doc.content.uri.startswith("data:"):
|
||||
|
|
|
@ -18,6 +18,8 @@ from llama_stack.providers.remote.inference.openai.config import OpenAIConfig
|
|||
from llama_stack.providers.remote.inference.openai.openai import OpenAIInferenceAdapter
|
||||
from llama_stack.providers.remote.inference.together.config import TogetherImplConfig
|
||||
from llama_stack.providers.remote.inference.together.together import TogetherInferenceAdapter
|
||||
from llama_stack.providers.remote.inference.watsonx.config import WatsonXConfig
|
||||
from llama_stack.providers.remote.inference.watsonx.watsonx import WatsonXInferenceAdapter
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
|
@ -58,3 +60,29 @@ def test_openai_provider_data_used(config_cls, adapter_cls, provider_data_valida
|
|||
{"x-llamastack-provider-data": json.dumps({inference_adapter.provider_data_api_key_field: api_key})}
|
||||
):
|
||||
assert inference_adapter.client.api_key == api_key
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"config_cls,adapter_cls,provider_data_validator",
|
||||
[
|
||||
(
|
||||
WatsonXConfig,
|
||||
WatsonXInferenceAdapter,
|
||||
"llama_stack.providers.remote.inference.watsonx.config.WatsonXProviderDataValidator",
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_litellm_provider_data_used(config_cls, adapter_cls, provider_data_validator: str):
|
||||
"""Validate data for LiteLLM-based providers. Similar to test_openai_provider_data_used, but without the
|
||||
assumption that there is an OpenAI-compatible client object."""
|
||||
|
||||
inference_adapter = adapter_cls(config=config_cls())
|
||||
|
||||
inference_adapter.__provider_spec__ = MagicMock()
|
||||
inference_adapter.__provider_spec__.provider_data_validator = provider_data_validator
|
||||
|
||||
for api_key in ["test1", "test2"]:
|
||||
with request_provider_data_context(
|
||||
{"x-llamastack-provider-data": json.dumps({inference_adapter.provider_data_api_key_field: api_key})}
|
||||
):
|
||||
assert inference_adapter.get_api_key() == api_key
|
||||
|
|
|
@ -125,8 +125,15 @@ async def test_duplicate_provider_registration(cached_disk_dist_registry):
|
|||
provider_resource_id="test_vector_db_2",
|
||||
provider_id="baz", # Same provider_id
|
||||
)
|
||||
await cached_disk_dist_registry.register(duplicate_vector_db)
|
||||
|
||||
# Now we expect a ValueError to be raised for duplicate registration
|
||||
with pytest.raises(
|
||||
ValueError,
|
||||
match=r"Provider 'baz' is already registered.*Unregister the existing provider first before registering it again.",
|
||||
):
|
||||
await cached_disk_dist_registry.register(duplicate_vector_db)
|
||||
|
||||
# Verify the original registration is still intact
|
||||
result = await cached_disk_dist_registry.get("vector_db", "test_vector_db_2")
|
||||
assert result is not None
|
||||
assert result.embedding_model == original_vector_db.embedding_model # Original values preserved
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue