diff --git a/.github/workflows/stale_bot.yml b/.github/workflows/stale_bot.yml index 502a78f8e..c5a1ba9e5 100644 --- a/.github/workflows/stale_bot.yml +++ b/.github/workflows/stale_bot.yml @@ -24,7 +24,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Stale Action - uses: actions/stale@3a9db7e6a41a89f618792c92c0e97cc736e1b13f # v10.0.0 + uses: actions/stale@5f858e3efba33a5ca4407a664cc011ad407f2008 # v10.1.0 with: stale-issue-label: 'stale' stale-issue-message: > diff --git a/docs/docs/providers/inference/remote_watsonx.mdx b/docs/docs/providers/inference/remote_watsonx.mdx index 33bc5bbc3..f081703ab 100644 --- a/docs/docs/providers/inference/remote_watsonx.mdx +++ b/docs/docs/providers/inference/remote_watsonx.mdx @@ -17,8 +17,8 @@ IBM WatsonX inference provider for accessing AI models on IBM's WatsonX platform | `allowed_models` | `list[str \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. | | `refresh_models` | `` | No | False | Whether to refresh models periodically from the provider | | `url` | `` | No | https://us-south.ml.cloud.ibm.com | A base url for accessing the watsonx.ai | -| `api_key` | `pydantic.types.SecretStr \| None` | No | | The watsonx API key | -| `project_id` | `str \| None` | No | | The Project ID key | +| `api_key` | `pydantic.types.SecretStr \| None` | No | | The watsonx.ai API key | +| `project_id` | `str \| None` | No | | The watsonx.ai project ID | | `timeout` | `` | No | 60 | Timeout for the HTTP requests | ## Sample Configuration diff --git a/docs/static/deprecated-llama-stack-spec.html b/docs/static/deprecated-llama-stack-spec.html index 04a3dca9b..1b8e78446 100644 --- a/docs/static/deprecated-llama-stack-spec.html +++ b/docs/static/deprecated-llama-stack-spec.html @@ -3526,343 +3526,6 @@ }, "deprecated": true } - }, - "/v1/telemetry/metrics/{metric_name}": { - "post": { - "responses": { - "200": { - "description": "A QueryMetricsResponse.", - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/QueryMetricsResponse" - } - } - } - }, - "400": { - "$ref": "#/components/responses/BadRequest400" - }, - "429": { - "$ref": "#/components/responses/TooManyRequests429" - }, - "500": { - "$ref": "#/components/responses/InternalServerError500" - }, - "default": { - "$ref": "#/components/responses/DefaultError" - } - }, - "tags": [ - "Telemetry" - ], - "summary": "Query metrics.", - "description": "Query metrics.", - "parameters": [ - { - "name": "metric_name", - "in": "path", - "description": "The name of the metric to query.", - "required": true, - "schema": { - "type": "string" - } - } - ], - "requestBody": { - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/QueryMetricsRequest" - } - } - }, - "required": true - }, - "deprecated": true - } - }, - "/v1/telemetry/spans": { - "post": { - "responses": { - "200": { - "description": "A QuerySpansResponse.", - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/QuerySpansResponse" - } - } - } - }, - "400": { - "$ref": "#/components/responses/BadRequest400" - }, - "429": { - "$ref": "#/components/responses/TooManyRequests429" - }, - "500": { - "$ref": "#/components/responses/InternalServerError500" - }, - "default": { - "$ref": "#/components/responses/DefaultError" - } - }, - "tags": [ - "Telemetry" - ], - "summary": "Query spans.", - "description": "Query spans.", - "parameters": [], - "requestBody": { - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/QuerySpansRequest" - } - } - }, - "required": true - }, - "deprecated": true - } - }, - "/v1/telemetry/spans/export": { - "post": { - "responses": { - "200": { - "description": "OK" - }, - "400": { - "$ref": "#/components/responses/BadRequest400" - }, - "429": { - "$ref": "#/components/responses/TooManyRequests429" - }, - "500": { - "$ref": "#/components/responses/InternalServerError500" - }, - "default": { - "$ref": "#/components/responses/DefaultError" - } - }, - "tags": [ - "Telemetry" - ], - "summary": "Save spans to a dataset.", - "description": "Save spans to a dataset.", - "parameters": [], - "requestBody": { - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/SaveSpansToDatasetRequest" - } - } - }, - "required": true - }, - "deprecated": true - } - }, - "/v1/telemetry/spans/{span_id}/tree": { - "post": { - "responses": { - "200": { - "description": "A QuerySpanTreeResponse.", - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/QuerySpanTreeResponse" - } - } - } - }, - "400": { - "$ref": "#/components/responses/BadRequest400" - }, - "429": { - "$ref": "#/components/responses/TooManyRequests429" - }, - "500": { - "$ref": "#/components/responses/InternalServerError500" - }, - "default": { - "$ref": "#/components/responses/DefaultError" - } - }, - "tags": [ - "Telemetry" - ], - "summary": "Get a span tree by its ID.", - "description": "Get a span tree by its ID.", - "parameters": [ - { - "name": "span_id", - "in": "path", - "description": "The ID of the span to get the tree from.", - "required": true, - "schema": { - "type": "string" - } - } - ], - "requestBody": { - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/GetSpanTreeRequest" - } - } - }, - "required": true - }, - "deprecated": true - } - }, - "/v1/telemetry/traces": { - "post": { - "responses": { - "200": { - "description": "A QueryTracesResponse.", - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/QueryTracesResponse" - } - } - } - }, - "400": { - "$ref": "#/components/responses/BadRequest400" - }, - "429": { - "$ref": "#/components/responses/TooManyRequests429" - }, - "500": { - "$ref": "#/components/responses/InternalServerError500" - }, - "default": { - "$ref": "#/components/responses/DefaultError" - } - }, - "tags": [ - "Telemetry" - ], - "summary": "Query traces.", - "description": "Query traces.", - "parameters": [], - "requestBody": { - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/QueryTracesRequest" - } - } - }, - "required": true - }, - "deprecated": true - } - }, - "/v1/telemetry/traces/{trace_id}": { - "get": { - "responses": { - "200": { - "description": "A Trace.", - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/Trace" - } - } - } - }, - "400": { - "$ref": "#/components/responses/BadRequest400" - }, - "429": { - "$ref": "#/components/responses/TooManyRequests429" - }, - "500": { - "$ref": "#/components/responses/InternalServerError500" - }, - "default": { - "$ref": "#/components/responses/DefaultError" - } - }, - "tags": [ - "Telemetry" - ], - "summary": "Get a trace by its ID.", - "description": "Get a trace by its ID.", - "parameters": [ - { - "name": "trace_id", - "in": "path", - "description": "The ID of the trace to get.", - "required": true, - "schema": { - "type": "string" - } - } - ], - "deprecated": true - } - }, - "/v1/telemetry/traces/{trace_id}/spans/{span_id}": { - "get": { - "responses": { - "200": { - "description": "A Span.", - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/Span" - } - } - } - }, - "400": { - "$ref": "#/components/responses/BadRequest400" - }, - "429": { - "$ref": "#/components/responses/TooManyRequests429" - }, - "500": { - "$ref": "#/components/responses/InternalServerError500" - }, - "default": { - "$ref": "#/components/responses/DefaultError" - } - }, - "tags": [ - "Telemetry" - ], - "summary": "Get a span by its ID.", - "description": "Get a span by its ID.", - "parameters": [ - { - "name": "trace_id", - "in": "path", - "description": "The ID of the trace to get the span from.", - "required": true, - "schema": { - "type": "string" - } - }, - { - "name": "span_id", - "in": "path", - "description": "The ID of the span to get.", - "required": true, - "schema": { - "type": "string" - } - } - ], - "deprecated": true - } } }, "jsonSchemaDialect": "https://json-schema.org/draft/2020-12/schema", @@ -12716,561 +12379,6 @@ "logger_config" ], "title": "SupervisedFineTuneRequest" - }, - "QueryMetricsRequest": { - "type": "object", - "properties": { - "start_time": { - "type": "integer", - "description": "The start time of the metric to query." - }, - "end_time": { - "type": "integer", - "description": "The end time of the metric to query." - }, - "granularity": { - "type": "string", - "description": "The granularity of the metric to query." - }, - "query_type": { - "type": "string", - "enum": [ - "range", - "instant" - ], - "description": "The type of query to perform." - }, - "label_matchers": { - "type": "array", - "items": { - "type": "object", - "properties": { - "name": { - "type": "string", - "description": "The name of the label to match" - }, - "value": { - "type": "string", - "description": "The value to match against" - }, - "operator": { - "type": "string", - "enum": [ - "=", - "!=", - "=~", - "!~" - ], - "description": "The comparison operator to use for matching", - "default": "=" - } - }, - "additionalProperties": false, - "required": [ - "name", - "value", - "operator" - ], - "title": "MetricLabelMatcher", - "description": "A matcher for filtering metrics by label values." - }, - "description": "The label matchers to apply to the metric." - } - }, - "additionalProperties": false, - "required": [ - "start_time", - "query_type" - ], - "title": "QueryMetricsRequest" - }, - "MetricDataPoint": { - "type": "object", - "properties": { - "timestamp": { - "type": "integer", - "description": "Unix timestamp when the metric value was recorded" - }, - "value": { - "type": "number", - "description": "The numeric value of the metric at this timestamp" - }, - "unit": { - "type": "string" - } - }, - "additionalProperties": false, - "required": [ - "timestamp", - "value", - "unit" - ], - "title": "MetricDataPoint", - "description": "A single data point in a metric time series." - }, - "MetricLabel": { - "type": "object", - "properties": { - "name": { - "type": "string", - "description": "The name of the label" - }, - "value": { - "type": "string", - "description": "The value of the label" - } - }, - "additionalProperties": false, - "required": [ - "name", - "value" - ], - "title": "MetricLabel", - "description": "A label associated with a metric." - }, - "MetricSeries": { - "type": "object", - "properties": { - "metric": { - "type": "string", - "description": "The name of the metric" - }, - "labels": { - "type": "array", - "items": { - "$ref": "#/components/schemas/MetricLabel" - }, - "description": "List of labels associated with this metric series" - }, - "values": { - "type": "array", - "items": { - "$ref": "#/components/schemas/MetricDataPoint" - }, - "description": "List of data points in chronological order" - } - }, - "additionalProperties": false, - "required": [ - "metric", - "labels", - "values" - ], - "title": "MetricSeries", - "description": "A time series of metric data points." - }, - "QueryMetricsResponse": { - "type": "object", - "properties": { - "data": { - "type": "array", - "items": { - "$ref": "#/components/schemas/MetricSeries" - }, - "description": "List of metric series matching the query criteria" - } - }, - "additionalProperties": false, - "required": [ - "data" - ], - "title": "QueryMetricsResponse", - "description": "Response containing metric time series data." - }, - "QueryCondition": { - "type": "object", - "properties": { - "key": { - "type": "string", - "description": "The attribute key to filter on" - }, - "op": { - "$ref": "#/components/schemas/QueryConditionOp", - "description": "The comparison operator to apply" - }, - "value": { - "oneOf": [ - { - "type": "null" - }, - { - "type": "boolean" - }, - { - "type": "number" - }, - { - "type": "string" - }, - { - "type": "array" - }, - { - "type": "object" - } - ], - "description": "The value to compare against" - } - }, - "additionalProperties": false, - "required": [ - "key", - "op", - "value" - ], - "title": "QueryCondition", - "description": "A condition for filtering query results." - }, - "QueryConditionOp": { - "type": "string", - "enum": [ - "eq", - "ne", - "gt", - "lt" - ], - "title": "QueryConditionOp", - "description": "Comparison operators for query conditions." - }, - "QuerySpansRequest": { - "type": "object", - "properties": { - "attribute_filters": { - "type": "array", - "items": { - "$ref": "#/components/schemas/QueryCondition" - }, - "description": "The attribute filters to apply to the spans." - }, - "attributes_to_return": { - "type": "array", - "items": { - "type": "string" - }, - "description": "The attributes to return in the spans." - }, - "max_depth": { - "type": "integer", - "description": "The maximum depth of the tree." - } - }, - "additionalProperties": false, - "required": [ - "attribute_filters", - "attributes_to_return" - ], - "title": "QuerySpansRequest" - }, - "Span": { - "type": "object", - "properties": { - "span_id": { - "type": "string", - "description": "Unique identifier for the span" - }, - "trace_id": { - "type": "string", - "description": "Unique identifier for the trace this span belongs to" - }, - "parent_span_id": { - "type": "string", - "description": "(Optional) Unique identifier for the parent span, if this is a child span" - }, - "name": { - "type": "string", - "description": "Human-readable name describing the operation this span represents" - }, - "start_time": { - "type": "string", - "format": "date-time", - "description": "Timestamp when the operation began" - }, - "end_time": { - "type": "string", - "format": "date-time", - "description": "(Optional) Timestamp when the operation finished, if completed" - }, - "attributes": { - "type": "object", - "additionalProperties": { - "oneOf": [ - { - "type": "null" - }, - { - "type": "boolean" - }, - { - "type": "number" - }, - { - "type": "string" - }, - { - "type": "array" - }, - { - "type": "object" - } - ] - }, - "description": "(Optional) Key-value pairs containing additional metadata about the span" - } - }, - "additionalProperties": false, - "required": [ - "span_id", - "trace_id", - "name", - "start_time" - ], - "title": "Span", - "description": "A span representing a single operation within a trace." - }, - "QuerySpansResponse": { - "type": "object", - "properties": { - "data": { - "type": "array", - "items": { - "$ref": "#/components/schemas/Span" - }, - "description": "List of spans matching the query criteria" - } - }, - "additionalProperties": false, - "required": [ - "data" - ], - "title": "QuerySpansResponse", - "description": "Response containing a list of spans." - }, - "SaveSpansToDatasetRequest": { - "type": "object", - "properties": { - "attribute_filters": { - "type": "array", - "items": { - "$ref": "#/components/schemas/QueryCondition" - }, - "description": "The attribute filters to apply to the spans." - }, - "attributes_to_save": { - "type": "array", - "items": { - "type": "string" - }, - "description": "The attributes to save to the dataset." - }, - "dataset_id": { - "type": "string", - "description": "The ID of the dataset to save the spans to." - }, - "max_depth": { - "type": "integer", - "description": "The maximum depth of the tree." - } - }, - "additionalProperties": false, - "required": [ - "attribute_filters", - "attributes_to_save", - "dataset_id" - ], - "title": "SaveSpansToDatasetRequest" - }, - "GetSpanTreeRequest": { - "type": "object", - "properties": { - "attributes_to_return": { - "type": "array", - "items": { - "type": "string" - }, - "description": "The attributes to return in the tree." - }, - "max_depth": { - "type": "integer", - "description": "The maximum depth of the tree." - } - }, - "additionalProperties": false, - "title": "GetSpanTreeRequest" - }, - "SpanStatus": { - "type": "string", - "enum": [ - "ok", - "error" - ], - "title": "SpanStatus", - "description": "The status of a span indicating whether it completed successfully or with an error." - }, - "SpanWithStatus": { - "type": "object", - "properties": { - "span_id": { - "type": "string", - "description": "Unique identifier for the span" - }, - "trace_id": { - "type": "string", - "description": "Unique identifier for the trace this span belongs to" - }, - "parent_span_id": { - "type": "string", - "description": "(Optional) Unique identifier for the parent span, if this is a child span" - }, - "name": { - "type": "string", - "description": "Human-readable name describing the operation this span represents" - }, - "start_time": { - "type": "string", - "format": "date-time", - "description": "Timestamp when the operation began" - }, - "end_time": { - "type": "string", - "format": "date-time", - "description": "(Optional) Timestamp when the operation finished, if completed" - }, - "attributes": { - "type": "object", - "additionalProperties": { - "oneOf": [ - { - "type": "null" - }, - { - "type": "boolean" - }, - { - "type": "number" - }, - { - "type": "string" - }, - { - "type": "array" - }, - { - "type": "object" - } - ] - }, - "description": "(Optional) Key-value pairs containing additional metadata about the span" - }, - "status": { - "$ref": "#/components/schemas/SpanStatus", - "description": "(Optional) The current status of the span" - } - }, - "additionalProperties": false, - "required": [ - "span_id", - "trace_id", - "name", - "start_time" - ], - "title": "SpanWithStatus", - "description": "A span that includes status information." - }, - "QuerySpanTreeResponse": { - "type": "object", - "properties": { - "data": { - "type": "object", - "additionalProperties": { - "$ref": "#/components/schemas/SpanWithStatus" - }, - "description": "Dictionary mapping span IDs to spans with status information" - } - }, - "additionalProperties": false, - "required": [ - "data" - ], - "title": "QuerySpanTreeResponse", - "description": "Response containing a tree structure of spans." - }, - "QueryTracesRequest": { - "type": "object", - "properties": { - "attribute_filters": { - "type": "array", - "items": { - "$ref": "#/components/schemas/QueryCondition" - }, - "description": "The attribute filters to apply to the traces." - }, - "limit": { - "type": "integer", - "description": "The limit of traces to return." - }, - "offset": { - "type": "integer", - "description": "The offset of the traces to return." - }, - "order_by": { - "type": "array", - "items": { - "type": "string" - }, - "description": "The order by of the traces to return." - } - }, - "additionalProperties": false, - "title": "QueryTracesRequest" - }, - "Trace": { - "type": "object", - "properties": { - "trace_id": { - "type": "string", - "description": "Unique identifier for the trace" - }, - "root_span_id": { - "type": "string", - "description": "Unique identifier for the root span that started this trace" - }, - "start_time": { - "type": "string", - "format": "date-time", - "description": "Timestamp when the trace began" - }, - "end_time": { - "type": "string", - "format": "date-time", - "description": "(Optional) Timestamp when the trace finished, if completed" - } - }, - "additionalProperties": false, - "required": [ - "trace_id", - "root_span_id", - "start_time" - ], - "title": "Trace", - "description": "A trace representing the complete execution path of a request across multiple operations." - }, - "QueryTracesResponse": { - "type": "object", - "properties": { - "data": { - "type": "array", - "items": { - "$ref": "#/components/schemas/Trace" - }, - "description": "List of traces matching the query criteria" - } - }, - "additionalProperties": false, - "required": [ - "data" - ], - "title": "QueryTracesResponse", - "description": "Response containing a list of traces." } }, "responses": { @@ -13387,10 +12495,6 @@ "description": "OpenAI-compatible Moderations API.", "x-displayName": "Safety" }, - { - "name": "Telemetry", - "description": "" - }, { "name": "VectorIO", "description": "" @@ -13410,7 +12514,6 @@ "Models", "PostTraining (Coming Soon)", "Safety", - "Telemetry", "VectorIO" ] } diff --git a/docs/static/deprecated-llama-stack-spec.yaml b/docs/static/deprecated-llama-stack-spec.yaml index 1a215b877..349f54224 100644 --- a/docs/static/deprecated-llama-stack-spec.yaml +++ b/docs/static/deprecated-llama-stack-spec.yaml @@ -2593,238 +2593,6 @@ paths: $ref: '#/components/schemas/SupervisedFineTuneRequest' required: true deprecated: true - /v1/telemetry/metrics/{metric_name}: - post: - responses: - '200': - description: A QueryMetricsResponse. - content: - application/json: - schema: - $ref: '#/components/schemas/QueryMetricsResponse' - '400': - $ref: '#/components/responses/BadRequest400' - '429': - $ref: >- - #/components/responses/TooManyRequests429 - '500': - $ref: >- - #/components/responses/InternalServerError500 - default: - $ref: '#/components/responses/DefaultError' - tags: - - Telemetry - summary: Query metrics. - description: Query metrics. - parameters: - - name: metric_name - in: path - description: The name of the metric to query. - required: true - schema: - type: string - requestBody: - content: - application/json: - schema: - $ref: '#/components/schemas/QueryMetricsRequest' - required: true - deprecated: true - /v1/telemetry/spans: - post: - responses: - '200': - description: A QuerySpansResponse. - content: - application/json: - schema: - $ref: '#/components/schemas/QuerySpansResponse' - '400': - $ref: '#/components/responses/BadRequest400' - '429': - $ref: >- - #/components/responses/TooManyRequests429 - '500': - $ref: >- - #/components/responses/InternalServerError500 - default: - $ref: '#/components/responses/DefaultError' - tags: - - Telemetry - summary: Query spans. - description: Query spans. - parameters: [] - requestBody: - content: - application/json: - schema: - $ref: '#/components/schemas/QuerySpansRequest' - required: true - deprecated: true - /v1/telemetry/spans/export: - post: - responses: - '200': - description: OK - '400': - $ref: '#/components/responses/BadRequest400' - '429': - $ref: >- - #/components/responses/TooManyRequests429 - '500': - $ref: >- - #/components/responses/InternalServerError500 - default: - $ref: '#/components/responses/DefaultError' - tags: - - Telemetry - summary: Save spans to a dataset. - description: Save spans to a dataset. - parameters: [] - requestBody: - content: - application/json: - schema: - $ref: '#/components/schemas/SaveSpansToDatasetRequest' - required: true - deprecated: true - /v1/telemetry/spans/{span_id}/tree: - post: - responses: - '200': - description: A QuerySpanTreeResponse. - content: - application/json: - schema: - $ref: '#/components/schemas/QuerySpanTreeResponse' - '400': - $ref: '#/components/responses/BadRequest400' - '429': - $ref: >- - #/components/responses/TooManyRequests429 - '500': - $ref: >- - #/components/responses/InternalServerError500 - default: - $ref: '#/components/responses/DefaultError' - tags: - - Telemetry - summary: Get a span tree by its ID. - description: Get a span tree by its ID. - parameters: - - name: span_id - in: path - description: The ID of the span to get the tree from. - required: true - schema: - type: string - requestBody: - content: - application/json: - schema: - $ref: '#/components/schemas/GetSpanTreeRequest' - required: true - deprecated: true - /v1/telemetry/traces: - post: - responses: - '200': - description: A QueryTracesResponse. - content: - application/json: - schema: - $ref: '#/components/schemas/QueryTracesResponse' - '400': - $ref: '#/components/responses/BadRequest400' - '429': - $ref: >- - #/components/responses/TooManyRequests429 - '500': - $ref: >- - #/components/responses/InternalServerError500 - default: - $ref: '#/components/responses/DefaultError' - tags: - - Telemetry - summary: Query traces. - description: Query traces. - parameters: [] - requestBody: - content: - application/json: - schema: - $ref: '#/components/schemas/QueryTracesRequest' - required: true - deprecated: true - /v1/telemetry/traces/{trace_id}: - get: - responses: - '200': - description: A Trace. - content: - application/json: - schema: - $ref: '#/components/schemas/Trace' - '400': - $ref: '#/components/responses/BadRequest400' - '429': - $ref: >- - #/components/responses/TooManyRequests429 - '500': - $ref: >- - #/components/responses/InternalServerError500 - default: - $ref: '#/components/responses/DefaultError' - tags: - - Telemetry - summary: Get a trace by its ID. - description: Get a trace by its ID. - parameters: - - name: trace_id - in: path - description: The ID of the trace to get. - required: true - schema: - type: string - deprecated: true - /v1/telemetry/traces/{trace_id}/spans/{span_id}: - get: - responses: - '200': - description: A Span. - content: - application/json: - schema: - $ref: '#/components/schemas/Span' - '400': - $ref: '#/components/responses/BadRequest400' - '429': - $ref: >- - #/components/responses/TooManyRequests429 - '500': - $ref: >- - #/components/responses/InternalServerError500 - default: - $ref: '#/components/responses/DefaultError' - tags: - - Telemetry - summary: Get a span by its ID. - description: Get a span by its ID. - parameters: - - name: trace_id - in: path - description: >- - The ID of the trace to get the span from. - required: true - schema: - type: string - - name: span_id - in: path - description: The ID of the span to get. - required: true - schema: - type: string - deprecated: true jsonSchemaDialect: >- https://json-schema.org/draft/2020-12/schema components: @@ -9510,434 +9278,6 @@ components: - hyperparam_search_config - logger_config title: SupervisedFineTuneRequest - QueryMetricsRequest: - type: object - properties: - start_time: - type: integer - description: The start time of the metric to query. - end_time: - type: integer - description: The end time of the metric to query. - granularity: - type: string - description: The granularity of the metric to query. - query_type: - type: string - enum: - - range - - instant - description: The type of query to perform. - label_matchers: - type: array - items: - type: object - properties: - name: - type: string - description: The name of the label to match - value: - type: string - description: The value to match against - operator: - type: string - enum: - - '=' - - '!=' - - =~ - - '!~' - description: >- - The comparison operator to use for matching - default: '=' - additionalProperties: false - required: - - name - - value - - operator - title: MetricLabelMatcher - description: >- - A matcher for filtering metrics by label values. - description: >- - The label matchers to apply to the metric. - additionalProperties: false - required: - - start_time - - query_type - title: QueryMetricsRequest - MetricDataPoint: - type: object - properties: - timestamp: - type: integer - description: >- - Unix timestamp when the metric value was recorded - value: - type: number - description: >- - The numeric value of the metric at this timestamp - unit: - type: string - additionalProperties: false - required: - - timestamp - - value - - unit - title: MetricDataPoint - description: >- - A single data point in a metric time series. - MetricLabel: - type: object - properties: - name: - type: string - description: The name of the label - value: - type: string - description: The value of the label - additionalProperties: false - required: - - name - - value - title: MetricLabel - description: A label associated with a metric. - MetricSeries: - type: object - properties: - metric: - type: string - description: The name of the metric - labels: - type: array - items: - $ref: '#/components/schemas/MetricLabel' - description: >- - List of labels associated with this metric series - values: - type: array - items: - $ref: '#/components/schemas/MetricDataPoint' - description: >- - List of data points in chronological order - additionalProperties: false - required: - - metric - - labels - - values - title: MetricSeries - description: A time series of metric data points. - QueryMetricsResponse: - type: object - properties: - data: - type: array - items: - $ref: '#/components/schemas/MetricSeries' - description: >- - List of metric series matching the query criteria - additionalProperties: false - required: - - data - title: QueryMetricsResponse - description: >- - Response containing metric time series data. - QueryCondition: - type: object - properties: - key: - type: string - description: The attribute key to filter on - op: - $ref: '#/components/schemas/QueryConditionOp' - description: The comparison operator to apply - value: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: The value to compare against - additionalProperties: false - required: - - key - - op - - value - title: QueryCondition - description: A condition for filtering query results. - QueryConditionOp: - type: string - enum: - - eq - - ne - - gt - - lt - title: QueryConditionOp - description: >- - Comparison operators for query conditions. - QuerySpansRequest: - type: object - properties: - attribute_filters: - type: array - items: - $ref: '#/components/schemas/QueryCondition' - description: >- - The attribute filters to apply to the spans. - attributes_to_return: - type: array - items: - type: string - description: The attributes to return in the spans. - max_depth: - type: integer - description: The maximum depth of the tree. - additionalProperties: false - required: - - attribute_filters - - attributes_to_return - title: QuerySpansRequest - Span: - type: object - properties: - span_id: - type: string - description: Unique identifier for the span - trace_id: - type: string - description: >- - Unique identifier for the trace this span belongs to - parent_span_id: - type: string - description: >- - (Optional) Unique identifier for the parent span, if this is a child span - name: - type: string - description: >- - Human-readable name describing the operation this span represents - start_time: - type: string - format: date-time - description: Timestamp when the operation began - end_time: - type: string - format: date-time - description: >- - (Optional) Timestamp when the operation finished, if completed - attributes: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: >- - (Optional) Key-value pairs containing additional metadata about the span - additionalProperties: false - required: - - span_id - - trace_id - - name - - start_time - title: Span - description: >- - A span representing a single operation within a trace. - QuerySpansResponse: - type: object - properties: - data: - type: array - items: - $ref: '#/components/schemas/Span' - description: >- - List of spans matching the query criteria - additionalProperties: false - required: - - data - title: QuerySpansResponse - description: Response containing a list of spans. - SaveSpansToDatasetRequest: - type: object - properties: - attribute_filters: - type: array - items: - $ref: '#/components/schemas/QueryCondition' - description: >- - The attribute filters to apply to the spans. - attributes_to_save: - type: array - items: - type: string - description: The attributes to save to the dataset. - dataset_id: - type: string - description: >- - The ID of the dataset to save the spans to. - max_depth: - type: integer - description: The maximum depth of the tree. - additionalProperties: false - required: - - attribute_filters - - attributes_to_save - - dataset_id - title: SaveSpansToDatasetRequest - GetSpanTreeRequest: - type: object - properties: - attributes_to_return: - type: array - items: - type: string - description: The attributes to return in the tree. - max_depth: - type: integer - description: The maximum depth of the tree. - additionalProperties: false - title: GetSpanTreeRequest - SpanStatus: - type: string - enum: - - ok - - error - title: SpanStatus - description: >- - The status of a span indicating whether it completed successfully or with - an error. - SpanWithStatus: - type: object - properties: - span_id: - type: string - description: Unique identifier for the span - trace_id: - type: string - description: >- - Unique identifier for the trace this span belongs to - parent_span_id: - type: string - description: >- - (Optional) Unique identifier for the parent span, if this is a child span - name: - type: string - description: >- - Human-readable name describing the operation this span represents - start_time: - type: string - format: date-time - description: Timestamp when the operation began - end_time: - type: string - format: date-time - description: >- - (Optional) Timestamp when the operation finished, if completed - attributes: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: >- - (Optional) Key-value pairs containing additional metadata about the span - status: - $ref: '#/components/schemas/SpanStatus' - description: >- - (Optional) The current status of the span - additionalProperties: false - required: - - span_id - - trace_id - - name - - start_time - title: SpanWithStatus - description: A span that includes status information. - QuerySpanTreeResponse: - type: object - properties: - data: - type: object - additionalProperties: - $ref: '#/components/schemas/SpanWithStatus' - description: >- - Dictionary mapping span IDs to spans with status information - additionalProperties: false - required: - - data - title: QuerySpanTreeResponse - description: >- - Response containing a tree structure of spans. - QueryTracesRequest: - type: object - properties: - attribute_filters: - type: array - items: - $ref: '#/components/schemas/QueryCondition' - description: >- - The attribute filters to apply to the traces. - limit: - type: integer - description: The limit of traces to return. - offset: - type: integer - description: The offset of the traces to return. - order_by: - type: array - items: - type: string - description: The order by of the traces to return. - additionalProperties: false - title: QueryTracesRequest - Trace: - type: object - properties: - trace_id: - type: string - description: Unique identifier for the trace - root_span_id: - type: string - description: >- - Unique identifier for the root span that started this trace - start_time: - type: string - format: date-time - description: Timestamp when the trace began - end_time: - type: string - format: date-time - description: >- - (Optional) Timestamp when the trace finished, if completed - additionalProperties: false - required: - - trace_id - - root_span_id - - start_time - title: Trace - description: >- - A trace representing the complete execution path of a request across multiple - operations. - QueryTracesResponse: - type: object - properties: - data: - type: array - items: - $ref: '#/components/schemas/Trace' - description: >- - List of traces matching the query criteria - additionalProperties: false - required: - - data - title: QueryTracesResponse - description: Response containing a list of traces. responses: BadRequest400: description: The request was invalid or malformed @@ -10043,8 +9383,6 @@ tags: - name: Safety description: OpenAI-compatible Moderations API. x-displayName: Safety - - name: Telemetry - description: '' - name: VectorIO description: '' x-tagGroups: @@ -10060,5 +9398,4 @@ x-tagGroups: - Models - PostTraining (Coming Soon) - Safety - - Telemetry - VectorIO diff --git a/docs/static/experimental-llama-stack-spec.html b/docs/static/experimental-llama-stack-spec.html index a84226c05..e3edf2ffc 100644 --- a/docs/static/experimental-llama-stack-spec.html +++ b/docs/static/experimental-llama-stack-spec.html @@ -1711,343 +1711,6 @@ }, "deprecated": false } - }, - "/v1alpha/telemetry/metrics/{metric_name}": { - "post": { - "responses": { - "200": { - "description": "A QueryMetricsResponse.", - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/QueryMetricsResponse" - } - } - } - }, - "400": { - "$ref": "#/components/responses/BadRequest400" - }, - "429": { - "$ref": "#/components/responses/TooManyRequests429" - }, - "500": { - "$ref": "#/components/responses/InternalServerError500" - }, - "default": { - "$ref": "#/components/responses/DefaultError" - } - }, - "tags": [ - "Telemetry" - ], - "summary": "Query metrics.", - "description": "Query metrics.", - "parameters": [ - { - "name": "metric_name", - "in": "path", - "description": "The name of the metric to query.", - "required": true, - "schema": { - "type": "string" - } - } - ], - "requestBody": { - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/QueryMetricsRequest" - } - } - }, - "required": true - }, - "deprecated": false - } - }, - "/v1alpha/telemetry/spans": { - "post": { - "responses": { - "200": { - "description": "A QuerySpansResponse.", - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/QuerySpansResponse" - } - } - } - }, - "400": { - "$ref": "#/components/responses/BadRequest400" - }, - "429": { - "$ref": "#/components/responses/TooManyRequests429" - }, - "500": { - "$ref": "#/components/responses/InternalServerError500" - }, - "default": { - "$ref": "#/components/responses/DefaultError" - } - }, - "tags": [ - "Telemetry" - ], - "summary": "Query spans.", - "description": "Query spans.", - "parameters": [], - "requestBody": { - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/QuerySpansRequest" - } - } - }, - "required": true - }, - "deprecated": false - } - }, - "/v1alpha/telemetry/spans/export": { - "post": { - "responses": { - "200": { - "description": "OK" - }, - "400": { - "$ref": "#/components/responses/BadRequest400" - }, - "429": { - "$ref": "#/components/responses/TooManyRequests429" - }, - "500": { - "$ref": "#/components/responses/InternalServerError500" - }, - "default": { - "$ref": "#/components/responses/DefaultError" - } - }, - "tags": [ - "Telemetry" - ], - "summary": "Save spans to a dataset.", - "description": "Save spans to a dataset.", - "parameters": [], - "requestBody": { - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/SaveSpansToDatasetRequest" - } - } - }, - "required": true - }, - "deprecated": false - } - }, - "/v1alpha/telemetry/spans/{span_id}/tree": { - "post": { - "responses": { - "200": { - "description": "A QuerySpanTreeResponse.", - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/QuerySpanTreeResponse" - } - } - } - }, - "400": { - "$ref": "#/components/responses/BadRequest400" - }, - "429": { - "$ref": "#/components/responses/TooManyRequests429" - }, - "500": { - "$ref": "#/components/responses/InternalServerError500" - }, - "default": { - "$ref": "#/components/responses/DefaultError" - } - }, - "tags": [ - "Telemetry" - ], - "summary": "Get a span tree by its ID.", - "description": "Get a span tree by its ID.", - "parameters": [ - { - "name": "span_id", - "in": "path", - "description": "The ID of the span to get the tree from.", - "required": true, - "schema": { - "type": "string" - } - } - ], - "requestBody": { - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/GetSpanTreeRequest" - } - } - }, - "required": true - }, - "deprecated": false - } - }, - "/v1alpha/telemetry/traces": { - "post": { - "responses": { - "200": { - "description": "A QueryTracesResponse.", - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/QueryTracesResponse" - } - } - } - }, - "400": { - "$ref": "#/components/responses/BadRequest400" - }, - "429": { - "$ref": "#/components/responses/TooManyRequests429" - }, - "500": { - "$ref": "#/components/responses/InternalServerError500" - }, - "default": { - "$ref": "#/components/responses/DefaultError" - } - }, - "tags": [ - "Telemetry" - ], - "summary": "Query traces.", - "description": "Query traces.", - "parameters": [], - "requestBody": { - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/QueryTracesRequest" - } - } - }, - "required": true - }, - "deprecated": false - } - }, - "/v1alpha/telemetry/traces/{trace_id}": { - "get": { - "responses": { - "200": { - "description": "A Trace.", - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/Trace" - } - } - } - }, - "400": { - "$ref": "#/components/responses/BadRequest400" - }, - "429": { - "$ref": "#/components/responses/TooManyRequests429" - }, - "500": { - "$ref": "#/components/responses/InternalServerError500" - }, - "default": { - "$ref": "#/components/responses/DefaultError" - } - }, - "tags": [ - "Telemetry" - ], - "summary": "Get a trace by its ID.", - "description": "Get a trace by its ID.", - "parameters": [ - { - "name": "trace_id", - "in": "path", - "description": "The ID of the trace to get.", - "required": true, - "schema": { - "type": "string" - } - } - ], - "deprecated": false - } - }, - "/v1alpha/telemetry/traces/{trace_id}/spans/{span_id}": { - "get": { - "responses": { - "200": { - "description": "A Span.", - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/Span" - } - } - } - }, - "400": { - "$ref": "#/components/responses/BadRequest400" - }, - "429": { - "$ref": "#/components/responses/TooManyRequests429" - }, - "500": { - "$ref": "#/components/responses/InternalServerError500" - }, - "default": { - "$ref": "#/components/responses/DefaultError" - } - }, - "tags": [ - "Telemetry" - ], - "summary": "Get a span by its ID.", - "description": "Get a span by its ID.", - "parameters": [ - { - "name": "trace_id", - "in": "path", - "description": "The ID of the trace to get the span from.", - "required": true, - "schema": { - "type": "string" - } - }, - { - "name": "span_id", - "in": "path", - "description": "The ID of the span to get.", - "required": true, - "schema": { - "type": "string" - } - } - ], - "deprecated": false - } } }, "jsonSchemaDialect": "https://json-schema.org/draft/2020-12/schema", @@ -5765,561 +5428,6 @@ "logger_config" ], "title": "SupervisedFineTuneRequest" - }, - "QueryMetricsRequest": { - "type": "object", - "properties": { - "start_time": { - "type": "integer", - "description": "The start time of the metric to query." - }, - "end_time": { - "type": "integer", - "description": "The end time of the metric to query." - }, - "granularity": { - "type": "string", - "description": "The granularity of the metric to query." - }, - "query_type": { - "type": "string", - "enum": [ - "range", - "instant" - ], - "description": "The type of query to perform." - }, - "label_matchers": { - "type": "array", - "items": { - "type": "object", - "properties": { - "name": { - "type": "string", - "description": "The name of the label to match" - }, - "value": { - "type": "string", - "description": "The value to match against" - }, - "operator": { - "type": "string", - "enum": [ - "=", - "!=", - "=~", - "!~" - ], - "description": "The comparison operator to use for matching", - "default": "=" - } - }, - "additionalProperties": false, - "required": [ - "name", - "value", - "operator" - ], - "title": "MetricLabelMatcher", - "description": "A matcher for filtering metrics by label values." - }, - "description": "The label matchers to apply to the metric." - } - }, - "additionalProperties": false, - "required": [ - "start_time", - "query_type" - ], - "title": "QueryMetricsRequest" - }, - "MetricDataPoint": { - "type": "object", - "properties": { - "timestamp": { - "type": "integer", - "description": "Unix timestamp when the metric value was recorded" - }, - "value": { - "type": "number", - "description": "The numeric value of the metric at this timestamp" - }, - "unit": { - "type": "string" - } - }, - "additionalProperties": false, - "required": [ - "timestamp", - "value", - "unit" - ], - "title": "MetricDataPoint", - "description": "A single data point in a metric time series." - }, - "MetricLabel": { - "type": "object", - "properties": { - "name": { - "type": "string", - "description": "The name of the label" - }, - "value": { - "type": "string", - "description": "The value of the label" - } - }, - "additionalProperties": false, - "required": [ - "name", - "value" - ], - "title": "MetricLabel", - "description": "A label associated with a metric." - }, - "MetricSeries": { - "type": "object", - "properties": { - "metric": { - "type": "string", - "description": "The name of the metric" - }, - "labels": { - "type": "array", - "items": { - "$ref": "#/components/schemas/MetricLabel" - }, - "description": "List of labels associated with this metric series" - }, - "values": { - "type": "array", - "items": { - "$ref": "#/components/schemas/MetricDataPoint" - }, - "description": "List of data points in chronological order" - } - }, - "additionalProperties": false, - "required": [ - "metric", - "labels", - "values" - ], - "title": "MetricSeries", - "description": "A time series of metric data points." - }, - "QueryMetricsResponse": { - "type": "object", - "properties": { - "data": { - "type": "array", - "items": { - "$ref": "#/components/schemas/MetricSeries" - }, - "description": "List of metric series matching the query criteria" - } - }, - "additionalProperties": false, - "required": [ - "data" - ], - "title": "QueryMetricsResponse", - "description": "Response containing metric time series data." - }, - "QueryCondition": { - "type": "object", - "properties": { - "key": { - "type": "string", - "description": "The attribute key to filter on" - }, - "op": { - "$ref": "#/components/schemas/QueryConditionOp", - "description": "The comparison operator to apply" - }, - "value": { - "oneOf": [ - { - "type": "null" - }, - { - "type": "boolean" - }, - { - "type": "number" - }, - { - "type": "string" - }, - { - "type": "array" - }, - { - "type": "object" - } - ], - "description": "The value to compare against" - } - }, - "additionalProperties": false, - "required": [ - "key", - "op", - "value" - ], - "title": "QueryCondition", - "description": "A condition for filtering query results." - }, - "QueryConditionOp": { - "type": "string", - "enum": [ - "eq", - "ne", - "gt", - "lt" - ], - "title": "QueryConditionOp", - "description": "Comparison operators for query conditions." - }, - "QuerySpansRequest": { - "type": "object", - "properties": { - "attribute_filters": { - "type": "array", - "items": { - "$ref": "#/components/schemas/QueryCondition" - }, - "description": "The attribute filters to apply to the spans." - }, - "attributes_to_return": { - "type": "array", - "items": { - "type": "string" - }, - "description": "The attributes to return in the spans." - }, - "max_depth": { - "type": "integer", - "description": "The maximum depth of the tree." - } - }, - "additionalProperties": false, - "required": [ - "attribute_filters", - "attributes_to_return" - ], - "title": "QuerySpansRequest" - }, - "Span": { - "type": "object", - "properties": { - "span_id": { - "type": "string", - "description": "Unique identifier for the span" - }, - "trace_id": { - "type": "string", - "description": "Unique identifier for the trace this span belongs to" - }, - "parent_span_id": { - "type": "string", - "description": "(Optional) Unique identifier for the parent span, if this is a child span" - }, - "name": { - "type": "string", - "description": "Human-readable name describing the operation this span represents" - }, - "start_time": { - "type": "string", - "format": "date-time", - "description": "Timestamp when the operation began" - }, - "end_time": { - "type": "string", - "format": "date-time", - "description": "(Optional) Timestamp when the operation finished, if completed" - }, - "attributes": { - "type": "object", - "additionalProperties": { - "oneOf": [ - { - "type": "null" - }, - { - "type": "boolean" - }, - { - "type": "number" - }, - { - "type": "string" - }, - { - "type": "array" - }, - { - "type": "object" - } - ] - }, - "description": "(Optional) Key-value pairs containing additional metadata about the span" - } - }, - "additionalProperties": false, - "required": [ - "span_id", - "trace_id", - "name", - "start_time" - ], - "title": "Span", - "description": "A span representing a single operation within a trace." - }, - "QuerySpansResponse": { - "type": "object", - "properties": { - "data": { - "type": "array", - "items": { - "$ref": "#/components/schemas/Span" - }, - "description": "List of spans matching the query criteria" - } - }, - "additionalProperties": false, - "required": [ - "data" - ], - "title": "QuerySpansResponse", - "description": "Response containing a list of spans." - }, - "SaveSpansToDatasetRequest": { - "type": "object", - "properties": { - "attribute_filters": { - "type": "array", - "items": { - "$ref": "#/components/schemas/QueryCondition" - }, - "description": "The attribute filters to apply to the spans." - }, - "attributes_to_save": { - "type": "array", - "items": { - "type": "string" - }, - "description": "The attributes to save to the dataset." - }, - "dataset_id": { - "type": "string", - "description": "The ID of the dataset to save the spans to." - }, - "max_depth": { - "type": "integer", - "description": "The maximum depth of the tree." - } - }, - "additionalProperties": false, - "required": [ - "attribute_filters", - "attributes_to_save", - "dataset_id" - ], - "title": "SaveSpansToDatasetRequest" - }, - "GetSpanTreeRequest": { - "type": "object", - "properties": { - "attributes_to_return": { - "type": "array", - "items": { - "type": "string" - }, - "description": "The attributes to return in the tree." - }, - "max_depth": { - "type": "integer", - "description": "The maximum depth of the tree." - } - }, - "additionalProperties": false, - "title": "GetSpanTreeRequest" - }, - "SpanStatus": { - "type": "string", - "enum": [ - "ok", - "error" - ], - "title": "SpanStatus", - "description": "The status of a span indicating whether it completed successfully or with an error." - }, - "SpanWithStatus": { - "type": "object", - "properties": { - "span_id": { - "type": "string", - "description": "Unique identifier for the span" - }, - "trace_id": { - "type": "string", - "description": "Unique identifier for the trace this span belongs to" - }, - "parent_span_id": { - "type": "string", - "description": "(Optional) Unique identifier for the parent span, if this is a child span" - }, - "name": { - "type": "string", - "description": "Human-readable name describing the operation this span represents" - }, - "start_time": { - "type": "string", - "format": "date-time", - "description": "Timestamp when the operation began" - }, - "end_time": { - "type": "string", - "format": "date-time", - "description": "(Optional) Timestamp when the operation finished, if completed" - }, - "attributes": { - "type": "object", - "additionalProperties": { - "oneOf": [ - { - "type": "null" - }, - { - "type": "boolean" - }, - { - "type": "number" - }, - { - "type": "string" - }, - { - "type": "array" - }, - { - "type": "object" - } - ] - }, - "description": "(Optional) Key-value pairs containing additional metadata about the span" - }, - "status": { - "$ref": "#/components/schemas/SpanStatus", - "description": "(Optional) The current status of the span" - } - }, - "additionalProperties": false, - "required": [ - "span_id", - "trace_id", - "name", - "start_time" - ], - "title": "SpanWithStatus", - "description": "A span that includes status information." - }, - "QuerySpanTreeResponse": { - "type": "object", - "properties": { - "data": { - "type": "object", - "additionalProperties": { - "$ref": "#/components/schemas/SpanWithStatus" - }, - "description": "Dictionary mapping span IDs to spans with status information" - } - }, - "additionalProperties": false, - "required": [ - "data" - ], - "title": "QuerySpanTreeResponse", - "description": "Response containing a tree structure of spans." - }, - "QueryTracesRequest": { - "type": "object", - "properties": { - "attribute_filters": { - "type": "array", - "items": { - "$ref": "#/components/schemas/QueryCondition" - }, - "description": "The attribute filters to apply to the traces." - }, - "limit": { - "type": "integer", - "description": "The limit of traces to return." - }, - "offset": { - "type": "integer", - "description": "The offset of the traces to return." - }, - "order_by": { - "type": "array", - "items": { - "type": "string" - }, - "description": "The order by of the traces to return." - } - }, - "additionalProperties": false, - "title": "QueryTracesRequest" - }, - "Trace": { - "type": "object", - "properties": { - "trace_id": { - "type": "string", - "description": "Unique identifier for the trace" - }, - "root_span_id": { - "type": "string", - "description": "Unique identifier for the root span that started this trace" - }, - "start_time": { - "type": "string", - "format": "date-time", - "description": "Timestamp when the trace began" - }, - "end_time": { - "type": "string", - "format": "date-time", - "description": "(Optional) Timestamp when the trace finished, if completed" - } - }, - "additionalProperties": false, - "required": [ - "trace_id", - "root_span_id", - "start_time" - ], - "title": "Trace", - "description": "A trace representing the complete execution path of a request across multiple operations." - }, - "QueryTracesResponse": { - "type": "object", - "properties": { - "data": { - "type": "array", - "items": { - "$ref": "#/components/schemas/Trace" - }, - "description": "List of traces matching the query criteria" - } - }, - "additionalProperties": false, - "required": [ - "data" - ], - "title": "QueryTracesResponse", - "description": "Response containing a list of traces." } }, "responses": { @@ -6416,10 +5524,6 @@ { "name": "PostTraining (Coming Soon)", "description": "" - }, - { - "name": "Telemetry", - "description": "" } ], "x-tagGroups": [ @@ -6431,8 +5535,7 @@ "DatasetIO", "Datasets", "Eval", - "PostTraining (Coming Soon)", - "Telemetry" + "PostTraining (Coming Soon)" ] } ] diff --git a/docs/static/experimental-llama-stack-spec.yaml b/docs/static/experimental-llama-stack-spec.yaml index a08c0cc87..7ee5a6cdf 100644 --- a/docs/static/experimental-llama-stack-spec.yaml +++ b/docs/static/experimental-llama-stack-spec.yaml @@ -1224,238 +1224,6 @@ paths: $ref: '#/components/schemas/SupervisedFineTuneRequest' required: true deprecated: false - /v1alpha/telemetry/metrics/{metric_name}: - post: - responses: - '200': - description: A QueryMetricsResponse. - content: - application/json: - schema: - $ref: '#/components/schemas/QueryMetricsResponse' - '400': - $ref: '#/components/responses/BadRequest400' - '429': - $ref: >- - #/components/responses/TooManyRequests429 - '500': - $ref: >- - #/components/responses/InternalServerError500 - default: - $ref: '#/components/responses/DefaultError' - tags: - - Telemetry - summary: Query metrics. - description: Query metrics. - parameters: - - name: metric_name - in: path - description: The name of the metric to query. - required: true - schema: - type: string - requestBody: - content: - application/json: - schema: - $ref: '#/components/schemas/QueryMetricsRequest' - required: true - deprecated: false - /v1alpha/telemetry/spans: - post: - responses: - '200': - description: A QuerySpansResponse. - content: - application/json: - schema: - $ref: '#/components/schemas/QuerySpansResponse' - '400': - $ref: '#/components/responses/BadRequest400' - '429': - $ref: >- - #/components/responses/TooManyRequests429 - '500': - $ref: >- - #/components/responses/InternalServerError500 - default: - $ref: '#/components/responses/DefaultError' - tags: - - Telemetry - summary: Query spans. - description: Query spans. - parameters: [] - requestBody: - content: - application/json: - schema: - $ref: '#/components/schemas/QuerySpansRequest' - required: true - deprecated: false - /v1alpha/telemetry/spans/export: - post: - responses: - '200': - description: OK - '400': - $ref: '#/components/responses/BadRequest400' - '429': - $ref: >- - #/components/responses/TooManyRequests429 - '500': - $ref: >- - #/components/responses/InternalServerError500 - default: - $ref: '#/components/responses/DefaultError' - tags: - - Telemetry - summary: Save spans to a dataset. - description: Save spans to a dataset. - parameters: [] - requestBody: - content: - application/json: - schema: - $ref: '#/components/schemas/SaveSpansToDatasetRequest' - required: true - deprecated: false - /v1alpha/telemetry/spans/{span_id}/tree: - post: - responses: - '200': - description: A QuerySpanTreeResponse. - content: - application/json: - schema: - $ref: '#/components/schemas/QuerySpanTreeResponse' - '400': - $ref: '#/components/responses/BadRequest400' - '429': - $ref: >- - #/components/responses/TooManyRequests429 - '500': - $ref: >- - #/components/responses/InternalServerError500 - default: - $ref: '#/components/responses/DefaultError' - tags: - - Telemetry - summary: Get a span tree by its ID. - description: Get a span tree by its ID. - parameters: - - name: span_id - in: path - description: The ID of the span to get the tree from. - required: true - schema: - type: string - requestBody: - content: - application/json: - schema: - $ref: '#/components/schemas/GetSpanTreeRequest' - required: true - deprecated: false - /v1alpha/telemetry/traces: - post: - responses: - '200': - description: A QueryTracesResponse. - content: - application/json: - schema: - $ref: '#/components/schemas/QueryTracesResponse' - '400': - $ref: '#/components/responses/BadRequest400' - '429': - $ref: >- - #/components/responses/TooManyRequests429 - '500': - $ref: >- - #/components/responses/InternalServerError500 - default: - $ref: '#/components/responses/DefaultError' - tags: - - Telemetry - summary: Query traces. - description: Query traces. - parameters: [] - requestBody: - content: - application/json: - schema: - $ref: '#/components/schemas/QueryTracesRequest' - required: true - deprecated: false - /v1alpha/telemetry/traces/{trace_id}: - get: - responses: - '200': - description: A Trace. - content: - application/json: - schema: - $ref: '#/components/schemas/Trace' - '400': - $ref: '#/components/responses/BadRequest400' - '429': - $ref: >- - #/components/responses/TooManyRequests429 - '500': - $ref: >- - #/components/responses/InternalServerError500 - default: - $ref: '#/components/responses/DefaultError' - tags: - - Telemetry - summary: Get a trace by its ID. - description: Get a trace by its ID. - parameters: - - name: trace_id - in: path - description: The ID of the trace to get. - required: true - schema: - type: string - deprecated: false - /v1alpha/telemetry/traces/{trace_id}/spans/{span_id}: - get: - responses: - '200': - description: A Span. - content: - application/json: - schema: - $ref: '#/components/schemas/Span' - '400': - $ref: '#/components/responses/BadRequest400' - '429': - $ref: >- - #/components/responses/TooManyRequests429 - '500': - $ref: >- - #/components/responses/InternalServerError500 - default: - $ref: '#/components/responses/DefaultError' - tags: - - Telemetry - summary: Get a span by its ID. - description: Get a span by its ID. - parameters: - - name: trace_id - in: path - description: >- - The ID of the trace to get the span from. - required: true - schema: - type: string - - name: span_id - in: path - description: The ID of the span to get. - required: true - schema: - type: string - deprecated: false jsonSchemaDialect: >- https://json-schema.org/draft/2020-12/schema components: @@ -4249,434 +4017,6 @@ components: - hyperparam_search_config - logger_config title: SupervisedFineTuneRequest - QueryMetricsRequest: - type: object - properties: - start_time: - type: integer - description: The start time of the metric to query. - end_time: - type: integer - description: The end time of the metric to query. - granularity: - type: string - description: The granularity of the metric to query. - query_type: - type: string - enum: - - range - - instant - description: The type of query to perform. - label_matchers: - type: array - items: - type: object - properties: - name: - type: string - description: The name of the label to match - value: - type: string - description: The value to match against - operator: - type: string - enum: - - '=' - - '!=' - - =~ - - '!~' - description: >- - The comparison operator to use for matching - default: '=' - additionalProperties: false - required: - - name - - value - - operator - title: MetricLabelMatcher - description: >- - A matcher for filtering metrics by label values. - description: >- - The label matchers to apply to the metric. - additionalProperties: false - required: - - start_time - - query_type - title: QueryMetricsRequest - MetricDataPoint: - type: object - properties: - timestamp: - type: integer - description: >- - Unix timestamp when the metric value was recorded - value: - type: number - description: >- - The numeric value of the metric at this timestamp - unit: - type: string - additionalProperties: false - required: - - timestamp - - value - - unit - title: MetricDataPoint - description: >- - A single data point in a metric time series. - MetricLabel: - type: object - properties: - name: - type: string - description: The name of the label - value: - type: string - description: The value of the label - additionalProperties: false - required: - - name - - value - title: MetricLabel - description: A label associated with a metric. - MetricSeries: - type: object - properties: - metric: - type: string - description: The name of the metric - labels: - type: array - items: - $ref: '#/components/schemas/MetricLabel' - description: >- - List of labels associated with this metric series - values: - type: array - items: - $ref: '#/components/schemas/MetricDataPoint' - description: >- - List of data points in chronological order - additionalProperties: false - required: - - metric - - labels - - values - title: MetricSeries - description: A time series of metric data points. - QueryMetricsResponse: - type: object - properties: - data: - type: array - items: - $ref: '#/components/schemas/MetricSeries' - description: >- - List of metric series matching the query criteria - additionalProperties: false - required: - - data - title: QueryMetricsResponse - description: >- - Response containing metric time series data. - QueryCondition: - type: object - properties: - key: - type: string - description: The attribute key to filter on - op: - $ref: '#/components/schemas/QueryConditionOp' - description: The comparison operator to apply - value: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: The value to compare against - additionalProperties: false - required: - - key - - op - - value - title: QueryCondition - description: A condition for filtering query results. - QueryConditionOp: - type: string - enum: - - eq - - ne - - gt - - lt - title: QueryConditionOp - description: >- - Comparison operators for query conditions. - QuerySpansRequest: - type: object - properties: - attribute_filters: - type: array - items: - $ref: '#/components/schemas/QueryCondition' - description: >- - The attribute filters to apply to the spans. - attributes_to_return: - type: array - items: - type: string - description: The attributes to return in the spans. - max_depth: - type: integer - description: The maximum depth of the tree. - additionalProperties: false - required: - - attribute_filters - - attributes_to_return - title: QuerySpansRequest - Span: - type: object - properties: - span_id: - type: string - description: Unique identifier for the span - trace_id: - type: string - description: >- - Unique identifier for the trace this span belongs to - parent_span_id: - type: string - description: >- - (Optional) Unique identifier for the parent span, if this is a child span - name: - type: string - description: >- - Human-readable name describing the operation this span represents - start_time: - type: string - format: date-time - description: Timestamp when the operation began - end_time: - type: string - format: date-time - description: >- - (Optional) Timestamp when the operation finished, if completed - attributes: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: >- - (Optional) Key-value pairs containing additional metadata about the span - additionalProperties: false - required: - - span_id - - trace_id - - name - - start_time - title: Span - description: >- - A span representing a single operation within a trace. - QuerySpansResponse: - type: object - properties: - data: - type: array - items: - $ref: '#/components/schemas/Span' - description: >- - List of spans matching the query criteria - additionalProperties: false - required: - - data - title: QuerySpansResponse - description: Response containing a list of spans. - SaveSpansToDatasetRequest: - type: object - properties: - attribute_filters: - type: array - items: - $ref: '#/components/schemas/QueryCondition' - description: >- - The attribute filters to apply to the spans. - attributes_to_save: - type: array - items: - type: string - description: The attributes to save to the dataset. - dataset_id: - type: string - description: >- - The ID of the dataset to save the spans to. - max_depth: - type: integer - description: The maximum depth of the tree. - additionalProperties: false - required: - - attribute_filters - - attributes_to_save - - dataset_id - title: SaveSpansToDatasetRequest - GetSpanTreeRequest: - type: object - properties: - attributes_to_return: - type: array - items: - type: string - description: The attributes to return in the tree. - max_depth: - type: integer - description: The maximum depth of the tree. - additionalProperties: false - title: GetSpanTreeRequest - SpanStatus: - type: string - enum: - - ok - - error - title: SpanStatus - description: >- - The status of a span indicating whether it completed successfully or with - an error. - SpanWithStatus: - type: object - properties: - span_id: - type: string - description: Unique identifier for the span - trace_id: - type: string - description: >- - Unique identifier for the trace this span belongs to - parent_span_id: - type: string - description: >- - (Optional) Unique identifier for the parent span, if this is a child span - name: - type: string - description: >- - Human-readable name describing the operation this span represents - start_time: - type: string - format: date-time - description: Timestamp when the operation began - end_time: - type: string - format: date-time - description: >- - (Optional) Timestamp when the operation finished, if completed - attributes: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: >- - (Optional) Key-value pairs containing additional metadata about the span - status: - $ref: '#/components/schemas/SpanStatus' - description: >- - (Optional) The current status of the span - additionalProperties: false - required: - - span_id - - trace_id - - name - - start_time - title: SpanWithStatus - description: A span that includes status information. - QuerySpanTreeResponse: - type: object - properties: - data: - type: object - additionalProperties: - $ref: '#/components/schemas/SpanWithStatus' - description: >- - Dictionary mapping span IDs to spans with status information - additionalProperties: false - required: - - data - title: QuerySpanTreeResponse - description: >- - Response containing a tree structure of spans. - QueryTracesRequest: - type: object - properties: - attribute_filters: - type: array - items: - $ref: '#/components/schemas/QueryCondition' - description: >- - The attribute filters to apply to the traces. - limit: - type: integer - description: The limit of traces to return. - offset: - type: integer - description: The offset of the traces to return. - order_by: - type: array - items: - type: string - description: The order by of the traces to return. - additionalProperties: false - title: QueryTracesRequest - Trace: - type: object - properties: - trace_id: - type: string - description: Unique identifier for the trace - root_span_id: - type: string - description: >- - Unique identifier for the root span that started this trace - start_time: - type: string - format: date-time - description: Timestamp when the trace began - end_time: - type: string - format: date-time - description: >- - (Optional) Timestamp when the trace finished, if completed - additionalProperties: false - required: - - trace_id - - root_span_id - - start_time - title: Trace - description: >- - A trace representing the complete execution path of a request across multiple - operations. - QueryTracesResponse: - type: object - properties: - data: - type: array - items: - $ref: '#/components/schemas/Trace' - description: >- - List of traces matching the query criteria - additionalProperties: false - required: - - data - title: QueryTracesResponse - description: Response containing a list of traces. responses: BadRequest400: description: The request was invalid or malformed @@ -4784,8 +4124,6 @@ tags: Llama Stack Evaluation API for running evaluations on model and agent candidates. - name: PostTraining (Coming Soon) description: '' - - name: Telemetry - description: '' x-tagGroups: - name: Operations tags: @@ -4795,4 +4133,3 @@ x-tagGroups: - Datasets - Eval - PostTraining (Coming Soon) - - Telemetry diff --git a/docs/static/llama-stack-spec.html b/docs/static/llama-stack-spec.html index 9cd526176..8ac30d548 100644 --- a/docs/static/llama-stack-spec.html +++ b/docs/static/llama-stack-spec.html @@ -2525,44 +2525,6 @@ "deprecated": false } }, - "/v1/telemetry/events": { - "post": { - "responses": { - "200": { - "description": "OK" - }, - "400": { - "$ref": "#/components/responses/BadRequest400" - }, - "429": { - "$ref": "#/components/responses/TooManyRequests429" - }, - "500": { - "$ref": "#/components/responses/InternalServerError500" - }, - "default": { - "$ref": "#/components/responses/DefaultError" - } - }, - "tags": [ - "Telemetry" - ], - "summary": "Log an event.", - "description": "Log an event.", - "parameters": [], - "requestBody": { - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/LogEventRequest" - } - } - }, - "required": true - }, - "deprecated": false - } - }, "/v1/tool-runtime/invoke": { "post": { "responses": { @@ -10364,354 +10326,6 @@ "title": "SyntheticDataGenerationResponse", "description": "Response from the synthetic data generation. Batch of (prompt, response, score) tuples that pass the threshold." }, - "Event": { - "oneOf": [ - { - "$ref": "#/components/schemas/UnstructuredLogEvent" - }, - { - "$ref": "#/components/schemas/MetricEvent" - }, - { - "$ref": "#/components/schemas/StructuredLogEvent" - } - ], - "discriminator": { - "propertyName": "type", - "mapping": { - "unstructured_log": "#/components/schemas/UnstructuredLogEvent", - "metric": "#/components/schemas/MetricEvent", - "structured_log": "#/components/schemas/StructuredLogEvent" - } - } - }, - "EventType": { - "type": "string", - "enum": [ - "unstructured_log", - "structured_log", - "metric" - ], - "title": "EventType", - "description": "The type of telemetry event being logged." - }, - "LogSeverity": { - "type": "string", - "enum": [ - "verbose", - "debug", - "info", - "warn", - "error", - "critical" - ], - "title": "LogSeverity", - "description": "The severity level of a log message." - }, - "MetricEvent": { - "type": "object", - "properties": { - "trace_id": { - "type": "string", - "description": "Unique identifier for the trace this event belongs to" - }, - "span_id": { - "type": "string", - "description": "Unique identifier for the span this event belongs to" - }, - "timestamp": { - "type": "string", - "format": "date-time", - "description": "Timestamp when the event occurred" - }, - "attributes": { - "type": "object", - "additionalProperties": { - "oneOf": [ - { - "type": "string" - }, - { - "type": "integer" - }, - { - "type": "number" - }, - { - "type": "boolean" - }, - { - "type": "null" - } - ] - }, - "description": "(Optional) Key-value pairs containing additional metadata about the event" - }, - "type": { - "$ref": "#/components/schemas/EventType", - "const": "metric", - "default": "metric", - "description": "Event type identifier set to METRIC" - }, - "metric": { - "type": "string", - "description": "The name of the metric being measured" - }, - "value": { - "oneOf": [ - { - "type": "integer" - }, - { - "type": "number" - } - ], - "description": "The numeric value of the metric measurement" - }, - "unit": { - "type": "string", - "description": "The unit of measurement for the metric value" - } - }, - "additionalProperties": false, - "required": [ - "trace_id", - "span_id", - "timestamp", - "type", - "metric", - "value", - "unit" - ], - "title": "MetricEvent", - "description": "A metric event containing a measured value." - }, - "SpanEndPayload": { - "type": "object", - "properties": { - "type": { - "$ref": "#/components/schemas/StructuredLogType", - "const": "span_end", - "default": "span_end", - "description": "Payload type identifier set to SPAN_END" - }, - "status": { - "$ref": "#/components/schemas/SpanStatus", - "description": "The final status of the span indicating success or failure" - } - }, - "additionalProperties": false, - "required": [ - "type", - "status" - ], - "title": "SpanEndPayload", - "description": "Payload for a span end event." - }, - "SpanStartPayload": { - "type": "object", - "properties": { - "type": { - "$ref": "#/components/schemas/StructuredLogType", - "const": "span_start", - "default": "span_start", - "description": "Payload type identifier set to SPAN_START" - }, - "name": { - "type": "string", - "description": "Human-readable name describing the operation this span represents" - }, - "parent_span_id": { - "type": "string", - "description": "(Optional) Unique identifier for the parent span, if this is a child span" - } - }, - "additionalProperties": false, - "required": [ - "type", - "name" - ], - "title": "SpanStartPayload", - "description": "Payload for a span start event." - }, - "SpanStatus": { - "type": "string", - "enum": [ - "ok", - "error" - ], - "title": "SpanStatus", - "description": "The status of a span indicating whether it completed successfully or with an error." - }, - "StructuredLogEvent": { - "type": "object", - "properties": { - "trace_id": { - "type": "string", - "description": "Unique identifier for the trace this event belongs to" - }, - "span_id": { - "type": "string", - "description": "Unique identifier for the span this event belongs to" - }, - "timestamp": { - "type": "string", - "format": "date-time", - "description": "Timestamp when the event occurred" - }, - "attributes": { - "type": "object", - "additionalProperties": { - "oneOf": [ - { - "type": "string" - }, - { - "type": "integer" - }, - { - "type": "number" - }, - { - "type": "boolean" - }, - { - "type": "null" - } - ] - }, - "description": "(Optional) Key-value pairs containing additional metadata about the event" - }, - "type": { - "$ref": "#/components/schemas/EventType", - "const": "structured_log", - "default": "structured_log", - "description": "Event type identifier set to STRUCTURED_LOG" - }, - "payload": { - "oneOf": [ - { - "$ref": "#/components/schemas/SpanStartPayload" - }, - { - "$ref": "#/components/schemas/SpanEndPayload" - } - ], - "discriminator": { - "propertyName": "type", - "mapping": { - "span_start": "#/components/schemas/SpanStartPayload", - "span_end": "#/components/schemas/SpanEndPayload" - } - }, - "description": "The structured payload data for the log event" - } - }, - "additionalProperties": false, - "required": [ - "trace_id", - "span_id", - "timestamp", - "type", - "payload" - ], - "title": "StructuredLogEvent", - "description": "A structured log event containing typed payload data." - }, - "StructuredLogType": { - "type": "string", - "enum": [ - "span_start", - "span_end" - ], - "title": "StructuredLogType", - "description": "The type of structured log event payload." - }, - "UnstructuredLogEvent": { - "type": "object", - "properties": { - "trace_id": { - "type": "string", - "description": "Unique identifier for the trace this event belongs to" - }, - "span_id": { - "type": "string", - "description": "Unique identifier for the span this event belongs to" - }, - "timestamp": { - "type": "string", - "format": "date-time", - "description": "Timestamp when the event occurred" - }, - "attributes": { - "type": "object", - "additionalProperties": { - "oneOf": [ - { - "type": "string" - }, - { - "type": "integer" - }, - { - "type": "number" - }, - { - "type": "boolean" - }, - { - "type": "null" - } - ] - }, - "description": "(Optional) Key-value pairs containing additional metadata about the event" - }, - "type": { - "$ref": "#/components/schemas/EventType", - "const": "unstructured_log", - "default": "unstructured_log", - "description": "Event type identifier set to UNSTRUCTURED_LOG" - }, - "message": { - "type": "string", - "description": "The log message text" - }, - "severity": { - "$ref": "#/components/schemas/LogSeverity", - "description": "The severity level of the log message" - } - }, - "additionalProperties": false, - "required": [ - "trace_id", - "span_id", - "timestamp", - "type", - "message", - "severity" - ], - "title": "UnstructuredLogEvent", - "description": "An unstructured log event containing a simple text message." - }, - "LogEventRequest": { - "type": "object", - "properties": { - "event": { - "$ref": "#/components/schemas/Event", - "description": "The event to log." - }, - "ttl_seconds": { - "type": "integer", - "description": "The time to live of the event." - } - }, - "additionalProperties": false, - "required": [ - "event", - "ttl_seconds" - ], - "title": "LogEventRequest" - }, "InvokeToolRequest": { "type": "object", "properties": { @@ -12962,10 +12576,6 @@ "name": "SyntheticDataGeneration (Coming Soon)", "description": "" }, - { - "name": "Telemetry", - "description": "" - }, { "name": "ToolGroups", "description": "" @@ -13000,7 +12610,6 @@ "ScoringFunctions", "Shields", "SyntheticDataGeneration (Coming Soon)", - "Telemetry", "ToolGroups", "ToolRuntime", "VectorDBs", diff --git a/docs/static/llama-stack-spec.yaml b/docs/static/llama-stack-spec.yaml index 66ce8e38a..6b6d8a83d 100644 --- a/docs/static/llama-stack-spec.yaml +++ b/docs/static/llama-stack-spec.yaml @@ -1944,33 +1944,6 @@ paths: $ref: '#/components/schemas/SyntheticDataGenerateRequest' required: true deprecated: false - /v1/telemetry/events: - post: - responses: - '200': - description: OK - '400': - $ref: '#/components/responses/BadRequest400' - '429': - $ref: >- - #/components/responses/TooManyRequests429 - '500': - $ref: >- - #/components/responses/InternalServerError500 - default: - $ref: '#/components/responses/DefaultError' - tags: - - Telemetry - summary: Log an event. - description: Log an event. - parameters: [] - requestBody: - content: - application/json: - schema: - $ref: '#/components/schemas/LogEventRequest' - required: true - deprecated: false /v1/tool-runtime/invoke: post: responses: @@ -7840,267 +7813,6 @@ components: description: >- Response from the synthetic data generation. Batch of (prompt, response, score) tuples that pass the threshold. - Event: - oneOf: - - $ref: '#/components/schemas/UnstructuredLogEvent' - - $ref: '#/components/schemas/MetricEvent' - - $ref: '#/components/schemas/StructuredLogEvent' - discriminator: - propertyName: type - mapping: - unstructured_log: '#/components/schemas/UnstructuredLogEvent' - metric: '#/components/schemas/MetricEvent' - structured_log: '#/components/schemas/StructuredLogEvent' - EventType: - type: string - enum: - - unstructured_log - - structured_log - - metric - title: EventType - description: >- - The type of telemetry event being logged. - LogSeverity: - type: string - enum: - - verbose - - debug - - info - - warn - - error - - critical - title: LogSeverity - description: The severity level of a log message. - MetricEvent: - type: object - properties: - trace_id: - type: string - description: >- - Unique identifier for the trace this event belongs to - span_id: - type: string - description: >- - Unique identifier for the span this event belongs to - timestamp: - type: string - format: date-time - description: Timestamp when the event occurred - attributes: - type: object - additionalProperties: - oneOf: - - type: string - - type: integer - - type: number - - type: boolean - - type: 'null' - description: >- - (Optional) Key-value pairs containing additional metadata about the event - type: - $ref: '#/components/schemas/EventType' - const: metric - default: metric - description: Event type identifier set to METRIC - metric: - type: string - description: The name of the metric being measured - value: - oneOf: - - type: integer - - type: number - description: >- - The numeric value of the metric measurement - unit: - type: string - description: >- - The unit of measurement for the metric value - additionalProperties: false - required: - - trace_id - - span_id - - timestamp - - type - - metric - - value - - unit - title: MetricEvent - description: >- - A metric event containing a measured value. - SpanEndPayload: - type: object - properties: - type: - $ref: '#/components/schemas/StructuredLogType' - const: span_end - default: span_end - description: Payload type identifier set to SPAN_END - status: - $ref: '#/components/schemas/SpanStatus' - description: >- - The final status of the span indicating success or failure - additionalProperties: false - required: - - type - - status - title: SpanEndPayload - description: Payload for a span end event. - SpanStartPayload: - type: object - properties: - type: - $ref: '#/components/schemas/StructuredLogType' - const: span_start - default: span_start - description: >- - Payload type identifier set to SPAN_START - name: - type: string - description: >- - Human-readable name describing the operation this span represents - parent_span_id: - type: string - description: >- - (Optional) Unique identifier for the parent span, if this is a child span - additionalProperties: false - required: - - type - - name - title: SpanStartPayload - description: Payload for a span start event. - SpanStatus: - type: string - enum: - - ok - - error - title: SpanStatus - description: >- - The status of a span indicating whether it completed successfully or with - an error. - StructuredLogEvent: - type: object - properties: - trace_id: - type: string - description: >- - Unique identifier for the trace this event belongs to - span_id: - type: string - description: >- - Unique identifier for the span this event belongs to - timestamp: - type: string - format: date-time - description: Timestamp when the event occurred - attributes: - type: object - additionalProperties: - oneOf: - - type: string - - type: integer - - type: number - - type: boolean - - type: 'null' - description: >- - (Optional) Key-value pairs containing additional metadata about the event - type: - $ref: '#/components/schemas/EventType' - const: structured_log - default: structured_log - description: >- - Event type identifier set to STRUCTURED_LOG - payload: - oneOf: - - $ref: '#/components/schemas/SpanStartPayload' - - $ref: '#/components/schemas/SpanEndPayload' - discriminator: - propertyName: type - mapping: - span_start: '#/components/schemas/SpanStartPayload' - span_end: '#/components/schemas/SpanEndPayload' - description: >- - The structured payload data for the log event - additionalProperties: false - required: - - trace_id - - span_id - - timestamp - - type - - payload - title: StructuredLogEvent - description: >- - A structured log event containing typed payload data. - StructuredLogType: - type: string - enum: - - span_start - - span_end - title: StructuredLogType - description: >- - The type of structured log event payload. - UnstructuredLogEvent: - type: object - properties: - trace_id: - type: string - description: >- - Unique identifier for the trace this event belongs to - span_id: - type: string - description: >- - Unique identifier for the span this event belongs to - timestamp: - type: string - format: date-time - description: Timestamp when the event occurred - attributes: - type: object - additionalProperties: - oneOf: - - type: string - - type: integer - - type: number - - type: boolean - - type: 'null' - description: >- - (Optional) Key-value pairs containing additional metadata about the event - type: - $ref: '#/components/schemas/EventType' - const: unstructured_log - default: unstructured_log - description: >- - Event type identifier set to UNSTRUCTURED_LOG - message: - type: string - description: The log message text - severity: - $ref: '#/components/schemas/LogSeverity' - description: The severity level of the log message - additionalProperties: false - required: - - trace_id - - span_id - - timestamp - - type - - message - - severity - title: UnstructuredLogEvent - description: >- - An unstructured log event containing a simple text message. - LogEventRequest: - type: object - properties: - event: - $ref: '#/components/schemas/Event' - description: The event to log. - ttl_seconds: - type: integer - description: The time to live of the event. - additionalProperties: false - required: - - event - - ttl_seconds - title: LogEventRequest InvokeToolRequest: type: object properties: @@ -9833,8 +9545,6 @@ tags: description: '' - name: SyntheticDataGeneration (Coming Soon) description: '' - - name: Telemetry - description: '' - name: ToolGroups description: '' - name: ToolRuntime @@ -9859,7 +9569,6 @@ x-tagGroups: - ScoringFunctions - Shields - SyntheticDataGeneration (Coming Soon) - - Telemetry - ToolGroups - ToolRuntime - VectorDBs diff --git a/docs/static/stainless-llama-stack-spec.html b/docs/static/stainless-llama-stack-spec.html index 3478d3338..2bda06ea5 100644 --- a/docs/static/stainless-llama-stack-spec.html +++ b/docs/static/stainless-llama-stack-spec.html @@ -2525,44 +2525,6 @@ "deprecated": false } }, - "/v1/telemetry/events": { - "post": { - "responses": { - "200": { - "description": "OK" - }, - "400": { - "$ref": "#/components/responses/BadRequest400" - }, - "429": { - "$ref": "#/components/responses/TooManyRequests429" - }, - "500": { - "$ref": "#/components/responses/InternalServerError500" - }, - "default": { - "$ref": "#/components/responses/DefaultError" - } - }, - "tags": [ - "Telemetry" - ], - "summary": "Log an event.", - "description": "Log an event.", - "parameters": [], - "requestBody": { - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/LogEventRequest" - } - } - }, - "required": true - }, - "deprecated": false - } - }, "/v1/tool-runtime/invoke": { "post": { "responses": { @@ -5873,343 +5835,6 @@ }, "deprecated": false } - }, - "/v1alpha/telemetry/metrics/{metric_name}": { - "post": { - "responses": { - "200": { - "description": "A QueryMetricsResponse.", - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/QueryMetricsResponse" - } - } - } - }, - "400": { - "$ref": "#/components/responses/BadRequest400" - }, - "429": { - "$ref": "#/components/responses/TooManyRequests429" - }, - "500": { - "$ref": "#/components/responses/InternalServerError500" - }, - "default": { - "$ref": "#/components/responses/DefaultError" - } - }, - "tags": [ - "Telemetry" - ], - "summary": "Query metrics.", - "description": "Query metrics.", - "parameters": [ - { - "name": "metric_name", - "in": "path", - "description": "The name of the metric to query.", - "required": true, - "schema": { - "type": "string" - } - } - ], - "requestBody": { - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/QueryMetricsRequest" - } - } - }, - "required": true - }, - "deprecated": false - } - }, - "/v1alpha/telemetry/spans": { - "post": { - "responses": { - "200": { - "description": "A QuerySpansResponse.", - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/QuerySpansResponse" - } - } - } - }, - "400": { - "$ref": "#/components/responses/BadRequest400" - }, - "429": { - "$ref": "#/components/responses/TooManyRequests429" - }, - "500": { - "$ref": "#/components/responses/InternalServerError500" - }, - "default": { - "$ref": "#/components/responses/DefaultError" - } - }, - "tags": [ - "Telemetry" - ], - "summary": "Query spans.", - "description": "Query spans.", - "parameters": [], - "requestBody": { - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/QuerySpansRequest" - } - } - }, - "required": true - }, - "deprecated": false - } - }, - "/v1alpha/telemetry/spans/export": { - "post": { - "responses": { - "200": { - "description": "OK" - }, - "400": { - "$ref": "#/components/responses/BadRequest400" - }, - "429": { - "$ref": "#/components/responses/TooManyRequests429" - }, - "500": { - "$ref": "#/components/responses/InternalServerError500" - }, - "default": { - "$ref": "#/components/responses/DefaultError" - } - }, - "tags": [ - "Telemetry" - ], - "summary": "Save spans to a dataset.", - "description": "Save spans to a dataset.", - "parameters": [], - "requestBody": { - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/SaveSpansToDatasetRequest" - } - } - }, - "required": true - }, - "deprecated": false - } - }, - "/v1alpha/telemetry/spans/{span_id}/tree": { - "post": { - "responses": { - "200": { - "description": "A QuerySpanTreeResponse.", - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/QuerySpanTreeResponse" - } - } - } - }, - "400": { - "$ref": "#/components/responses/BadRequest400" - }, - "429": { - "$ref": "#/components/responses/TooManyRequests429" - }, - "500": { - "$ref": "#/components/responses/InternalServerError500" - }, - "default": { - "$ref": "#/components/responses/DefaultError" - } - }, - "tags": [ - "Telemetry" - ], - "summary": "Get a span tree by its ID.", - "description": "Get a span tree by its ID.", - "parameters": [ - { - "name": "span_id", - "in": "path", - "description": "The ID of the span to get the tree from.", - "required": true, - "schema": { - "type": "string" - } - } - ], - "requestBody": { - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/GetSpanTreeRequest" - } - } - }, - "required": true - }, - "deprecated": false - } - }, - "/v1alpha/telemetry/traces": { - "post": { - "responses": { - "200": { - "description": "A QueryTracesResponse.", - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/QueryTracesResponse" - } - } - } - }, - "400": { - "$ref": "#/components/responses/BadRequest400" - }, - "429": { - "$ref": "#/components/responses/TooManyRequests429" - }, - "500": { - "$ref": "#/components/responses/InternalServerError500" - }, - "default": { - "$ref": "#/components/responses/DefaultError" - } - }, - "tags": [ - "Telemetry" - ], - "summary": "Query traces.", - "description": "Query traces.", - "parameters": [], - "requestBody": { - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/QueryTracesRequest" - } - } - }, - "required": true - }, - "deprecated": false - } - }, - "/v1alpha/telemetry/traces/{trace_id}": { - "get": { - "responses": { - "200": { - "description": "A Trace.", - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/Trace" - } - } - } - }, - "400": { - "$ref": "#/components/responses/BadRequest400" - }, - "429": { - "$ref": "#/components/responses/TooManyRequests429" - }, - "500": { - "$ref": "#/components/responses/InternalServerError500" - }, - "default": { - "$ref": "#/components/responses/DefaultError" - } - }, - "tags": [ - "Telemetry" - ], - "summary": "Get a trace by its ID.", - "description": "Get a trace by its ID.", - "parameters": [ - { - "name": "trace_id", - "in": "path", - "description": "The ID of the trace to get.", - "required": true, - "schema": { - "type": "string" - } - } - ], - "deprecated": false - } - }, - "/v1alpha/telemetry/traces/{trace_id}/spans/{span_id}": { - "get": { - "responses": { - "200": { - "description": "A Span.", - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/Span" - } - } - } - }, - "400": { - "$ref": "#/components/responses/BadRequest400" - }, - "429": { - "$ref": "#/components/responses/TooManyRequests429" - }, - "500": { - "$ref": "#/components/responses/InternalServerError500" - }, - "default": { - "$ref": "#/components/responses/DefaultError" - } - }, - "tags": [ - "Telemetry" - ], - "summary": "Get a span by its ID.", - "description": "Get a span by its ID.", - "parameters": [ - { - "name": "trace_id", - "in": "path", - "description": "The ID of the trace to get the span from.", - "required": true, - "schema": { - "type": "string" - } - }, - { - "name": "span_id", - "in": "path", - "description": "The ID of the span to get.", - "required": true, - "schema": { - "type": "string" - } - } - ], - "deprecated": false - } } }, "jsonSchemaDialect": "https://json-schema.org/draft/2020-12/schema", @@ -12373,354 +11998,6 @@ "title": "SyntheticDataGenerationResponse", "description": "Response from the synthetic data generation. Batch of (prompt, response, score) tuples that pass the threshold." }, - "Event": { - "oneOf": [ - { - "$ref": "#/components/schemas/UnstructuredLogEvent" - }, - { - "$ref": "#/components/schemas/MetricEvent" - }, - { - "$ref": "#/components/schemas/StructuredLogEvent" - } - ], - "discriminator": { - "propertyName": "type", - "mapping": { - "unstructured_log": "#/components/schemas/UnstructuredLogEvent", - "metric": "#/components/schemas/MetricEvent", - "structured_log": "#/components/schemas/StructuredLogEvent" - } - } - }, - "EventType": { - "type": "string", - "enum": [ - "unstructured_log", - "structured_log", - "metric" - ], - "title": "EventType", - "description": "The type of telemetry event being logged." - }, - "LogSeverity": { - "type": "string", - "enum": [ - "verbose", - "debug", - "info", - "warn", - "error", - "critical" - ], - "title": "LogSeverity", - "description": "The severity level of a log message." - }, - "MetricEvent": { - "type": "object", - "properties": { - "trace_id": { - "type": "string", - "description": "Unique identifier for the trace this event belongs to" - }, - "span_id": { - "type": "string", - "description": "Unique identifier for the span this event belongs to" - }, - "timestamp": { - "type": "string", - "format": "date-time", - "description": "Timestamp when the event occurred" - }, - "attributes": { - "type": "object", - "additionalProperties": { - "oneOf": [ - { - "type": "string" - }, - { - "type": "integer" - }, - { - "type": "number" - }, - { - "type": "boolean" - }, - { - "type": "null" - } - ] - }, - "description": "(Optional) Key-value pairs containing additional metadata about the event" - }, - "type": { - "$ref": "#/components/schemas/EventType", - "const": "metric", - "default": "metric", - "description": "Event type identifier set to METRIC" - }, - "metric": { - "type": "string", - "description": "The name of the metric being measured" - }, - "value": { - "oneOf": [ - { - "type": "integer" - }, - { - "type": "number" - } - ], - "description": "The numeric value of the metric measurement" - }, - "unit": { - "type": "string", - "description": "The unit of measurement for the metric value" - } - }, - "additionalProperties": false, - "required": [ - "trace_id", - "span_id", - "timestamp", - "type", - "metric", - "value", - "unit" - ], - "title": "MetricEvent", - "description": "A metric event containing a measured value." - }, - "SpanEndPayload": { - "type": "object", - "properties": { - "type": { - "$ref": "#/components/schemas/StructuredLogType", - "const": "span_end", - "default": "span_end", - "description": "Payload type identifier set to SPAN_END" - }, - "status": { - "$ref": "#/components/schemas/SpanStatus", - "description": "The final status of the span indicating success or failure" - } - }, - "additionalProperties": false, - "required": [ - "type", - "status" - ], - "title": "SpanEndPayload", - "description": "Payload for a span end event." - }, - "SpanStartPayload": { - "type": "object", - "properties": { - "type": { - "$ref": "#/components/schemas/StructuredLogType", - "const": "span_start", - "default": "span_start", - "description": "Payload type identifier set to SPAN_START" - }, - "name": { - "type": "string", - "description": "Human-readable name describing the operation this span represents" - }, - "parent_span_id": { - "type": "string", - "description": "(Optional) Unique identifier for the parent span, if this is a child span" - } - }, - "additionalProperties": false, - "required": [ - "type", - "name" - ], - "title": "SpanStartPayload", - "description": "Payload for a span start event." - }, - "SpanStatus": { - "type": "string", - "enum": [ - "ok", - "error" - ], - "title": "SpanStatus", - "description": "The status of a span indicating whether it completed successfully or with an error." - }, - "StructuredLogEvent": { - "type": "object", - "properties": { - "trace_id": { - "type": "string", - "description": "Unique identifier for the trace this event belongs to" - }, - "span_id": { - "type": "string", - "description": "Unique identifier for the span this event belongs to" - }, - "timestamp": { - "type": "string", - "format": "date-time", - "description": "Timestamp when the event occurred" - }, - "attributes": { - "type": "object", - "additionalProperties": { - "oneOf": [ - { - "type": "string" - }, - { - "type": "integer" - }, - { - "type": "number" - }, - { - "type": "boolean" - }, - { - "type": "null" - } - ] - }, - "description": "(Optional) Key-value pairs containing additional metadata about the event" - }, - "type": { - "$ref": "#/components/schemas/EventType", - "const": "structured_log", - "default": "structured_log", - "description": "Event type identifier set to STRUCTURED_LOG" - }, - "payload": { - "oneOf": [ - { - "$ref": "#/components/schemas/SpanStartPayload" - }, - { - "$ref": "#/components/schemas/SpanEndPayload" - } - ], - "discriminator": { - "propertyName": "type", - "mapping": { - "span_start": "#/components/schemas/SpanStartPayload", - "span_end": "#/components/schemas/SpanEndPayload" - } - }, - "description": "The structured payload data for the log event" - } - }, - "additionalProperties": false, - "required": [ - "trace_id", - "span_id", - "timestamp", - "type", - "payload" - ], - "title": "StructuredLogEvent", - "description": "A structured log event containing typed payload data." - }, - "StructuredLogType": { - "type": "string", - "enum": [ - "span_start", - "span_end" - ], - "title": "StructuredLogType", - "description": "The type of structured log event payload." - }, - "UnstructuredLogEvent": { - "type": "object", - "properties": { - "trace_id": { - "type": "string", - "description": "Unique identifier for the trace this event belongs to" - }, - "span_id": { - "type": "string", - "description": "Unique identifier for the span this event belongs to" - }, - "timestamp": { - "type": "string", - "format": "date-time", - "description": "Timestamp when the event occurred" - }, - "attributes": { - "type": "object", - "additionalProperties": { - "oneOf": [ - { - "type": "string" - }, - { - "type": "integer" - }, - { - "type": "number" - }, - { - "type": "boolean" - }, - { - "type": "null" - } - ] - }, - "description": "(Optional) Key-value pairs containing additional metadata about the event" - }, - "type": { - "$ref": "#/components/schemas/EventType", - "const": "unstructured_log", - "default": "unstructured_log", - "description": "Event type identifier set to UNSTRUCTURED_LOG" - }, - "message": { - "type": "string", - "description": "The log message text" - }, - "severity": { - "$ref": "#/components/schemas/LogSeverity", - "description": "The severity level of the log message" - } - }, - "additionalProperties": false, - "required": [ - "trace_id", - "span_id", - "timestamp", - "type", - "message", - "severity" - ], - "title": "UnstructuredLogEvent", - "description": "An unstructured log event containing a simple text message." - }, - "LogEventRequest": { - "type": "object", - "properties": { - "event": { - "$ref": "#/components/schemas/Event", - "description": "The event to log." - }, - "ttl_seconds": { - "type": "integer", - "description": "The time to live of the event." - } - }, - "additionalProperties": false, - "required": [ - "event", - "ttl_seconds" - ], - "title": "LogEventRequest" - }, "InvokeToolRequest": { "type": "object", "properties": { @@ -17841,552 +17118,6 @@ "logger_config" ], "title": "SupervisedFineTuneRequest" - }, - "QueryMetricsRequest": { - "type": "object", - "properties": { - "start_time": { - "type": "integer", - "description": "The start time of the metric to query." - }, - "end_time": { - "type": "integer", - "description": "The end time of the metric to query." - }, - "granularity": { - "type": "string", - "description": "The granularity of the metric to query." - }, - "query_type": { - "type": "string", - "enum": [ - "range", - "instant" - ], - "description": "The type of query to perform." - }, - "label_matchers": { - "type": "array", - "items": { - "type": "object", - "properties": { - "name": { - "type": "string", - "description": "The name of the label to match" - }, - "value": { - "type": "string", - "description": "The value to match against" - }, - "operator": { - "type": "string", - "enum": [ - "=", - "!=", - "=~", - "!~" - ], - "description": "The comparison operator to use for matching", - "default": "=" - } - }, - "additionalProperties": false, - "required": [ - "name", - "value", - "operator" - ], - "title": "MetricLabelMatcher", - "description": "A matcher for filtering metrics by label values." - }, - "description": "The label matchers to apply to the metric." - } - }, - "additionalProperties": false, - "required": [ - "start_time", - "query_type" - ], - "title": "QueryMetricsRequest" - }, - "MetricDataPoint": { - "type": "object", - "properties": { - "timestamp": { - "type": "integer", - "description": "Unix timestamp when the metric value was recorded" - }, - "value": { - "type": "number", - "description": "The numeric value of the metric at this timestamp" - }, - "unit": { - "type": "string" - } - }, - "additionalProperties": false, - "required": [ - "timestamp", - "value", - "unit" - ], - "title": "MetricDataPoint", - "description": "A single data point in a metric time series." - }, - "MetricLabel": { - "type": "object", - "properties": { - "name": { - "type": "string", - "description": "The name of the label" - }, - "value": { - "type": "string", - "description": "The value of the label" - } - }, - "additionalProperties": false, - "required": [ - "name", - "value" - ], - "title": "MetricLabel", - "description": "A label associated with a metric." - }, - "MetricSeries": { - "type": "object", - "properties": { - "metric": { - "type": "string", - "description": "The name of the metric" - }, - "labels": { - "type": "array", - "items": { - "$ref": "#/components/schemas/MetricLabel" - }, - "description": "List of labels associated with this metric series" - }, - "values": { - "type": "array", - "items": { - "$ref": "#/components/schemas/MetricDataPoint" - }, - "description": "List of data points in chronological order" - } - }, - "additionalProperties": false, - "required": [ - "metric", - "labels", - "values" - ], - "title": "MetricSeries", - "description": "A time series of metric data points." - }, - "QueryMetricsResponse": { - "type": "object", - "properties": { - "data": { - "type": "array", - "items": { - "$ref": "#/components/schemas/MetricSeries" - }, - "description": "List of metric series matching the query criteria" - } - }, - "additionalProperties": false, - "required": [ - "data" - ], - "title": "QueryMetricsResponse", - "description": "Response containing metric time series data." - }, - "QueryCondition": { - "type": "object", - "properties": { - "key": { - "type": "string", - "description": "The attribute key to filter on" - }, - "op": { - "$ref": "#/components/schemas/QueryConditionOp", - "description": "The comparison operator to apply" - }, - "value": { - "oneOf": [ - { - "type": "null" - }, - { - "type": "boolean" - }, - { - "type": "number" - }, - { - "type": "string" - }, - { - "type": "array" - }, - { - "type": "object" - } - ], - "description": "The value to compare against" - } - }, - "additionalProperties": false, - "required": [ - "key", - "op", - "value" - ], - "title": "QueryCondition", - "description": "A condition for filtering query results." - }, - "QueryConditionOp": { - "type": "string", - "enum": [ - "eq", - "ne", - "gt", - "lt" - ], - "title": "QueryConditionOp", - "description": "Comparison operators for query conditions." - }, - "QuerySpansRequest": { - "type": "object", - "properties": { - "attribute_filters": { - "type": "array", - "items": { - "$ref": "#/components/schemas/QueryCondition" - }, - "description": "The attribute filters to apply to the spans." - }, - "attributes_to_return": { - "type": "array", - "items": { - "type": "string" - }, - "description": "The attributes to return in the spans." - }, - "max_depth": { - "type": "integer", - "description": "The maximum depth of the tree." - } - }, - "additionalProperties": false, - "required": [ - "attribute_filters", - "attributes_to_return" - ], - "title": "QuerySpansRequest" - }, - "Span": { - "type": "object", - "properties": { - "span_id": { - "type": "string", - "description": "Unique identifier for the span" - }, - "trace_id": { - "type": "string", - "description": "Unique identifier for the trace this span belongs to" - }, - "parent_span_id": { - "type": "string", - "description": "(Optional) Unique identifier for the parent span, if this is a child span" - }, - "name": { - "type": "string", - "description": "Human-readable name describing the operation this span represents" - }, - "start_time": { - "type": "string", - "format": "date-time", - "description": "Timestamp when the operation began" - }, - "end_time": { - "type": "string", - "format": "date-time", - "description": "(Optional) Timestamp when the operation finished, if completed" - }, - "attributes": { - "type": "object", - "additionalProperties": { - "oneOf": [ - { - "type": "null" - }, - { - "type": "boolean" - }, - { - "type": "number" - }, - { - "type": "string" - }, - { - "type": "array" - }, - { - "type": "object" - } - ] - }, - "description": "(Optional) Key-value pairs containing additional metadata about the span" - } - }, - "additionalProperties": false, - "required": [ - "span_id", - "trace_id", - "name", - "start_time" - ], - "title": "Span", - "description": "A span representing a single operation within a trace." - }, - "QuerySpansResponse": { - "type": "object", - "properties": { - "data": { - "type": "array", - "items": { - "$ref": "#/components/schemas/Span" - }, - "description": "List of spans matching the query criteria" - } - }, - "additionalProperties": false, - "required": [ - "data" - ], - "title": "QuerySpansResponse", - "description": "Response containing a list of spans." - }, - "SaveSpansToDatasetRequest": { - "type": "object", - "properties": { - "attribute_filters": { - "type": "array", - "items": { - "$ref": "#/components/schemas/QueryCondition" - }, - "description": "The attribute filters to apply to the spans." - }, - "attributes_to_save": { - "type": "array", - "items": { - "type": "string" - }, - "description": "The attributes to save to the dataset." - }, - "dataset_id": { - "type": "string", - "description": "The ID of the dataset to save the spans to." - }, - "max_depth": { - "type": "integer", - "description": "The maximum depth of the tree." - } - }, - "additionalProperties": false, - "required": [ - "attribute_filters", - "attributes_to_save", - "dataset_id" - ], - "title": "SaveSpansToDatasetRequest" - }, - "GetSpanTreeRequest": { - "type": "object", - "properties": { - "attributes_to_return": { - "type": "array", - "items": { - "type": "string" - }, - "description": "The attributes to return in the tree." - }, - "max_depth": { - "type": "integer", - "description": "The maximum depth of the tree." - } - }, - "additionalProperties": false, - "title": "GetSpanTreeRequest" - }, - "SpanWithStatus": { - "type": "object", - "properties": { - "span_id": { - "type": "string", - "description": "Unique identifier for the span" - }, - "trace_id": { - "type": "string", - "description": "Unique identifier for the trace this span belongs to" - }, - "parent_span_id": { - "type": "string", - "description": "(Optional) Unique identifier for the parent span, if this is a child span" - }, - "name": { - "type": "string", - "description": "Human-readable name describing the operation this span represents" - }, - "start_time": { - "type": "string", - "format": "date-time", - "description": "Timestamp when the operation began" - }, - "end_time": { - "type": "string", - "format": "date-time", - "description": "(Optional) Timestamp when the operation finished, if completed" - }, - "attributes": { - "type": "object", - "additionalProperties": { - "oneOf": [ - { - "type": "null" - }, - { - "type": "boolean" - }, - { - "type": "number" - }, - { - "type": "string" - }, - { - "type": "array" - }, - { - "type": "object" - } - ] - }, - "description": "(Optional) Key-value pairs containing additional metadata about the span" - }, - "status": { - "$ref": "#/components/schemas/SpanStatus", - "description": "(Optional) The current status of the span" - } - }, - "additionalProperties": false, - "required": [ - "span_id", - "trace_id", - "name", - "start_time" - ], - "title": "SpanWithStatus", - "description": "A span that includes status information." - }, - "QuerySpanTreeResponse": { - "type": "object", - "properties": { - "data": { - "type": "object", - "additionalProperties": { - "$ref": "#/components/schemas/SpanWithStatus" - }, - "description": "Dictionary mapping span IDs to spans with status information" - } - }, - "additionalProperties": false, - "required": [ - "data" - ], - "title": "QuerySpanTreeResponse", - "description": "Response containing a tree structure of spans." - }, - "QueryTracesRequest": { - "type": "object", - "properties": { - "attribute_filters": { - "type": "array", - "items": { - "$ref": "#/components/schemas/QueryCondition" - }, - "description": "The attribute filters to apply to the traces." - }, - "limit": { - "type": "integer", - "description": "The limit of traces to return." - }, - "offset": { - "type": "integer", - "description": "The offset of the traces to return." - }, - "order_by": { - "type": "array", - "items": { - "type": "string" - }, - "description": "The order by of the traces to return." - } - }, - "additionalProperties": false, - "title": "QueryTracesRequest" - }, - "Trace": { - "type": "object", - "properties": { - "trace_id": { - "type": "string", - "description": "Unique identifier for the trace" - }, - "root_span_id": { - "type": "string", - "description": "Unique identifier for the root span that started this trace" - }, - "start_time": { - "type": "string", - "format": "date-time", - "description": "Timestamp when the trace began" - }, - "end_time": { - "type": "string", - "format": "date-time", - "description": "(Optional) Timestamp when the trace finished, if completed" - } - }, - "additionalProperties": false, - "required": [ - "trace_id", - "root_span_id", - "start_time" - ], - "title": "Trace", - "description": "A trace representing the complete execution path of a request across multiple operations." - }, - "QueryTracesResponse": { - "type": "object", - "properties": { - "data": { - "type": "array", - "items": { - "$ref": "#/components/schemas/Trace" - }, - "description": "List of traces matching the query criteria" - } - }, - "additionalProperties": false, - "required": [ - "data" - ], - "title": "QueryTracesResponse", - "description": "Response containing a list of traces." } }, "responses": { @@ -18539,10 +17270,6 @@ "name": "SyntheticDataGeneration (Coming Soon)", "description": "" }, - { - "name": "Telemetry", - "description": "" - }, { "name": "ToolGroups", "description": "" @@ -18582,7 +17309,6 @@ "ScoringFunctions", "Shields", "SyntheticDataGeneration (Coming Soon)", - "Telemetry", "ToolGroups", "ToolRuntime", "VectorDBs", diff --git a/docs/static/stainless-llama-stack-spec.yaml b/docs/static/stainless-llama-stack-spec.yaml index 6c04542bf..56035ad5f 100644 --- a/docs/static/stainless-llama-stack-spec.yaml +++ b/docs/static/stainless-llama-stack-spec.yaml @@ -1947,33 +1947,6 @@ paths: $ref: '#/components/schemas/SyntheticDataGenerateRequest' required: true deprecated: false - /v1/telemetry/events: - post: - responses: - '200': - description: OK - '400': - $ref: '#/components/responses/BadRequest400' - '429': - $ref: >- - #/components/responses/TooManyRequests429 - '500': - $ref: >- - #/components/responses/InternalServerError500 - default: - $ref: '#/components/responses/DefaultError' - tags: - - Telemetry - summary: Log an event. - description: Log an event. - parameters: [] - requestBody: - content: - application/json: - schema: - $ref: '#/components/schemas/LogEventRequest' - required: true - deprecated: false /v1/tool-runtime/invoke: post: responses: @@ -4392,238 +4365,6 @@ paths: $ref: '#/components/schemas/SupervisedFineTuneRequest' required: true deprecated: false - /v1alpha/telemetry/metrics/{metric_name}: - post: - responses: - '200': - description: A QueryMetricsResponse. - content: - application/json: - schema: - $ref: '#/components/schemas/QueryMetricsResponse' - '400': - $ref: '#/components/responses/BadRequest400' - '429': - $ref: >- - #/components/responses/TooManyRequests429 - '500': - $ref: >- - #/components/responses/InternalServerError500 - default: - $ref: '#/components/responses/DefaultError' - tags: - - Telemetry - summary: Query metrics. - description: Query metrics. - parameters: - - name: metric_name - in: path - description: The name of the metric to query. - required: true - schema: - type: string - requestBody: - content: - application/json: - schema: - $ref: '#/components/schemas/QueryMetricsRequest' - required: true - deprecated: false - /v1alpha/telemetry/spans: - post: - responses: - '200': - description: A QuerySpansResponse. - content: - application/json: - schema: - $ref: '#/components/schemas/QuerySpansResponse' - '400': - $ref: '#/components/responses/BadRequest400' - '429': - $ref: >- - #/components/responses/TooManyRequests429 - '500': - $ref: >- - #/components/responses/InternalServerError500 - default: - $ref: '#/components/responses/DefaultError' - tags: - - Telemetry - summary: Query spans. - description: Query spans. - parameters: [] - requestBody: - content: - application/json: - schema: - $ref: '#/components/schemas/QuerySpansRequest' - required: true - deprecated: false - /v1alpha/telemetry/spans/export: - post: - responses: - '200': - description: OK - '400': - $ref: '#/components/responses/BadRequest400' - '429': - $ref: >- - #/components/responses/TooManyRequests429 - '500': - $ref: >- - #/components/responses/InternalServerError500 - default: - $ref: '#/components/responses/DefaultError' - tags: - - Telemetry - summary: Save spans to a dataset. - description: Save spans to a dataset. - parameters: [] - requestBody: - content: - application/json: - schema: - $ref: '#/components/schemas/SaveSpansToDatasetRequest' - required: true - deprecated: false - /v1alpha/telemetry/spans/{span_id}/tree: - post: - responses: - '200': - description: A QuerySpanTreeResponse. - content: - application/json: - schema: - $ref: '#/components/schemas/QuerySpanTreeResponse' - '400': - $ref: '#/components/responses/BadRequest400' - '429': - $ref: >- - #/components/responses/TooManyRequests429 - '500': - $ref: >- - #/components/responses/InternalServerError500 - default: - $ref: '#/components/responses/DefaultError' - tags: - - Telemetry - summary: Get a span tree by its ID. - description: Get a span tree by its ID. - parameters: - - name: span_id - in: path - description: The ID of the span to get the tree from. - required: true - schema: - type: string - requestBody: - content: - application/json: - schema: - $ref: '#/components/schemas/GetSpanTreeRequest' - required: true - deprecated: false - /v1alpha/telemetry/traces: - post: - responses: - '200': - description: A QueryTracesResponse. - content: - application/json: - schema: - $ref: '#/components/schemas/QueryTracesResponse' - '400': - $ref: '#/components/responses/BadRequest400' - '429': - $ref: >- - #/components/responses/TooManyRequests429 - '500': - $ref: >- - #/components/responses/InternalServerError500 - default: - $ref: '#/components/responses/DefaultError' - tags: - - Telemetry - summary: Query traces. - description: Query traces. - parameters: [] - requestBody: - content: - application/json: - schema: - $ref: '#/components/schemas/QueryTracesRequest' - required: true - deprecated: false - /v1alpha/telemetry/traces/{trace_id}: - get: - responses: - '200': - description: A Trace. - content: - application/json: - schema: - $ref: '#/components/schemas/Trace' - '400': - $ref: '#/components/responses/BadRequest400' - '429': - $ref: >- - #/components/responses/TooManyRequests429 - '500': - $ref: >- - #/components/responses/InternalServerError500 - default: - $ref: '#/components/responses/DefaultError' - tags: - - Telemetry - summary: Get a trace by its ID. - description: Get a trace by its ID. - parameters: - - name: trace_id - in: path - description: The ID of the trace to get. - required: true - schema: - type: string - deprecated: false - /v1alpha/telemetry/traces/{trace_id}/spans/{span_id}: - get: - responses: - '200': - description: A Span. - content: - application/json: - schema: - $ref: '#/components/schemas/Span' - '400': - $ref: '#/components/responses/BadRequest400' - '429': - $ref: >- - #/components/responses/TooManyRequests429 - '500': - $ref: >- - #/components/responses/InternalServerError500 - default: - $ref: '#/components/responses/DefaultError' - tags: - - Telemetry - summary: Get a span by its ID. - description: Get a span by its ID. - parameters: - - name: trace_id - in: path - description: >- - The ID of the trace to get the span from. - required: true - schema: - type: string - - name: span_id - in: path - description: The ID of the span to get. - required: true - schema: - type: string - deprecated: false jsonSchemaDialect: >- https://json-schema.org/draft/2020-12/schema components: @@ -9285,267 +9026,6 @@ components: description: >- Response from the synthetic data generation. Batch of (prompt, response, score) tuples that pass the threshold. - Event: - oneOf: - - $ref: '#/components/schemas/UnstructuredLogEvent' - - $ref: '#/components/schemas/MetricEvent' - - $ref: '#/components/schemas/StructuredLogEvent' - discriminator: - propertyName: type - mapping: - unstructured_log: '#/components/schemas/UnstructuredLogEvent' - metric: '#/components/schemas/MetricEvent' - structured_log: '#/components/schemas/StructuredLogEvent' - EventType: - type: string - enum: - - unstructured_log - - structured_log - - metric - title: EventType - description: >- - The type of telemetry event being logged. - LogSeverity: - type: string - enum: - - verbose - - debug - - info - - warn - - error - - critical - title: LogSeverity - description: The severity level of a log message. - MetricEvent: - type: object - properties: - trace_id: - type: string - description: >- - Unique identifier for the trace this event belongs to - span_id: - type: string - description: >- - Unique identifier for the span this event belongs to - timestamp: - type: string - format: date-time - description: Timestamp when the event occurred - attributes: - type: object - additionalProperties: - oneOf: - - type: string - - type: integer - - type: number - - type: boolean - - type: 'null' - description: >- - (Optional) Key-value pairs containing additional metadata about the event - type: - $ref: '#/components/schemas/EventType' - const: metric - default: metric - description: Event type identifier set to METRIC - metric: - type: string - description: The name of the metric being measured - value: - oneOf: - - type: integer - - type: number - description: >- - The numeric value of the metric measurement - unit: - type: string - description: >- - The unit of measurement for the metric value - additionalProperties: false - required: - - trace_id - - span_id - - timestamp - - type - - metric - - value - - unit - title: MetricEvent - description: >- - A metric event containing a measured value. - SpanEndPayload: - type: object - properties: - type: - $ref: '#/components/schemas/StructuredLogType' - const: span_end - default: span_end - description: Payload type identifier set to SPAN_END - status: - $ref: '#/components/schemas/SpanStatus' - description: >- - The final status of the span indicating success or failure - additionalProperties: false - required: - - type - - status - title: SpanEndPayload - description: Payload for a span end event. - SpanStartPayload: - type: object - properties: - type: - $ref: '#/components/schemas/StructuredLogType' - const: span_start - default: span_start - description: >- - Payload type identifier set to SPAN_START - name: - type: string - description: >- - Human-readable name describing the operation this span represents - parent_span_id: - type: string - description: >- - (Optional) Unique identifier for the parent span, if this is a child span - additionalProperties: false - required: - - type - - name - title: SpanStartPayload - description: Payload for a span start event. - SpanStatus: - type: string - enum: - - ok - - error - title: SpanStatus - description: >- - The status of a span indicating whether it completed successfully or with - an error. - StructuredLogEvent: - type: object - properties: - trace_id: - type: string - description: >- - Unique identifier for the trace this event belongs to - span_id: - type: string - description: >- - Unique identifier for the span this event belongs to - timestamp: - type: string - format: date-time - description: Timestamp when the event occurred - attributes: - type: object - additionalProperties: - oneOf: - - type: string - - type: integer - - type: number - - type: boolean - - type: 'null' - description: >- - (Optional) Key-value pairs containing additional metadata about the event - type: - $ref: '#/components/schemas/EventType' - const: structured_log - default: structured_log - description: >- - Event type identifier set to STRUCTURED_LOG - payload: - oneOf: - - $ref: '#/components/schemas/SpanStartPayload' - - $ref: '#/components/schemas/SpanEndPayload' - discriminator: - propertyName: type - mapping: - span_start: '#/components/schemas/SpanStartPayload' - span_end: '#/components/schemas/SpanEndPayload' - description: >- - The structured payload data for the log event - additionalProperties: false - required: - - trace_id - - span_id - - timestamp - - type - - payload - title: StructuredLogEvent - description: >- - A structured log event containing typed payload data. - StructuredLogType: - type: string - enum: - - span_start - - span_end - title: StructuredLogType - description: >- - The type of structured log event payload. - UnstructuredLogEvent: - type: object - properties: - trace_id: - type: string - description: >- - Unique identifier for the trace this event belongs to - span_id: - type: string - description: >- - Unique identifier for the span this event belongs to - timestamp: - type: string - format: date-time - description: Timestamp when the event occurred - attributes: - type: object - additionalProperties: - oneOf: - - type: string - - type: integer - - type: number - - type: boolean - - type: 'null' - description: >- - (Optional) Key-value pairs containing additional metadata about the event - type: - $ref: '#/components/schemas/EventType' - const: unstructured_log - default: unstructured_log - description: >- - Event type identifier set to UNSTRUCTURED_LOG - message: - type: string - description: The log message text - severity: - $ref: '#/components/schemas/LogSeverity' - description: The severity level of the log message - additionalProperties: false - required: - - trace_id - - span_id - - timestamp - - type - - message - - severity - title: UnstructuredLogEvent - description: >- - An unstructured log event containing a simple text message. - LogEventRequest: - type: object - properties: - event: - $ref: '#/components/schemas/Event' - description: The event to log. - ttl_seconds: - type: integer - description: The time to live of the event. - additionalProperties: false - required: - - event - - ttl_seconds - title: LogEventRequest InvokeToolRequest: type: object properties: @@ -13349,425 +12829,6 @@ components: - hyperparam_search_config - logger_config title: SupervisedFineTuneRequest - QueryMetricsRequest: - type: object - properties: - start_time: - type: integer - description: The start time of the metric to query. - end_time: - type: integer - description: The end time of the metric to query. - granularity: - type: string - description: The granularity of the metric to query. - query_type: - type: string - enum: - - range - - instant - description: The type of query to perform. - label_matchers: - type: array - items: - type: object - properties: - name: - type: string - description: The name of the label to match - value: - type: string - description: The value to match against - operator: - type: string - enum: - - '=' - - '!=' - - =~ - - '!~' - description: >- - The comparison operator to use for matching - default: '=' - additionalProperties: false - required: - - name - - value - - operator - title: MetricLabelMatcher - description: >- - A matcher for filtering metrics by label values. - description: >- - The label matchers to apply to the metric. - additionalProperties: false - required: - - start_time - - query_type - title: QueryMetricsRequest - MetricDataPoint: - type: object - properties: - timestamp: - type: integer - description: >- - Unix timestamp when the metric value was recorded - value: - type: number - description: >- - The numeric value of the metric at this timestamp - unit: - type: string - additionalProperties: false - required: - - timestamp - - value - - unit - title: MetricDataPoint - description: >- - A single data point in a metric time series. - MetricLabel: - type: object - properties: - name: - type: string - description: The name of the label - value: - type: string - description: The value of the label - additionalProperties: false - required: - - name - - value - title: MetricLabel - description: A label associated with a metric. - MetricSeries: - type: object - properties: - metric: - type: string - description: The name of the metric - labels: - type: array - items: - $ref: '#/components/schemas/MetricLabel' - description: >- - List of labels associated with this metric series - values: - type: array - items: - $ref: '#/components/schemas/MetricDataPoint' - description: >- - List of data points in chronological order - additionalProperties: false - required: - - metric - - labels - - values - title: MetricSeries - description: A time series of metric data points. - QueryMetricsResponse: - type: object - properties: - data: - type: array - items: - $ref: '#/components/schemas/MetricSeries' - description: >- - List of metric series matching the query criteria - additionalProperties: false - required: - - data - title: QueryMetricsResponse - description: >- - Response containing metric time series data. - QueryCondition: - type: object - properties: - key: - type: string - description: The attribute key to filter on - op: - $ref: '#/components/schemas/QueryConditionOp' - description: The comparison operator to apply - value: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: The value to compare against - additionalProperties: false - required: - - key - - op - - value - title: QueryCondition - description: A condition for filtering query results. - QueryConditionOp: - type: string - enum: - - eq - - ne - - gt - - lt - title: QueryConditionOp - description: >- - Comparison operators for query conditions. - QuerySpansRequest: - type: object - properties: - attribute_filters: - type: array - items: - $ref: '#/components/schemas/QueryCondition' - description: >- - The attribute filters to apply to the spans. - attributes_to_return: - type: array - items: - type: string - description: The attributes to return in the spans. - max_depth: - type: integer - description: The maximum depth of the tree. - additionalProperties: false - required: - - attribute_filters - - attributes_to_return - title: QuerySpansRequest - Span: - type: object - properties: - span_id: - type: string - description: Unique identifier for the span - trace_id: - type: string - description: >- - Unique identifier for the trace this span belongs to - parent_span_id: - type: string - description: >- - (Optional) Unique identifier for the parent span, if this is a child span - name: - type: string - description: >- - Human-readable name describing the operation this span represents - start_time: - type: string - format: date-time - description: Timestamp when the operation began - end_time: - type: string - format: date-time - description: >- - (Optional) Timestamp when the operation finished, if completed - attributes: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: >- - (Optional) Key-value pairs containing additional metadata about the span - additionalProperties: false - required: - - span_id - - trace_id - - name - - start_time - title: Span - description: >- - A span representing a single operation within a trace. - QuerySpansResponse: - type: object - properties: - data: - type: array - items: - $ref: '#/components/schemas/Span' - description: >- - List of spans matching the query criteria - additionalProperties: false - required: - - data - title: QuerySpansResponse - description: Response containing a list of spans. - SaveSpansToDatasetRequest: - type: object - properties: - attribute_filters: - type: array - items: - $ref: '#/components/schemas/QueryCondition' - description: >- - The attribute filters to apply to the spans. - attributes_to_save: - type: array - items: - type: string - description: The attributes to save to the dataset. - dataset_id: - type: string - description: >- - The ID of the dataset to save the spans to. - max_depth: - type: integer - description: The maximum depth of the tree. - additionalProperties: false - required: - - attribute_filters - - attributes_to_save - - dataset_id - title: SaveSpansToDatasetRequest - GetSpanTreeRequest: - type: object - properties: - attributes_to_return: - type: array - items: - type: string - description: The attributes to return in the tree. - max_depth: - type: integer - description: The maximum depth of the tree. - additionalProperties: false - title: GetSpanTreeRequest - SpanWithStatus: - type: object - properties: - span_id: - type: string - description: Unique identifier for the span - trace_id: - type: string - description: >- - Unique identifier for the trace this span belongs to - parent_span_id: - type: string - description: >- - (Optional) Unique identifier for the parent span, if this is a child span - name: - type: string - description: >- - Human-readable name describing the operation this span represents - start_time: - type: string - format: date-time - description: Timestamp when the operation began - end_time: - type: string - format: date-time - description: >- - (Optional) Timestamp when the operation finished, if completed - attributes: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: >- - (Optional) Key-value pairs containing additional metadata about the span - status: - $ref: '#/components/schemas/SpanStatus' - description: >- - (Optional) The current status of the span - additionalProperties: false - required: - - span_id - - trace_id - - name - - start_time - title: SpanWithStatus - description: A span that includes status information. - QuerySpanTreeResponse: - type: object - properties: - data: - type: object - additionalProperties: - $ref: '#/components/schemas/SpanWithStatus' - description: >- - Dictionary mapping span IDs to spans with status information - additionalProperties: false - required: - - data - title: QuerySpanTreeResponse - description: >- - Response containing a tree structure of spans. - QueryTracesRequest: - type: object - properties: - attribute_filters: - type: array - items: - $ref: '#/components/schemas/QueryCondition' - description: >- - The attribute filters to apply to the traces. - limit: - type: integer - description: The limit of traces to return. - offset: - type: integer - description: The offset of the traces to return. - order_by: - type: array - items: - type: string - description: The order by of the traces to return. - additionalProperties: false - title: QueryTracesRequest - Trace: - type: object - properties: - trace_id: - type: string - description: Unique identifier for the trace - root_span_id: - type: string - description: >- - Unique identifier for the root span that started this trace - start_time: - type: string - format: date-time - description: Timestamp when the trace began - end_time: - type: string - format: date-time - description: >- - (Optional) Timestamp when the trace finished, if completed - additionalProperties: false - required: - - trace_id - - root_span_id - - start_time - title: Trace - description: >- - A trace representing the complete execution path of a request across multiple - operations. - QueryTracesResponse: - type: object - properties: - data: - type: array - items: - $ref: '#/components/schemas/Trace' - description: >- - List of traces matching the query criteria - additionalProperties: false - required: - - data - title: QueryTracesResponse - description: Response containing a list of traces. responses: BadRequest400: description: The request was invalid or malformed @@ -13881,8 +12942,6 @@ tags: description: '' - name: SyntheticDataGeneration (Coming Soon) description: '' - - name: Telemetry - description: '' - name: ToolGroups description: '' - name: ToolRuntime @@ -13912,7 +12971,6 @@ x-tagGroups: - ScoringFunctions - Shields - SyntheticDataGeneration (Coming Soon) - - Telemetry - ToolGroups - ToolRuntime - VectorDBs diff --git a/llama_stack/apis/telemetry/telemetry.py b/llama_stack/apis/telemetry/telemetry.py index 0e772da6a..b2999ad33 100644 --- a/llama_stack/apis/telemetry/telemetry.py +++ b/llama_stack/apis/telemetry/telemetry.py @@ -16,15 +16,12 @@ from typing import ( from pydantic import BaseModel, Field -from llama_stack.apis.version import LLAMA_STACK_API_V1, LLAMA_STACK_API_V1ALPHA from llama_stack.models.llama.datatypes import Primitive -from llama_stack.schema_utils import json_schema_type, register_schema, webmethod +from llama_stack.schema_utils import json_schema_type, register_schema # Add this constant near the top of the file, after the imports DEFAULT_TTL_DAYS = 7 -REQUIRED_SCOPE = "telemetry.read" - @json_schema_type class SpanStatus(Enum): @@ -413,7 +410,6 @@ class QueryMetricsResponse(BaseModel): @runtime_checkable class Telemetry(Protocol): - @webmethod(route="/telemetry/events", method="POST", level=LLAMA_STACK_API_V1) async def log_event( self, event: Event, @@ -426,14 +422,6 @@ class Telemetry(Protocol): """ ... - @webmethod( - route="/telemetry/traces", - method="POST", - required_scope=REQUIRED_SCOPE, - deprecated=True, - level=LLAMA_STACK_API_V1, - ) - @webmethod(route="/telemetry/traces", method="POST", required_scope=REQUIRED_SCOPE, level=LLAMA_STACK_API_V1ALPHA) async def query_traces( self, attribute_filters: list[QueryCondition] | None = None, @@ -451,19 +439,6 @@ class Telemetry(Protocol): """ ... - @webmethod( - route="/telemetry/traces/{trace_id:path}", - method="GET", - required_scope=REQUIRED_SCOPE, - deprecated=True, - level=LLAMA_STACK_API_V1, - ) - @webmethod( - route="/telemetry/traces/{trace_id:path}", - method="GET", - required_scope=REQUIRED_SCOPE, - level=LLAMA_STACK_API_V1ALPHA, - ) async def get_trace(self, trace_id: str) -> Trace: """Get a trace by its ID. @@ -472,19 +447,6 @@ class Telemetry(Protocol): """ ... - @webmethod( - route="/telemetry/traces/{trace_id:path}/spans/{span_id:path}", - method="GET", - required_scope=REQUIRED_SCOPE, - deprecated=True, - level=LLAMA_STACK_API_V1, - ) - @webmethod( - route="/telemetry/traces/{trace_id:path}/spans/{span_id:path}", - method="GET", - required_scope=REQUIRED_SCOPE, - level=LLAMA_STACK_API_V1ALPHA, - ) async def get_span(self, trace_id: str, span_id: str) -> Span: """Get a span by its ID. @@ -494,19 +456,6 @@ class Telemetry(Protocol): """ ... - @webmethod( - route="/telemetry/spans/{span_id:path}/tree", - method="POST", - deprecated=True, - required_scope=REQUIRED_SCOPE, - level=LLAMA_STACK_API_V1, - ) - @webmethod( - route="/telemetry/spans/{span_id:path}/tree", - method="POST", - required_scope=REQUIRED_SCOPE, - level=LLAMA_STACK_API_V1ALPHA, - ) async def get_span_tree( self, span_id: str, @@ -522,14 +471,6 @@ class Telemetry(Protocol): """ ... - @webmethod( - route="/telemetry/spans", - method="POST", - required_scope=REQUIRED_SCOPE, - deprecated=True, - level=LLAMA_STACK_API_V1, - ) - @webmethod(route="/telemetry/spans", method="POST", required_scope=REQUIRED_SCOPE, level=LLAMA_STACK_API_V1ALPHA) async def query_spans( self, attribute_filters: list[QueryCondition], @@ -545,8 +486,6 @@ class Telemetry(Protocol): """ ... - @webmethod(route="/telemetry/spans/export", method="POST", deprecated=True, level=LLAMA_STACK_API_V1) - @webmethod(route="/telemetry/spans/export", method="POST", level=LLAMA_STACK_API_V1ALPHA) async def save_spans_to_dataset( self, attribute_filters: list[QueryCondition], @@ -563,19 +502,6 @@ class Telemetry(Protocol): """ ... - @webmethod( - route="/telemetry/metrics/{metric_name}", - method="POST", - required_scope=REQUIRED_SCOPE, - deprecated=True, - level=LLAMA_STACK_API_V1, - ) - @webmethod( - route="/telemetry/metrics/{metric_name}", - method="POST", - required_scope=REQUIRED_SCOPE, - level=LLAMA_STACK_API_V1ALPHA, - ) async def query_metrics( self, metric_name: str, diff --git a/llama_stack/core/conversations/conversations.py b/llama_stack/core/conversations/conversations.py index bef138e69..612b2f68e 100644 --- a/llama_stack/core/conversations/conversations.py +++ b/llama_stack/core/conversations/conversations.py @@ -32,7 +32,7 @@ from llama_stack.providers.utils.sqlstore.sqlstore import ( sqlstore_impl, ) -logger = get_logger(name=__name__, category="openai::conversations") +logger = get_logger(name=__name__, category="openai_conversations") class ConversationServiceConfig(BaseModel): diff --git a/llama_stack/core/routers/inference.py b/llama_stack/core/routers/inference.py index c4338e614..847f6a2d2 100644 --- a/llama_stack/core/routers/inference.py +++ b/llama_stack/core/routers/inference.py @@ -611,7 +611,7 @@ class InferenceRouter(Inference): completion_text += "".join(choice_data["content_parts"]) # Add metrics to the chunk - if self.telemetry and chunk.usage: + if self.telemetry and hasattr(chunk, "usage") and chunk.usage: metrics = self._construct_metrics( prompt_tokens=chunk.usage.prompt_tokens, completion_tokens=chunk.usage.completion_tokens, diff --git a/llama_stack/core/store/registry.py b/llama_stack/core/store/registry.py index 624dbd176..0486553d5 100644 --- a/llama_stack/core/store/registry.py +++ b/llama_stack/core/store/registry.py @@ -98,7 +98,10 @@ class DiskDistributionRegistry(DistributionRegistry): existing_obj = await self.get(obj.type, obj.identifier) # dont register if the object's providerid already exists if existing_obj and existing_obj.provider_id == obj.provider_id: - return False + raise ValueError( + f"Provider '{obj.provider_id}' is already registered." + f"Unregister the existing provider first before registering it again." + ) await self.kvstore.set( KEY_FORMAT.format(type=obj.type, identifier=obj.identifier), diff --git a/llama_stack/distributions/watsonx/__init__.py b/llama_stack/distributions/watsonx/__init__.py index 756f351d8..078d86144 100644 --- a/llama_stack/distributions/watsonx/__init__.py +++ b/llama_stack/distributions/watsonx/__init__.py @@ -3,3 +3,5 @@ # # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. + +from .watsonx import get_distribution_template # noqa: F401 diff --git a/llama_stack/distributions/watsonx/build.yaml b/llama_stack/distributions/watsonx/build.yaml index bf4be7eaf..06349a741 100644 --- a/llama_stack/distributions/watsonx/build.yaml +++ b/llama_stack/distributions/watsonx/build.yaml @@ -3,44 +3,33 @@ distribution_spec: description: Use watsonx for running LLM inference providers: inference: - - provider_id: watsonx - provider_type: remote::watsonx - - provider_id: sentence-transformers - provider_type: inline::sentence-transformers + - provider_type: remote::watsonx + - provider_type: inline::sentence-transformers vector_io: - - provider_id: faiss - provider_type: inline::faiss + - provider_type: inline::faiss safety: - - provider_id: llama-guard - provider_type: inline::llama-guard + - provider_type: inline::llama-guard agents: - - provider_id: meta-reference - provider_type: inline::meta-reference + - provider_type: inline::meta-reference telemetry: - - provider_id: meta-reference - provider_type: inline::meta-reference + - provider_type: inline::meta-reference eval: - - provider_id: meta-reference - provider_type: inline::meta-reference + - provider_type: inline::meta-reference datasetio: - - provider_id: huggingface - provider_type: remote::huggingface - - provider_id: localfs - provider_type: inline::localfs + - provider_type: remote::huggingface + - provider_type: inline::localfs scoring: - - provider_id: basic - provider_type: inline::basic - - provider_id: llm-as-judge - provider_type: inline::llm-as-judge - - provider_id: braintrust - provider_type: inline::braintrust + - provider_type: inline::basic + - provider_type: inline::llm-as-judge + - provider_type: inline::braintrust tool_runtime: - provider_type: remote::brave-search - provider_type: remote::tavily-search - provider_type: inline::rag-runtime - provider_type: remote::model-context-protocol + files: + - provider_type: inline::localfs image_type: venv additional_pip_packages: +- aiosqlite - sqlalchemy[asyncio] -- aiosqlite -- aiosqlite diff --git a/llama_stack/distributions/watsonx/run.yaml b/llama_stack/distributions/watsonx/run.yaml index 92f367910..e0c337f9d 100644 --- a/llama_stack/distributions/watsonx/run.yaml +++ b/llama_stack/distributions/watsonx/run.yaml @@ -4,13 +4,13 @@ apis: - agents - datasetio - eval +- files - inference - safety - scoring - telemetry - tool_runtime - vector_io -- files providers: inference: - provider_id: watsonx @@ -19,8 +19,6 @@ providers: url: ${env.WATSONX_BASE_URL:=https://us-south.ml.cloud.ibm.com} api_key: ${env.WATSONX_API_KEY:=} project_id: ${env.WATSONX_PROJECT_ID:=} - - provider_id: sentence-transformers - provider_type: inline::sentence-transformers vector_io: - provider_id: faiss provider_type: inline::faiss @@ -48,7 +46,7 @@ providers: provider_type: inline::meta-reference config: service_name: "${env.OTEL_SERVICE_NAME:=\u200B}" - sinks: ${env.TELEMETRY_SINKS:=console,sqlite} + sinks: ${env.TELEMETRY_SINKS:=sqlite} sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/trace_store.db otel_exporter_otlp_endpoint: ${env.OTEL_EXPORTER_OTLP_ENDPOINT:=} eval: @@ -109,102 +107,7 @@ metadata_store: inference_store: type: sqlite db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/inference_store.db -models: -- metadata: {} - model_id: meta-llama/llama-3-3-70b-instruct - provider_id: watsonx - provider_model_id: meta-llama/llama-3-3-70b-instruct - model_type: llm -- metadata: {} - model_id: meta-llama/Llama-3.3-70B-Instruct - provider_id: watsonx - provider_model_id: meta-llama/llama-3-3-70b-instruct - model_type: llm -- metadata: {} - model_id: meta-llama/llama-2-13b-chat - provider_id: watsonx - provider_model_id: meta-llama/llama-2-13b-chat - model_type: llm -- metadata: {} - model_id: meta-llama/Llama-2-13b - provider_id: watsonx - provider_model_id: meta-llama/llama-2-13b-chat - model_type: llm -- metadata: {} - model_id: meta-llama/llama-3-1-70b-instruct - provider_id: watsonx - provider_model_id: meta-llama/llama-3-1-70b-instruct - model_type: llm -- metadata: {} - model_id: meta-llama/Llama-3.1-70B-Instruct - provider_id: watsonx - provider_model_id: meta-llama/llama-3-1-70b-instruct - model_type: llm -- metadata: {} - model_id: meta-llama/llama-3-1-8b-instruct - provider_id: watsonx - provider_model_id: meta-llama/llama-3-1-8b-instruct - model_type: llm -- metadata: {} - model_id: meta-llama/Llama-3.1-8B-Instruct - provider_id: watsonx - provider_model_id: meta-llama/llama-3-1-8b-instruct - model_type: llm -- metadata: {} - model_id: meta-llama/llama-3-2-11b-vision-instruct - provider_id: watsonx - provider_model_id: meta-llama/llama-3-2-11b-vision-instruct - model_type: llm -- metadata: {} - model_id: meta-llama/Llama-3.2-11B-Vision-Instruct - provider_id: watsonx - provider_model_id: meta-llama/llama-3-2-11b-vision-instruct - model_type: llm -- metadata: {} - model_id: meta-llama/llama-3-2-1b-instruct - provider_id: watsonx - provider_model_id: meta-llama/llama-3-2-1b-instruct - model_type: llm -- metadata: {} - model_id: meta-llama/Llama-3.2-1B-Instruct - provider_id: watsonx - provider_model_id: meta-llama/llama-3-2-1b-instruct - model_type: llm -- metadata: {} - model_id: meta-llama/llama-3-2-3b-instruct - provider_id: watsonx - provider_model_id: meta-llama/llama-3-2-3b-instruct - model_type: llm -- metadata: {} - model_id: meta-llama/Llama-3.2-3B-Instruct - provider_id: watsonx - provider_model_id: meta-llama/llama-3-2-3b-instruct - model_type: llm -- metadata: {} - model_id: meta-llama/llama-3-2-90b-vision-instruct - provider_id: watsonx - provider_model_id: meta-llama/llama-3-2-90b-vision-instruct - model_type: llm -- metadata: {} - model_id: meta-llama/Llama-3.2-90B-Vision-Instruct - provider_id: watsonx - provider_model_id: meta-llama/llama-3-2-90b-vision-instruct - model_type: llm -- metadata: {} - model_id: meta-llama/llama-guard-3-11b-vision - provider_id: watsonx - provider_model_id: meta-llama/llama-guard-3-11b-vision - model_type: llm -- metadata: {} - model_id: meta-llama/Llama-Guard-3-11B-Vision - provider_id: watsonx - provider_model_id: meta-llama/llama-guard-3-11b-vision - model_type: llm -- metadata: - embedding_dimension: 384 - model_id: all-MiniLM-L6-v2 - provider_id: sentence-transformers - model_type: embedding +models: [] shields: [] vector_dbs: [] datasets: [] diff --git a/llama_stack/distributions/watsonx/watsonx.py b/llama_stack/distributions/watsonx/watsonx.py index c3cab5d1b..645770612 100644 --- a/llama_stack/distributions/watsonx/watsonx.py +++ b/llama_stack/distributions/watsonx/watsonx.py @@ -4,17 +4,11 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from pathlib import Path -from llama_stack.apis.models import ModelType -from llama_stack.core.datatypes import BuildProvider, ModelInput, Provider, ToolGroupInput -from llama_stack.distributions.template import DistributionTemplate, RunConfigSettings, get_model_registry +from llama_stack.core.datatypes import BuildProvider, Provider, ToolGroupInput +from llama_stack.distributions.template import DistributionTemplate, RunConfigSettings from llama_stack.providers.inline.files.localfs.config import LocalfsFilesImplConfig -from llama_stack.providers.inline.inference.sentence_transformers import ( - SentenceTransformersInferenceConfig, -) from llama_stack.providers.remote.inference.watsonx import WatsonXConfig -from llama_stack.providers.remote.inference.watsonx.models import MODEL_ENTRIES def get_distribution_template(name: str = "watsonx") -> DistributionTemplate: @@ -52,15 +46,6 @@ def get_distribution_template(name: str = "watsonx") -> DistributionTemplate: config=WatsonXConfig.sample_run_config(), ) - embedding_provider = Provider( - provider_id="sentence-transformers", - provider_type="inline::sentence-transformers", - config=SentenceTransformersInferenceConfig.sample_run_config(), - ) - - available_models = { - "watsonx": MODEL_ENTRIES, - } default_tool_groups = [ ToolGroupInput( toolgroup_id="builtin::websearch", @@ -72,36 +57,25 @@ def get_distribution_template(name: str = "watsonx") -> DistributionTemplate: ), ] - embedding_model = ModelInput( - model_id="all-MiniLM-L6-v2", - provider_id="sentence-transformers", - model_type=ModelType.embedding, - metadata={ - "embedding_dimension": 384, - }, - ) - files_provider = Provider( provider_id="meta-reference-files", provider_type="inline::localfs", config=LocalfsFilesImplConfig.sample_run_config(f"~/.llama/distributions/{name}"), ) - default_models, _ = get_model_registry(available_models) return DistributionTemplate( name=name, distro_type="remote_hosted", description="Use watsonx for running LLM inference", container_image=None, - template_path=Path(__file__).parent / "doc_template.md", + template_path=None, providers=providers, - available_models_by_provider=available_models, run_configs={ "run.yaml": RunConfigSettings( provider_overrides={ - "inference": [inference_provider, embedding_provider], + "inference": [inference_provider], "files": [files_provider], }, - default_models=default_models + [embedding_model], + default_models=[], default_tool_groups=default_tool_groups, ), }, diff --git a/llama_stack/log.py b/llama_stack/log.py index 6f751b21d..191f158e3 100644 --- a/llama_stack/log.py +++ b/llama_stack/log.py @@ -31,12 +31,17 @@ CATEGORIES = [ "client", "telemetry", "openai_responses", + "openai_conversations", "testing", "providers", "models", "files", "vector_io", "tool_runtime", + "cli", + "post_training", + "scoring", + "tests", ] UNCATEGORIZED = "uncategorized" @@ -261,11 +266,12 @@ def get_logger( if root_category in _category_levels: log_level = _category_levels[root_category] else: - log_level = _category_levels.get("root", DEFAULT_LOG_LEVEL) if category != UNCATEGORIZED: - logging.warning( - f"Unknown logging category: {category}. Falling back to default 'root' level: {log_level}" + raise ValueError( + f"Unknown logging category: {category}. To resolve, choose a valid category from the CATEGORIES list " + f"or add it to the CATEGORIES list. Available categories: {CATEGORIES}" ) + log_level = _category_levels.get("root", DEFAULT_LOG_LEVEL) logger.setLevel(log_level) return logging.LoggerAdapter(logger, {"category": category}) diff --git a/llama_stack/models/llama/prompt_format.py b/llama_stack/models/llama/prompt_format.py index 6191df61a..16e4068d7 100644 --- a/llama_stack/models/llama/prompt_format.py +++ b/llama_stack/models/llama/prompt_format.py @@ -11,19 +11,13 @@ # top-level folder for each specific model found within the models/ directory at # the top-level of this source tree. -import json import textwrap -from pathlib import Path from pydantic import BaseModel, Field from llama_stack.models.llama.datatypes import ( RawContent, - RawMediaItem, RawMessage, - RawTextItem, - StopReason, - ToolCall, ToolPromptFormat, ) from llama_stack.models.llama.llama4.tokenizer import Tokenizer @@ -175,25 +169,6 @@ def llama3_1_builtin_code_interpreter_dialog(tool_prompt_format=ToolPromptFormat return messages -def llama3_1_builtin_tool_call_with_image_dialog( - tool_prompt_format=ToolPromptFormat.json, -): - this_dir = Path(__file__).parent - with open(this_dir / "llama3/dog.jpg", "rb") as f: - img = f.read() - - interface = LLama31Interface(tool_prompt_format) - - messages = interface.system_messages(**system_message_builtin_tools_only()) - messages += interface.user_message(content=[RawMediaItem(data=img), RawTextItem(text="What is this dog breed?")]) - messages += interface.assistant_response_messages( - "Based on the description of the dog in the image, it appears to be a small breed dog, possibly a terrier mix", - StopReason.end_of_turn, - ) - messages += interface.user_message("Search the web for some food recommendations for the indentified breed") - return messages - - def llama3_1_custom_tool_call_dialog(tool_prompt_format=ToolPromptFormat.json): interface = LLama31Interface(tool_prompt_format) @@ -202,35 +177,6 @@ def llama3_1_custom_tool_call_dialog(tool_prompt_format=ToolPromptFormat.json): return messages -def llama3_1_e2e_tool_call_dialog(tool_prompt_format=ToolPromptFormat.json): - tool_response = json.dumps(["great song1", "awesome song2", "cool song3"]) - interface = LLama31Interface(tool_prompt_format) - - messages = interface.system_messages(**system_message_custom_tools_only()) - messages += interface.user_message(content="Use tools to get latest trending songs") - messages.append( - RawMessage( - role="assistant", - content="", - stop_reason=StopReason.end_of_message, - tool_calls=[ - ToolCall( - call_id="call_id", - tool_name="trending_songs", - arguments={"n": "10", "genre": "latest"}, - ) - ], - ), - ) - messages.append( - RawMessage( - role="assistant", - content=tool_response, - ) - ) - return messages - - def llama3_2_user_assistant_conversation(): return UseCase( title="User and assistant conversation", diff --git a/llama_stack/providers/inline/agents/meta_reference/agent_instance.py b/llama_stack/providers/inline/agents/meta_reference/agent_instance.py index c2ce9aa7b..b17c720e9 100644 --- a/llama_stack/providers/inline/agents/meta_reference/agent_instance.py +++ b/llama_stack/providers/inline/agents/meta_reference/agent_instance.py @@ -7,8 +7,6 @@ import copy import json import re -import secrets -import string import uuid import warnings from collections.abc import AsyncGenerator @@ -84,11 +82,6 @@ from llama_stack.providers.utils.telemetry import tracing from .persistence import AgentPersistence from .safety import SafetyException, ShieldRunnerMixin - -def make_random_string(length: int = 8): - return "".join(secrets.choice(string.ascii_letters + string.digits) for _ in range(length)) - - TOOLS_ATTACHMENT_KEY_REGEX = re.compile(r"__tools_attachment__=(\{.*?\})") MEMORY_QUERY_TOOL = "knowledge_search" WEB_SEARCH_TOOL = "web_search" diff --git a/llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py b/llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py index 8ccdcb0e1..245203f10 100644 --- a/llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py +++ b/llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py @@ -269,7 +269,7 @@ class OpenAIResponsesImpl: response_tools=tools, temperature=temperature, response_format=response_format, - inputs=input, + inputs=all_input, ) # Create orchestrator and delegate streaming logic diff --git a/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py b/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py index 8a662e6db..895d13a7f 100644 --- a/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py +++ b/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py @@ -175,6 +175,8 @@ class StreamingResponseOrchestrator: ): yield stream_event + messages = next_turn_messages + if not function_tool_calls and not non_function_tool_calls: break @@ -187,9 +189,7 @@ class StreamingResponseOrchestrator: logger.info(f"Exiting inference loop since iteration count({n_iter}) exceeds {self.max_infer_iters=}") break - messages = next_turn_messages - - self.final_messages = messages.copy() + [current_response.choices[0].message] + self.final_messages = messages.copy() # Create final response final_response = OpenAIResponseObject( @@ -232,9 +232,11 @@ class StreamingResponseOrchestrator: non_function_tool_calls.append(tool_call) else: logger.info(f"Approval denied for {tool_call.id} on {tool_call.function.name}") + next_turn_messages.pop() else: logger.info(f"Requesting approval for {tool_call.id} on {tool_call.function.name}") approvals.append(tool_call) + next_turn_messages.pop() else: non_function_tool_calls.append(tool_call) diff --git a/llama_stack/providers/inline/tool_runtime/rag/memory.py b/llama_stack/providers/inline/tool_runtime/rag/memory.py index aac86a056..3ccfd0bcb 100644 --- a/llama_stack/providers/inline/tool_runtime/rag/memory.py +++ b/llama_stack/providers/inline/tool_runtime/rag/memory.py @@ -8,8 +8,6 @@ import asyncio import base64 import io import mimetypes -import secrets -import string from typing import Any import httpx @@ -52,10 +50,6 @@ from .context_retriever import generate_rag_query log = get_logger(name=__name__, category="tool_runtime") -def make_random_string(length: int = 8): - return "".join(secrets.choice(string.ascii_letters + string.digits) for _ in range(length)) - - async def raw_data_from_doc(doc: RAGDocument) -> tuple[bytes, str]: """Get raw binary data and mime type from a RAGDocument for file upload.""" if isinstance(doc.content, URL): diff --git a/llama_stack/providers/registry/inference.py b/llama_stack/providers/registry/inference.py index bf6a09b6c..f89565892 100644 --- a/llama_stack/providers/registry/inference.py +++ b/llama_stack/providers/registry/inference.py @@ -268,7 +268,7 @@ Available Models: api=Api.inference, adapter_type="watsonx", provider_type="remote::watsonx", - pip_packages=["ibm_watsonx_ai"], + pip_packages=["litellm"], module="llama_stack.providers.remote.inference.watsonx", config_class="llama_stack.providers.remote.inference.watsonx.WatsonXConfig", provider_data_validator="llama_stack.providers.remote.inference.watsonx.WatsonXProviderDataValidator", diff --git a/llama_stack/providers/remote/inference/nvidia/openai_utils.py b/llama_stack/providers/remote/inference/nvidia/openai_utils.py deleted file mode 100644 index 0b0d7fcf3..000000000 --- a/llama_stack/providers/remote/inference/nvidia/openai_utils.py +++ /dev/null @@ -1,217 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -import warnings -from collections.abc import AsyncGenerator -from typing import Any - -from openai import AsyncStream -from openai.types.chat.chat_completion import ( - Choice as OpenAIChoice, -) -from openai.types.completion import Completion as OpenAICompletion -from openai.types.completion_choice import Logprobs as OpenAICompletionLogprobs - -from llama_stack.apis.inference import ( - ChatCompletionRequest, - CompletionRequest, - CompletionResponse, - CompletionResponseStreamChunk, - GreedySamplingStrategy, - JsonSchemaResponseFormat, - TokenLogProbs, - TopKSamplingStrategy, - TopPSamplingStrategy, -) -from llama_stack.providers.utils.inference.openai_compat import ( - _convert_openai_finish_reason, - convert_message_to_openai_dict_new, - convert_tooldef_to_openai_tool, -) - - -async def convert_chat_completion_request( - request: ChatCompletionRequest, - n: int = 1, -) -> dict: - """ - Convert a ChatCompletionRequest to an OpenAI API-compatible dictionary. - """ - # model -> model - # messages -> messages - # sampling_params TODO(mattf): review strategy - # strategy=greedy -> nvext.top_k = -1, temperature = temperature - # strategy=top_p -> nvext.top_k = -1, top_p = top_p - # strategy=top_k -> nvext.top_k = top_k - # temperature -> temperature - # top_p -> top_p - # top_k -> nvext.top_k - # max_tokens -> max_tokens - # repetition_penalty -> nvext.repetition_penalty - # response_format -> GrammarResponseFormat TODO(mf) - # response_format -> JsonSchemaResponseFormat: response_format = "json_object" & nvext["guided_json"] = json_schema - # tools -> tools - # tool_choice ("auto", "required") -> tool_choice - # tool_prompt_format -> TBD - # stream -> stream - # logprobs -> logprobs - - if request.response_format and not isinstance(request.response_format, JsonSchemaResponseFormat): - raise ValueError( - f"Unsupported response format: {request.response_format}. Only JsonSchemaResponseFormat is supported." - ) - - nvext = {} - payload: dict[str, Any] = dict( - model=request.model, - messages=[await convert_message_to_openai_dict_new(message) for message in request.messages], - stream=request.stream, - n=n, - extra_body=dict(nvext=nvext), - extra_headers={ - b"User-Agent": b"llama-stack: nvidia-inference-adapter", - }, - ) - - if request.response_format: - # server bug - setting guided_json changes the behavior of response_format resulting in an error - # payload.update(response_format="json_object") - nvext.update(guided_json=request.response_format.json_schema) - - if request.tools: - payload.update(tools=[convert_tooldef_to_openai_tool(tool) for tool in request.tools]) - if request.tool_config.tool_choice: - payload.update( - tool_choice=request.tool_config.tool_choice.value - ) # we cannot include tool_choice w/o tools, server will complain - - if request.logprobs: - payload.update(logprobs=True) - payload.update(top_logprobs=request.logprobs.top_k) - - if request.sampling_params: - nvext.update(repetition_penalty=request.sampling_params.repetition_penalty) - - if request.sampling_params.max_tokens: - payload.update(max_tokens=request.sampling_params.max_tokens) - - strategy = request.sampling_params.strategy - if isinstance(strategy, TopPSamplingStrategy): - nvext.update(top_k=-1) - payload.update(top_p=strategy.top_p) - payload.update(temperature=strategy.temperature) - elif isinstance(strategy, TopKSamplingStrategy): - if strategy.top_k != -1 and strategy.top_k < 1: - warnings.warn("top_k must be -1 or >= 1", stacklevel=2) - nvext.update(top_k=strategy.top_k) - elif isinstance(strategy, GreedySamplingStrategy): - nvext.update(top_k=-1) - else: - raise ValueError(f"Unsupported sampling strategy: {strategy}") - - return payload - - -def convert_completion_request( - request: CompletionRequest, - n: int = 1, -) -> dict: - """ - Convert a ChatCompletionRequest to an OpenAI API-compatible dictionary. - """ - # model -> model - # prompt -> prompt - # sampling_params TODO(mattf): review strategy - # strategy=greedy -> nvext.top_k = -1, temperature = temperature - # strategy=top_p -> nvext.top_k = -1, top_p = top_p - # strategy=top_k -> nvext.top_k = top_k - # temperature -> temperature - # top_p -> top_p - # top_k -> nvext.top_k - # max_tokens -> max_tokens - # repetition_penalty -> nvext.repetition_penalty - # response_format -> nvext.guided_json - # stream -> stream - # logprobs.top_k -> logprobs - - nvext = {} - payload: dict[str, Any] = dict( - model=request.model, - prompt=request.content, - stream=request.stream, - extra_body=dict(nvext=nvext), - extra_headers={ - b"User-Agent": b"llama-stack: nvidia-inference-adapter", - }, - n=n, - ) - - if request.response_format: - # this is not openai compliant, it is a nim extension - nvext.update(guided_json=request.response_format.json_schema) - - if request.logprobs: - payload.update(logprobs=request.logprobs.top_k) - - if request.sampling_params: - nvext.update(repetition_penalty=request.sampling_params.repetition_penalty) - - if request.sampling_params.max_tokens: - payload.update(max_tokens=request.sampling_params.max_tokens) - - if request.sampling_params.strategy == "top_p": - nvext.update(top_k=-1) - payload.update(top_p=request.sampling_params.top_p) - elif request.sampling_params.strategy == "top_k": - if request.sampling_params.top_k != -1 and request.sampling_params.top_k < 1: - warnings.warn("top_k must be -1 or >= 1", stacklevel=2) - nvext.update(top_k=request.sampling_params.top_k) - elif request.sampling_params.strategy == "greedy": - nvext.update(top_k=-1) - payload.update(temperature=request.sampling_params.temperature) - - return payload - - -def _convert_openai_completion_logprobs( - logprobs: OpenAICompletionLogprobs | None, -) -> list[TokenLogProbs] | None: - """ - Convert an OpenAI CompletionLogprobs into a list of TokenLogProbs. - """ - if not logprobs: - return None - - return [TokenLogProbs(logprobs_by_token=logprobs) for logprobs in logprobs.top_logprobs] - - -def convert_openai_completion_choice( - choice: OpenAIChoice, -) -> CompletionResponse: - """ - Convert an OpenAI Completion Choice into a CompletionResponse. - """ - return CompletionResponse( - content=choice.text, - stop_reason=_convert_openai_finish_reason(choice.finish_reason), - logprobs=_convert_openai_completion_logprobs(choice.logprobs), - ) - - -async def convert_openai_completion_stream( - stream: AsyncStream[OpenAICompletion], -) -> AsyncGenerator[CompletionResponse, None]: - """ - Convert a stream of OpenAI Completions into a stream - of ChatCompletionResponseStreamChunks. - """ - async for chunk in stream: - choice = chunk.choices[0] - yield CompletionResponseStreamChunk( - delta=choice.text, - stop_reason=_convert_openai_finish_reason(choice.finish_reason), - logprobs=_convert_openai_completion_logprobs(choice.logprobs), - ) diff --git a/llama_stack/providers/remote/inference/nvidia/utils.py b/llama_stack/providers/remote/inference/nvidia/utils.py index b8431e859..46ee939d9 100644 --- a/llama_stack/providers/remote/inference/nvidia/utils.py +++ b/llama_stack/providers/remote/inference/nvidia/utils.py @@ -4,53 +4,8 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -import httpx - -from llama_stack.log import get_logger - from . import NVIDIAConfig -logger = get_logger(name=__name__, category="inference::nvidia") - def _is_nvidia_hosted(config: NVIDIAConfig) -> bool: return "integrate.api.nvidia.com" in config.url - - -async def _get_health(url: str) -> tuple[bool, bool]: - """ - Query {url}/v1/health/{live,ready} to check if the server is running and ready - - Args: - url (str): URL of the server - - Returns: - Tuple[bool, bool]: (is_live, is_ready) - """ - async with httpx.AsyncClient() as client: - live = await client.get(f"{url}/v1/health/live") - ready = await client.get(f"{url}/v1/health/ready") - return live.status_code == 200, ready.status_code == 200 - - -async def check_health(config: NVIDIAConfig) -> None: - """ - Check if the server is running and ready - - Args: - url (str): URL of the server - - Raises: - RuntimeError: If the server is not running or ready - """ - if not _is_nvidia_hosted(config): - logger.info("Checking NVIDIA NIM health...") - try: - is_live, is_ready = await _get_health(config.url) - if not is_live: - raise ConnectionError("NVIDIA NIM is not running") - if not is_ready: - raise ConnectionError("NVIDIA NIM is not ready") - # TODO(mf): should we wait for the server to be ready? - except httpx.ConnectError as e: - raise ConnectionError(f"Failed to connect to NVIDIA NIM: {e}") from e diff --git a/llama_stack/providers/remote/inference/watsonx/__init__.py b/llama_stack/providers/remote/inference/watsonx/__init__.py index e59e873b6..35e74a720 100644 --- a/llama_stack/providers/remote/inference/watsonx/__init__.py +++ b/llama_stack/providers/remote/inference/watsonx/__init__.py @@ -4,19 +4,12 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from llama_stack.apis.inference import Inference - from .config import WatsonXConfig -async def get_adapter_impl(config: WatsonXConfig, _deps) -> Inference: - # import dynamically so `llama stack build` does not fail due to missing dependencies +async def get_adapter_impl(config: WatsonXConfig, _deps): + # import dynamically so the import is used only when it is needed from .watsonx import WatsonXInferenceAdapter - if not isinstance(config, WatsonXConfig): - raise RuntimeError(f"Unexpected config type: {type(config)}") adapter = WatsonXInferenceAdapter(config) return adapter - - -__all__ = ["get_adapter_impl", "WatsonXConfig"] diff --git a/llama_stack/providers/remote/inference/watsonx/config.py b/llama_stack/providers/remote/inference/watsonx/config.py index 4bc0173c4..9e98d4003 100644 --- a/llama_stack/providers/remote/inference/watsonx/config.py +++ b/llama_stack/providers/remote/inference/watsonx/config.py @@ -7,16 +7,18 @@ import os from typing import Any -from pydantic import BaseModel, Field, SecretStr +from pydantic import BaseModel, ConfigDict, Field, SecretStr from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig from llama_stack.schema_utils import json_schema_type class WatsonXProviderDataValidator(BaseModel): - url: str - api_key: str - project_id: str + model_config = ConfigDict( + from_attributes=True, + extra="forbid", + ) + watsonx_api_key: str | None @json_schema_type @@ -25,13 +27,17 @@ class WatsonXConfig(RemoteInferenceProviderConfig): default_factory=lambda: os.getenv("WATSONX_BASE_URL", "https://us-south.ml.cloud.ibm.com"), description="A base url for accessing the watsonx.ai", ) + # This seems like it should be required, but none of the other remote inference + # providers require it, so this is optional here too for consistency. + # The OpenAIConfig uses default=None instead, so this is following that precedent. api_key: SecretStr | None = Field( - default_factory=lambda: os.getenv("WATSONX_API_KEY"), - description="The watsonx API key", + default=None, + description="The watsonx.ai API key", ) + # As above, this is optional here too for consistency. project_id: str | None = Field( - default_factory=lambda: os.getenv("WATSONX_PROJECT_ID"), - description="The Project ID key", + default=None, + description="The watsonx.ai project ID", ) timeout: int = Field( default=60, diff --git a/llama_stack/providers/remote/inference/watsonx/models.py b/llama_stack/providers/remote/inference/watsonx/models.py deleted file mode 100644 index d98f0510a..000000000 --- a/llama_stack/providers/remote/inference/watsonx/models.py +++ /dev/null @@ -1,47 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -from llama_stack.models.llama.sku_types import CoreModelId -from llama_stack.providers.utils.inference.model_registry import build_hf_repo_model_entry - -MODEL_ENTRIES = [ - build_hf_repo_model_entry( - "meta-llama/llama-3-3-70b-instruct", - CoreModelId.llama3_3_70b_instruct.value, - ), - build_hf_repo_model_entry( - "meta-llama/llama-2-13b-chat", - CoreModelId.llama2_13b.value, - ), - build_hf_repo_model_entry( - "meta-llama/llama-3-1-70b-instruct", - CoreModelId.llama3_1_70b_instruct.value, - ), - build_hf_repo_model_entry( - "meta-llama/llama-3-1-8b-instruct", - CoreModelId.llama3_1_8b_instruct.value, - ), - build_hf_repo_model_entry( - "meta-llama/llama-3-2-11b-vision-instruct", - CoreModelId.llama3_2_11b_vision_instruct.value, - ), - build_hf_repo_model_entry( - "meta-llama/llama-3-2-1b-instruct", - CoreModelId.llama3_2_1b_instruct.value, - ), - build_hf_repo_model_entry( - "meta-llama/llama-3-2-3b-instruct", - CoreModelId.llama3_2_3b_instruct.value, - ), - build_hf_repo_model_entry( - "meta-llama/llama-3-2-90b-vision-instruct", - CoreModelId.llama3_2_90b_vision_instruct.value, - ), - build_hf_repo_model_entry( - "meta-llama/llama-guard-3-11b-vision", - CoreModelId.llama_guard_3_11b_vision.value, - ), -] diff --git a/llama_stack/providers/remote/inference/watsonx/watsonx.py b/llama_stack/providers/remote/inference/watsonx/watsonx.py index fc58691e2..d04472936 100644 --- a/llama_stack/providers/remote/inference/watsonx/watsonx.py +++ b/llama_stack/providers/remote/inference/watsonx/watsonx.py @@ -4,240 +4,120 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from collections.abc import AsyncGenerator, AsyncIterator from typing import Any -from ibm_watsonx_ai.foundation_models import Model -from ibm_watsonx_ai.metanames import GenTextParamsMetaNames as GenParams -from openai import AsyncOpenAI +import requests -from llama_stack.apis.inference import ( - ChatCompletionRequest, - CompletionRequest, - GreedySamplingStrategy, - Inference, - OpenAIChatCompletion, - OpenAIChatCompletionChunk, - OpenAICompletion, - OpenAIEmbeddingsResponse, - OpenAIMessageParam, - OpenAIResponseFormatParam, - TopKSamplingStrategy, - TopPSamplingStrategy, -) -from llama_stack.log import get_logger -from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper -from llama_stack.providers.utils.inference.openai_compat import ( - prepare_openai_completion_params, -) -from llama_stack.providers.utils.inference.prompt_adapter import ( - chat_completion_request_to_prompt, - completion_request_to_prompt, - request_has_media, -) - -from . import WatsonXConfig -from .models import MODEL_ENTRIES - -logger = get_logger(name=__name__, category="inference::watsonx") +from llama_stack.apis.inference import ChatCompletionRequest +from llama_stack.apis.models import Model +from llama_stack.apis.models.models import ModelType +from llama_stack.providers.remote.inference.watsonx.config import WatsonXConfig +from llama_stack.providers.utils.inference.litellm_openai_mixin import LiteLLMOpenAIMixin -# Note on structured output -# WatsonX returns responses with a json embedded into a string. -# Examples: +class WatsonXInferenceAdapter(LiteLLMOpenAIMixin): + _model_cache: dict[str, Model] = {} -# ChatCompletionResponse(completion_message=CompletionMessage(content='```json\n{\n -# "first_name": "Michael",\n "last_name": "Jordan",\n'...) -# Not even a valid JSON, but we can still extract the JSON from the content + def __init__(self, config: WatsonXConfig): + LiteLLMOpenAIMixin.__init__( + self, + litellm_provider_name="watsonx", + api_key_from_config=config.api_key.get_secret_value() if config.api_key else None, + provider_data_api_key_field="watsonx_api_key", + ) + self.available_models = None + self.config = config -# CompletionResponse(content=' \nThe best answer is $\\boxed{\\{"name": "Michael Jordan", -# "year_born": "1963", "year_retired": "2003"\\}}$') -# Find the start of the boxed content + def get_base_url(self) -> str: + return self.config.url + async def _get_params(self, request: ChatCompletionRequest) -> dict[str, Any]: + # Get base parameters from parent + params = await super()._get_params(request) -class WatsonXInferenceAdapter(Inference, ModelRegistryHelper): - def __init__(self, config: WatsonXConfig) -> None: - ModelRegistryHelper.__init__(self, model_entries=MODEL_ENTRIES) - - logger.info(f"Initializing watsonx InferenceAdapter({config.url})...") - self._config = config - self._openai_client: AsyncOpenAI | None = None - - self._project_id = self._config.project_id - - def _get_client(self, model_id) -> Model: - config_api_key = self._config.api_key.get_secret_value() if self._config.api_key else None - config_url = self._config.url - project_id = self._config.project_id - credentials = {"url": config_url, "apikey": config_api_key} - - return Model(model_id=model_id, credentials=credentials, project_id=project_id) - - def _get_openai_client(self) -> AsyncOpenAI: - if not self._openai_client: - self._openai_client = AsyncOpenAI( - base_url=f"{self._config.url}/openai/v1", - api_key=self._config.api_key, - ) - return self._openai_client - - async def _get_params(self, request: ChatCompletionRequest | CompletionRequest) -> dict: - input_dict = {"params": {}} - media_present = request_has_media(request) - llama_model = self.get_llama_model(request.model) - if isinstance(request, ChatCompletionRequest): - input_dict["prompt"] = await chat_completion_request_to_prompt(request, llama_model) - else: - assert not media_present, "Together does not support media for Completion requests" - input_dict["prompt"] = await completion_request_to_prompt(request) - if request.sampling_params: - if request.sampling_params.strategy: - input_dict["params"][GenParams.DECODING_METHOD] = request.sampling_params.strategy.type - if request.sampling_params.max_tokens: - input_dict["params"][GenParams.MAX_NEW_TOKENS] = request.sampling_params.max_tokens - if request.sampling_params.repetition_penalty: - input_dict["params"][GenParams.REPETITION_PENALTY] = request.sampling_params.repetition_penalty - - if isinstance(request.sampling_params.strategy, TopPSamplingStrategy): - input_dict["params"][GenParams.TOP_P] = request.sampling_params.strategy.top_p - input_dict["params"][GenParams.TEMPERATURE] = request.sampling_params.strategy.temperature - if isinstance(request.sampling_params.strategy, TopKSamplingStrategy): - input_dict["params"][GenParams.TOP_K] = request.sampling_params.strategy.top_k - if isinstance(request.sampling_params.strategy, GreedySamplingStrategy): - input_dict["params"][GenParams.TEMPERATURE] = 0.0 - - input_dict["params"][GenParams.STOP_SEQUENCES] = ["<|endoftext|>"] - - params = { - **input_dict, - } + # Add watsonx.ai specific parameters + params["project_id"] = self.config.project_id + params["time_limit"] = self.config.timeout return params - async def openai_embeddings( - self, - model: str, - input: str | list[str], - encoding_format: str | None = "float", - dimensions: int | None = None, - user: str | None = None, - ) -> OpenAIEmbeddingsResponse: - raise NotImplementedError() + # Copied from OpenAIMixin + async def check_model_availability(self, model: str) -> bool: + """ + Check if a specific model is available from the provider's /v1/models. - async def openai_completion( - self, - model: str, - prompt: str | list[str] | list[int] | list[list[int]], - best_of: int | None = None, - echo: bool | None = None, - frequency_penalty: float | None = None, - logit_bias: dict[str, float] | None = None, - logprobs: bool | None = None, - max_tokens: int | None = None, - n: int | None = None, - presence_penalty: float | None = None, - seed: int | None = None, - stop: str | list[str] | None = None, - stream: bool | None = None, - stream_options: dict[str, Any] | None = None, - temperature: float | None = None, - top_p: float | None = None, - user: str | None = None, - guided_choice: list[str] | None = None, - prompt_logprobs: int | None = None, - suffix: str | None = None, - ) -> OpenAICompletion: - model_obj = await self.model_store.get_model(model) - params = await prepare_openai_completion_params( - model=model_obj.provider_resource_id, - prompt=prompt, - best_of=best_of, - echo=echo, - frequency_penalty=frequency_penalty, - logit_bias=logit_bias, - logprobs=logprobs, - max_tokens=max_tokens, - n=n, - presence_penalty=presence_penalty, - seed=seed, - stop=stop, - stream=stream, - stream_options=stream_options, - temperature=temperature, - top_p=top_p, - user=user, - ) - return await self._get_openai_client().completions.create(**params) # type: ignore + :param model: The model identifier to check. + :return: True if the model is available dynamically, False otherwise. + """ + if not self._model_cache: + await self.list_models() + return model in self._model_cache - async def openai_chat_completion( - self, - model: str, - messages: list[OpenAIMessageParam], - frequency_penalty: float | None = None, - function_call: str | dict[str, Any] | None = None, - functions: list[dict[str, Any]] | None = None, - logit_bias: dict[str, float] | None = None, - logprobs: bool | None = None, - max_completion_tokens: int | None = None, - max_tokens: int | None = None, - n: int | None = None, - parallel_tool_calls: bool | None = None, - presence_penalty: float | None = None, - response_format: OpenAIResponseFormatParam | None = None, - seed: int | None = None, - stop: str | list[str] | None = None, - stream: bool | None = None, - stream_options: dict[str, Any] | None = None, - temperature: float | None = None, - tool_choice: str | dict[str, Any] | None = None, - tools: list[dict[str, Any]] | None = None, - top_logprobs: int | None = None, - top_p: float | None = None, - user: str | None = None, - ) -> OpenAIChatCompletion | AsyncIterator[OpenAIChatCompletionChunk]: - model_obj = await self.model_store.get_model(model) - params = await prepare_openai_completion_params( - model=model_obj.provider_resource_id, - messages=messages, - frequency_penalty=frequency_penalty, - function_call=function_call, - functions=functions, - logit_bias=logit_bias, - logprobs=logprobs, - max_completion_tokens=max_completion_tokens, - max_tokens=max_tokens, - n=n, - parallel_tool_calls=parallel_tool_calls, - presence_penalty=presence_penalty, - response_format=response_format, - seed=seed, - stop=stop, - stream=stream, - stream_options=stream_options, - temperature=temperature, - tool_choice=tool_choice, - tools=tools, - top_logprobs=top_logprobs, - top_p=top_p, - user=user, - ) - if params.get("stream", False): - return self._stream_openai_chat_completion(params) - return await self._get_openai_client().chat.completions.create(**params) # type: ignore + async def list_models(self) -> list[Model] | None: + self._model_cache = {} + models = [] + for model_spec in self._get_model_specs(): + functions = [f["id"] for f in model_spec.get("functions", [])] + # Format: {"embedding_dimension": 1536, "context_length": 8192} - async def _stream_openai_chat_completion(self, params: dict) -> AsyncGenerator: - # watsonx.ai sometimes adds usage data to the stream - include_usage = False - if params.get("stream_options", None): - include_usage = params["stream_options"].get("include_usage", False) - stream = await self._get_openai_client().chat.completions.create(**params) + # Example of an embedding model: + # {'model_id': 'ibm/granite-embedding-278m-multilingual', + # 'label': 'granite-embedding-278m-multilingual', + # 'model_limits': {'max_sequence_length': 512, 'embedding_dimension': 768}, + # ... + provider_resource_id = f"{self.__provider_id__}/{model_spec['model_id']}" + if "embedding" in functions: + embedding_dimension = model_spec["model_limits"]["embedding_dimension"] + context_length = model_spec["model_limits"]["max_sequence_length"] + embedding_metadata = { + "embedding_dimension": embedding_dimension, + "context_length": context_length, + } + model = Model( + identifier=model_spec["model_id"], + provider_resource_id=provider_resource_id, + provider_id=self.__provider_id__, + metadata=embedding_metadata, + model_type=ModelType.embedding, + ) + self._model_cache[provider_resource_id] = model + models.append(model) + if "text_chat" in functions: + model = Model( + identifier=model_spec["model_id"], + provider_resource_id=provider_resource_id, + provider_id=self.__provider_id__, + metadata={}, + model_type=ModelType.llm, + ) + # In theory, I guess it is possible that a model could be both an embedding model and a text chat model. + # In that case, the cache will record the generator Model object, and the list which we return will have + # both the generator Model object and the text chat Model object. That's fine because the cache is + # only used for check_model_availability() anyway. + self._model_cache[provider_resource_id] = model + models.append(model) + return models - seen_finish_reason = False - async for chunk in stream: - # Final usage chunk with no choices that the user didn't request, so discard - if not include_usage and seen_finish_reason and len(chunk.choices) == 0: - break - yield chunk - for choice in chunk.choices: - if choice.finish_reason: - seen_finish_reason = True - break + # LiteLLM provides methods to list models for many providers, but not for watsonx.ai. + # So we need to implement our own method to list models by calling the watsonx.ai API. + def _get_model_specs(self) -> list[dict[str, Any]]: + """ + Retrieves foundation model specifications from the watsonx.ai API. + """ + url = f"{self.config.url}/ml/v1/foundation_model_specs?version=2023-10-25" + headers = { + # Note that there is no authorization header. Listing models does not require authentication. + "Content-Type": "application/json", + } + + response = requests.get(url, headers=headers) + + # --- Process the Response --- + # Raise an exception for bad status codes (4xx or 5xx) + response.raise_for_status() + + # If the request is successful, parse and return the JSON response. + # The response should contain a list of model specifications + response_data = response.json() + if "resources" not in response_data: + raise ValueError("Resources not found in response") + return response_data["resources"] diff --git a/llama_stack/providers/utils/inference/litellm_openai_mixin.py b/llama_stack/providers/utils/inference/litellm_openai_mixin.py index 6c8f61c3b..6bef97dd5 100644 --- a/llama_stack/providers/utils/inference/litellm_openai_mixin.py +++ b/llama_stack/providers/utils/inference/litellm_openai_mixin.py @@ -4,6 +4,8 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. +import base64 +import struct from collections.abc import AsyncIterator from typing import Any @@ -16,6 +18,7 @@ from llama_stack.apis.inference import ( OpenAIChatCompletion, OpenAIChatCompletionChunk, OpenAICompletion, + OpenAIEmbeddingData, OpenAIEmbeddingsResponse, OpenAIEmbeddingUsage, OpenAIMessageParam, @@ -26,7 +29,6 @@ from llama_stack.core.request_headers import NeedsRequestProviderData from llama_stack.log import get_logger from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper, ProviderModelEntry from llama_stack.providers.utils.inference.openai_compat import ( - b64_encode_openai_embeddings_response, convert_message_to_openai_dict_new, convert_tooldef_to_openai_tool, get_sampling_options, @@ -349,3 +351,28 @@ class LiteLLMOpenAIMixin( return False return model in litellm.models_by_provider[self.litellm_provider_name] + + +def b64_encode_openai_embeddings_response( + response_data: list[dict], encoding_format: str | None = "float" +) -> list[OpenAIEmbeddingData]: + """ + Process the OpenAI embeddings response to encode the embeddings in base64 format if specified. + """ + data = [] + for i, embedding_data in enumerate(response_data): + if encoding_format == "base64": + byte_array = bytearray() + for embedding_value in embedding_data["embedding"]: + byte_array.extend(struct.pack("f", float(embedding_value))) + + response_embedding = base64.b64encode(byte_array).decode("utf-8") + else: + response_embedding = embedding_data["embedding"] + data.append( + OpenAIEmbeddingData( + embedding=response_embedding, + index=i, + ) + ) + return data diff --git a/llama_stack/providers/utils/inference/openai_compat.py b/llama_stack/providers/utils/inference/openai_compat.py index d863eb53a..7e465a14c 100644 --- a/llama_stack/providers/utils/inference/openai_compat.py +++ b/llama_stack/providers/utils/inference/openai_compat.py @@ -3,9 +3,7 @@ # # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -import base64 import json -import struct import time import uuid import warnings @@ -103,7 +101,6 @@ from llama_stack.apis.inference import ( JsonSchemaResponseFormat, Message, OpenAIChatCompletion, - OpenAIEmbeddingData, OpenAIMessageParam, OpenAIResponseFormatParam, SamplingParams, @@ -1402,28 +1399,3 @@ def prepare_openai_embeddings_params( params["user"] = user return params - - -def b64_encode_openai_embeddings_response( - response_data: dict, encoding_format: str | None = "float" -) -> list[OpenAIEmbeddingData]: - """ - Process the OpenAI embeddings response to encode the embeddings in base64 format if specified. - """ - data = [] - for i, embedding_data in enumerate(response_data): - if encoding_format == "base64": - byte_array = bytearray() - for embedding_value in embedding_data.embedding: - byte_array.extend(struct.pack("f", float(embedding_value))) - - response_embedding = base64.b64encode(byte_array).decode("utf-8") - else: - response_embedding = embedding_data.embedding - data.append( - OpenAIEmbeddingData( - embedding=response_embedding, - index=i, - ) - ) - return data diff --git a/llama_stack/providers/utils/memory/openai_vector_store_mixin.py b/llama_stack/providers/utils/memory/openai_vector_store_mixin.py index 2a5177f93..c179eba6c 100644 --- a/llama_stack/providers/utils/memory/openai_vector_store_mixin.py +++ b/llama_stack/providers/utils/memory/openai_vector_store_mixin.py @@ -296,15 +296,14 @@ class OpenAIVectorStoreMixin(ABC): async def shutdown(self) -> None: """Clean up mixin resources including background tasks.""" # Cancel any running file batch tasks gracefully - if hasattr(self, "_file_batch_tasks"): - tasks_to_cancel = list(self._file_batch_tasks.items()) - for _, task in tasks_to_cancel: - if not task.done(): - task.cancel() - try: - await task - except asyncio.CancelledError: - pass + tasks_to_cancel = list(self._file_batch_tasks.items()) + for _, task in tasks_to_cancel: + if not task.done(): + task.cancel() + try: + await task + except asyncio.CancelledError: + pass @abstractmethod async def delete_chunks(self, store_id: str, chunks_for_deletion: list[ChunkForDeletion]) -> None: diff --git a/llama_stack/providers/utils/memory/vector_store.py b/llama_stack/providers/utils/memory/vector_store.py index 857fbe910..c0534a875 100644 --- a/llama_stack/providers/utils/memory/vector_store.py +++ b/llama_stack/providers/utils/memory/vector_store.py @@ -20,7 +20,6 @@ from pydantic import BaseModel from llama_stack.apis.common.content_types import ( URL, InterleavedContent, - TextContentItem, ) from llama_stack.apis.tools import RAGDocument from llama_stack.apis.vector_dbs import VectorDB @@ -129,26 +128,6 @@ def content_from_data_and_mime_type(data: bytes | str, mime_type: str | None, en return "" -def concat_interleaved_content(content: list[InterleavedContent]) -> InterleavedContent: - """concatenate interleaved content into a single list. ensure that 'str's are converted to TextContentItem when in a list""" - - ret = [] - - def _process(c): - if isinstance(c, str): - ret.append(TextContentItem(text=c)) - elif isinstance(c, list): - for item in c: - _process(item) - else: - ret.append(c) - - for c in content: - _process(c) - - return ret - - async def content_from_doc(doc: RAGDocument) -> str: if isinstance(doc.content, URL): if doc.content.uri.startswith("data:"): diff --git a/tests/unit/providers/inference/test_inference_client_caching.py b/tests/unit/providers/inference/test_inference_client_caching.py index d30b5b12a..55a6793c2 100644 --- a/tests/unit/providers/inference/test_inference_client_caching.py +++ b/tests/unit/providers/inference/test_inference_client_caching.py @@ -18,6 +18,8 @@ from llama_stack.providers.remote.inference.openai.config import OpenAIConfig from llama_stack.providers.remote.inference.openai.openai import OpenAIInferenceAdapter from llama_stack.providers.remote.inference.together.config import TogetherImplConfig from llama_stack.providers.remote.inference.together.together import TogetherInferenceAdapter +from llama_stack.providers.remote.inference.watsonx.config import WatsonXConfig +from llama_stack.providers.remote.inference.watsonx.watsonx import WatsonXInferenceAdapter @pytest.mark.parametrize( @@ -58,3 +60,29 @@ def test_openai_provider_data_used(config_cls, adapter_cls, provider_data_valida {"x-llamastack-provider-data": json.dumps({inference_adapter.provider_data_api_key_field: api_key})} ): assert inference_adapter.client.api_key == api_key + + +@pytest.mark.parametrize( + "config_cls,adapter_cls,provider_data_validator", + [ + ( + WatsonXConfig, + WatsonXInferenceAdapter, + "llama_stack.providers.remote.inference.watsonx.config.WatsonXProviderDataValidator", + ), + ], +) +def test_litellm_provider_data_used(config_cls, adapter_cls, provider_data_validator: str): + """Validate data for LiteLLM-based providers. Similar to test_openai_provider_data_used, but without the + assumption that there is an OpenAI-compatible client object.""" + + inference_adapter = adapter_cls(config=config_cls()) + + inference_adapter.__provider_spec__ = MagicMock() + inference_adapter.__provider_spec__.provider_data_validator = provider_data_validator + + for api_key in ["test1", "test2"]: + with request_provider_data_context( + {"x-llamastack-provider-data": json.dumps({inference_adapter.provider_data_api_key_field: api_key})} + ): + assert inference_adapter.get_api_key() == api_key diff --git a/tests/unit/registry/test_registry.py b/tests/unit/registry/test_registry.py index 4ea4a20b9..c1f834d5d 100644 --- a/tests/unit/registry/test_registry.py +++ b/tests/unit/registry/test_registry.py @@ -125,8 +125,15 @@ async def test_duplicate_provider_registration(cached_disk_dist_registry): provider_resource_id="test_vector_db_2", provider_id="baz", # Same provider_id ) - await cached_disk_dist_registry.register(duplicate_vector_db) + # Now we expect a ValueError to be raised for duplicate registration + with pytest.raises( + ValueError, + match=r"Provider 'baz' is already registered.*Unregister the existing provider first before registering it again.", + ): + await cached_disk_dist_registry.register(duplicate_vector_db) + + # Verify the original registration is still intact result = await cached_disk_dist_registry.get("vector_db", "test_vector_db_2") assert result is not None assert result.embedding_model == original_vector_db.embedding_model # Original values preserved