Build AI Applications with Llama Stack
- Unified APIs for Inference, RAG, Agents, Tools, Safety, and Telemetry
+ Unified APIs for Inference, RAG, Agents, Tools, and Safety
+ description="The open-source framework for building generative AI applications with unified APIs for Inference, RAG, Agents, Tools, Safety, and Evals.">
diff --git a/docs/static/deprecated-llama-stack-spec.yaml b/docs/static/deprecated-llama-stack-spec.yaml
index cf9bd14c4..2d0ce6e08 100644
--- a/docs/static/deprecated-llama-stack-spec.yaml
+++ b/docs/static/deprecated-llama-stack-spec.yaml
@@ -378,6 +378,91 @@ paths:
type: string
description: 'Path parameter: identifier'
deprecated: true
+ /v1/tool-runtime/invoke:
+ post:
+ responses:
+ '200':
+ description: A ToolInvocationResult.
+ content:
+ application/json:
+ schema:
+ $ref: '#/components/schemas/ToolInvocationResult'
+ '400':
+ description: Bad Request
+ $ref: '#/components/responses/BadRequest400'
+ '429':
+ description: Too Many Requests
+ $ref: '#/components/responses/TooManyRequests429'
+ '500':
+ description: Internal Server Error
+ $ref: '#/components/responses/InternalServerError500'
+ default:
+ description: Default Response
+ $ref: '#/components/responses/DefaultError'
+ tags:
+ - Tool Runtime
+ summary: Invoke Tool
+ description: Run a tool with the given arguments.
+ operationId: invoke_tool_v1_tool_runtime_invoke_post
+ requestBody:
+ content:
+ application/json:
+ schema:
+ $ref: '#/components/schemas/InvokeToolRequest'
+ required: true
+ deprecated: true
+ /v1/tool-runtime/list-tools:
+ get:
+ responses:
+ '200':
+ description: A ListToolDefsResponse.
+ content:
+ application/json:
+ schema:
+ $ref: '#/components/schemas/ListToolDefsResponse'
+ '400':
+ $ref: '#/components/responses/BadRequest400'
+ description: Bad Request
+ '429':
+ $ref: '#/components/responses/TooManyRequests429'
+ description: Too Many Requests
+ '500':
+ $ref: '#/components/responses/InternalServerError500'
+ description: Internal Server Error
+ default:
+ $ref: '#/components/responses/DefaultError'
+ description: Default Response
+ tags:
+ - Tool Runtime
+ summary: List Runtime Tools
+ description: List all tools in the runtime.
+ operationId: list_runtime_tools_v1_tool_runtime_list_tools_get
+ parameters:
+ - name: authorization
+ in: query
+ required: false
+ schema:
+ anyOf:
+ - type: string
+ - type: 'null'
+ title: Authorization
+ - name: tool_group_id
+ in: query
+ required: false
+ schema:
+ anyOf:
+ - type: string
+ - type: 'null'
+ title: Tool Group Id
+ - name: mcp_endpoint
+ in: query
+ required: false
+ schema:
+ anyOf:
+ - $ref: '#/components/schemas/URL'
+ - type: 'null'
+ title: Mcp Endpoint
+ deprecated: true
/v1/toolgroups:
get:
responses:
@@ -404,6 +489,7 @@ paths:
summary: List Tool Groups
description: List tool groups with optional provider.
operationId: list_tool_groups_v1_toolgroups_get
+ deprecated: true
post:
responses:
'400':
@@ -465,6 +551,7 @@ paths:
schema:
type: string
description: 'Path parameter: toolgroup_id'
+ deprecated: true
delete:
responses:
'400':
@@ -494,6 +581,76 @@ paths:
type: string
description: 'Path parameter: toolgroup_id'
deprecated: true
+ /v1/tools:
+ get:
+ responses:
+ '200':
+ description: A ListToolDefsResponse.
+ content:
+ application/json:
+ schema:
+ $ref: '#/components/schemas/ListToolDefsResponse'
+ '400':
+ $ref: '#/components/responses/BadRequest400'
+ description: Bad Request
+ '429':
+ $ref: '#/components/responses/TooManyRequests429'
+ description: Too Many Requests
+ '500':
+ $ref: '#/components/responses/InternalServerError500'
+ description: Internal Server Error
+ default:
+ $ref: '#/components/responses/DefaultError'
+ description: Default Response
+ tags:
+ - Tool Groups
+ summary: List Tools
+ description: List tools with optional tool group.
+ operationId: list_tools_v1_tools_get
+ parameters:
+ - name: toolgroup_id
+ in: query
+ required: false
+ schema:
+ anyOf:
+ - type: string
+ - type: 'null'
+ title: Toolgroup Id
+ deprecated: true
+ /v1/tools/{tool_name}:
+ get:
+ responses:
+ '200':
+ description: A ToolDef.
+ content:
+ application/json:
+ schema:
+ $ref: '#/components/schemas/ToolDef'
+ '400':
+ description: Bad Request
+ $ref: '#/components/responses/BadRequest400'
+ '429':
+ description: Too Many Requests
+ $ref: '#/components/responses/TooManyRequests429'
+ '500':
+ description: Internal Server Error
+ $ref: '#/components/responses/InternalServerError500'
+ default:
+ description: Default Response
+ $ref: '#/components/responses/DefaultError'
+ tags:
+ - Tool Groups
+ summary: Get Tool
+ description: Get a tool by its name.
+ operationId: get_tool_v1_tools__tool_name__get
+ parameters:
+ - name: tool_name
+ in: path
+ required: true
+ schema:
+ type: string
+ description: 'Path parameter: tool_name'
+ deprecated: true
/v1beta/datasets:
get:
responses:
@@ -3639,6 +3796,12 @@ components:
anyOf:
- type: integer
- type: 'null'
+ metadata:
+ anyOf:
+ - additionalProperties:
+ type: string
+ type: object
+ - type: 'null'
input:
items:
anyOf:
@@ -4042,6 +4205,12 @@ components:
anyOf:
- type: integer
- type: 'null'
+ metadata:
+ anyOf:
+ - additionalProperties:
+ type: string
+ type: object
+ - type: 'null'
type: object
required:
- input
@@ -4173,6 +4342,12 @@ components:
anyOf:
- type: integer
- type: 'null'
+ metadata:
+ anyOf:
+ - additionalProperties:
+ type: string
+ type: object
+ - type: 'null'
type: object
required:
- created_at
@@ -9023,227 +9198,6 @@ components:
- $ref: '#/components/schemas/OpenAIResponseContentPartReasoningText'
title: OpenAIResponseContentPartReasoningText
title: OpenAIResponseContentPartOutputText | OpenAIResponseContentPartRefusal | OpenAIResponseContentPartReasoningText
- SpanEndPayload:
- description: Payload for a span end event.
- properties:
- type:
- const: span_end
- default: span_end
- title: Type
- type: string
- status:
- $ref: '#/components/schemas/SpanStatus'
- required:
- - status
- title: SpanEndPayload
- type: object
- SpanStartPayload:
- description: Payload for a span start event.
- properties:
- type:
- const: span_start
- default: span_start
- title: Type
- type: string
- name:
- title: Name
- type: string
- parent_span_id:
- anyOf:
- - type: string
- - type: 'null'
- nullable: true
- required:
- - name
- title: SpanStartPayload
- type: object
- SpanStatus:
- description: The status of a span indicating whether it completed successfully or with an error.
- enum:
- - ok
- - error
- title: SpanStatus
- type: string
- StructuredLogPayload:
- discriminator:
- mapping:
- span_end: '#/components/schemas/SpanEndPayload'
- span_start: '#/components/schemas/SpanStartPayload'
- propertyName: type
- oneOf:
- - $ref: '#/components/schemas/SpanStartPayload'
- title: SpanStartPayload
- - $ref: '#/components/schemas/SpanEndPayload'
- title: SpanEndPayload
- title: SpanStartPayload | SpanEndPayload
- LogSeverity:
- description: The severity level of a log message.
- enum:
- - verbose
- - debug
- - info
- - warn
- - error
- - critical
- title: LogSeverity
- type: string
- MetricEvent:
- description: A metric event containing a measured value.
- properties:
- trace_id:
- title: Trace Id
- type: string
- span_id:
- title: Span Id
- type: string
- timestamp:
- format: date-time
- title: Timestamp
- type: string
- attributes:
- anyOf:
- - additionalProperties:
- anyOf:
- - type: string
- - type: integer
- - type: number
- - type: boolean
- - type: 'null'
- title: string | ... (4 variants)
- type: object
- - type: 'null'
- type:
- const: metric
- default: metric
- title: Type
- type: string
- metric:
- title: Metric
- type: string
- value:
- anyOf:
- - type: integer
- - type: number
- title: integer | number
- unit:
- title: Unit
- type: string
- required:
- - trace_id
- - span_id
- - timestamp
- - metric
- - value
- - unit
- title: MetricEvent
- type: object
- StructuredLogEvent:
- description: A structured log event containing typed payload data.
- properties:
- trace_id:
- title: Trace Id
- type: string
- span_id:
- title: Span Id
- type: string
- timestamp:
- format: date-time
- title: Timestamp
- type: string
- attributes:
- anyOf:
- - additionalProperties:
- anyOf:
- - type: string
- - type: integer
- - type: number
- - type: boolean
- - type: 'null'
- title: string | ... (4 variants)
- type: object
- - type: 'null'
- type:
- const: structured_log
- default: structured_log
- title: Type
- type: string
- payload:
- discriminator:
- mapping:
- span_end: '#/components/schemas/SpanEndPayload'
- span_start: '#/components/schemas/SpanStartPayload'
- propertyName: type
- oneOf:
- - $ref: '#/components/schemas/SpanStartPayload'
- title: SpanStartPayload
- - $ref: '#/components/schemas/SpanEndPayload'
- title: SpanEndPayload
- title: SpanStartPayload | SpanEndPayload
- required:
- - trace_id
- - span_id
- - timestamp
- - payload
- title: StructuredLogEvent
- type: object
- UnstructuredLogEvent:
- description: An unstructured log event containing a simple text message.
- properties:
- trace_id:
- title: Trace Id
- type: string
- span_id:
- title: Span Id
- type: string
- timestamp:
- format: date-time
- title: Timestamp
- type: string
- attributes:
- anyOf:
- - additionalProperties:
- anyOf:
- - type: string
- - type: integer
- - type: number
- - type: boolean
- - type: 'null'
- title: string | ... (4 variants)
- type: object
- - type: 'null'
- type:
- const: unstructured_log
- default: unstructured_log
- title: Type
- type: string
- message:
- title: Message
- type: string
- severity:
- $ref: '#/components/schemas/LogSeverity'
- required:
- - trace_id
- - span_id
- - timestamp
- - message
- - severity
- title: UnstructuredLogEvent
- type: object
- Event:
- discriminator:
- mapping:
- metric: '#/components/schemas/MetricEvent'
- structured_log: '#/components/schemas/StructuredLogEvent'
- unstructured_log: '#/components/schemas/UnstructuredLogEvent'
- propertyName: type
- oneOf:
- - $ref: '#/components/schemas/UnstructuredLogEvent'
- title: UnstructuredLogEvent
- - $ref: '#/components/schemas/MetricEvent'
- title: MetricEvent
- - $ref: '#/components/schemas/StructuredLogEvent'
- title: StructuredLogEvent
- title: UnstructuredLogEvent | MetricEvent | StructuredLogEvent
MetricInResponse:
description: A metric value included in API responses.
properties:
@@ -10068,236 +10022,6 @@ components:
- logger_config
title: PostTrainingRLHFRequest
type: object
- Span:
- description: A span representing a single operation within a trace.
- properties:
- span_id:
- title: Span Id
- type: string
- trace_id:
- title: Trace Id
- type: string
- parent_span_id:
- anyOf:
- - type: string
- - type: 'null'
- nullable: true
- name:
- title: Name
- type: string
- start_time:
- format: date-time
- title: Start Time
- type: string
- end_time:
- anyOf:
- - format: date-time
- type: string
- - type: 'null'
- nullable: true
- attributes:
- anyOf:
- - additionalProperties: true
- type: object
- - type: 'null'
- required:
- - span_id
- - trace_id
- - name
- - start_time
- title: Span
- type: object
- Trace:
- description: A trace representing the complete execution path of a request across multiple operations.
- properties:
- trace_id:
- title: Trace Id
- type: string
- root_span_id:
- title: Root Span Id
- type: string
- start_time:
- format: date-time
- title: Start Time
- type: string
- end_time:
- anyOf:
- - format: date-time
- type: string
- - type: 'null'
- nullable: true
- required:
- - trace_id
- - root_span_id
- - start_time
- title: Trace
- type: object
- EventType:
- description: The type of telemetry event being logged.
- enum:
- - unstructured_log
- - structured_log
- - metric
- title: EventType
- type: string
- StructuredLogType:
- description: The type of structured log event payload.
- enum:
- - span_start
- - span_end
- title: StructuredLogType
- type: string
- EvalTrace:
- description: A trace record for evaluation purposes.
- properties:
- session_id:
- title: Session Id
- type: string
- step:
- title: Step
- type: string
- input:
- title: Input
- type: string
- output:
- title: Output
- type: string
- expected_output:
- title: Expected Output
- type: string
- required:
- - session_id
- - step
- - input
- - output
- - expected_output
- title: EvalTrace
- type: object
- SpanWithStatus:
- description: A span that includes status information.
- properties:
- span_id:
- title: Span Id
- type: string
- trace_id:
- title: Trace Id
- type: string
- parent_span_id:
- anyOf:
- - type: string
- - type: 'null'
- nullable: true
- name:
- title: Name
- type: string
- start_time:
- format: date-time
- title: Start Time
- type: string
- end_time:
- anyOf:
- - format: date-time
- type: string
- - type: 'null'
- nullable: true
- attributes:
- anyOf:
- - additionalProperties: true
- type: object
- - type: 'null'
- status:
- anyOf:
- - $ref: '#/components/schemas/SpanStatus'
- title: SpanStatus
- - type: 'null'
- nullable: true
- title: SpanStatus
- required:
- - span_id
- - trace_id
- - name
- - start_time
- title: SpanWithStatus
- type: object
- QueryConditionOp:
- description: Comparison operators for query conditions.
- enum:
- - eq
- - ne
- - gt
- - lt
- title: QueryConditionOp
- type: string
- QueryCondition:
- description: A condition for filtering query results.
- properties:
- key:
- title: Key
- type: string
- op:
- $ref: '#/components/schemas/QueryConditionOp'
- value:
- title: Value
- required:
- - key
- - op
- - value
- title: QueryCondition
- type: object
- MetricLabel:
- description: A label associated with a metric.
- properties:
- name:
- title: Name
- type: string
- value:
- title: Value
- type: string
- required:
- - name
- - value
- title: MetricLabel
- type: object
- MetricDataPoint:
- description: A single data point in a metric time series.
- properties:
- timestamp:
- title: Timestamp
- type: integer
- value:
- title: Value
- type: number
- unit:
- title: Unit
- type: string
- required:
- - timestamp
- - value
- - unit
- title: MetricDataPoint
- type: object
- MetricSeries:
- description: A time series of metric data points.
- properties:
- metric:
- title: Metric
- type: string
- labels:
- items:
- $ref: '#/components/schemas/MetricLabel'
- title: Labels
- type: array
- values:
- items:
- $ref: '#/components/schemas/MetricDataPoint'
- title: Values
- type: array
- required:
- - metric
- - labels
- - values
- title: MetricSeries
- type: object
responses:
BadRequest400:
description: The request was invalid or malformed
diff --git a/docs/static/experimental-llama-stack-spec.yaml b/docs/static/experimental-llama-stack-spec.yaml
index 18ce75562..4d5a43693 100644
--- a/docs/static/experimental-llama-stack-spec.yaml
+++ b/docs/static/experimental-llama-stack-spec.yaml
@@ -3336,6 +3336,12 @@ components:
anyOf:
- type: integer
- type: 'null'
+ metadata:
+ anyOf:
+ - additionalProperties:
+ type: string
+ type: object
+ - type: 'null'
input:
items:
anyOf:
@@ -3736,6 +3742,12 @@ components:
anyOf:
- type: integer
- type: 'null'
+ metadata:
+ anyOf:
+ - additionalProperties:
+ type: string
+ type: object
+ - type: 'null'
type: object
required:
- created_at
@@ -7952,227 +7964,6 @@ components:
- $ref: '#/components/schemas/OpenAIResponseContentPartReasoningText'
title: OpenAIResponseContentPartReasoningText
title: OpenAIResponseContentPartOutputText | OpenAIResponseContentPartRefusal | OpenAIResponseContentPartReasoningText
- SpanEndPayload:
- description: Payload for a span end event.
- properties:
- type:
- const: span_end
- default: span_end
- title: Type
- type: string
- status:
- $ref: '#/components/schemas/SpanStatus'
- required:
- - status
- title: SpanEndPayload
- type: object
- SpanStartPayload:
- description: Payload for a span start event.
- properties:
- type:
- const: span_start
- default: span_start
- title: Type
- type: string
- name:
- title: Name
- type: string
- parent_span_id:
- anyOf:
- - type: string
- - type: 'null'
- nullable: true
- required:
- - name
- title: SpanStartPayload
- type: object
- SpanStatus:
- description: The status of a span indicating whether it completed successfully or with an error.
- enum:
- - ok
- - error
- title: SpanStatus
- type: string
- StructuredLogPayload:
- discriminator:
- mapping:
- span_end: '#/components/schemas/SpanEndPayload'
- span_start: '#/components/schemas/SpanStartPayload'
- propertyName: type
- oneOf:
- - $ref: '#/components/schemas/SpanStartPayload'
- title: SpanStartPayload
- - $ref: '#/components/schemas/SpanEndPayload'
- title: SpanEndPayload
- title: SpanStartPayload | SpanEndPayload
- LogSeverity:
- description: The severity level of a log message.
- enum:
- - verbose
- - debug
- - info
- - warn
- - error
- - critical
- title: LogSeverity
- type: string
- MetricEvent:
- description: A metric event containing a measured value.
- properties:
- trace_id:
- title: Trace Id
- type: string
- span_id:
- title: Span Id
- type: string
- timestamp:
- format: date-time
- title: Timestamp
- type: string
- attributes:
- anyOf:
- - additionalProperties:
- anyOf:
- - type: string
- - type: integer
- - type: number
- - type: boolean
- - type: 'null'
- title: string | ... (4 variants)
- type: object
- - type: 'null'
- type:
- const: metric
- default: metric
- title: Type
- type: string
- metric:
- title: Metric
- type: string
- value:
- anyOf:
- - type: integer
- - type: number
- title: integer | number
- unit:
- title: Unit
- type: string
- required:
- - trace_id
- - span_id
- - timestamp
- - metric
- - value
- - unit
- title: MetricEvent
- type: object
- StructuredLogEvent:
- description: A structured log event containing typed payload data.
- properties:
- trace_id:
- title: Trace Id
- type: string
- span_id:
- title: Span Id
- type: string
- timestamp:
- format: date-time
- title: Timestamp
- type: string
- attributes:
- anyOf:
- - additionalProperties:
- anyOf:
- - type: string
- - type: integer
- - type: number
- - type: boolean
- - type: 'null'
- title: string | ... (4 variants)
- type: object
- - type: 'null'
- type:
- const: structured_log
- default: structured_log
- title: Type
- type: string
- payload:
- discriminator:
- mapping:
- span_end: '#/components/schemas/SpanEndPayload'
- span_start: '#/components/schemas/SpanStartPayload'
- propertyName: type
- oneOf:
- - $ref: '#/components/schemas/SpanStartPayload'
- title: SpanStartPayload
- - $ref: '#/components/schemas/SpanEndPayload'
- title: SpanEndPayload
- title: SpanStartPayload | SpanEndPayload
- required:
- - trace_id
- - span_id
- - timestamp
- - payload
- title: StructuredLogEvent
- type: object
- UnstructuredLogEvent:
- description: An unstructured log event containing a simple text message.
- properties:
- trace_id:
- title: Trace Id
- type: string
- span_id:
- title: Span Id
- type: string
- timestamp:
- format: date-time
- title: Timestamp
- type: string
- attributes:
- anyOf:
- - additionalProperties:
- anyOf:
- - type: string
- - type: integer
- - type: number
- - type: boolean
- - type: 'null'
- title: string | ... (4 variants)
- type: object
- - type: 'null'
- type:
- const: unstructured_log
- default: unstructured_log
- title: Type
- type: string
- message:
- title: Message
- type: string
- severity:
- $ref: '#/components/schemas/LogSeverity'
- required:
- - trace_id
- - span_id
- - timestamp
- - message
- - severity
- title: UnstructuredLogEvent
- type: object
- Event:
- discriminator:
- mapping:
- metric: '#/components/schemas/MetricEvent'
- structured_log: '#/components/schemas/StructuredLogEvent'
- unstructured_log: '#/components/schemas/UnstructuredLogEvent'
- propertyName: type
- oneOf:
- - $ref: '#/components/schemas/UnstructuredLogEvent'
- title: UnstructuredLogEvent
- - $ref: '#/components/schemas/MetricEvent'
- title: MetricEvent
- - $ref: '#/components/schemas/StructuredLogEvent'
- title: StructuredLogEvent
- title: UnstructuredLogEvent | MetricEvent | StructuredLogEvent
MetricInResponse:
description: A metric value included in API responses.
properties:
@@ -8997,236 +8788,6 @@ components:
- logger_config
title: PostTrainingRLHFRequest
type: object
- Span:
- description: A span representing a single operation within a trace.
- properties:
- span_id:
- title: Span Id
- type: string
- trace_id:
- title: Trace Id
- type: string
- parent_span_id:
- anyOf:
- - type: string
- - type: 'null'
- nullable: true
- name:
- title: Name
- type: string
- start_time:
- format: date-time
- title: Start Time
- type: string
- end_time:
- anyOf:
- - format: date-time
- type: string
- - type: 'null'
- nullable: true
- attributes:
- anyOf:
- - additionalProperties: true
- type: object
- - type: 'null'
- required:
- - span_id
- - trace_id
- - name
- - start_time
- title: Span
- type: object
- Trace:
- description: A trace representing the complete execution path of a request across multiple operations.
- properties:
- trace_id:
- title: Trace Id
- type: string
- root_span_id:
- title: Root Span Id
- type: string
- start_time:
- format: date-time
- title: Start Time
- type: string
- end_time:
- anyOf:
- - format: date-time
- type: string
- - type: 'null'
- nullable: true
- required:
- - trace_id
- - root_span_id
- - start_time
- title: Trace
- type: object
- EventType:
- description: The type of telemetry event being logged.
- enum:
- - unstructured_log
- - structured_log
- - metric
- title: EventType
- type: string
- StructuredLogType:
- description: The type of structured log event payload.
- enum:
- - span_start
- - span_end
- title: StructuredLogType
- type: string
- EvalTrace:
- description: A trace record for evaluation purposes.
- properties:
- session_id:
- title: Session Id
- type: string
- step:
- title: Step
- type: string
- input:
- title: Input
- type: string
- output:
- title: Output
- type: string
- expected_output:
- title: Expected Output
- type: string
- required:
- - session_id
- - step
- - input
- - output
- - expected_output
- title: EvalTrace
- type: object
- SpanWithStatus:
- description: A span that includes status information.
- properties:
- span_id:
- title: Span Id
- type: string
- trace_id:
- title: Trace Id
- type: string
- parent_span_id:
- anyOf:
- - type: string
- - type: 'null'
- nullable: true
- name:
- title: Name
- type: string
- start_time:
- format: date-time
- title: Start Time
- type: string
- end_time:
- anyOf:
- - format: date-time
- type: string
- - type: 'null'
- nullable: true
- attributes:
- anyOf:
- - additionalProperties: true
- type: object
- - type: 'null'
- status:
- anyOf:
- - $ref: '#/components/schemas/SpanStatus'
- title: SpanStatus
- - type: 'null'
- nullable: true
- title: SpanStatus
- required:
- - span_id
- - trace_id
- - name
- - start_time
- title: SpanWithStatus
- type: object
- QueryConditionOp:
- description: Comparison operators for query conditions.
- enum:
- - eq
- - ne
- - gt
- - lt
- title: QueryConditionOp
- type: string
- QueryCondition:
- description: A condition for filtering query results.
- properties:
- key:
- title: Key
- type: string
- op:
- $ref: '#/components/schemas/QueryConditionOp'
- value:
- title: Value
- required:
- - key
- - op
- - value
- title: QueryCondition
- type: object
- MetricLabel:
- description: A label associated with a metric.
- properties:
- name:
- title: Name
- type: string
- value:
- title: Value
- type: string
- required:
- - name
- - value
- title: MetricLabel
- type: object
- MetricDataPoint:
- description: A single data point in a metric time series.
- properties:
- timestamp:
- title: Timestamp
- type: integer
- value:
- title: Value
- type: number
- unit:
- title: Unit
- type: string
- required:
- - timestamp
- - value
- - unit
- title: MetricDataPoint
- type: object
- MetricSeries:
- description: A time series of metric data points.
- properties:
- metric:
- title: Metric
- type: string
- labels:
- items:
- $ref: '#/components/schemas/MetricLabel'
- title: Labels
- type: array
- values:
- items:
- $ref: '#/components/schemas/MetricDataPoint'
- title: Values
- type: array
- required:
- - metric
- - labels
- - values
- title: MetricSeries
- type: object
responses:
BadRequest400:
description: The request was invalid or malformed
diff --git a/docs/static/llama-stack-spec.yaml b/docs/static/llama-stack-spec.yaml
index 9f7b2ed64..a593fef85 100644
--- a/docs/static/llama-stack-spec.yaml
+++ b/docs/static/llama-stack-spec.yaml
@@ -1872,216 +1872,6 @@ paths:
schema:
type: string
description: 'Path parameter: identifier'
- /v1/tool-runtime/invoke:
- post:
- responses:
- '200':
- description: A ToolInvocationResult.
- content:
- application/json:
- schema:
- $ref: '#/components/schemas/ToolInvocationResult'
- '400':
- description: Bad Request
- $ref: '#/components/responses/BadRequest400'
- '429':
- description: Too Many Requests
- $ref: '#/components/responses/TooManyRequests429'
- '500':
- description: Internal Server Error
- $ref: '#/components/responses/InternalServerError500'
- default:
- description: Default Response
- $ref: '#/components/responses/DefaultError'
- tags:
- - Tool Runtime
- summary: Invoke Tool
- description: Run a tool with the given arguments.
- operationId: invoke_tool_v1_tool_runtime_invoke_post
- requestBody:
- content:
- application/json:
- schema:
- $ref: '#/components/schemas/InvokeToolRequest'
- required: true
- /v1/tool-runtime/list-tools:
- get:
- responses:
- '200':
- description: A ListToolDefsResponse.
- content:
- application/json:
- schema:
- $ref: '#/components/schemas/ListToolDefsResponse'
- '400':
- $ref: '#/components/responses/BadRequest400'
- description: Bad Request
- '429':
- $ref: '#/components/responses/TooManyRequests429'
- description: Too Many Requests
- '500':
- $ref: '#/components/responses/InternalServerError500'
- description: Internal Server Error
- default:
- $ref: '#/components/responses/DefaultError'
- description: Default Response
- tags:
- - Tool Runtime
- summary: List Runtime Tools
- description: List all tools in the runtime.
- operationId: list_runtime_tools_v1_tool_runtime_list_tools_get
- parameters:
- - name: authorization
- in: query
- required: false
- schema:
- anyOf:
- - type: string
- - type: 'null'
- title: Authorization
- - name: tool_group_id
- in: query
- required: false
- schema:
- anyOf:
- - type: string
- - type: 'null'
- title: Tool Group Id
- - name: mcp_endpoint
- in: query
- required: false
- schema:
- anyOf:
- - $ref: '#/components/schemas/URL'
- - type: 'null'
- title: Mcp Endpoint
- /v1/toolgroups:
- get:
- responses:
- '200':
- description: A ListToolGroupsResponse.
- content:
- application/json:
- schema:
- $ref: '#/components/schemas/ListToolGroupsResponse'
- '400':
- description: Bad Request
- $ref: '#/components/responses/BadRequest400'
- '429':
- description: Too Many Requests
- $ref: '#/components/responses/TooManyRequests429'
- '500':
- description: Internal Server Error
- $ref: '#/components/responses/InternalServerError500'
- default:
- description: Default Response
- $ref: '#/components/responses/DefaultError'
- tags:
- - Tool Groups
- summary: List Tool Groups
- description: List tool groups with optional provider.
- operationId: list_tool_groups_v1_toolgroups_get
- /v1/toolgroups/{toolgroup_id}:
- get:
- responses:
- '200':
- description: A ToolGroup.
- content:
- application/json:
- schema:
- $ref: '#/components/schemas/ToolGroup'
- '400':
- description: Bad Request
- $ref: '#/components/responses/BadRequest400'
- '429':
- description: Too Many Requests
- $ref: '#/components/responses/TooManyRequests429'
- '500':
- description: Internal Server Error
- $ref: '#/components/responses/InternalServerError500'
- default:
- description: Default Response
- $ref: '#/components/responses/DefaultError'
- tags:
- - Tool Groups
- summary: Get Tool Group
- description: Get a tool group by its ID.
- operationId: get_tool_group_v1_toolgroups__toolgroup_id__get
- parameters:
- - name: toolgroup_id
- in: path
- required: true
- schema:
- type: string
- description: 'Path parameter: toolgroup_id'
- /v1/tools:
- get:
- responses:
- '200':
- description: A ListToolDefsResponse.
- content:
- application/json:
- schema:
- $ref: '#/components/schemas/ListToolDefsResponse'
- '400':
- $ref: '#/components/responses/BadRequest400'
- description: Bad Request
- '429':
- $ref: '#/components/responses/TooManyRequests429'
- description: Too Many Requests
- '500':
- $ref: '#/components/responses/InternalServerError500'
- description: Internal Server Error
- default:
- $ref: '#/components/responses/DefaultError'
- description: Default Response
- tags:
- - Tool Groups
- summary: List Tools
- description: List tools with optional tool group.
- operationId: list_tools_v1_tools_get
- parameters:
- - name: toolgroup_id
- in: query
- required: false
- schema:
- anyOf:
- - type: string
- - type: 'null'
- title: Toolgroup Id
- /v1/tools/{tool_name}:
- get:
- responses:
- '200':
- description: A ToolDef.
- content:
- application/json:
- schema:
- $ref: '#/components/schemas/ToolDef'
- '400':
- description: Bad Request
- $ref: '#/components/responses/BadRequest400'
- '429':
- description: Too Many Requests
- $ref: '#/components/responses/TooManyRequests429'
- '500':
- description: Internal Server Error
- $ref: '#/components/responses/InternalServerError500'
- default:
- description: Default Response
- $ref: '#/components/responses/DefaultError'
- tags:
- - Tool Groups
- summary: Get Tool
- description: Get a tool by its name.
- operationId: get_tool_v1_tools__tool_name__get
- parameters:
- - name: tool_name
- in: path
- required: true
- schema:
- type: string
- description: 'Path parameter: tool_name'
/v1/vector-io/insert:
post:
responses:
@@ -5817,6 +5607,12 @@ components:
anyOf:
- type: integer
- type: 'null'
+ metadata:
+ anyOf:
+ - additionalProperties:
+ type: string
+ type: object
+ - type: 'null'
input:
items:
anyOf:
@@ -6220,6 +6016,12 @@ components:
anyOf:
- type: integer
- type: 'null'
+ metadata:
+ anyOf:
+ - additionalProperties:
+ type: string
+ type: object
+ - type: 'null'
type: object
required:
- input
@@ -6351,6 +6153,12 @@ components:
anyOf:
- type: integer
- type: 'null'
+ metadata:
+ anyOf:
+ - additionalProperties:
+ type: string
+ type: object
+ - type: 'null'
type: object
required:
- created_at
@@ -8115,24 +7923,6 @@ components:
required:
- data
title: ListShieldsResponse
- InvokeToolRequest:
- properties:
- tool_name:
- type: string
- title: Tool Name
- kwargs:
- additionalProperties: true
- type: object
- title: Kwargs
- authorization:
- anyOf:
- - type: string
- - type: 'null'
- type: object
- required:
- - tool_name
- - kwargs
- title: InvokeToolRequest
ImageContentItem:
description: A image content item
properties:
@@ -10850,227 +10640,6 @@ components:
- $ref: '#/components/schemas/OpenAIResponseContentPartReasoningText'
title: OpenAIResponseContentPartReasoningText
title: OpenAIResponseContentPartOutputText | OpenAIResponseContentPartRefusal | OpenAIResponseContentPartReasoningText
- SpanEndPayload:
- description: Payload for a span end event.
- properties:
- type:
- const: span_end
- default: span_end
- title: Type
- type: string
- status:
- $ref: '#/components/schemas/SpanStatus'
- required:
- - status
- title: SpanEndPayload
- type: object
- SpanStartPayload:
- description: Payload for a span start event.
- properties:
- type:
- const: span_start
- default: span_start
- title: Type
- type: string
- name:
- title: Name
- type: string
- parent_span_id:
- anyOf:
- - type: string
- - type: 'null'
- nullable: true
- required:
- - name
- title: SpanStartPayload
- type: object
- SpanStatus:
- description: The status of a span indicating whether it completed successfully or with an error.
- enum:
- - ok
- - error
- title: SpanStatus
- type: string
- StructuredLogPayload:
- discriminator:
- mapping:
- span_end: '#/components/schemas/SpanEndPayload'
- span_start: '#/components/schemas/SpanStartPayload'
- propertyName: type
- oneOf:
- - $ref: '#/components/schemas/SpanStartPayload'
- title: SpanStartPayload
- - $ref: '#/components/schemas/SpanEndPayload'
- title: SpanEndPayload
- title: SpanStartPayload | SpanEndPayload
- LogSeverity:
- description: The severity level of a log message.
- enum:
- - verbose
- - debug
- - info
- - warn
- - error
- - critical
- title: LogSeverity
- type: string
- MetricEvent:
- description: A metric event containing a measured value.
- properties:
- trace_id:
- title: Trace Id
- type: string
- span_id:
- title: Span Id
- type: string
- timestamp:
- format: date-time
- title: Timestamp
- type: string
- attributes:
- anyOf:
- - additionalProperties:
- anyOf:
- - type: string
- - type: integer
- - type: number
- - type: boolean
- - type: 'null'
- title: string | ... (4 variants)
- type: object
- - type: 'null'
- type:
- const: metric
- default: metric
- title: Type
- type: string
- metric:
- title: Metric
- type: string
- value:
- anyOf:
- - type: integer
- - type: number
- title: integer | number
- unit:
- title: Unit
- type: string
- required:
- - trace_id
- - span_id
- - timestamp
- - metric
- - value
- - unit
- title: MetricEvent
- type: object
- StructuredLogEvent:
- description: A structured log event containing typed payload data.
- properties:
- trace_id:
- title: Trace Id
- type: string
- span_id:
- title: Span Id
- type: string
- timestamp:
- format: date-time
- title: Timestamp
- type: string
- attributes:
- anyOf:
- - additionalProperties:
- anyOf:
- - type: string
- - type: integer
- - type: number
- - type: boolean
- - type: 'null'
- title: string | ... (4 variants)
- type: object
- - type: 'null'
- type:
- const: structured_log
- default: structured_log
- title: Type
- type: string
- payload:
- discriminator:
- mapping:
- span_end: '#/components/schemas/SpanEndPayload'
- span_start: '#/components/schemas/SpanStartPayload'
- propertyName: type
- oneOf:
- - $ref: '#/components/schemas/SpanStartPayload'
- title: SpanStartPayload
- - $ref: '#/components/schemas/SpanEndPayload'
- title: SpanEndPayload
- title: SpanStartPayload | SpanEndPayload
- required:
- - trace_id
- - span_id
- - timestamp
- - payload
- title: StructuredLogEvent
- type: object
- UnstructuredLogEvent:
- description: An unstructured log event containing a simple text message.
- properties:
- trace_id:
- title: Trace Id
- type: string
- span_id:
- title: Span Id
- type: string
- timestamp:
- format: date-time
- title: Timestamp
- type: string
- attributes:
- anyOf:
- - additionalProperties:
- anyOf:
- - type: string
- - type: integer
- - type: number
- - type: boolean
- - type: 'null'
- title: string | ... (4 variants)
- type: object
- - type: 'null'
- type:
- const: unstructured_log
- default: unstructured_log
- title: Type
- type: string
- message:
- title: Message
- type: string
- severity:
- $ref: '#/components/schemas/LogSeverity'
- required:
- - trace_id
- - span_id
- - timestamp
- - message
- - severity
- title: UnstructuredLogEvent
- type: object
- Event:
- discriminator:
- mapping:
- metric: '#/components/schemas/MetricEvent'
- structured_log: '#/components/schemas/StructuredLogEvent'
- unstructured_log: '#/components/schemas/UnstructuredLogEvent'
- propertyName: type
- oneOf:
- - $ref: '#/components/schemas/UnstructuredLogEvent'
- title: UnstructuredLogEvent
- - $ref: '#/components/schemas/MetricEvent'
- title: MetricEvent
- - $ref: '#/components/schemas/StructuredLogEvent'
- title: StructuredLogEvent
- title: UnstructuredLogEvent | MetricEvent | StructuredLogEvent
MetricInResponse:
description: A metric value included in API responses.
properties:
@@ -11892,236 +11461,6 @@ components:
- logger_config
title: PostTrainingRLHFRequest
type: object
- Span:
- description: A span representing a single operation within a trace.
- properties:
- span_id:
- title: Span Id
- type: string
- trace_id:
- title: Trace Id
- type: string
- parent_span_id:
- anyOf:
- - type: string
- - type: 'null'
- nullable: true
- name:
- title: Name
- type: string
- start_time:
- format: date-time
- title: Start Time
- type: string
- end_time:
- anyOf:
- - format: date-time
- type: string
- - type: 'null'
- nullable: true
- attributes:
- anyOf:
- - additionalProperties: true
- type: object
- - type: 'null'
- required:
- - span_id
- - trace_id
- - name
- - start_time
- title: Span
- type: object
- Trace:
- description: A trace representing the complete execution path of a request across multiple operations.
- properties:
- trace_id:
- title: Trace Id
- type: string
- root_span_id:
- title: Root Span Id
- type: string
- start_time:
- format: date-time
- title: Start Time
- type: string
- end_time:
- anyOf:
- - format: date-time
- type: string
- - type: 'null'
- nullable: true
- required:
- - trace_id
- - root_span_id
- - start_time
- title: Trace
- type: object
- EventType:
- description: The type of telemetry event being logged.
- enum:
- - unstructured_log
- - structured_log
- - metric
- title: EventType
- type: string
- StructuredLogType:
- description: The type of structured log event payload.
- enum:
- - span_start
- - span_end
- title: StructuredLogType
- type: string
- EvalTrace:
- description: A trace record for evaluation purposes.
- properties:
- session_id:
- title: Session Id
- type: string
- step:
- title: Step
- type: string
- input:
- title: Input
- type: string
- output:
- title: Output
- type: string
- expected_output:
- title: Expected Output
- type: string
- required:
- - session_id
- - step
- - input
- - output
- - expected_output
- title: EvalTrace
- type: object
- SpanWithStatus:
- description: A span that includes status information.
- properties:
- span_id:
- title: Span Id
- type: string
- trace_id:
- title: Trace Id
- type: string
- parent_span_id:
- anyOf:
- - type: string
- - type: 'null'
- nullable: true
- name:
- title: Name
- type: string
- start_time:
- format: date-time
- title: Start Time
- type: string
- end_time:
- anyOf:
- - format: date-time
- type: string
- - type: 'null'
- nullable: true
- attributes:
- anyOf:
- - additionalProperties: true
- type: object
- - type: 'null'
- status:
- anyOf:
- - $ref: '#/components/schemas/SpanStatus'
- title: SpanStatus
- - type: 'null'
- nullable: true
- title: SpanStatus
- required:
- - span_id
- - trace_id
- - name
- - start_time
- title: SpanWithStatus
- type: object
- QueryConditionOp:
- description: Comparison operators for query conditions.
- enum:
- - eq
- - ne
- - gt
- - lt
- title: QueryConditionOp
- type: string
- QueryCondition:
- description: A condition for filtering query results.
- properties:
- key:
- title: Key
- type: string
- op:
- $ref: '#/components/schemas/QueryConditionOp'
- value:
- title: Value
- required:
- - key
- - op
- - value
- title: QueryCondition
- type: object
- MetricLabel:
- description: A label associated with a metric.
- properties:
- name:
- title: Name
- type: string
- value:
- title: Value
- type: string
- required:
- - name
- - value
- title: MetricLabel
- type: object
- MetricDataPoint:
- description: A single data point in a metric time series.
- properties:
- timestamp:
- title: Timestamp
- type: integer
- value:
- title: Value
- type: number
- unit:
- title: Unit
- type: string
- required:
- - timestamp
- - value
- - unit
- title: MetricDataPoint
- type: object
- MetricSeries:
- description: A time series of metric data points.
- properties:
- metric:
- title: Metric
- type: string
- labels:
- items:
- $ref: '#/components/schemas/MetricLabel'
- title: Labels
- type: array
- values:
- items:
- $ref: '#/components/schemas/MetricDataPoint'
- title: Values
- type: array
- required:
- - metric
- - labels
- - values
- title: MetricSeries
- type: object
responses:
BadRequest400:
description: The request was invalid or malformed
diff --git a/docs/static/stainless-llama-stack-spec.yaml b/docs/static/stainless-llama-stack-spec.yaml
index 9269b7e39..51607d92d 100644
--- a/docs/static/stainless-llama-stack-spec.yaml
+++ b/docs/static/stainless-llama-stack-spec.yaml
@@ -2091,6 +2091,7 @@ paths:
schema:
$ref: '#/components/schemas/InvokeToolRequest'
required: true
+ deprecated: true
/v1/tool-runtime/list-tools:
get:
responses:
@@ -2142,6 +2143,7 @@ paths:
- $ref: '#/components/schemas/URL'
- type: 'null'
title: Mcp Endpoint
+ deprecated: true
/v1/toolgroups:
get:
responses:
@@ -2168,6 +2170,7 @@ paths:
summary: List Tool Groups
description: List tool groups with optional provider.
operationId: list_tool_groups_v1_toolgroups_get
+ deprecated: true
post:
responses:
'400':
@@ -2229,6 +2232,7 @@ paths:
schema:
type: string
description: 'Path parameter: toolgroup_id'
+ deprecated: true
delete:
responses:
'400':
@@ -2293,6 +2297,7 @@ paths:
- type: string
- type: 'null'
title: Toolgroup Id
+ deprecated: true
/v1/tools/{tool_name}:
get:
responses:
@@ -2326,6 +2331,7 @@ paths:
schema:
type: string
description: 'Path parameter: tool_name'
+ deprecated: true
/v1/vector-io/insert:
post:
responses:
@@ -6796,6 +6802,12 @@ components:
anyOf:
- type: integer
- type: 'null'
+ metadata:
+ anyOf:
+ - additionalProperties:
+ type: string
+ type: object
+ - type: 'null'
input:
items:
anyOf:
@@ -7199,6 +7211,12 @@ components:
anyOf:
- type: integer
- type: 'null'
+ metadata:
+ anyOf:
+ - additionalProperties:
+ type: string
+ type: object
+ - type: 'null'
type: object
required:
- input
@@ -7330,6 +7348,12 @@ components:
anyOf:
- type: integer
- type: 'null'
+ metadata:
+ anyOf:
+ - additionalProperties:
+ type: string
+ type: object
+ - type: 'null'
type: object
required:
- created_at
@@ -12180,227 +12204,6 @@ components:
- $ref: '#/components/schemas/OpenAIResponseContentPartReasoningText'
title: OpenAIResponseContentPartReasoningText
title: OpenAIResponseContentPartOutputText | OpenAIResponseContentPartRefusal | OpenAIResponseContentPartReasoningText
- SpanEndPayload:
- description: Payload for a span end event.
- properties:
- type:
- const: span_end
- default: span_end
- title: Type
- type: string
- status:
- $ref: '#/components/schemas/SpanStatus'
- required:
- - status
- title: SpanEndPayload
- type: object
- SpanStartPayload:
- description: Payload for a span start event.
- properties:
- type:
- const: span_start
- default: span_start
- title: Type
- type: string
- name:
- title: Name
- type: string
- parent_span_id:
- anyOf:
- - type: string
- - type: 'null'
- nullable: true
- required:
- - name
- title: SpanStartPayload
- type: object
- SpanStatus:
- description: The status of a span indicating whether it completed successfully or with an error.
- enum:
- - ok
- - error
- title: SpanStatus
- type: string
- StructuredLogPayload:
- discriminator:
- mapping:
- span_end: '#/components/schemas/SpanEndPayload'
- span_start: '#/components/schemas/SpanStartPayload'
- propertyName: type
- oneOf:
- - $ref: '#/components/schemas/SpanStartPayload'
- title: SpanStartPayload
- - $ref: '#/components/schemas/SpanEndPayload'
- title: SpanEndPayload
- title: SpanStartPayload | SpanEndPayload
- LogSeverity:
- description: The severity level of a log message.
- enum:
- - verbose
- - debug
- - info
- - warn
- - error
- - critical
- title: LogSeverity
- type: string
- MetricEvent:
- description: A metric event containing a measured value.
- properties:
- trace_id:
- title: Trace Id
- type: string
- span_id:
- title: Span Id
- type: string
- timestamp:
- format: date-time
- title: Timestamp
- type: string
- attributes:
- anyOf:
- - additionalProperties:
- anyOf:
- - type: string
- - type: integer
- - type: number
- - type: boolean
- - type: 'null'
- title: string | ... (4 variants)
- type: object
- - type: 'null'
- type:
- const: metric
- default: metric
- title: Type
- type: string
- metric:
- title: Metric
- type: string
- value:
- anyOf:
- - type: integer
- - type: number
- title: integer | number
- unit:
- title: Unit
- type: string
- required:
- - trace_id
- - span_id
- - timestamp
- - metric
- - value
- - unit
- title: MetricEvent
- type: object
- StructuredLogEvent:
- description: A structured log event containing typed payload data.
- properties:
- trace_id:
- title: Trace Id
- type: string
- span_id:
- title: Span Id
- type: string
- timestamp:
- format: date-time
- title: Timestamp
- type: string
- attributes:
- anyOf:
- - additionalProperties:
- anyOf:
- - type: string
- - type: integer
- - type: number
- - type: boolean
- - type: 'null'
- title: string | ... (4 variants)
- type: object
- - type: 'null'
- type:
- const: structured_log
- default: structured_log
- title: Type
- type: string
- payload:
- discriminator:
- mapping:
- span_end: '#/components/schemas/SpanEndPayload'
- span_start: '#/components/schemas/SpanStartPayload'
- propertyName: type
- oneOf:
- - $ref: '#/components/schemas/SpanStartPayload'
- title: SpanStartPayload
- - $ref: '#/components/schemas/SpanEndPayload'
- title: SpanEndPayload
- title: SpanStartPayload | SpanEndPayload
- required:
- - trace_id
- - span_id
- - timestamp
- - payload
- title: StructuredLogEvent
- type: object
- UnstructuredLogEvent:
- description: An unstructured log event containing a simple text message.
- properties:
- trace_id:
- title: Trace Id
- type: string
- span_id:
- title: Span Id
- type: string
- timestamp:
- format: date-time
- title: Timestamp
- type: string
- attributes:
- anyOf:
- - additionalProperties:
- anyOf:
- - type: string
- - type: integer
- - type: number
- - type: boolean
- - type: 'null'
- title: string | ... (4 variants)
- type: object
- - type: 'null'
- type:
- const: unstructured_log
- default: unstructured_log
- title: Type
- type: string
- message:
- title: Message
- type: string
- severity:
- $ref: '#/components/schemas/LogSeverity'
- required:
- - trace_id
- - span_id
- - timestamp
- - message
- - severity
- title: UnstructuredLogEvent
- type: object
- Event:
- discriminator:
- mapping:
- metric: '#/components/schemas/MetricEvent'
- structured_log: '#/components/schemas/StructuredLogEvent'
- unstructured_log: '#/components/schemas/UnstructuredLogEvent'
- propertyName: type
- oneOf:
- - $ref: '#/components/schemas/UnstructuredLogEvent'
- title: UnstructuredLogEvent
- - $ref: '#/components/schemas/MetricEvent'
- title: MetricEvent
- - $ref: '#/components/schemas/StructuredLogEvent'
- title: StructuredLogEvent
- title: UnstructuredLogEvent | MetricEvent | StructuredLogEvent
MetricInResponse:
description: A metric value included in API responses.
properties:
@@ -13225,236 +13028,6 @@ components:
- logger_config
title: PostTrainingRLHFRequest
type: object
- Span:
- description: A span representing a single operation within a trace.
- properties:
- span_id:
- title: Span Id
- type: string
- trace_id:
- title: Trace Id
- type: string
- parent_span_id:
- anyOf:
- - type: string
- - type: 'null'
- nullable: true
- name:
- title: Name
- type: string
- start_time:
- format: date-time
- title: Start Time
- type: string
- end_time:
- anyOf:
- - format: date-time
- type: string
- - type: 'null'
- nullable: true
- attributes:
- anyOf:
- - additionalProperties: true
- type: object
- - type: 'null'
- required:
- - span_id
- - trace_id
- - name
- - start_time
- title: Span
- type: object
- Trace:
- description: A trace representing the complete execution path of a request across multiple operations.
- properties:
- trace_id:
- title: Trace Id
- type: string
- root_span_id:
- title: Root Span Id
- type: string
- start_time:
- format: date-time
- title: Start Time
- type: string
- end_time:
- anyOf:
- - format: date-time
- type: string
- - type: 'null'
- nullable: true
- required:
- - trace_id
- - root_span_id
- - start_time
- title: Trace
- type: object
- EventType:
- description: The type of telemetry event being logged.
- enum:
- - unstructured_log
- - structured_log
- - metric
- title: EventType
- type: string
- StructuredLogType:
- description: The type of structured log event payload.
- enum:
- - span_start
- - span_end
- title: StructuredLogType
- type: string
- EvalTrace:
- description: A trace record for evaluation purposes.
- properties:
- session_id:
- title: Session Id
- type: string
- step:
- title: Step
- type: string
- input:
- title: Input
- type: string
- output:
- title: Output
- type: string
- expected_output:
- title: Expected Output
- type: string
- required:
- - session_id
- - step
- - input
- - output
- - expected_output
- title: EvalTrace
- type: object
- SpanWithStatus:
- description: A span that includes status information.
- properties:
- span_id:
- title: Span Id
- type: string
- trace_id:
- title: Trace Id
- type: string
- parent_span_id:
- anyOf:
- - type: string
- - type: 'null'
- nullable: true
- name:
- title: Name
- type: string
- start_time:
- format: date-time
- title: Start Time
- type: string
- end_time:
- anyOf:
- - format: date-time
- type: string
- - type: 'null'
- nullable: true
- attributes:
- anyOf:
- - additionalProperties: true
- type: object
- - type: 'null'
- status:
- anyOf:
- - $ref: '#/components/schemas/SpanStatus'
- title: SpanStatus
- - type: 'null'
- nullable: true
- title: SpanStatus
- required:
- - span_id
- - trace_id
- - name
- - start_time
- title: SpanWithStatus
- type: object
- QueryConditionOp:
- description: Comparison operators for query conditions.
- enum:
- - eq
- - ne
- - gt
- - lt
- title: QueryConditionOp
- type: string
- QueryCondition:
- description: A condition for filtering query results.
- properties:
- key:
- title: Key
- type: string
- op:
- $ref: '#/components/schemas/QueryConditionOp'
- value:
- title: Value
- required:
- - key
- - op
- - value
- title: QueryCondition
- type: object
- MetricLabel:
- description: A label associated with a metric.
- properties:
- name:
- title: Name
- type: string
- value:
- title: Value
- type: string
- required:
- - name
- - value
- title: MetricLabel
- type: object
- MetricDataPoint:
- description: A single data point in a metric time series.
- properties:
- timestamp:
- title: Timestamp
- type: integer
- value:
- title: Value
- type: number
- unit:
- title: Unit
- type: string
- required:
- - timestamp
- - value
- - unit
- title: MetricDataPoint
- type: object
- MetricSeries:
- description: A time series of metric data points.
- properties:
- metric:
- title: Metric
- type: string
- labels:
- items:
- $ref: '#/components/schemas/MetricLabel'
- title: Labels
- type: array
- values:
- items:
- $ref: '#/components/schemas/MetricDataPoint'
- title: Values
- type: array
- required:
- - metric
- - labels
- - values
- title: MetricSeries
- type: object
responses:
BadRequest400:
description: The request was invalid or malformed
diff --git a/scripts/gen-changelog.py b/scripts/gen-changelog.py
deleted file mode 100755
index 3df2af06b..000000000
--- a/scripts/gen-changelog.py
+++ /dev/null
@@ -1,74 +0,0 @@
-#!/usr/bin/env python
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-import os
-
-import requests
-
-
-def get_all_releases(token):
- url = "https://api.github.com/repos/meta-llama/llama-stack/releases"
- headers = {"Accept": "application/vnd.github.v3+json"}
-
- if token:
- headers["Authorization"] = f"token {token}"
-
- response = requests.get(url, headers=headers)
-
- if response.status_code == 200:
- return response.json()
- else:
- raise Exception(f"Error fetching releases: {response.status_code}, {response.text}")
-
-
-def clean_release_body(body):
- """Remove '## All changes' sections from release notes."""
- lines = body.split("\n")
- cleaned_lines = []
- skip_mode = False
-
- for line in lines:
- if line.strip() in [
- "## All changes",
- "### What's Changed",
- "## What's Changed",
- "## New Contributors",
- ]:
- skip_mode = True
- elif skip_mode and line.startswith("##"):
- # Found a new section, stop skipping
- skip_mode = False
- cleaned_lines.append(line)
- elif not skip_mode:
- cleaned_lines.append(line)
-
- return "\n".join(cleaned_lines)
-
-
-def merge_release_notes(output_file, token=None):
- releases = get_all_releases(token)
-
- with open(output_file, "w", encoding="utf-8") as md_file:
- md_file.write("# Changelog\n\n")
-
- for release in releases:
- md_file.write(f"# {release['tag_name']}\n")
- md_file.write(f"Published on: {release['published_at']}\n\n")
-
- # Clean the release body to remove "## All changes" sections
- cleaned_body = clean_release_body(release["body"])
- md_file.write(f"{cleaned_body}\n\n")
-
- md_file.write("---\n\n")
-
- print(f"Merged release notes saved to {output_file}")
-
-
-if __name__ == "__main__":
- OUTPUT_FILE = "CHANGELOG.md"
- TOKEN = os.getenv("GITHUB_TOKEN")
- merge_release_notes(OUTPUT_FILE, TOKEN)
diff --git a/scripts/integration-tests.sh b/scripts/integration-tests.sh
index 2adef892d..9907cd0bb 100755
--- a/scripts/integration-tests.sh
+++ b/scripts/integration-tests.sh
@@ -171,10 +171,18 @@ if [[ "$COLLECT_ONLY" == false ]]; then
# Set MCP host for in-process MCP server tests
# - For library client and server mode: localhost (both on same host)
- # - For docker mode: host.docker.internal (container needs to reach host)
+ # - For docker mode on Linux: localhost (container uses host network, shares network namespace)
+ # - For docker mode on macOS/Windows: host.docker.internal (container uses bridge network)
if [[ "$STACK_CONFIG" == docker:* ]]; then
- export LLAMA_STACK_TEST_MCP_HOST="host.docker.internal"
- echo "Setting MCP host: host.docker.internal (docker mode)"
+ if [[ "$(uname)" != "Darwin" ]] && [[ "$(uname)" != *"MINGW"* ]]; then
+ # On Linux with host network mode, container shares host network namespace
+ export LLAMA_STACK_TEST_MCP_HOST="localhost"
+ echo "Setting MCP host: localhost (docker mode with host network)"
+ else
+ # On macOS/Windows with bridge network, need special host access
+ export LLAMA_STACK_TEST_MCP_HOST="host.docker.internal"
+ echo "Setting MCP host: host.docker.internal (docker mode with bridge network)"
+ fi
else
export LLAMA_STACK_TEST_MCP_HOST="localhost"
echo "Setting MCP host: localhost (library/server mode)"
diff --git a/scripts/openapi_generator/schema_collection.py b/scripts/openapi_generator/schema_collection.py
index 51a70c62a..127f6da9c 100644
--- a/scripts/openapi_generator/schema_collection.py
+++ b/scripts/openapi_generator/schema_collection.py
@@ -8,7 +8,6 @@
Schema discovery and collection for OpenAPI generation.
"""
-import importlib
from typing import Any
@@ -20,23 +19,6 @@ def _ensure_components_schemas(openapi_schema: dict[str, Any]) -> None:
openapi_schema["components"]["schemas"] = {}
-def _load_extra_schema_modules() -> None:
- """
- Import modules outside llama_stack_api that use schema_utils to register schemas.
-
- The API package already imports its submodules via __init__, but server-side modules
- like telemetry need to be imported explicitly so their decorator side effects run.
- """
- extra_modules = [
- "llama_stack.core.telemetry.telemetry",
- ]
- for module_name in extra_modules:
- try:
- importlib.import_module(module_name)
- except ImportError:
- continue
-
-
def _extract_and_fix_defs(schema: dict[str, Any], openapi_schema: dict[str, Any]) -> None:
"""
Extract $defs from a schema, move them to components/schemas, and fix references.
@@ -79,9 +61,6 @@ def _ensure_json_schema_types_included(openapi_schema: dict[str, Any]) -> dict[s
iter_registered_schema_types,
)
- # Import extra modules (e.g., telemetry) whose schema registrations live outside llama_stack_api
- _load_extra_schema_modules()
-
# Handle explicitly registered schemas first (union types, Annotated structs, etc.)
for registration_info in iter_registered_schema_types():
schema_type = registration_info.type
diff --git a/scripts/telemetry/llama-stack-dashboard.json b/scripts/telemetry/llama-stack-dashboard.json
index a9f8ac7a2..a8db9713c 100644
--- a/scripts/telemetry/llama-stack-dashboard.json
+++ b/scripts/telemetry/llama-stack-dashboard.json
@@ -1,11 +1,24 @@
{
"annotations": {
- "list": []
+ "list": [
+ {
+ "builtIn": 1,
+ "datasource": {
+ "type": "grafana",
+ "uid": "-- Grafana --"
+ },
+ "enable": true,
+ "hide": true,
+ "iconColor": "rgba(0, 211, 255, 1)",
+ "name": "Annotations & Alerts",
+ "type": "dashboard"
+ }
+ ]
},
"editable": true,
"fiscalYearStartMonth": 0,
"graphTooltip": 0,
- "id": null,
+ "id": 1,
"links": [],
"liveNow": false,
"panels": [
@@ -16,11 +29,40 @@
},
"fieldConfig": {
"defaults": {
+ "color": {
+ "mode": "palette-classic"
+ },
"custom": {
+ "axisBorderShow": false,
+ "axisCenteredZero": false,
+ "axisColorMode": "text",
+ "axisLabel": "",
+ "axisPlacement": "auto",
+ "barAlignment": 0,
"drawStyle": "line",
+ "fillOpacity": 10,
+ "gradientMode": "none",
+ "hideFrom": {
+ "legend": false,
+ "tooltip": false,
+ "viz": false
+ },
+ "insertNulls": false,
"lineInterpolation": "linear",
+ "lineWidth": 1,
+ "pointSize": 5,
+ "scaleDistribution": {
+ "type": "linear"
+ },
"showPoints": "auto",
- "fillOpacity": 10
+ "spanNulls": false,
+ "stacking": {
+ "group": "A",
+ "mode": "none"
+ },
+ "thresholdsStyle": {
+ "mode": "off"
+ }
},
"mappings": [],
"thresholds": {
@@ -32,7 +74,8 @@
}
]
}
- }
+ },
+ "overrides": []
},
"gridPos": {
"h": 8,
@@ -40,15 +83,16 @@
"x": 0,
"y": 0
},
- "id": 1,
+ "id": 2,
"options": {
"legend": {
"calcs": [],
- "displayMode": "table",
+ "displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
+ "maxHeight": 600,
"mode": "multi",
"sort": "none"
}
@@ -59,9 +103,112 @@
"type": "prometheus",
"uid": "prometheus"
},
- "expr": "llama_stack_completion_tokens_total",
- "legendFormat": "{{model_id}} ({{provider_id}})",
- "refId": "A"
+ "disableTextWrap": false,
+ "editorMode": "builder",
+ "expr": "sum by(gen_ai_request_model) (llama_stack_gen_ai_client_token_usage_sum{gen_ai_token_type=\"input\"})",
+ "fullMetaSearch": false,
+ "includeNullMetadata": true,
+ "legendFormat": "__auto",
+ "range": true,
+ "refId": "A",
+ "useBackend": false
+ }
+ ],
+ "title": "Prompt Tokens",
+ "type": "timeseries"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "palette-classic"
+ },
+ "custom": {
+ "axisBorderShow": false,
+ "axisCenteredZero": false,
+ "axisColorMode": "text",
+ "axisLabel": "",
+ "axisPlacement": "auto",
+ "barAlignment": 0,
+ "drawStyle": "line",
+ "fillOpacity": 10,
+ "gradientMode": "none",
+ "hideFrom": {
+ "legend": false,
+ "tooltip": false,
+ "viz": false
+ },
+ "insertNulls": false,
+ "lineInterpolation": "linear",
+ "lineWidth": 1,
+ "pointSize": 5,
+ "scaleDistribution": {
+ "type": "linear"
+ },
+ "showPoints": "auto",
+ "spanNulls": false,
+ "stacking": {
+ "group": "A",
+ "mode": "none"
+ },
+ "thresholdsStyle": {
+ "mode": "off"
+ }
+ },
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": null
+ }
+ ]
+ }
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 8,
+ "w": 12,
+ "x": 12,
+ "y": 0
+ },
+ "id": 1,
+ "options": {
+ "legend": {
+ "calcs": [],
+ "displayMode": "list",
+ "placement": "bottom",
+ "showLegend": true
+ },
+ "tooltip": {
+ "maxHeight": 600,
+ "mode": "multi",
+ "sort": "none"
+ }
+ },
+ "targets": [
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "disableTextWrap": false,
+ "editorMode": "builder",
+ "exemplar": false,
+ "expr": "sum by(gen_ai_request_model) (llama_stack_gen_ai_client_token_usage_sum{gen_ai_token_type=\"output\"})",
+ "fullMetaSearch": false,
+ "includeNullMetadata": true,
+ "interval": "",
+ "legendFormat": "__auto",
+ "range": true,
+ "refId": "A",
+ "useBackend": false
}
],
"title": "Completion Tokens",
@@ -74,78 +221,40 @@
},
"fieldConfig": {
"defaults": {
+ "color": {
+ "mode": "palette-classic"
+ },
"custom": {
+ "axisBorderShow": false,
+ "axisCenteredZero": false,
+ "axisColorMode": "text",
+ "axisLabel": "",
+ "axisPlacement": "auto",
+ "barAlignment": 0,
"drawStyle": "line",
+ "fillOpacity": 10,
+ "gradientMode": "none",
+ "hideFrom": {
+ "legend": false,
+ "tooltip": false,
+ "viz": false
+ },
+ "insertNulls": false,
"lineInterpolation": "linear",
+ "lineWidth": 1,
+ "pointSize": 5,
+ "scaleDistribution": {
+ "type": "linear"
+ },
"showPoints": "auto",
- "fillOpacity": 10
- },
- "mappings": [],
- "thresholds": {
- "mode": "absolute",
- "steps": [
- {
- "color": "green",
- "value": null
- }
- ]
- }
- }
- },
- "gridPos": {
- "h": 8,
- "w": 12,
- "x": 12,
- "y": 0
- },
- "id": 2,
- "options": {
- "legend": {
- "calcs": [],
- "displayMode": "table",
- "placement": "bottom",
- "showLegend": true
- },
- "tooltip": {
- "mode": "multi",
- "sort": "none"
- }
- },
- "targets": [
- {
- "datasource": {
- "type": "prometheus",
- "uid": "prometheus"
- },
- "expr": "llama_stack_prompt_tokens_total",
- "legendFormat": "Prompt - {{model_id}}",
- "refId": "A"
- },
- {
- "datasource": {
- "type": "prometheus",
- "uid": "prometheus"
- },
- "expr": "llama_stack_tokens_total",
- "legendFormat": "Total - {{model_id}}",
- "refId": "B"
- }
- ],
- "title": "Prompt & Total Tokens",
- "type": "timeseries"
- },
- {
- "datasource": {
- "type": "prometheus",
- "uid": "prometheus"
- },
- "fieldConfig": {
- "defaults": {
- "custom": {
- "drawStyle": "line",
- "lineInterpolation": "linear",
- "showPoints": "auto",
- "fillOpacity": 10
+ "spanNulls": false,
+ "stacking": {
+ "group": "A",
+ "mode": "none"
+ },
+ "thresholdsStyle": {
+ "mode": "off"
+ }
},
"mappings": [],
"thresholds": {
@@ -158,7 +267,8 @@
]
},
"unit": "ms"
- }
+ },
+ "overrides": []
},
"gridPos": {
"h": 8,
@@ -175,6 +285,7 @@
"showLegend": true
},
"tooltip": {
+ "maxHeight": 600,
"mode": "multi",
"sort": "none"
}
@@ -219,7 +330,8 @@
}
]
}
- }
+ },
+ "overrides": []
},
"gridPos": {
"h": 8,
@@ -240,8 +352,11 @@
"fields": "",
"values": false
},
- "textMode": "auto"
+ "showPercentChange": false,
+ "textMode": "auto",
+ "wideLayout": true
},
+ "pluginVersion": "11.0.0",
"targets": [
{
"datasource": {
@@ -272,7 +387,8 @@
}
]
}
- }
+ },
+ "overrides": []
},
"gridPos": {
"h": 8,
@@ -293,8 +409,11 @@
"fields": "",
"values": false
},
- "textMode": "auto"
+ "showPercentChange": false,
+ "textMode": "auto",
+ "wideLayout": true
},
+ "pluginVersion": "11.0.0",
"targets": [
{
"datasource": {
@@ -315,11 +434,40 @@
},
"fieldConfig": {
"defaults": {
+ "color": {
+ "mode": "palette-classic"
+ },
"custom": {
+ "axisBorderShow": false,
+ "axisCenteredZero": false,
+ "axisColorMode": "text",
+ "axisLabel": "",
+ "axisPlacement": "auto",
+ "barAlignment": 0,
"drawStyle": "line",
+ "fillOpacity": 10,
+ "gradientMode": "none",
+ "hideFrom": {
+ "legend": false,
+ "tooltip": false,
+ "viz": false
+ },
+ "insertNulls": false,
"lineInterpolation": "linear",
+ "lineWidth": 1,
+ "pointSize": 5,
+ "scaleDistribution": {
+ "type": "linear"
+ },
"showPoints": "auto",
- "fillOpacity": 10
+ "spanNulls": false,
+ "stacking": {
+ "group": "A",
+ "mode": "none"
+ },
+ "thresholdsStyle": {
+ "mode": "off"
+ }
},
"mappings": [],
"thresholds": {
@@ -332,7 +480,8 @@
]
},
"unit": "reqps"
- }
+ },
+ "overrides": []
},
"gridPos": {
"h": 8,
@@ -349,6 +498,7 @@
"showLegend": true
},
"tooltip": {
+ "maxHeight": 600,
"mode": "multi",
"sort": "none"
}
@@ -374,11 +524,40 @@
},
"fieldConfig": {
"defaults": {
+ "color": {
+ "mode": "palette-classic"
+ },
"custom": {
+ "axisBorderShow": false,
+ "axisCenteredZero": false,
+ "axisColorMode": "text",
+ "axisLabel": "",
+ "axisPlacement": "auto",
+ "barAlignment": 0,
"drawStyle": "line",
+ "fillOpacity": 10,
+ "gradientMode": "none",
+ "hideFrom": {
+ "legend": false,
+ "tooltip": false,
+ "viz": false
+ },
+ "insertNulls": false,
"lineInterpolation": "linear",
+ "lineWidth": 1,
+ "pointSize": 5,
+ "scaleDistribution": {
+ "type": "linear"
+ },
"showPoints": "auto",
- "fillOpacity": 10
+ "spanNulls": false,
+ "stacking": {
+ "group": "A",
+ "mode": "none"
+ },
+ "thresholdsStyle": {
+ "mode": "off"
+ }
},
"mappings": [],
"thresholds": {
@@ -391,7 +570,8 @@
]
},
"unit": "Bps"
- }
+ },
+ "overrides": []
},
"gridPos": {
"h": 8,
@@ -408,6 +588,7 @@
"showLegend": true
},
"tooltip": {
+ "maxHeight": 600,
"mode": "multi",
"sort": "none"
}
@@ -437,7 +618,7 @@
}
],
"refresh": "5s",
- "schemaVersion": 38,
+ "schemaVersion": 39,
"tags": [
"llama-stack"
],
@@ -445,13 +626,14 @@
"list": []
},
"time": {
- "from": "now-15m",
+ "from": "now-3h",
"to": "now"
},
+ "timeRangeUpdatedDuringEditOrView": false,
"timepicker": {},
"timezone": "browser",
"title": "Llama Stack Metrics",
"uid": "llama-stack-metrics",
- "version": 0,
+ "version": 17,
"weekStart": ""
}
diff --git a/src/llama_stack/cli/stack/run.py b/src/llama_stack/cli/stack/run.py
index 73d8d13d5..bc4ef70fd 100644
--- a/src/llama_stack/cli/stack/run.py
+++ b/src/llama_stack/cli/stack/run.py
@@ -197,7 +197,7 @@ class StackRun(Subcommand):
config = StackRunConfig(**cast_image_name_to_string(replace_env_vars(config_contents)))
port = args.port or config.server.port
- host = config.server.host or "0.0.0.0"
+ host = config.server.host or ["::", "0.0.0.0"]
# Set the config file in environment so create_app can find it
os.environ["LLAMA_STACK_CONFIG"] = str(config_file)
diff --git a/src/llama_stack/core/datatypes.py b/src/llama_stack/core/datatypes.py
index 1e29690ff..f64286ef5 100644
--- a/src/llama_stack/core/datatypes.py
+++ b/src/llama_stack/core/datatypes.py
@@ -191,22 +191,6 @@ class DistributionSpec(BaseModel):
)
-class TelemetryConfig(BaseModel):
- """
- Configuration for telemetry.
-
- Llama Stack uses OpenTelemetry for telemetry. Please refer to https://opentelemetry.io/docs/languages/sdk-configuration/
- for env variables to configure the OpenTelemetry SDK.
-
- Example:
- ```bash
- OTEL_SERVICE_NAME=llama-stack OTEL_EXPORTER_OTLP_ENDPOINT=http://localhost:4318 uv run llama stack run starter
- ```
- """
-
- enabled: bool = Field(default=False, description="enable or disable telemetry")
-
-
class OAuth2JWKSConfig(BaseModel):
# The JWKS URI for collecting public keys
uri: str
@@ -527,8 +511,6 @@ can be instantiated multiple times (with different configs) if necessary.
logging: LoggingConfig | None = Field(default=None, description="Configuration for Llama Stack Logging")
- telemetry: TelemetryConfig = Field(default_factory=TelemetryConfig, description="Configuration for telemetry")
-
server: ServerConfig = Field(
default_factory=ServerConfig,
description="Configuration for the HTTP(S) server",
diff --git a/src/llama_stack/core/library_client.py b/src/llama_stack/core/library_client.py
index d6be7aeca..7ae29ad0d 100644
--- a/src/llama_stack/core/library_client.py
+++ b/src/llama_stack/core/library_client.py
@@ -46,8 +46,6 @@ from llama_stack.core.request_headers import PROVIDER_DATA_VAR, request_provider
from llama_stack.core.resolver import ProviderRegistry
from llama_stack.core.server.routes import RouteImpls, find_matching_route, initialize_route_impls
from llama_stack.core.stack import Stack, get_stack_run_config_from_distro, replace_env_vars
-from llama_stack.core.telemetry import Telemetry
-from llama_stack.core.telemetry.tracing import CURRENT_TRACE_CONTEXT, end_trace, setup_logger, start_trace
from llama_stack.core.utils.config import redact_sensitive_fields
from llama_stack.core.utils.context import preserve_contexts_async_generator
from llama_stack.core.utils.exec import in_notebook
@@ -204,13 +202,6 @@ class AsyncLlamaStackAsLibraryClient(AsyncLlamaStackClient):
super().__init__()
# Initialize logging from environment variables first
setup_logging()
-
- # when using the library client, we should not log to console since many
- # of our logs are intended for server-side usage
- if sinks_from_env := os.environ.get("TELEMETRY_SINKS", None):
- current_sinks = sinks_from_env.strip().lower().split(",")
- os.environ["TELEMETRY_SINKS"] = ",".join(sink for sink in current_sinks if sink != "console")
-
if in_notebook():
import nest_asyncio
@@ -295,8 +286,6 @@ class AsyncLlamaStackAsLibraryClient(AsyncLlamaStackClient):
raise _e
assert self.impls is not None
- if self.config.telemetry.enabled:
- setup_logger(Telemetry())
if not os.environ.get("PYTEST_CURRENT_TEST"):
console = Console()
@@ -392,13 +381,7 @@ class AsyncLlamaStackAsLibraryClient(AsyncLlamaStackClient):
body, field_names = self._handle_file_uploads(options, body)
body = self._convert_body(matched_func, body, exclude_params=set(field_names))
-
- trace_path = webmethod.descriptive_name or route_path
- await start_trace(trace_path, {"__location__": "library_client"})
- try:
- result = await matched_func(**body)
- finally:
- await end_trace()
+ result = await matched_func(**body)
# Handle FastAPI Response objects (e.g., from file content retrieval)
if isinstance(result, FastAPIResponse):
@@ -457,19 +440,13 @@ class AsyncLlamaStackAsLibraryClient(AsyncLlamaStackClient):
# Prepare body for the function call (handles both Pydantic and traditional params)
body = self._convert_body(func, body)
- trace_path = webmethod.descriptive_name or route_path
- await start_trace(trace_path, {"__location__": "library_client"})
-
async def gen():
- try:
- async for chunk in await func(**body):
- data = json.dumps(convert_pydantic_to_json_value(chunk))
- sse_event = f"data: {data}\n\n"
- yield sse_event.encode("utf-8")
- finally:
- await end_trace()
+ async for chunk in await func(**body):
+ data = json.dumps(convert_pydantic_to_json_value(chunk))
+ sse_event = f"data: {data}\n\n"
+ yield sse_event.encode("utf-8")
- wrapped_gen = preserve_contexts_async_generator(gen(), [CURRENT_TRACE_CONTEXT, PROVIDER_DATA_VAR])
+ wrapped_gen = preserve_contexts_async_generator(gen(), [PROVIDER_DATA_VAR])
mock_response = httpx.Response(
status_code=httpx.codes.OK,
diff --git a/src/llama_stack/core/resolver.py b/src/llama_stack/core/resolver.py
index 6bc32c2d0..15720df95 100644
--- a/src/llama_stack/core/resolver.py
+++ b/src/llama_stack/core/resolver.py
@@ -392,8 +392,6 @@ async def instantiate_provider(
args = [config, deps]
if "policy" in inspect.signature(getattr(module, method)).parameters:
args.append(policy)
- if "telemetry_enabled" in inspect.signature(getattr(module, method)).parameters and run_config.telemetry:
- args.append(run_config.telemetry.enabled)
fn = getattr(module, method)
impl = await fn(*args)
@@ -401,18 +399,6 @@ async def instantiate_provider(
impl.__provider_spec__ = provider_spec
impl.__provider_config__ = config
- # Apply tracing if telemetry is enabled and any base class has __marked_for_tracing__ marker
- if run_config.telemetry.enabled:
- traced_classes = [
- base for base in reversed(impl.__class__.__mro__) if getattr(base, "__marked_for_tracing__", False)
- ]
-
- if traced_classes:
- from llama_stack.core.telemetry.trace_protocol import trace_protocol
-
- for cls in traced_classes:
- trace_protocol(cls)
-
protocols = api_protocol_map_for_compliance_check(run_config)
additional_protocols = additional_protocols_map()
# TODO: check compliance for special tool groups
diff --git a/src/llama_stack/core/routers/__init__.py b/src/llama_stack/core/routers/__init__.py
index 289755bcb..c6f8a7ac2 100644
--- a/src/llama_stack/core/routers/__init__.py
+++ b/src/llama_stack/core/routers/__init__.py
@@ -85,8 +85,6 @@ async def get_auto_router_impl(
)
await inference_store.initialize()
api_to_dep_impl["store"] = inference_store
- api_to_dep_impl["telemetry_enabled"] = run_config.telemetry.enabled
-
elif api == Api.vector_io:
api_to_dep_impl["vector_stores_config"] = run_config.vector_stores
elif api == Api.safety:
diff --git a/src/llama_stack/core/routers/inference.py b/src/llama_stack/core/routers/inference.py
index 719624e86..8a7ffaa5f 100644
--- a/src/llama_stack/core/routers/inference.py
+++ b/src/llama_stack/core/routers/inference.py
@@ -7,7 +7,6 @@
import asyncio
import time
from collections.abc import AsyncIterator
-from datetime import UTC, datetime
from typing import Annotated, Any
from fastapi import Body
@@ -15,11 +14,7 @@ from openai.types.chat import ChatCompletionToolChoiceOptionParam as OpenAIChatC
from openai.types.chat import ChatCompletionToolParam as OpenAIChatCompletionToolParam
from pydantic import TypeAdapter
-from llama_stack.core.telemetry.telemetry import MetricEvent
-from llama_stack.core.telemetry.tracing import enqueue_event, get_current_span
from llama_stack.log import get_logger
-from llama_stack.models.llama.llama3.chat_format import ChatFormat
-from llama_stack.models.llama.llama3.tokenizer import Tokenizer
from llama_stack.providers.utils.inference.inference_store import InferenceStore
from llama_stack_api import (
HealthResponse,
@@ -60,15 +55,10 @@ class InferenceRouter(Inference):
self,
routing_table: RoutingTable,
store: InferenceStore | None = None,
- telemetry_enabled: bool = False,
) -> None:
logger.debug("Initializing InferenceRouter")
self.routing_table = routing_table
- self.telemetry_enabled = telemetry_enabled
self.store = store
- if self.telemetry_enabled:
- self.tokenizer = Tokenizer.get_instance()
- self.formatter = ChatFormat(self.tokenizer)
async def initialize(self) -> None:
logger.debug("InferenceRouter.initialize")
@@ -94,54 +84,6 @@ class InferenceRouter(Inference):
)
await self.routing_table.register_model(model_id, provider_model_id, provider_id, metadata, model_type)
- def _construct_metrics(
- self,
- prompt_tokens: int,
- completion_tokens: int,
- total_tokens: int,
- fully_qualified_model_id: str,
- provider_id: str,
- ) -> list[MetricEvent]:
- """Constructs a list of MetricEvent objects containing token usage metrics.
-
- Args:
- prompt_tokens: Number of tokens in the prompt
- completion_tokens: Number of tokens in the completion
- total_tokens: Total number of tokens used
- fully_qualified_model_id:
- provider_id: The provider identifier
-
- Returns:
- List of MetricEvent objects with token usage metrics
- """
- span = get_current_span()
- if span is None:
- logger.warning("No span found for token usage metrics")
- return []
-
- metrics = [
- ("prompt_tokens", prompt_tokens),
- ("completion_tokens", completion_tokens),
- ("total_tokens", total_tokens),
- ]
- metric_events = []
- for metric_name, value in metrics:
- metric_events.append(
- MetricEvent(
- trace_id=span.trace_id,
- span_id=span.span_id,
- metric=metric_name,
- value=value,
- timestamp=datetime.now(UTC),
- unit="tokens",
- attributes={
- "model_id": fully_qualified_model_id,
- "provider_id": provider_id,
- },
- )
- )
- return metric_events
-
async def _get_model_provider(self, model_id: str, expected_model_type: str) -> tuple[Inference, str]:
model = await self.routing_table.get_object_by_identifier("model", model_id)
if model:
@@ -186,26 +128,9 @@ class InferenceRouter(Inference):
if params.stream:
return await provider.openai_completion(params)
- # TODO: Metrics do NOT work with openai_completion stream=True due to the fact
- # that we do not return an AsyncIterator, our tests expect a stream of chunks we cannot intercept currently.
response = await provider.openai_completion(params)
response.model = request_model_id
- if self.telemetry_enabled and response.usage is not None:
- metrics = self._construct_metrics(
- prompt_tokens=response.usage.prompt_tokens,
- completion_tokens=response.usage.completion_tokens,
- total_tokens=response.usage.total_tokens,
- fully_qualified_model_id=request_model_id,
- provider_id=provider.__provider_id__,
- )
- for metric in metrics:
- enqueue_event(metric)
-
- # these metrics will show up in the client response.
- response.metrics = (
- metrics if not hasattr(response, "metrics") or response.metrics is None else response.metrics + metrics
- )
return response
async def openai_chat_completion(
@@ -254,20 +179,6 @@ class InferenceRouter(Inference):
if self.store:
asyncio.create_task(self.store.store_chat_completion(response, params.messages))
- if self.telemetry_enabled and response.usage is not None:
- metrics = self._construct_metrics(
- prompt_tokens=response.usage.prompt_tokens,
- completion_tokens=response.usage.completion_tokens,
- total_tokens=response.usage.total_tokens,
- fully_qualified_model_id=request_model_id,
- provider_id=provider.__provider_id__,
- )
- for metric in metrics:
- enqueue_event(metric)
- # these metrics will show up in the client response.
- response.metrics = (
- metrics if not hasattr(response, "metrics") or response.metrics is None else response.metrics + metrics
- )
return response
async def openai_embeddings(
@@ -411,18 +322,6 @@ class InferenceRouter(Inference):
for choice_data in choices_data.values():
completion_text += "".join(choice_data["content_parts"])
- # Add metrics to the chunk
- if self.telemetry_enabled and hasattr(chunk, "usage") and chunk.usage:
- metrics = self._construct_metrics(
- prompt_tokens=chunk.usage.prompt_tokens,
- completion_tokens=chunk.usage.completion_tokens,
- total_tokens=chunk.usage.total_tokens,
- fully_qualified_model_id=fully_qualified_model_id,
- provider_id=provider_id,
- )
- for metric in metrics:
- enqueue_event(metric)
-
yield chunk
finally:
# Store the final assembled completion
diff --git a/src/llama_stack/core/routers/safety.py b/src/llama_stack/core/routers/safety.py
index 2bc99f14f..10c21ea88 100644
--- a/src/llama_stack/core/routers/safety.py
+++ b/src/llama_stack/core/routers/safety.py
@@ -6,11 +6,15 @@
from typing import Any
+from opentelemetry import trace
+
from llama_stack.core.datatypes import SafetyConfig
from llama_stack.log import get_logger
+from llama_stack.telemetry.helpers import safety_request_span_attributes, safety_span_name
from llama_stack_api import ModerationObject, OpenAIMessageParam, RoutingTable, RunShieldResponse, Safety, Shield
logger = get_logger(name=__name__, category="core::routers")
+tracer = trace.get_tracer(__name__)
class SafetyRouter(Safety):
@@ -51,13 +55,17 @@ class SafetyRouter(Safety):
messages: list[OpenAIMessageParam],
params: dict[str, Any] = None,
) -> RunShieldResponse:
- logger.debug(f"SafetyRouter.run_shield: {shield_id}")
- provider = await self.routing_table.get_provider_impl(shield_id)
- return await provider.run_shield(
- shield_id=shield_id,
- messages=messages,
- params=params,
- )
+ with tracer.start_as_current_span(name=safety_span_name(shield_id)):
+ logger.debug(f"SafetyRouter.run_shield: {shield_id}")
+ provider = await self.routing_table.get_provider_impl(shield_id)
+ response = await provider.run_shield(
+ shield_id=shield_id,
+ messages=messages,
+ params=params,
+ )
+
+ safety_request_span_attributes(shield_id, messages, response)
+ return response
async def run_moderation(self, input: str | list[str], model: str | None = None) -> ModerationObject:
list_shields_response = await self.routing_table.list_shields()
diff --git a/src/llama_stack/core/server/server.py b/src/llama_stack/core/server/server.py
index 0d3513980..9a01eb75e 100644
--- a/src/llama_stack/core/server/server.py
+++ b/src/llama_stack/core/server/server.py
@@ -50,8 +50,6 @@ from llama_stack.core.stack import (
cast_image_name_to_string,
replace_env_vars,
)
-from llama_stack.core.telemetry import Telemetry
-from llama_stack.core.telemetry.tracing import CURRENT_TRACE_CONTEXT, setup_logger
from llama_stack.core.utils.config import redact_sensitive_fields
from llama_stack.core.utils.config_resolution import Mode, resolve_config_or_distro
from llama_stack.core.utils.context import preserve_contexts_async_generator
@@ -60,7 +58,6 @@ from llama_stack_api import Api, ConflictError, PaginatedResponse, ResourceNotFo
from .auth import AuthenticationMiddleware
from .quota import QuotaMiddleware
-from .tracing import TracingMiddleware
REPO_ROOT = Path(__file__).parent.parent.parent.parent
@@ -263,7 +260,7 @@ def create_dynamic_typed_route(func: Any, method: str, route: str) -> Callable:
try:
if is_streaming:
- context_vars = [CURRENT_TRACE_CONTEXT, PROVIDER_DATA_VAR]
+ context_vars = [PROVIDER_DATA_VAR]
if test_context_var is not None:
context_vars.append(test_context_var)
gen = preserve_contexts_async_generator(sse_generator(func(**kwargs)), context_vars)
@@ -441,9 +438,6 @@ def create_app() -> StackApp:
if cors_config:
app.add_middleware(CORSMiddleware, **cors_config.model_dump())
- if config.telemetry.enabled:
- setup_logger(Telemetry())
-
# Load external APIs if configured
external_apis = load_external_apis(config)
all_routes = get_all_api_routes(external_apis)
@@ -500,9 +494,6 @@ def create_app() -> StackApp:
app.exception_handler(RequestValidationError)(global_exception_handler)
app.exception_handler(Exception)(global_exception_handler)
- if config.telemetry.enabled:
- app.add_middleware(TracingMiddleware, impls=impls, external_apis=external_apis)
-
return app
diff --git a/src/llama_stack/core/server/tracing.py b/src/llama_stack/core/server/tracing.py
deleted file mode 100644
index c4901d9b1..000000000
--- a/src/llama_stack/core/server/tracing.py
+++ /dev/null
@@ -1,80 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-from aiohttp import hdrs
-
-from llama_stack.core.external import ExternalApiSpec
-from llama_stack.core.server.routes import find_matching_route, initialize_route_impls
-from llama_stack.core.telemetry.tracing import end_trace, start_trace
-from llama_stack.log import get_logger
-
-logger = get_logger(name=__name__, category="core::server")
-
-
-class TracingMiddleware:
- def __init__(self, app, impls, external_apis: dict[str, ExternalApiSpec]):
- self.app = app
- self.impls = impls
- self.external_apis = external_apis
- # FastAPI built-in paths that should bypass custom routing
- self.fastapi_paths = ("/docs", "/redoc", "/openapi.json", "/favicon.ico", "/static")
-
- async def __call__(self, scope, receive, send):
- if scope.get("type") == "lifespan":
- return await self.app(scope, receive, send)
-
- path = scope.get("path", "")
-
- # Check if the path is a FastAPI built-in path
- if path.startswith(self.fastapi_paths):
- # Pass through to FastAPI's built-in handlers
- logger.debug(f"Bypassing custom routing for FastAPI built-in path: {path}")
- return await self.app(scope, receive, send)
-
- if not hasattr(self, "route_impls"):
- self.route_impls = initialize_route_impls(self.impls, self.external_apis)
-
- try:
- _, _, route_path, webmethod = find_matching_route(
- scope.get("method", hdrs.METH_GET), path, self.route_impls
- )
- except ValueError:
- # If no matching endpoint is found, pass through to FastAPI
- logger.debug(f"No matching route found for path: {path}, falling back to FastAPI")
- return await self.app(scope, receive, send)
-
- # Log deprecation warning if route is deprecated
- if getattr(webmethod, "deprecated", False):
- logger.warning(
- f"DEPRECATED ROUTE USED: {scope.get('method', 'GET')} {path} - "
- f"This route is deprecated and may be removed in a future version. "
- f"Please check the docs for the supported version."
- )
-
- trace_attributes = {"__location__": "server", "raw_path": path}
-
- # Extract W3C trace context headers and store as trace attributes
- headers = dict(scope.get("headers", []))
- traceparent = headers.get(b"traceparent", b"").decode()
- if traceparent:
- trace_attributes["traceparent"] = traceparent
- tracestate = headers.get(b"tracestate", b"").decode()
- if tracestate:
- trace_attributes["tracestate"] = tracestate
-
- trace_path = webmethod.descriptive_name or route_path
- trace_context = await start_trace(trace_path, trace_attributes)
-
- async def send_with_trace_id(message):
- if message["type"] == "http.response.start":
- headers = message.get("headers", [])
- headers.append([b"x-trace-id", str(trace_context.trace_id).encode()])
- message["headers"] = headers
- await send(message)
-
- try:
- return await self.app(scope, receive, send_with_trace_id)
- finally:
- await end_trace()
diff --git a/src/llama_stack/core/telemetry/__init__.py b/src/llama_stack/core/telemetry/__init__.py
deleted file mode 100644
index bab612c0d..000000000
--- a/src/llama_stack/core/telemetry/__init__.py
+++ /dev/null
@@ -1,32 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-from .telemetry import Telemetry
-from .trace_protocol import serialize_value, trace_protocol
-from .tracing import (
- CURRENT_TRACE_CONTEXT,
- ROOT_SPAN_MARKERS,
- end_trace,
- enqueue_event,
- get_current_span,
- setup_logger,
- span,
- start_trace,
-)
-
-__all__ = [
- "Telemetry",
- "trace_protocol",
- "serialize_value",
- "CURRENT_TRACE_CONTEXT",
- "ROOT_SPAN_MARKERS",
- "end_trace",
- "enqueue_event",
- "get_current_span",
- "setup_logger",
- "span",
- "start_trace",
-]
diff --git a/src/llama_stack/core/telemetry/telemetry.py b/src/llama_stack/core/telemetry/telemetry.py
deleted file mode 100644
index 5268fa641..000000000
--- a/src/llama_stack/core/telemetry/telemetry.py
+++ /dev/null
@@ -1,629 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-import os
-import threading
-from collections.abc import Mapping, Sequence
-from datetime import datetime
-from enum import Enum
-from typing import (
- Annotated,
- Any,
- Literal,
- cast,
-)
-
-from opentelemetry import metrics, trace
-from opentelemetry.exporter.otlp.proto.http.metric_exporter import OTLPMetricExporter
-from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter
-from opentelemetry.sdk.metrics import MeterProvider
-from opentelemetry.sdk.metrics.export import PeriodicExportingMetricReader
-from opentelemetry.sdk.trace import TracerProvider
-from opentelemetry.sdk.trace.export import BatchSpanProcessor
-from opentelemetry.trace.propagation.tracecontext import TraceContextTextMapPropagator
-from pydantic import BaseModel, Field
-
-from llama_stack.log import get_logger
-from llama_stack.models.llama.datatypes import Primitive
-from llama_stack_api import json_schema_type, register_schema
-
-ROOT_SPAN_MARKERS = ["__root__", "__root_span__"]
-
-# Type alias for OpenTelemetry attribute values (excludes None)
-AttributeValue = str | bool | int | float | Sequence[str] | Sequence[bool] | Sequence[int] | Sequence[float]
-Attributes = Mapping[str, AttributeValue]
-
-
-@json_schema_type
-class SpanStatus(Enum):
- """The status of a span indicating whether it completed successfully or with an error.
- :cvar OK: Span completed successfully without errors
- :cvar ERROR: Span completed with an error or failure
- """
-
- OK = "ok"
- ERROR = "error"
-
-
-@json_schema_type
-class Span(BaseModel):
- """A span representing a single operation within a trace.
- :param span_id: Unique identifier for the span
- :param trace_id: Unique identifier for the trace this span belongs to
- :param parent_span_id: (Optional) Unique identifier for the parent span, if this is a child span
- :param name: Human-readable name describing the operation this span represents
- :param start_time: Timestamp when the operation began
- :param end_time: (Optional) Timestamp when the operation finished, if completed
- :param attributes: (Optional) Key-value pairs containing additional metadata about the span
- """
-
- span_id: str
- trace_id: str
- parent_span_id: str | None = None
- name: str
- start_time: datetime
- end_time: datetime | None = None
- attributes: dict[str, Any] | None = Field(default_factory=lambda: {})
-
- def set_attribute(self, key: str, value: Any):
- if self.attributes is None:
- self.attributes = {}
- self.attributes[key] = value
-
-
-@json_schema_type
-class Trace(BaseModel):
- """A trace representing the complete execution path of a request across multiple operations.
- :param trace_id: Unique identifier for the trace
- :param root_span_id: Unique identifier for the root span that started this trace
- :param start_time: Timestamp when the trace began
- :param end_time: (Optional) Timestamp when the trace finished, if completed
- """
-
- trace_id: str
- root_span_id: str
- start_time: datetime
- end_time: datetime | None = None
-
-
-@json_schema_type
-class EventType(Enum):
- """The type of telemetry event being logged.
- :cvar UNSTRUCTURED_LOG: A simple log message with severity level
- :cvar STRUCTURED_LOG: A structured log event with typed payload data
- :cvar METRIC: A metric measurement with value and unit
- """
-
- UNSTRUCTURED_LOG = "unstructured_log"
- STRUCTURED_LOG = "structured_log"
- METRIC = "metric"
-
-
-@json_schema_type
-class LogSeverity(Enum):
- """The severity level of a log message.
- :cvar VERBOSE: Detailed diagnostic information for troubleshooting
- :cvar DEBUG: Debug information useful during development
- :cvar INFO: General informational messages about normal operation
- :cvar WARN: Warning messages about potentially problematic situations
- :cvar ERROR: Error messages indicating failures that don't stop execution
- :cvar CRITICAL: Critical error messages indicating severe failures
- """
-
- VERBOSE = "verbose"
- DEBUG = "debug"
- INFO = "info"
- WARN = "warn"
- ERROR = "error"
- CRITICAL = "critical"
-
-
-class EventCommon(BaseModel):
- """Common fields shared by all telemetry events.
- :param trace_id: Unique identifier for the trace this event belongs to
- :param span_id: Unique identifier for the span this event belongs to
- :param timestamp: Timestamp when the event occurred
- :param attributes: (Optional) Key-value pairs containing additional metadata about the event
- """
-
- trace_id: str
- span_id: str
- timestamp: datetime
- attributes: dict[str, Primitive] | None = Field(default_factory=lambda: {})
-
-
-@json_schema_type
-class UnstructuredLogEvent(EventCommon):
- """An unstructured log event containing a simple text message.
- :param type: Event type identifier set to UNSTRUCTURED_LOG
- :param message: The log message text
- :param severity: The severity level of the log message
- """
-
- type: Literal[EventType.UNSTRUCTURED_LOG] = EventType.UNSTRUCTURED_LOG
- message: str
- severity: LogSeverity
-
-
-@json_schema_type
-class MetricEvent(EventCommon):
- """A metric event containing a measured value.
- :param type: Event type identifier set to METRIC
- :param metric: The name of the metric being measured
- :param value: The numeric value of the metric measurement
- :param unit: The unit of measurement for the metric value
- """
-
- type: Literal[EventType.METRIC] = EventType.METRIC
- metric: str # this would be an enum
- value: int | float
- unit: str
-
-
-@json_schema_type
-class StructuredLogType(Enum):
- """The type of structured log event payload.
- :cvar SPAN_START: Event indicating the start of a new span
- :cvar SPAN_END: Event indicating the completion of a span
- """
-
- SPAN_START = "span_start"
- SPAN_END = "span_end"
-
-
-@json_schema_type
-class SpanStartPayload(BaseModel):
- """Payload for a span start event.
- :param type: Payload type identifier set to SPAN_START
- :param name: Human-readable name describing the operation this span represents
- :param parent_span_id: (Optional) Unique identifier for the parent span, if this is a child span
- """
-
- type: Literal[StructuredLogType.SPAN_START] = StructuredLogType.SPAN_START
- name: str
- parent_span_id: str | None = None
-
-
-@json_schema_type
-class SpanEndPayload(BaseModel):
- """Payload for a span end event.
- :param type: Payload type identifier set to SPAN_END
- :param status: The final status of the span indicating success or failure
- """
-
- type: Literal[StructuredLogType.SPAN_END] = StructuredLogType.SPAN_END
- status: SpanStatus
-
-
-StructuredLogPayload = Annotated[
- SpanStartPayload | SpanEndPayload,
- Field(discriminator="type"),
-]
-register_schema(StructuredLogPayload, name="StructuredLogPayload")
-
-
-@json_schema_type
-class StructuredLogEvent(EventCommon):
- """A structured log event containing typed payload data.
- :param type: Event type identifier set to STRUCTURED_LOG
- :param payload: The structured payload data for the log event
- """
-
- type: Literal[EventType.STRUCTURED_LOG] = EventType.STRUCTURED_LOG
- payload: StructuredLogPayload
-
-
-Event = Annotated[
- UnstructuredLogEvent | MetricEvent | StructuredLogEvent,
- Field(discriminator="type"),
-]
-register_schema(Event, name="Event")
-
-
-@json_schema_type
-class EvalTrace(BaseModel):
- """A trace record for evaluation purposes.
- :param session_id: Unique identifier for the evaluation session
- :param step: The evaluation step or phase identifier
- :param input: The input data for the evaluation
- :param output: The actual output produced during evaluation
- :param expected_output: The expected output for comparison during evaluation
- """
-
- session_id: str
- step: str
- input: str
- output: str
- expected_output: str
-
-
-@json_schema_type
-class SpanWithStatus(Span):
- """A span that includes status information.
- :param status: (Optional) The current status of the span
- """
-
- status: SpanStatus | None = None
-
-
-@json_schema_type
-class QueryConditionOp(Enum):
- """Comparison operators for query conditions.
- :cvar EQ: Equal to comparison
- :cvar NE: Not equal to comparison
- :cvar GT: Greater than comparison
- :cvar LT: Less than comparison
- """
-
- EQ = "eq"
- NE = "ne"
- GT = "gt"
- LT = "lt"
-
-
-@json_schema_type
-class QueryCondition(BaseModel):
- """A condition for filtering query results.
- :param key: The attribute key to filter on
- :param op: The comparison operator to apply
- :param value: The value to compare against
- """
-
- key: str
- op: QueryConditionOp
- value: Any
-
-
-class QueryTracesResponse(BaseModel):
- """Response containing a list of traces.
- :param data: List of traces matching the query criteria
- """
-
- data: list[Trace]
-
-
-class QuerySpansResponse(BaseModel):
- """Response containing a list of spans.
- :param data: List of spans matching the query criteria
- """
-
- data: list[Span]
-
-
-class QuerySpanTreeResponse(BaseModel):
- """Response containing a tree structure of spans.
- :param data: Dictionary mapping span IDs to spans with status information
- """
-
- data: dict[str, SpanWithStatus]
-
-
-class MetricQueryType(Enum):
- """The type of metric query to perform.
- :cvar RANGE: Query metrics over a time range
- :cvar INSTANT: Query metrics at a specific point in time
- """
-
- RANGE = "range"
- INSTANT = "instant"
-
-
-class MetricLabelOperator(Enum):
- """Operators for matching metric labels.
- :cvar EQUALS: Label value must equal the specified value
- :cvar NOT_EQUALS: Label value must not equal the specified value
- :cvar REGEX_MATCH: Label value must match the specified regular expression
- :cvar REGEX_NOT_MATCH: Label value must not match the specified regular expression
- """
-
- EQUALS = "="
- NOT_EQUALS = "!="
- REGEX_MATCH = "=~"
- REGEX_NOT_MATCH = "!~"
-
-
-class MetricLabelMatcher(BaseModel):
- """A matcher for filtering metrics by label values.
- :param name: The name of the label to match
- :param value: The value to match against
- :param operator: The comparison operator to use for matching
- """
-
- name: str
- value: str
- operator: MetricLabelOperator = MetricLabelOperator.EQUALS
-
-
-@json_schema_type
-class MetricLabel(BaseModel):
- """A label associated with a metric.
- :param name: The name of the label
- :param value: The value of the label
- """
-
- name: str
- value: str
-
-
-@json_schema_type
-class MetricDataPoint(BaseModel):
- """A single data point in a metric time series.
- :param timestamp: Unix timestamp when the metric value was recorded
- :param value: The numeric value of the metric at this timestamp
- """
-
- timestamp: int
- value: float
- unit: str
-
-
-@json_schema_type
-class MetricSeries(BaseModel):
- """A time series of metric data points.
- :param metric: The name of the metric
- :param labels: List of labels associated with this metric series
- :param values: List of data points in chronological order
- """
-
- metric: str
- labels: list[MetricLabel]
- values: list[MetricDataPoint]
-
-
-class QueryMetricsResponse(BaseModel):
- """Response containing metric time series data.
- :param data: List of metric series matching the query criteria
- """
-
- data: list[MetricSeries]
-
-
-_GLOBAL_STORAGE: dict[str, dict[str | int, Any]] = {
- "active_spans": {},
- "counters": {},
- "gauges": {},
- "up_down_counters": {},
- "histograms": {},
-}
-_global_lock = threading.Lock()
-_TRACER_PROVIDER = None
-
-logger = get_logger(name=__name__, category="telemetry")
-
-
-def _clean_attributes(attrs: dict[str, Any] | None) -> Attributes | None:
- """Remove None values from attributes dict to match OpenTelemetry's expected type."""
- if attrs is None:
- return None
- return {k: v for k, v in attrs.items() if v is not None}
-
-
-def is_tracing_enabled(tracer):
- with tracer.start_as_current_span("check_tracing") as span:
- return span.is_recording()
-
-
-class Telemetry:
- def __init__(self) -> None:
- self.meter = None
-
- global _TRACER_PROVIDER
- # Initialize the correct span processor based on the provider state.
- # This is needed since once the span processor is set, it cannot be unset.
- # Recreating the telemetry adapter multiple times will result in duplicate span processors.
- # Since the library client can be recreated multiple times in a notebook,
- # the kernel will hold on to the span processor and cause duplicate spans to be written.
- if os.environ.get("OTEL_EXPORTER_OTLP_ENDPOINT"):
- if _TRACER_PROVIDER is None:
- provider = TracerProvider()
- trace.set_tracer_provider(provider)
- _TRACER_PROVIDER = provider
-
- # Use single OTLP endpoint for all telemetry signals
-
- # Let OpenTelemetry SDK handle endpoint construction automatically
- # The SDK will read OTEL_EXPORTER_OTLP_ENDPOINT and construct appropriate URLs
- # https://opentelemetry.io/docs/languages/sdk-configuration/otlp-exporter
- span_exporter = OTLPSpanExporter()
- span_processor = BatchSpanProcessor(span_exporter)
- cast(TracerProvider, trace.get_tracer_provider()).add_span_processor(span_processor)
-
- metric_reader = PeriodicExportingMetricReader(OTLPMetricExporter())
- metric_provider = MeterProvider(metric_readers=[metric_reader])
- metrics.set_meter_provider(metric_provider)
- self.is_otel_endpoint_set = True
- else:
- logger.warning("OTEL_EXPORTER_OTLP_ENDPOINT is not set, skipping telemetry")
- self.is_otel_endpoint_set = False
-
- self.meter = metrics.get_meter(__name__)
- self._lock = _global_lock
-
- async def initialize(self) -> None:
- pass
-
- async def shutdown(self) -> None:
- if self.is_otel_endpoint_set:
- cast(TracerProvider, trace.get_tracer_provider()).force_flush()
-
- async def log_event(self, event: Event, ttl_seconds: int = 604800) -> None:
- if isinstance(event, UnstructuredLogEvent):
- self._log_unstructured(event, ttl_seconds)
- elif isinstance(event, MetricEvent):
- self._log_metric(event)
- elif isinstance(event, StructuredLogEvent):
- self._log_structured(event, ttl_seconds)
- else:
- raise ValueError(f"Unknown event type: {event}")
-
- def _log_unstructured(self, event: UnstructuredLogEvent, ttl_seconds: int) -> None:
- with self._lock:
- # Use global storage instead of instance storage
- span_id = int(event.span_id, 16)
- span = _GLOBAL_STORAGE["active_spans"].get(span_id)
-
- if span:
- timestamp_ns = int(event.timestamp.timestamp() * 1e9)
- span.add_event(
- name=event.type.value,
- attributes={
- "message": event.message,
- "severity": event.severity.value,
- "__ttl__": ttl_seconds,
- **(event.attributes or {}),
- },
- timestamp=timestamp_ns,
- )
- else:
- print(f"Warning: No active span found for span_id {span_id}. Dropping event: {event}")
-
- def _get_or_create_counter(self, name: str, unit: str) -> metrics.Counter:
- assert self.meter is not None
- if name not in _GLOBAL_STORAGE["counters"]:
- _GLOBAL_STORAGE["counters"][name] = self.meter.create_counter(
- name=name,
- unit=unit,
- description=f"Counter for {name}",
- )
- return cast(metrics.Counter, _GLOBAL_STORAGE["counters"][name])
-
- def _get_or_create_gauge(self, name: str, unit: str) -> metrics.ObservableGauge:
- assert self.meter is not None
- if name not in _GLOBAL_STORAGE["gauges"]:
- _GLOBAL_STORAGE["gauges"][name] = self.meter.create_gauge(
- name=name,
- unit=unit,
- description=f"Gauge for {name}",
- )
- return cast(metrics.ObservableGauge, _GLOBAL_STORAGE["gauges"][name])
-
- def _get_or_create_histogram(self, name: str, unit: str) -> metrics.Histogram:
- assert self.meter is not None
- if name not in _GLOBAL_STORAGE["histograms"]:
- _GLOBAL_STORAGE["histograms"][name] = self.meter.create_histogram(
- name=name,
- unit=unit,
- description=f"Histogram for {name}",
- )
- return cast(metrics.Histogram, _GLOBAL_STORAGE["histograms"][name])
-
- def _log_metric(self, event: MetricEvent) -> None:
- # Add metric as an event to the current span
- try:
- with self._lock:
- # Only try to add to span if we have a valid span_id
- if event.span_id:
- try:
- span_id = int(event.span_id, 16)
- span = _GLOBAL_STORAGE["active_spans"].get(span_id)
-
- if span:
- timestamp_ns = int(event.timestamp.timestamp() * 1e9)
- span.add_event(
- name=f"metric.{event.metric}",
- attributes={
- "value": event.value,
- "unit": event.unit,
- **(event.attributes or {}),
- },
- timestamp=timestamp_ns,
- )
- except (ValueError, KeyError):
- # Invalid span_id or span not found, but we already logged to console above
- pass
- except Exception:
- # Lock acquisition failed
- logger.debug("Failed to acquire lock to add metric to span")
-
- # Log to OpenTelemetry meter if available
- if self.meter is None:
- return
-
- # Use histograms for token-related metrics (per-request measurements)
- # Use counters for other cumulative metrics
- token_metrics = {"prompt_tokens", "completion_tokens", "total_tokens"}
-
- if event.metric in token_metrics:
- # Token metrics are per-request measurements, use histogram
- histogram = self._get_or_create_histogram(event.metric, event.unit)
- histogram.record(event.value, attributes=_clean_attributes(event.attributes))
- elif isinstance(event.value, int):
- counter = self._get_or_create_counter(event.metric, event.unit)
- counter.add(event.value, attributes=_clean_attributes(event.attributes))
- elif isinstance(event.value, float):
- up_down_counter = self._get_or_create_up_down_counter(event.metric, event.unit)
- up_down_counter.add(event.value, attributes=_clean_attributes(event.attributes))
-
- def _get_or_create_up_down_counter(self, name: str, unit: str) -> metrics.UpDownCounter:
- assert self.meter is not None
- if name not in _GLOBAL_STORAGE["up_down_counters"]:
- _GLOBAL_STORAGE["up_down_counters"][name] = self.meter.create_up_down_counter(
- name=name,
- unit=unit,
- description=f"UpDownCounter for {name}",
- )
- return cast(metrics.UpDownCounter, _GLOBAL_STORAGE["up_down_counters"][name])
-
- def _log_structured(self, event: StructuredLogEvent, ttl_seconds: int) -> None:
- with self._lock:
- span_id = int(event.span_id, 16)
- tracer = trace.get_tracer(__name__)
- if event.attributes is None:
- event.attributes = {}
- event.attributes["__ttl__"] = ttl_seconds
-
- # Extract these W3C trace context attributes so they are not written to
- # underlying storage, as we just need them to propagate the trace context.
- traceparent = event.attributes.pop("traceparent", None)
- tracestate = event.attributes.pop("tracestate", None)
- if traceparent:
- # If we have a traceparent header value, we're not the root span.
- for root_attribute in ROOT_SPAN_MARKERS:
- event.attributes.pop(root_attribute, None)
-
- if isinstance(event.payload, SpanStartPayload):
- # Check if span already exists to prevent duplicates
- if span_id in _GLOBAL_STORAGE["active_spans"]:
- return
-
- context = None
- if event.payload.parent_span_id:
- parent_span_id = int(event.payload.parent_span_id, 16)
- parent_span = _GLOBAL_STORAGE["active_spans"].get(parent_span_id)
- if parent_span:
- context = trace.set_span_in_context(parent_span)
- elif traceparent:
- carrier = {
- "traceparent": traceparent,
- "tracestate": tracestate,
- }
- context = TraceContextTextMapPropagator().extract(carrier=carrier)
-
- span = tracer.start_span(
- name=event.payload.name,
- context=context,
- attributes=_clean_attributes(event.attributes),
- )
- _GLOBAL_STORAGE["active_spans"][span_id] = span
-
- elif isinstance(event.payload, SpanEndPayload):
- span = _GLOBAL_STORAGE["active_spans"].get(span_id) # type: ignore[assignment]
- if span:
- if event.attributes:
- cleaned_attrs = _clean_attributes(event.attributes)
- if cleaned_attrs:
- span.set_attributes(cleaned_attrs)
-
- status = (
- trace.Status(status_code=trace.StatusCode.OK)
- if event.payload.status == SpanStatus.OK
- else trace.Status(status_code=trace.StatusCode.ERROR)
- )
- span.set_status(status)
- span.end()
- _GLOBAL_STORAGE["active_spans"].pop(span_id, None)
- else:
- raise ValueError(f"Unknown structured log event: {event}")
diff --git a/src/llama_stack/core/telemetry/trace_protocol.py b/src/llama_stack/core/telemetry/trace_protocol.py
deleted file mode 100644
index 95b33a4bc..000000000
--- a/src/llama_stack/core/telemetry/trace_protocol.py
+++ /dev/null
@@ -1,154 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-import asyncio
-import inspect
-import json
-from collections.abc import AsyncGenerator, Callable
-from functools import wraps
-from typing import Any, cast
-
-from pydantic import BaseModel
-
-from llama_stack.models.llama.datatypes import Primitive
-
-type JSONValue = Primitive | list["JSONValue"] | dict[str, "JSONValue"]
-
-
-def serialize_value(value: Any) -> str:
- return str(_prepare_for_json(value))
-
-
-def _prepare_for_json(value: Any) -> JSONValue:
- """Serialize a single value into JSON-compatible format."""
- if value is None:
- return ""
- elif isinstance(value, str | int | float | bool):
- return value
- elif hasattr(value, "_name_"):
- return cast(str, value._name_)
- elif isinstance(value, BaseModel):
- return cast(JSONValue, json.loads(value.model_dump_json()))
- elif isinstance(value, list | tuple | set):
- return [_prepare_for_json(item) for item in value]
- elif isinstance(value, dict):
- return {str(k): _prepare_for_json(v) for k, v in value.items()}
- else:
- try:
- json.dumps(value)
- return cast(JSONValue, value)
- except Exception:
- return str(value)
-
-
-def trace_protocol[T: type[Any]](cls: T) -> T:
- """
- A class decorator that automatically traces all methods in a protocol/base class
- and its inheriting classes.
- """
-
- def trace_method(method: Callable[..., Any]) -> Callable[..., Any]:
- is_async = asyncio.iscoroutinefunction(method)
- is_async_gen = inspect.isasyncgenfunction(method)
-
- def create_span_context(self: Any, *args: Any, **kwargs: Any) -> tuple[str, str, dict[str, Primitive]]:
- class_name = self.__class__.__name__
- method_name = method.__name__
- span_type = "async_generator" if is_async_gen else "async" if is_async else "sync"
- sig = inspect.signature(method)
- param_names = list(sig.parameters.keys())[1:] # Skip 'self'
- combined_args: dict[str, str] = {}
- for i, arg in enumerate(args):
- param_name = param_names[i] if i < len(param_names) else f"position_{i + 1}"
- combined_args[param_name] = serialize_value(arg)
- for k, v in kwargs.items():
- combined_args[str(k)] = serialize_value(v)
-
- span_attributes: dict[str, Primitive] = {
- "__autotraced__": True,
- "__class__": class_name,
- "__method__": method_name,
- "__type__": span_type,
- "__args__": json.dumps(combined_args),
- }
-
- return class_name, method_name, span_attributes
-
- @wraps(method)
- async def async_gen_wrapper(self: Any, *args: Any, **kwargs: Any) -> AsyncGenerator[Any, None]:
- from llama_stack.core.telemetry import tracing
-
- class_name, method_name, span_attributes = create_span_context(self, *args, **kwargs)
-
- with tracing.span(f"{class_name}.{method_name}", span_attributes) as span:
- count = 0
- try:
- async for item in method(self, *args, **kwargs):
- yield item
- count += 1
- finally:
- span.set_attribute("chunk_count", count)
-
- @wraps(method)
- async def async_wrapper(self: Any, *args: Any, **kwargs: Any) -> Any:
- from llama_stack.core.telemetry import tracing
-
- class_name, method_name, span_attributes = create_span_context(self, *args, **kwargs)
-
- with tracing.span(f"{class_name}.{method_name}", span_attributes) as span:
- try:
- result = await method(self, *args, **kwargs)
- span.set_attribute("output", serialize_value(result))
- return result
- except Exception as e:
- span.set_attribute("error", str(e))
- raise
-
- @wraps(method)
- def sync_wrapper(self: Any, *args: Any, **kwargs: Any) -> Any:
- from llama_stack.core.telemetry import tracing
-
- class_name, method_name, span_attributes = create_span_context(self, *args, **kwargs)
-
- with tracing.span(f"{class_name}.{method_name}", span_attributes) as span:
- try:
- result = method(self, *args, **kwargs)
- span.set_attribute("output", serialize_value(result))
- return result
- except Exception as e:
- span.set_attribute("error", str(e))
- raise
-
- if is_async_gen:
- return async_gen_wrapper
- elif is_async:
- return async_wrapper
- else:
- return sync_wrapper
-
- # Wrap methods on the class itself (for classes applied at runtime)
- # Skip if already wrapped (indicated by __wrapped__ attribute)
- for name, method in vars(cls).items():
- if inspect.isfunction(method) and not name.startswith("_"):
- if not hasattr(method, "__wrapped__"):
- wrapped = trace_method(method)
- setattr(cls, name, wrapped) # noqa: B010
-
- # Also set up __init_subclass__ for future subclasses
- original_init_subclass = cast(Callable[..., Any] | None, getattr(cls, "__init_subclass__", None))
-
- def __init_subclass__(cls_child: type[Any], **kwargs: Any) -> None: # noqa: N807
- if original_init_subclass:
- cast(Callable[..., None], original_init_subclass)(**kwargs)
-
- for name, method in vars(cls_child).items():
- if inspect.isfunction(method) and not name.startswith("_"):
- setattr(cls_child, name, trace_method(method)) # noqa: B010
-
- cls_any = cast(Any, cls)
- cls_any.__init_subclass__ = classmethod(__init_subclass__)
-
- return cls
diff --git a/src/llama_stack/core/telemetry/tracing.py b/src/llama_stack/core/telemetry/tracing.py
deleted file mode 100644
index a67cbe784..000000000
--- a/src/llama_stack/core/telemetry/tracing.py
+++ /dev/null
@@ -1,388 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-import asyncio
-import contextvars
-import logging # allow-direct-logging
-import queue
-import secrets
-import sys
-import threading
-import time
-from collections.abc import Callable
-from datetime import UTC, datetime
-from functools import wraps
-from typing import Any, Self
-
-from llama_stack.core.telemetry.telemetry import (
- ROOT_SPAN_MARKERS,
- Event,
- LogSeverity,
- Span,
- SpanEndPayload,
- SpanStartPayload,
- SpanStatus,
- StructuredLogEvent,
- Telemetry,
- UnstructuredLogEvent,
-)
-from llama_stack.core.telemetry.trace_protocol import serialize_value
-from llama_stack.log import get_logger
-
-logger = get_logger(__name__, category="core")
-
-# Fallback logger that does NOT propagate to TelemetryHandler to avoid recursion
-_fallback_logger = logging.getLogger("llama_stack.telemetry.background")
-if not _fallback_logger.handlers:
- _fallback_logger.propagate = False
- _fallback_logger.setLevel(logging.ERROR)
- _fallback_handler = logging.StreamHandler(sys.stderr)
- _fallback_handler.setLevel(logging.ERROR)
- _fallback_handler.setFormatter(logging.Formatter("%(asctime)s [%(levelname)s] %(name)s: %(message)s"))
- _fallback_logger.addHandler(_fallback_handler)
-
-
-INVALID_SPAN_ID = 0x0000000000000000
-INVALID_TRACE_ID = 0x00000000000000000000000000000000
-
-# The logical root span may not be visible to this process if a parent context
-# is passed in. The local root span is the first local span in a trace.
-LOCAL_ROOT_SPAN_MARKER = "__local_root_span__"
-
-
-def trace_id_to_str(trace_id: int) -> str:
- """Convenience trace ID formatting method
- Args:
- trace_id: Trace ID int
-
- Returns:
- The trace ID as 32-byte hexadecimal string
- """
- return format(trace_id, "032x")
-
-
-def span_id_to_str(span_id: int) -> str:
- """Convenience span ID formatting method
- Args:
- span_id: Span ID int
-
- Returns:
- The span ID as 16-byte hexadecimal string
- """
- return format(span_id, "016x")
-
-
-def generate_span_id() -> str:
- span_id = secrets.randbits(64)
- while span_id == INVALID_SPAN_ID:
- span_id = secrets.randbits(64)
- return span_id_to_str(span_id)
-
-
-def generate_trace_id() -> str:
- trace_id = secrets.randbits(128)
- while trace_id == INVALID_TRACE_ID:
- trace_id = secrets.randbits(128)
- return trace_id_to_str(trace_id)
-
-
-LOG_QUEUE_FULL_LOG_INTERVAL_SECONDS = 60.0
-
-
-class BackgroundLogger:
- def __init__(self, api: Telemetry, capacity: int = 100000):
- self.api = api
- self.log_queue: queue.Queue[Any] = queue.Queue(maxsize=capacity)
- self.worker_thread = threading.Thread(target=self._worker, daemon=True)
- self.worker_thread.start()
- self._last_queue_full_log_time: float = 0.0
- self._dropped_since_last_notice: int = 0
-
- def log_event(self, event: Event) -> None:
- try:
- self.log_queue.put_nowait(event)
- except queue.Full:
- # Aggregate drops and emit at most once per interval via fallback logger
- self._dropped_since_last_notice += 1
- current_time = time.time()
- if current_time - self._last_queue_full_log_time >= LOG_QUEUE_FULL_LOG_INTERVAL_SECONDS:
- _fallback_logger.error(
- "Log queue is full; dropped %d events since last notice",
- self._dropped_since_last_notice,
- )
- self._last_queue_full_log_time = current_time
- self._dropped_since_last_notice = 0
-
- def _worker(self):
- loop = asyncio.new_event_loop()
- asyncio.set_event_loop(loop)
- loop.run_until_complete(self._process_logs())
-
- async def _process_logs(self):
- while True:
- try:
- event = self.log_queue.get()
- await self.api.log_event(event)
- except Exception:
- import traceback
-
- traceback.print_exc()
- print("Error processing log event")
- finally:
- self.log_queue.task_done()
-
- def __del__(self) -> None:
- self.log_queue.join()
-
-
-BACKGROUND_LOGGER: BackgroundLogger | None = None
-
-
-def enqueue_event(event: Event) -> None:
- """Enqueue a telemetry event to the background logger if available.
-
- This provides a non-blocking path for routers and other hot paths to
- submit telemetry without awaiting the Telemetry API, reducing contention
- with the main event loop.
- """
- global BACKGROUND_LOGGER
- if BACKGROUND_LOGGER is None:
- raise RuntimeError("Telemetry API not initialized")
- BACKGROUND_LOGGER.log_event(event)
-
-
-class TraceContext:
- def __init__(self, logger: BackgroundLogger, trace_id: str):
- self.logger = logger
- self.trace_id = trace_id
- self.spans: list[Span] = []
-
- def push_span(self, name: str, attributes: dict[str, Any] | None = None) -> Span:
- current_span = self.get_current_span()
- span = Span(
- span_id=generate_span_id(),
- trace_id=self.trace_id,
- name=name,
- start_time=datetime.now(UTC),
- parent_span_id=current_span.span_id if current_span else None,
- attributes=attributes,
- )
-
- self.logger.log_event(
- StructuredLogEvent(
- trace_id=span.trace_id,
- span_id=span.span_id,
- timestamp=span.start_time,
- attributes=span.attributes,
- payload=SpanStartPayload(
- name=span.name,
- parent_span_id=span.parent_span_id,
- ),
- )
- )
-
- self.spans.append(span)
- return span
-
- def pop_span(self, status: SpanStatus = SpanStatus.OK) -> None:
- span = self.spans.pop()
- if span is not None:
- self.logger.log_event(
- StructuredLogEvent(
- trace_id=span.trace_id,
- span_id=span.span_id,
- timestamp=span.start_time,
- attributes=span.attributes,
- payload=SpanEndPayload(
- status=status,
- ),
- )
- )
-
- def get_current_span(self) -> Span | None:
- return self.spans[-1] if self.spans else None
-
-
-CURRENT_TRACE_CONTEXT: contextvars.ContextVar[TraceContext | None] = contextvars.ContextVar(
- "trace_context", default=None
-)
-
-
-def setup_logger(api: Telemetry, level: int = logging.INFO):
- global BACKGROUND_LOGGER
-
- if BACKGROUND_LOGGER is None:
- BACKGROUND_LOGGER = BackgroundLogger(api)
- root_logger = logging.getLogger()
- root_logger.setLevel(level)
- root_logger.addHandler(TelemetryHandler())
-
-
-async def start_trace(name: str, attributes: dict[str, Any] | None = None) -> TraceContext | None:
- global CURRENT_TRACE_CONTEXT, BACKGROUND_LOGGER
-
- if BACKGROUND_LOGGER is None:
- logger.debug("No Telemetry implementation set. Skipping trace initialization...")
- return None
-
- trace_id = generate_trace_id()
- context = TraceContext(BACKGROUND_LOGGER, trace_id)
- # Mark this span as the root for the trace for now. The processing of
- # traceparent context if supplied comes later and will result in the
- # ROOT_SPAN_MARKERS being removed. Also mark this is the 'local' root,
- # i.e. the root of the spans originating in this process as this is
- # needed to ensure that we insert this 'local' root span's id into
- # the trace record in sqlite store.
- attributes = dict.fromkeys(ROOT_SPAN_MARKERS, True) | {LOCAL_ROOT_SPAN_MARKER: True} | (attributes or {})
- context.push_span(name, attributes)
-
- CURRENT_TRACE_CONTEXT.set(context)
- return context
-
-
-async def end_trace(status: SpanStatus = SpanStatus.OK):
- global CURRENT_TRACE_CONTEXT
-
- context = CURRENT_TRACE_CONTEXT.get()
- if context is None:
- logger.debug("No trace context to end")
- return
-
- context.pop_span(status)
- CURRENT_TRACE_CONTEXT.set(None)
-
-
-def severity(levelname: str) -> LogSeverity:
- if levelname == "DEBUG":
- return LogSeverity.DEBUG
- elif levelname == "INFO":
- return LogSeverity.INFO
- elif levelname == "WARNING":
- return LogSeverity.WARN
- elif levelname == "ERROR":
- return LogSeverity.ERROR
- elif levelname == "CRITICAL":
- return LogSeverity.CRITICAL
- else:
- raise ValueError(f"Unknown log level: {levelname}")
-
-
-# TODO: ideally, the actual emitting should be done inside a separate daemon
-# process completely isolated from the server
-class TelemetryHandler(logging.Handler):
- def emit(self, record: logging.LogRecord) -> None:
- # horrendous hack to avoid logging from asyncio and getting into an infinite loop
- if record.module in ("asyncio", "selector_events"):
- return
-
- global CURRENT_TRACE_CONTEXT
- context = CURRENT_TRACE_CONTEXT.get()
- if context is None:
- return
-
- span = context.get_current_span()
- if span is None:
- return
-
- enqueue_event(
- UnstructuredLogEvent(
- trace_id=span.trace_id,
- span_id=span.span_id,
- timestamp=datetime.now(UTC),
- message=self.format(record),
- severity=severity(record.levelname),
- )
- )
-
- def close(self) -> None:
- pass
-
-
-class SpanContextManager:
- def __init__(self, name: str, attributes: dict[str, Any] | None = None):
- self.name = name
- self.attributes = attributes
- self.span: Span | None = None
-
- def __enter__(self) -> Self:
- global CURRENT_TRACE_CONTEXT
- context = CURRENT_TRACE_CONTEXT.get()
- if not context:
- logger.debug("No trace context to push span")
- return self
-
- self.span = context.push_span(self.name, self.attributes)
- return self
-
- def __exit__(self, exc_type, exc_value, traceback) -> None:
- global CURRENT_TRACE_CONTEXT
- context = CURRENT_TRACE_CONTEXT.get()
- if not context:
- logger.debug("No trace context to pop span")
- return
-
- context.pop_span()
-
- def set_attribute(self, key: str, value: Any) -> None:
- if self.span:
- if self.span.attributes is None:
- self.span.attributes = {}
- self.span.attributes[key] = serialize_value(value)
-
- async def __aenter__(self) -> Self:
- global CURRENT_TRACE_CONTEXT
- context = CURRENT_TRACE_CONTEXT.get()
- if not context:
- logger.debug("No trace context to push span")
- return self
-
- self.span = context.push_span(self.name, self.attributes)
- return self
-
- async def __aexit__(self, exc_type, exc_value, traceback) -> None:
- global CURRENT_TRACE_CONTEXT
- context = CURRENT_TRACE_CONTEXT.get()
- if not context:
- logger.debug("No trace context to pop span")
- return
-
- context.pop_span()
-
- def __call__(self, func: Callable[..., Any]) -> Callable[..., Any]:
- @wraps(func)
- def sync_wrapper(*args: Any, **kwargs: Any) -> Any:
- with self:
- return func(*args, **kwargs)
-
- @wraps(func)
- async def async_wrapper(*args: Any, **kwargs: Any) -> Any:
- async with self:
- return await func(*args, **kwargs)
-
- @wraps(func)
- def wrapper(*args: Any, **kwargs: Any) -> Any:
- if asyncio.iscoroutinefunction(func):
- return async_wrapper(*args, **kwargs)
- else:
- return sync_wrapper(*args, **kwargs)
-
- return wrapper
-
-
-def span(name: str, attributes: dict[str, Any] | None = None) -> SpanContextManager:
- return SpanContextManager(name, attributes)
-
-
-def get_current_span() -> Span | None:
- global CURRENT_TRACE_CONTEXT
- if CURRENT_TRACE_CONTEXT is None:
- logger.debug("No trace context to get current span")
- return None
-
- context = CURRENT_TRACE_CONTEXT.get()
- if context:
- return context.get_current_span()
- return None
diff --git a/src/llama_stack/core/utils/context.py b/src/llama_stack/core/utils/context.py
index e7c61a8ed..0c3e41f00 100644
--- a/src/llama_stack/core/utils/context.py
+++ b/src/llama_stack/core/utils/context.py
@@ -7,8 +7,6 @@
from collections.abc import AsyncGenerator
from contextvars import ContextVar
-from llama_stack.core.telemetry.tracing import CURRENT_TRACE_CONTEXT
-
_MISSING = object()
@@ -69,16 +67,12 @@ def preserve_contexts_async_generator[T](
try:
yield item
# Update our tracked values with any changes made during this iteration
- # Only for non-trace context vars - trace context must persist across yields
- # to allow nested span tracking for telemetry
+ # This allows context changes to persist across generator iterations
for context_var in context_vars:
- if context_var is not CURRENT_TRACE_CONTEXT:
- initial_context_values[context_var.name] = context_var.get()
+ initial_context_values[context_var.name] = context_var.get()
finally:
- # Restore non-trace context vars after each yield to prevent leaks between requests
- # CURRENT_TRACE_CONTEXT is NOT restored here to preserve telemetry span stack
+ # Restore context vars after each yield to prevent leaks between requests
for context_var in context_vars:
- if context_var is not CURRENT_TRACE_CONTEXT:
- _restore_context_var(context_var)
+ _restore_context_var(context_var)
return wrapper()
diff --git a/src/llama_stack/distributions/ci-tests/run-with-postgres-store.yaml b/src/llama_stack/distributions/ci-tests/run-with-postgres-store.yaml
index d942c23a4..8414dcae5 100644
--- a/src/llama_stack/distributions/ci-tests/run-with-postgres-store.yaml
+++ b/src/llama_stack/distributions/ci-tests/run-with-postgres-store.yaml
@@ -47,7 +47,7 @@ providers:
- provider_id: bedrock
provider_type: remote::bedrock
config:
- api_key: ${env.AWS_BEDROCK_API_KEY:=}
+ api_key: ${env.AWS_BEARER_TOKEN_BEDROCK:=}
region_name: ${env.AWS_DEFAULT_REGION:=us-east-2}
- provider_id: ${env.NVIDIA_API_KEY:+nvidia}
provider_type: remote::nvidia
@@ -281,8 +281,6 @@ registered_resources:
provider_id: rag-runtime
server:
port: 8321
-telemetry:
- enabled: true
vector_stores:
default_provider_id: faiss
default_embedding_model:
diff --git a/src/llama_stack/distributions/ci-tests/run.yaml b/src/llama_stack/distributions/ci-tests/run.yaml
index 8b1cd2bb2..e83fc7fb5 100644
--- a/src/llama_stack/distributions/ci-tests/run.yaml
+++ b/src/llama_stack/distributions/ci-tests/run.yaml
@@ -47,7 +47,7 @@ providers:
- provider_id: bedrock
provider_type: remote::bedrock
config:
- api_key: ${env.AWS_BEDROCK_API_KEY:=}
+ api_key: ${env.AWS_BEARER_TOKEN_BEDROCK:=}
region_name: ${env.AWS_DEFAULT_REGION:=us-east-2}
- provider_id: ${env.NVIDIA_API_KEY:+nvidia}
provider_type: remote::nvidia
@@ -272,8 +272,6 @@ registered_resources:
provider_id: rag-runtime
server:
port: 8321
-telemetry:
- enabled: true
vector_stores:
default_provider_id: faiss
default_embedding_model:
diff --git a/src/llama_stack/distributions/dell/run-with-safety.yaml b/src/llama_stack/distributions/dell/run-with-safety.yaml
index e0da8060d..63bd95168 100644
--- a/src/llama_stack/distributions/dell/run-with-safety.yaml
+++ b/src/llama_stack/distributions/dell/run-with-safety.yaml
@@ -140,5 +140,3 @@ registered_resources:
provider_id: rag-runtime
server:
port: 8321
-telemetry:
- enabled: true
diff --git a/src/llama_stack/distributions/dell/run.yaml b/src/llama_stack/distributions/dell/run.yaml
index bc3117d88..93f0c35bc 100644
--- a/src/llama_stack/distributions/dell/run.yaml
+++ b/src/llama_stack/distributions/dell/run.yaml
@@ -131,5 +131,3 @@ registered_resources:
provider_id: rag-runtime
server:
port: 8321
-telemetry:
- enabled: true
diff --git a/src/llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml b/src/llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml
index 2fa9d198b..63fc3b1d2 100644
--- a/src/llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml
+++ b/src/llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml
@@ -153,5 +153,3 @@ registered_resources:
provider_id: rag-runtime
server:
port: 8321
-telemetry:
- enabled: true
diff --git a/src/llama_stack/distributions/meta-reference-gpu/run.yaml b/src/llama_stack/distributions/meta-reference-gpu/run.yaml
index 5c7f75ca8..ba8235398 100644
--- a/src/llama_stack/distributions/meta-reference-gpu/run.yaml
+++ b/src/llama_stack/distributions/meta-reference-gpu/run.yaml
@@ -138,5 +138,3 @@ registered_resources:
provider_id: rag-runtime
server:
port: 8321
-telemetry:
- enabled: true
diff --git a/src/llama_stack/distributions/nvidia/run-with-safety.yaml b/src/llama_stack/distributions/nvidia/run-with-safety.yaml
index d2c7dd090..7d95565e5 100644
--- a/src/llama_stack/distributions/nvidia/run-with-safety.yaml
+++ b/src/llama_stack/distributions/nvidia/run-with-safety.yaml
@@ -135,5 +135,3 @@ registered_resources:
provider_id: rag-runtime
server:
port: 8321
-telemetry:
- enabled: true
diff --git a/src/llama_stack/distributions/nvidia/run.yaml b/src/llama_stack/distributions/nvidia/run.yaml
index c267587c7..8c80b8303 100644
--- a/src/llama_stack/distributions/nvidia/run.yaml
+++ b/src/llama_stack/distributions/nvidia/run.yaml
@@ -114,5 +114,3 @@ registered_resources:
provider_id: rag-runtime
server:
port: 8321
-telemetry:
- enabled: true
diff --git a/src/llama_stack/distributions/oci/run.yaml b/src/llama_stack/distributions/oci/run.yaml
index e385ec606..ff0c818be 100644
--- a/src/llama_stack/distributions/oci/run.yaml
+++ b/src/llama_stack/distributions/oci/run.yaml
@@ -132,5 +132,3 @@ registered_resources:
provider_id: tavily-search
server:
port: 8321
-telemetry:
- enabled: true
diff --git a/src/llama_stack/distributions/open-benchmark/run.yaml b/src/llama_stack/distributions/open-benchmark/run.yaml
index 7ebc58841..43aa45b51 100644
--- a/src/llama_stack/distributions/open-benchmark/run.yaml
+++ b/src/llama_stack/distributions/open-benchmark/run.yaml
@@ -251,5 +251,3 @@ registered_resources:
provider_id: rag-runtime
server:
port: 8321
-telemetry:
- enabled: true
diff --git a/src/llama_stack/distributions/postgres-demo/run.yaml b/src/llama_stack/distributions/postgres-demo/run.yaml
index 049f519cd..c9316f923 100644
--- a/src/llama_stack/distributions/postgres-demo/run.yaml
+++ b/src/llama_stack/distributions/postgres-demo/run.yaml
@@ -114,5 +114,3 @@ registered_resources:
provider_id: rag-runtime
server:
port: 8321
-telemetry:
- enabled: true
diff --git a/src/llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml b/src/llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml
index 75cc9d188..0662986f1 100644
--- a/src/llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml
+++ b/src/llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml
@@ -47,7 +47,7 @@ providers:
- provider_id: bedrock
provider_type: remote::bedrock
config:
- api_key: ${env.AWS_BEDROCK_API_KEY:=}
+ api_key: ${env.AWS_BEARER_TOKEN_BEDROCK:=}
region_name: ${env.AWS_DEFAULT_REGION:=us-east-2}
- provider_id: ${env.NVIDIA_API_KEY:+nvidia}
provider_type: remote::nvidia
@@ -284,8 +284,6 @@ registered_resources:
provider_id: rag-runtime
server:
port: 8321
-telemetry:
- enabled: true
vector_stores:
default_provider_id: faiss
default_embedding_model:
diff --git a/src/llama_stack/distributions/starter-gpu/run.yaml b/src/llama_stack/distributions/starter-gpu/run.yaml
index 09c7be5a1..9ef5b3f6d 100644
--- a/src/llama_stack/distributions/starter-gpu/run.yaml
+++ b/src/llama_stack/distributions/starter-gpu/run.yaml
@@ -47,7 +47,7 @@ providers:
- provider_id: bedrock
provider_type: remote::bedrock
config:
- api_key: ${env.AWS_BEDROCK_API_KEY:=}
+ api_key: ${env.AWS_BEARER_TOKEN_BEDROCK:=}
region_name: ${env.AWS_DEFAULT_REGION:=us-east-2}
- provider_id: ${env.NVIDIA_API_KEY:+nvidia}
provider_type: remote::nvidia
@@ -275,8 +275,6 @@ registered_resources:
provider_id: rag-runtime
server:
port: 8321
-telemetry:
- enabled: true
vector_stores:
default_provider_id: faiss
default_embedding_model:
diff --git a/src/llama_stack/distributions/starter/run-with-postgres-store.yaml b/src/llama_stack/distributions/starter/run-with-postgres-store.yaml
index f59c809d2..1da4f0da7 100644
--- a/src/llama_stack/distributions/starter/run-with-postgres-store.yaml
+++ b/src/llama_stack/distributions/starter/run-with-postgres-store.yaml
@@ -47,7 +47,7 @@ providers:
- provider_id: bedrock
provider_type: remote::bedrock
config:
- api_key: ${env.AWS_BEDROCK_API_KEY:=}
+ api_key: ${env.AWS_BEARER_TOKEN_BEDROCK:=}
region_name: ${env.AWS_DEFAULT_REGION:=us-east-2}
- provider_id: ${env.NVIDIA_API_KEY:+nvidia}
provider_type: remote::nvidia
@@ -281,8 +281,6 @@ registered_resources:
provider_id: rag-runtime
server:
port: 8321
-telemetry:
- enabled: true
vector_stores:
default_provider_id: faiss
default_embedding_model:
diff --git a/src/llama_stack/distributions/starter/run.yaml b/src/llama_stack/distributions/starter/run.yaml
index 435bb22a7..3e6cde13a 100644
--- a/src/llama_stack/distributions/starter/run.yaml
+++ b/src/llama_stack/distributions/starter/run.yaml
@@ -47,7 +47,7 @@ providers:
- provider_id: bedrock
provider_type: remote::bedrock
config:
- api_key: ${env.AWS_BEDROCK_API_KEY:=}
+ api_key: ${env.AWS_BEARER_TOKEN_BEDROCK:=}
region_name: ${env.AWS_DEFAULT_REGION:=us-east-2}
- provider_id: ${env.NVIDIA_API_KEY:+nvidia}
provider_type: remote::nvidia
@@ -272,8 +272,6 @@ registered_resources:
provider_id: rag-runtime
server:
port: 8321
-telemetry:
- enabled: true
vector_stores:
default_provider_id: faiss
default_embedding_model:
diff --git a/src/llama_stack/distributions/template.py b/src/llama_stack/distributions/template.py
index 90b458805..bab3211e9 100644
--- a/src/llama_stack/distributions/template.py
+++ b/src/llama_stack/distributions/template.py
@@ -24,7 +24,6 @@ from llama_stack.core.datatypes import (
Provider,
SafetyConfig,
ShieldInput,
- TelemetryConfig,
ToolGroupInput,
VectorStoresConfig,
)
@@ -189,7 +188,6 @@ class RunConfigSettings(BaseModel):
default_benchmarks: list[BenchmarkInput] | None = None
vector_stores_config: VectorStoresConfig | None = None
safety_config: SafetyConfig | None = None
- telemetry: TelemetryConfig = Field(default_factory=lambda: TelemetryConfig(enabled=True))
storage_backends: dict[str, Any] | None = None
storage_stores: dict[str, Any] | None = None
@@ -289,7 +287,6 @@ class RunConfigSettings(BaseModel):
"server": {
"port": 8321,
},
- "telemetry": self.telemetry.model_dump(exclude_none=True) if self.telemetry else None,
}
if self.vector_stores_config:
diff --git a/src/llama_stack/distributions/watsonx/run.yaml b/src/llama_stack/distributions/watsonx/run.yaml
index f8c489fe3..55ea34cb6 100644
--- a/src/llama_stack/distributions/watsonx/run.yaml
+++ b/src/llama_stack/distributions/watsonx/run.yaml
@@ -132,5 +132,3 @@ registered_resources:
provider_id: rag-runtime
server:
port: 8321
-telemetry:
- enabled: true
diff --git a/src/llama_stack/log.py b/src/llama_stack/log.py
index c11c2c06f..a44a0ac26 100644
--- a/src/llama_stack/log.py
+++ b/src/llama_stack/log.py
@@ -37,7 +37,6 @@ CATEGORIES = [
"eval",
"tools",
"client",
- "telemetry",
"openai",
"openai_responses",
"openai_conversations",
diff --git a/src/llama_stack/providers/inline/agents/meta_reference/__init__.py b/src/llama_stack/providers/inline/agents/meta_reference/__init__.py
index 9683baf00..c9c7d348a 100644
--- a/src/llama_stack/providers/inline/agents/meta_reference/__init__.py
+++ b/src/llama_stack/providers/inline/agents/meta_reference/__init__.py
@@ -15,7 +15,6 @@ async def get_provider_impl(
config: MetaReferenceAgentsImplConfig,
deps: dict[Api, Any],
policy: list[AccessRule],
- telemetry_enabled: bool = False,
):
from .agents import MetaReferenceAgentsImpl
@@ -29,7 +28,6 @@ async def get_provider_impl(
deps[Api.conversations],
deps[Api.prompts],
deps[Api.files],
- telemetry_enabled,
policy,
)
await impl.initialize()
diff --git a/src/llama_stack/providers/inline/agents/meta_reference/agents.py b/src/llama_stack/providers/inline/agents/meta_reference/agents.py
index ca419a51a..39cc22be7 100644
--- a/src/llama_stack/providers/inline/agents/meta_reference/agents.py
+++ b/src/llama_stack/providers/inline/agents/meta_reference/agents.py
@@ -50,7 +50,6 @@ class MetaReferenceAgentsImpl(Agents):
prompts_api: Prompts,
files_api: Files,
policy: list[AccessRule],
- telemetry_enabled: bool = False,
):
self.config = config
self.inference_api = inference_api
@@ -59,7 +58,6 @@ class MetaReferenceAgentsImpl(Agents):
self.tool_runtime_api = tool_runtime_api
self.tool_groups_api = tool_groups_api
self.conversations_api = conversations_api
- self.telemetry_enabled = telemetry_enabled
self.prompts_api = prompts_api
self.files_api = files_api
self.in_memory_store = InmemoryKVStoreImpl()
@@ -111,6 +109,7 @@ class MetaReferenceAgentsImpl(Agents):
max_infer_iters: int | None = 10,
guardrails: list[ResponseGuardrail] | None = None,
max_tool_calls: int | None = None,
+ metadata: dict[str, str] | None = None,
) -> OpenAIResponseObject:
assert self.openai_responses_impl is not None, "OpenAI responses not initialized"
result = await self.openai_responses_impl.create_openai_response(
@@ -130,6 +129,7 @@ class MetaReferenceAgentsImpl(Agents):
guardrails,
parallel_tool_calls,
max_tool_calls,
+ metadata,
)
return result # type: ignore[no-any-return]
diff --git a/src/llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py b/src/llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py
index c8282df69..9cf30908c 100644
--- a/src/llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py
+++ b/src/llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py
@@ -336,6 +336,7 @@ class OpenAIResponsesImpl:
guardrails: list[str | ResponseGuardrailSpec] | None = None,
parallel_tool_calls: bool | None = None,
max_tool_calls: int | None = None,
+ metadata: dict[str, str] | None = None,
):
stream = bool(stream)
text = OpenAIResponseText(format=OpenAIResponseTextFormat(type="text")) if text is None else text
@@ -390,6 +391,7 @@ class OpenAIResponsesImpl:
guardrail_ids=guardrail_ids,
parallel_tool_calls=parallel_tool_calls,
max_tool_calls=max_tool_calls,
+ metadata=metadata,
)
if stream:
@@ -442,6 +444,7 @@ class OpenAIResponsesImpl:
guardrail_ids: list[str] | None = None,
parallel_tool_calls: bool | None = True,
max_tool_calls: int | None = None,
+ metadata: dict[str, str] | None = None,
) -> AsyncIterator[OpenAIResponseObjectStream]:
# These should never be None when called from create_openai_response (which sets defaults)
# but we assert here to help mypy understand the types
@@ -490,6 +493,7 @@ class OpenAIResponsesImpl:
guardrail_ids=guardrail_ids,
instructions=instructions,
max_tool_calls=max_tool_calls,
+ metadata=metadata,
)
# Stream the response
diff --git a/src/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py b/src/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py
index 9e901d88b..c778d65e7 100644
--- a/src/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py
+++ b/src/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py
@@ -8,7 +8,8 @@ import uuid
from collections.abc import AsyncIterator
from typing import Any
-from llama_stack.core.telemetry import tracing
+from opentelemetry import trace
+
from llama_stack.log import get_logger
from llama_stack.providers.utils.inference.prompt_adapter import interleaved_content_as_str
from llama_stack_api import (
@@ -79,6 +80,7 @@ from .utils import (
)
logger = get_logger(name=__name__, category="agents::meta_reference")
+tracer = trace.get_tracer(__name__)
def convert_tooldef_to_chat_tool(tool_def):
@@ -118,6 +120,7 @@ class StreamingResponseOrchestrator:
prompt: OpenAIResponsePrompt | None = None,
parallel_tool_calls: bool | None = None,
max_tool_calls: int | None = None,
+ metadata: dict[str, str] | None = None,
):
self.inference_api = inference_api
self.ctx = ctx
@@ -135,6 +138,7 @@ class StreamingResponseOrchestrator:
self.parallel_tool_calls = parallel_tool_calls
# Max number of total calls to built-in tools that can be processed in a response
self.max_tool_calls = max_tool_calls
+ self.metadata = metadata
self.sequence_number = 0
# Store MCP tool mapping that gets built during tool processing
self.mcp_tool_to_server: dict[str, OpenAIResponseInputToolMCP] = (
@@ -162,6 +166,7 @@ class StreamingResponseOrchestrator:
model=self.ctx.model,
status="completed",
output=[OpenAIResponseMessage(role="assistant", content=[refusal_content], type="message")],
+ metadata=self.metadata,
)
return OpenAIResponseObjectStreamResponseCompleted(response=refusal_response)
@@ -197,6 +202,7 @@ class StreamingResponseOrchestrator:
prompt=self.prompt,
parallel_tool_calls=self.parallel_tool_calls,
max_tool_calls=self.max_tool_calls,
+ metadata=self.metadata,
)
async def create_response(self) -> AsyncIterator[OpenAIResponseObjectStream]:
@@ -1106,8 +1112,10 @@ class StreamingResponseOrchestrator:
"server_url": mcp_tool.server_url,
"mcp_list_tools_id": list_id,
}
- # List MCP tools with authorization from tool config
- async with tracing.span("list_mcp_tools", attributes):
+
+ # TODO: follow semantic conventions for Open Telemetry tool spans
+ # https://opentelemetry.io/docs/specs/semconv/gen-ai/gen-ai-spans/#execute-tool-span
+ with tracer.start_as_current_span("list_mcp_tools", attributes=attributes):
tool_defs = await list_mcp_tools(
endpoint=mcp_tool.server_url,
headers=mcp_tool.headers,
@@ -1183,9 +1191,9 @@ class StreamingResponseOrchestrator:
if mcp_server.require_approval == "never":
return False
if isinstance(mcp_server, ApprovalFilter):
- if tool_name in mcp_server.always:
+ if mcp_server.always and tool_name in mcp_server.always:
return True
- if tool_name in mcp_server.never:
+ if mcp_server.never and tool_name in mcp_server.never:
return False
return True
diff --git a/src/llama_stack/providers/inline/agents/meta_reference/responses/tool_executor.py b/src/llama_stack/providers/inline/agents/meta_reference/responses/tool_executor.py
index 4f294a979..d27a0f8ad 100644
--- a/src/llama_stack/providers/inline/agents/meta_reference/responses/tool_executor.py
+++ b/src/llama_stack/providers/inline/agents/meta_reference/responses/tool_executor.py
@@ -9,7 +9,8 @@ import json
from collections.abc import AsyncIterator
from typing import Any
-from llama_stack.core.telemetry import tracing
+from opentelemetry import trace
+
from llama_stack.log import get_logger
from llama_stack_api import (
ImageContentItem,
@@ -42,6 +43,7 @@ from llama_stack_api import (
from .types import ChatCompletionContext, ToolExecutionResult
logger = get_logger(name=__name__, category="agents::meta_reference")
+tracer = trace.get_tracer(__name__)
class ToolExecutor:
@@ -296,8 +298,9 @@ class ToolExecutor:
"server_url": mcp_tool.server_url,
"tool_name": function_name,
}
- # Invoke MCP tool with authorization from tool config
- async with tracing.span("invoke_mcp_tool", attributes):
+ # TODO: follow semantic conventions for Open Telemetry tool spans
+ # https://opentelemetry.io/docs/specs/semconv/gen-ai/gen-ai-spans/#execute-tool-span
+ with tracer.start_as_current_span("invoke_mcp_tool", attributes=attributes):
result = await invoke_mcp_tool(
endpoint=mcp_tool.server_url,
tool_name=function_name,
@@ -318,7 +321,7 @@ class ToolExecutor:
# Use vector_stores.search API instead of knowledge_search tool
# to support filters and ranking_options
query = tool_kwargs.get("query", "")
- async with tracing.span("knowledge_search", {}):
+ with tracer.start_as_current_span("knowledge_search"):
result = await self._execute_knowledge_search_via_vector_store(
query=query,
response_file_search_tool=response_file_search_tool,
@@ -327,7 +330,9 @@ class ToolExecutor:
attributes = {
"tool_name": function_name,
}
- async with tracing.span("invoke_tool", attributes):
+ # TODO: follow semantic conventions for Open Telemetry tool spans
+ # https://opentelemetry.io/docs/specs/semconv/gen-ai/gen-ai-spans/#execute-tool-span
+ with tracer.start_as_current_span("invoke_tool", attributes=attributes):
result = await self.tool_runtime_api.invoke_tool(
tool_name=function_name,
kwargs=tool_kwargs,
diff --git a/src/llama_stack/providers/inline/agents/meta_reference/safety.py b/src/llama_stack/providers/inline/agents/meta_reference/safety.py
index bfb557a99..123a2e283 100644
--- a/src/llama_stack/providers/inline/agents/meta_reference/safety.py
+++ b/src/llama_stack/providers/inline/agents/meta_reference/safety.py
@@ -6,7 +6,6 @@
import asyncio
-from llama_stack.core.telemetry import tracing
from llama_stack.log import get_logger
from llama_stack_api import OpenAIMessageParam, Safety, SafetyViolation, ViolationLevel
@@ -31,15 +30,12 @@ class ShieldRunnerMixin:
self.output_shields = output_shields
async def run_multiple_shields(self, messages: list[OpenAIMessageParam], identifiers: list[str]) -> None:
- async def run_shield_with_span(identifier: str):
- async with tracing.span(f"run_shield_{identifier}"):
- return await self.safety_api.run_shield(
- shield_id=identifier,
- messages=messages,
- params={},
- )
-
- responses = await asyncio.gather(*[run_shield_with_span(identifier) for identifier in identifiers])
+ responses = await asyncio.gather(
+ *[
+ self.safety_api.run_shield(shield_id=identifier, messages=messages, params={})
+ for identifier in identifiers
+ ]
+ )
for identifier, response in zip(identifiers, responses, strict=False):
if not response.violation:
continue
diff --git a/src/llama_stack/providers/remote/inference/bedrock/bedrock.py b/src/llama_stack/providers/remote/inference/bedrock/bedrock.py
index 70ee95916..a890a568e 100644
--- a/src/llama_stack/providers/remote/inference/bedrock/bedrock.py
+++ b/src/llama_stack/providers/remote/inference/bedrock/bedrock.py
@@ -8,7 +8,6 @@ from collections.abc import AsyncIterator, Iterable
from openai import AuthenticationError
-from llama_stack.core.telemetry.tracing import get_current_span
from llama_stack.log import get_logger
from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
from llama_stack_api import (
@@ -37,7 +36,7 @@ class BedrockInferenceAdapter(OpenAIMixin):
"""
config: BedrockConfig
- provider_data_api_key_field: str = "aws_bedrock_api_key"
+ provider_data_api_key_field: str = "aws_bearer_token_bedrock"
def get_base_url(self) -> str:
"""Get base URL for OpenAI client."""
@@ -84,7 +83,7 @@ class BedrockInferenceAdapter(OpenAIMixin):
) -> OpenAIChatCompletion | AsyncIterator[OpenAIChatCompletionChunk]:
"""Override to enable streaming usage metrics and handle authentication errors."""
# Enable streaming usage metrics when telemetry is active
- if params.stream and get_current_span() is not None:
+ if params.stream:
if params.stream_options is None:
params.stream_options = {"include_usage": True}
elif "include_usage" not in params.stream_options:
@@ -111,7 +110,7 @@ class BedrockInferenceAdapter(OpenAIMixin):
logger.error(f"AWS Bedrock authentication token expired: {error_msg}")
raise ValueError(
"AWS Bedrock authentication failed: Bearer token has expired. "
- "The AWS_BEDROCK_API_KEY environment variable contains an expired pre-signed URL. "
+ "The AWS_BEARER_TOKEN_BEDROCK environment variable contains an expired pre-signed URL. "
"Please refresh your token by generating a new pre-signed URL with AWS credentials. "
"Refer to AWS Bedrock documentation for details on OpenAI-compatible endpoints."
) from e
diff --git a/src/llama_stack/providers/remote/inference/bedrock/config.py b/src/llama_stack/providers/remote/inference/bedrock/config.py
index 631a6e7ef..f31db63aa 100644
--- a/src/llama_stack/providers/remote/inference/bedrock/config.py
+++ b/src/llama_stack/providers/remote/inference/bedrock/config.py
@@ -12,9 +12,9 @@ from llama_stack.providers.utils.inference.model_registry import RemoteInference
class BedrockProviderDataValidator(BaseModel):
- aws_bedrock_api_key: str | None = Field(
+ aws_bearer_token_bedrock: str | None = Field(
default=None,
- description="API key for Amazon Bedrock",
+ description="API Key (Bearer token) for Amazon Bedrock",
)
@@ -27,6 +27,6 @@ class BedrockConfig(RemoteInferenceProviderConfig):
@classmethod
def sample_run_config(cls, **kwargs):
return {
- "api_key": "${env.AWS_BEDROCK_API_KEY:=}",
+ "api_key": "${env.AWS_BEARER_TOKEN_BEDROCK:=}",
"region_name": "${env.AWS_DEFAULT_REGION:=us-east-2}",
}
diff --git a/src/llama_stack/providers/remote/inference/vertexai/vertexai.py b/src/llama_stack/providers/remote/inference/vertexai/vertexai.py
index b91430fd0..7941f8c89 100644
--- a/src/llama_stack/providers/remote/inference/vertexai/vertexai.py
+++ b/src/llama_stack/providers/remote/inference/vertexai/vertexai.py
@@ -51,4 +51,4 @@ class VertexAIInferenceAdapter(OpenAIMixin):
:return: An iterable of model IDs
"""
- return ["vertexai/gemini-2.0-flash", "vertexai/gemini-2.5-flash", "vertexai/gemini-2.5-pro"]
+ return ["google/gemini-2.0-flash", "google/gemini-2.5-flash", "google/gemini-2.5-pro"]
diff --git a/src/llama_stack/providers/remote/inference/watsonx/watsonx.py b/src/llama_stack/providers/remote/inference/watsonx/watsonx.py
index 5684f6c17..2fcda370a 100644
--- a/src/llama_stack/providers/remote/inference/watsonx/watsonx.py
+++ b/src/llama_stack/providers/remote/inference/watsonx/watsonx.py
@@ -10,7 +10,6 @@ from typing import Any
import litellm
import requests
-from llama_stack.core.telemetry.tracing import get_current_span
from llama_stack.log import get_logger
from llama_stack.providers.remote.inference.watsonx.config import WatsonXConfig
from llama_stack.providers.utils.inference.litellm_openai_mixin import LiteLLMOpenAIMixin
@@ -59,7 +58,7 @@ class WatsonXInferenceAdapter(LiteLLMOpenAIMixin):
# Add usage tracking for streaming when telemetry is active
stream_options = params.stream_options
- if params.stream and get_current_span() is not None:
+ if params.stream:
if stream_options is None:
stream_options = {"include_usage": True}
elif "include_usage" not in stream_options:
diff --git a/src/llama_stack/providers/utils/inference/litellm_openai_mixin.py b/src/llama_stack/providers/utils/inference/litellm_openai_mixin.py
index c462d1aad..47c68ff0a 100644
--- a/src/llama_stack/providers/utils/inference/litellm_openai_mixin.py
+++ b/src/llama_stack/providers/utils/inference/litellm_openai_mixin.py
@@ -217,10 +217,9 @@ class LiteLLMOpenAIMixin(
params: OpenAIChatCompletionRequestWithExtraBody,
) -> OpenAIChatCompletion | AsyncIterator[OpenAIChatCompletionChunk]:
# Add usage tracking for streaming when telemetry is active
- from llama_stack.core.telemetry.tracing import get_current_span
stream_options = params.stream_options
- if params.stream and get_current_span() is not None:
+ if params.stream:
if stream_options is None:
stream_options = {"include_usage": True}
elif "include_usage" not in stream_options:
diff --git a/src/llama_stack/providers/utils/tools/mcp.py b/src/llama_stack/providers/utils/tools/mcp.py
index 9c5e9cd96..05cdfa73b 100644
--- a/src/llama_stack/providers/utils/tools/mcp.py
+++ b/src/llama_stack/providers/utils/tools/mcp.py
@@ -89,6 +89,7 @@ async def client_wrapper(endpoint: str, headers: dict[str, str]) -> AsyncGenerat
# sse_client and streamablehttp_client have different signatures, but both
# are called the same way here, so we cast to Any to avoid type errors
client = cast(Any, sse_client)
+
async with client(endpoint, headers=headers) as client_streams:
async with ClientSession(read_stream=client_streams[0], write_stream=client_streams[1]) as session:
await session.initialize()
diff --git a/src/llama_stack/telemetry/__init__.py b/src/llama_stack/telemetry/__init__.py
new file mode 100644
index 000000000..756f351d8
--- /dev/null
+++ b/src/llama_stack/telemetry/__init__.py
@@ -0,0 +1,5 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
diff --git a/src/llama_stack/telemetry/constants.py b/src/llama_stack/telemetry/constants.py
new file mode 100644
index 000000000..1d3db0742
--- /dev/null
+++ b/src/llama_stack/telemetry/constants.py
@@ -0,0 +1,27 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+"""
+This file contains constants used for naming data captured for telemetry.
+
+This is used to ensure that the data captured for telemetry is consistent and can be used to
+identify and correlate data. If custom telemetry data is added to llama stack, please add
+constants for it here.
+"""
+
+llama_stack_prefix = "llama_stack"
+
+# Safety Attributes
+RUN_SHIELD_OPERATION_NAME = "run_shield"
+
+SAFETY_REQUEST_PREFIX = f"{llama_stack_prefix}.safety.request"
+SAFETY_REQUEST_SHIELD_ID_ATTRIBUTE = f"{SAFETY_REQUEST_PREFIX}.shield_id"
+SAFETY_REQUEST_MESSAGES_ATTRIBUTE = f"{SAFETY_REQUEST_PREFIX}.messages"
+
+SAFETY_RESPONSE_PREFIX = f"{llama_stack_prefix}.safety.response"
+SAFETY_RESPONSE_METADATA_ATTRIBUTE = f"{SAFETY_RESPONSE_PREFIX}.metadata"
+SAFETY_RESPONSE_VIOLATION_LEVEL_ATTRIBUTE = f"{SAFETY_RESPONSE_PREFIX}.violation.level"
+SAFETY_RESPONSE_USER_MESSAGE_ATTRIBUTE = f"{SAFETY_RESPONSE_PREFIX}.violation.user_message"
diff --git a/src/llama_stack/telemetry/helpers.py b/src/llama_stack/telemetry/helpers.py
new file mode 100644
index 000000000..2ae13c9c5
--- /dev/null
+++ b/src/llama_stack/telemetry/helpers.py
@@ -0,0 +1,43 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+import json
+
+from opentelemetry import trace
+
+from llama_stack_api import OpenAIMessageParam, RunShieldResponse
+
+from .constants import (
+ RUN_SHIELD_OPERATION_NAME,
+ SAFETY_REQUEST_MESSAGES_ATTRIBUTE,
+ SAFETY_REQUEST_SHIELD_ID_ATTRIBUTE,
+ SAFETY_RESPONSE_METADATA_ATTRIBUTE,
+ SAFETY_RESPONSE_USER_MESSAGE_ATTRIBUTE,
+ SAFETY_RESPONSE_VIOLATION_LEVEL_ATTRIBUTE,
+)
+
+
+def safety_span_name(shield_id: str) -> str:
+ return f"{RUN_SHIELD_OPERATION_NAME} {shield_id}"
+
+
+# TODO: Consider using Wrapt to automatically instrument code
+# This is the industry standard way to package automatically instrumentation in python.
+def safety_request_span_attributes(
+ shield_id: str, messages: list[OpenAIMessageParam], response: RunShieldResponse
+) -> None:
+ span = trace.get_current_span()
+ span.set_attribute(SAFETY_REQUEST_SHIELD_ID_ATTRIBUTE, shield_id)
+ messages_json = json.dumps([msg.model_dump() for msg in messages])
+ span.set_attribute(SAFETY_REQUEST_MESSAGES_ATTRIBUTE, messages_json)
+
+ if response.violation:
+ if response.violation.metadata:
+ metadata_json = json.dumps(response.violation.metadata)
+ span.set_attribute(SAFETY_RESPONSE_METADATA_ATTRIBUTE, metadata_json)
+ if response.violation.user_message:
+ span.set_attribute(SAFETY_RESPONSE_USER_MESSAGE_ATTRIBUTE, response.violation.user_message)
+ span.set_attribute(SAFETY_RESPONSE_VIOLATION_LEVEL_ATTRIBUTE, response.violation.violation_level.value)
diff --git a/src/llama_stack_api/agents.py b/src/llama_stack_api/agents.py
index 9b767608a..8d3b489e1 100644
--- a/src/llama_stack_api/agents.py
+++ b/src/llama_stack_api/agents.py
@@ -89,6 +89,7 @@ class Agents(Protocol):
),
] = None,
max_tool_calls: int | None = None,
+ metadata: dict[str, str] | None = None,
) -> OpenAIResponseObject | AsyncIterator[OpenAIResponseObjectStream]:
"""Create a model response.
@@ -100,6 +101,7 @@ class Agents(Protocol):
:param include: (Optional) Additional fields to include in the response.
:param guardrails: (Optional) List of guardrails to apply during response generation. Can be guardrail IDs (strings) or guardrail specifications.
:param max_tool_calls: (Optional) Max number of total calls to built-in tools that can be processed in a response.
+ :param metadata: (Optional) Dictionary of metadata key-value pairs to attach to the response.
:returns: An OpenAIResponseObject.
"""
...
diff --git a/src/llama_stack_api/common/tracing.py b/src/llama_stack_api/common/tracing.py
deleted file mode 100644
index 830c2945a..000000000
--- a/src/llama_stack_api/common/tracing.py
+++ /dev/null
@@ -1,22 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-
-def telemetry_traceable(cls):
- """
- Mark a protocol for automatic tracing when telemetry is enabled.
-
- This is a metadata-only decorator with no dependencies on core.
- Actual tracing is applied by core routers at runtime if telemetry is enabled.
-
- Usage:
- @runtime_checkable
- @telemetry_traceable
- class MyProtocol(Protocol):
- ...
- """
- cls.__marked_for_tracing__ = True
- return cls
diff --git a/src/llama_stack_api/conversations.py b/src/llama_stack_api/conversations.py
index 4854181d1..81b5ab2c4 100644
--- a/src/llama_stack_api/conversations.py
+++ b/src/llama_stack_api/conversations.py
@@ -9,7 +9,6 @@ from typing import Annotated, Literal, Protocol, runtime_checkable
from pydantic import BaseModel, Field
-from llama_stack_api.common.tracing import telemetry_traceable
from llama_stack_api.openai_responses import (
OpenAIResponseInputFunctionToolCallOutput,
OpenAIResponseMCPApprovalRequest,
@@ -157,7 +156,6 @@ class ConversationItemDeletedResource(BaseModel):
@runtime_checkable
-@telemetry_traceable
class Conversations(Protocol):
"""Conversations
diff --git a/src/llama_stack_api/files.py b/src/llama_stack_api/files.py
index 8a75a1c39..e515fe0ae 100644
--- a/src/llama_stack_api/files.py
+++ b/src/llama_stack_api/files.py
@@ -11,7 +11,6 @@ from fastapi import File, Form, Response, UploadFile
from pydantic import BaseModel, Field
from llama_stack_api.common.responses import Order
-from llama_stack_api.common.tracing import telemetry_traceable
from llama_stack_api.schema_utils import json_schema_type, webmethod
from llama_stack_api.version import LLAMA_STACK_API_V1
@@ -102,7 +101,6 @@ class OpenAIFileDeleteResponse(BaseModel):
@runtime_checkable
-@telemetry_traceable
class Files(Protocol):
"""Files
diff --git a/src/llama_stack_api/inference.py b/src/llama_stack_api/inference.py
index b42de95be..4a169486a 100644
--- a/src/llama_stack_api/inference.py
+++ b/src/llama_stack_api/inference.py
@@ -22,7 +22,6 @@ from llama_stack_api.common.content_types import InterleavedContent
from llama_stack_api.common.responses import (
Order,
)
-from llama_stack_api.common.tracing import telemetry_traceable
from llama_stack_api.models import Model
from llama_stack_api.schema_utils import json_schema_type, register_schema, webmethod
from llama_stack_api.version import LLAMA_STACK_API_V1, LLAMA_STACK_API_V1ALPHA
@@ -989,7 +988,6 @@ class OpenAIEmbeddingsRequestWithExtraBody(BaseModel, extra="allow"):
@runtime_checkable
-@telemetry_traceable
class InferenceProvider(Protocol):
"""
This protocol defines the interface that should be implemented by all inference providers.
diff --git a/src/llama_stack_api/models.py b/src/llama_stack_api/models.py
index 98c16b6c2..3efdfe66b 100644
--- a/src/llama_stack_api/models.py
+++ b/src/llama_stack_api/models.py
@@ -9,7 +9,6 @@ from typing import Any, Literal, Protocol, runtime_checkable
from pydantic import BaseModel, ConfigDict, Field, field_validator
-from llama_stack_api.common.tracing import telemetry_traceable
from llama_stack_api.resource import Resource, ResourceType
from llama_stack_api.schema_utils import json_schema_type, webmethod
from llama_stack_api.version import LLAMA_STACK_API_V1
@@ -106,7 +105,6 @@ class OpenAIListModelsResponse(BaseModel):
@runtime_checkable
-@telemetry_traceable
class Models(Protocol):
async def list_models(self) -> ListModelsResponse:
"""List all models.
diff --git a/src/llama_stack_api/openai_responses.py b/src/llama_stack_api/openai_responses.py
index e20004487..177d2314a 100644
--- a/src/llama_stack_api/openai_responses.py
+++ b/src/llama_stack_api/openai_responses.py
@@ -597,6 +597,7 @@ class OpenAIResponseObject(BaseModel):
:param usage: (Optional) Token usage information for the response
:param instructions: (Optional) System message inserted into the model's context
:param max_tool_calls: (Optional) Max number of total calls to built-in tools that can be processed in a response
+ :param metadata: (Optional) Dictionary of metadata key-value pairs
"""
created_at: int
@@ -619,6 +620,7 @@ class OpenAIResponseObject(BaseModel):
usage: OpenAIResponseUsage | None = None
instructions: str | None = None
max_tool_calls: int | None = None
+ metadata: dict[str, str] | None = None
@json_schema_type
diff --git a/src/llama_stack_api/prompts.py b/src/llama_stack_api/prompts.py
index 8562e4704..2054ccd30 100644
--- a/src/llama_stack_api/prompts.py
+++ b/src/llama_stack_api/prompts.py
@@ -10,7 +10,6 @@ from typing import Protocol, runtime_checkable
from pydantic import BaseModel, Field, field_validator, model_validator
-from llama_stack_api.common.tracing import telemetry_traceable
from llama_stack_api.schema_utils import json_schema_type, webmethod
from llama_stack_api.version import LLAMA_STACK_API_V1
@@ -93,7 +92,6 @@ class ListPromptsResponse(BaseModel):
@runtime_checkable
-@telemetry_traceable
class Prompts(Protocol):
"""Prompts
diff --git a/src/llama_stack_api/safety.py b/src/llama_stack_api/safety.py
index ef84be2ea..7b4f2af5c 100644
--- a/src/llama_stack_api/safety.py
+++ b/src/llama_stack_api/safety.py
@@ -9,7 +9,6 @@ from typing import Any, Protocol, runtime_checkable
from pydantic import BaseModel, Field
-from llama_stack_api.common.tracing import telemetry_traceable
from llama_stack_api.inference import OpenAIMessageParam
from llama_stack_api.schema_utils import json_schema_type, webmethod
from llama_stack_api.shields import Shield
@@ -94,7 +93,6 @@ class ShieldStore(Protocol):
@runtime_checkable
-@telemetry_traceable
class Safety(Protocol):
"""Safety
diff --git a/src/llama_stack_api/shields.py b/src/llama_stack_api/shields.py
index 19e412a5a..36ad2351b 100644
--- a/src/llama_stack_api/shields.py
+++ b/src/llama_stack_api/shields.py
@@ -8,7 +8,6 @@ from typing import Any, Literal, Protocol, runtime_checkable
from pydantic import BaseModel
-from llama_stack_api.common.tracing import telemetry_traceable
from llama_stack_api.resource import Resource, ResourceType
from llama_stack_api.schema_utils import json_schema_type, webmethod
from llama_stack_api.version import LLAMA_STACK_API_V1
@@ -49,7 +48,6 @@ class ListShieldsResponse(BaseModel):
@runtime_checkable
-@telemetry_traceable
class Shields(Protocol):
@webmethod(route="/shields", method="GET", level=LLAMA_STACK_API_V1)
async def list_shields(self) -> ListShieldsResponse:
diff --git a/src/llama_stack_api/tools.py b/src/llama_stack_api/tools.py
index 4dd5d55d2..2a2a4304c 100644
--- a/src/llama_stack_api/tools.py
+++ b/src/llama_stack_api/tools.py
@@ -11,7 +11,6 @@ from pydantic import BaseModel
from typing_extensions import runtime_checkable
from llama_stack_api.common.content_types import URL, InterleavedContent
-from llama_stack_api.common.tracing import telemetry_traceable
from llama_stack_api.resource import Resource, ResourceType
from llama_stack_api.schema_utils import json_schema_type, webmethod
from llama_stack_api.version import LLAMA_STACK_API_V1
@@ -109,7 +108,6 @@ class ListToolDefsResponse(BaseModel):
@runtime_checkable
-@telemetry_traceable
class ToolGroups(Protocol):
@webmethod(route="/toolgroups", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
async def register_tool_group(
@@ -128,7 +126,7 @@ class ToolGroups(Protocol):
"""
...
- @webmethod(route="/toolgroups/{toolgroup_id:path}", method="GET", level=LLAMA_STACK_API_V1)
+ @webmethod(route="/toolgroups/{toolgroup_id:path}", method="GET", level=LLAMA_STACK_API_V1, deprecated=True)
async def get_tool_group(
self,
toolgroup_id: str,
@@ -140,7 +138,7 @@ class ToolGroups(Protocol):
"""
...
- @webmethod(route="/toolgroups", method="GET", level=LLAMA_STACK_API_V1)
+ @webmethod(route="/toolgroups", method="GET", level=LLAMA_STACK_API_V1, deprecated=True)
async def list_tool_groups(self) -> ListToolGroupsResponse:
"""List tool groups with optional provider.
@@ -148,7 +146,7 @@ class ToolGroups(Protocol):
"""
...
- @webmethod(route="/tools", method="GET", level=LLAMA_STACK_API_V1)
+ @webmethod(route="/tools", method="GET", level=LLAMA_STACK_API_V1, deprecated=True)
async def list_tools(self, toolgroup_id: str | None = None) -> ListToolDefsResponse:
"""List tools with optional tool group.
@@ -157,7 +155,7 @@ class ToolGroups(Protocol):
"""
...
- @webmethod(route="/tools/{tool_name:path}", method="GET", level=LLAMA_STACK_API_V1)
+ @webmethod(route="/tools/{tool_name:path}", method="GET", level=LLAMA_STACK_API_V1, deprecated=True)
async def get_tool(
self,
tool_name: str,
@@ -191,12 +189,11 @@ class SpecialToolGroup(Enum):
@runtime_checkable
-@telemetry_traceable
class ToolRuntime(Protocol):
tool_store: ToolStore | None = None
# TODO: This needs to be renamed once OPEN API generator name conflict issue is fixed.
- @webmethod(route="/tool-runtime/list-tools", method="GET", level=LLAMA_STACK_API_V1)
+ @webmethod(route="/tool-runtime/list-tools", method="GET", level=LLAMA_STACK_API_V1, deprecated=True)
async def list_runtime_tools(
self,
tool_group_id: str | None = None,
@@ -212,7 +209,7 @@ class ToolRuntime(Protocol):
"""
...
- @webmethod(route="/tool-runtime/invoke", method="POST", level=LLAMA_STACK_API_V1)
+ @webmethod(route="/tool-runtime/invoke", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
async def invoke_tool(
self,
tool_name: str,
diff --git a/src/llama_stack_api/vector_io.py b/src/llama_stack_api/vector_io.py
index 135468d19..188ea3307 100644
--- a/src/llama_stack_api/vector_io.py
+++ b/src/llama_stack_api/vector_io.py
@@ -13,7 +13,6 @@ from typing import Annotated, Any, Literal, Protocol, runtime_checkable
from fastapi import Body, Query
from pydantic import BaseModel, Field, field_validator
-from llama_stack_api.common.tracing import telemetry_traceable
from llama_stack_api.inference import InterleavedContent
from llama_stack_api.schema_utils import json_schema_type, register_schema, webmethod
from llama_stack_api.vector_stores import VectorStore
@@ -572,7 +571,6 @@ class VectorStoreTable(Protocol):
@runtime_checkable
-@telemetry_traceable
class VectorIO(Protocol):
vector_store_table: VectorStoreTable | None = None
diff --git a/tests/integration/inference/test_provider_data_routing.py b/tests/integration/inference/test_provider_data_routing.py
index e4a0a24b5..cf79c9f8f 100644
--- a/tests/integration/inference/test_provider_data_routing.py
+++ b/tests/integration/inference/test_provider_data_routing.py
@@ -17,7 +17,6 @@ from unittest.mock import AsyncMock, patch
import pytest
from llama_stack.core.library_client import LlamaStackAsLibraryClient
-from llama_stack.core.telemetry.telemetry import MetricEvent
from llama_stack_api import (
Api,
OpenAIAssistantMessageParam,
@@ -27,10 +26,6 @@ from llama_stack_api import (
)
-class OpenAIChatCompletionWithMetrics(OpenAIChatCompletion):
- metrics: list[MetricEvent] | None = None
-
-
def test_unregistered_model_routing_with_provider_data(client_with_models):
"""
Test that a model can be routed using provider_id/model_id format
@@ -72,7 +67,7 @@ def test_unregistered_model_routing_with_provider_data(client_with_models):
# The inference router's routing_table.impls_by_provider_id should have anthropic
# Let's patch the anthropic provider's openai_chat_completion method
# to avoid making real API calls
- mock_response = OpenAIChatCompletionWithMetrics(
+ mock_response = OpenAIChatCompletion(
id="chatcmpl-test-123",
created=1234567890,
model="claude-3-5-sonnet-20241022",
diff --git a/tests/integration/telemetry/collectors/in_memory.py b/tests/integration/telemetry/collectors/in_memory.py
index 7127b3816..9ff8de6f5 100644
--- a/tests/integration/telemetry/collectors/in_memory.py
+++ b/tests/integration/telemetry/collectors/in_memory.py
@@ -15,11 +15,10 @@ from opentelemetry.sdk.trace import TracerProvider
from opentelemetry.sdk.trace.export import SimpleSpanProcessor
from opentelemetry.sdk.trace.export.in_memory_span_exporter import InMemorySpanExporter
-import llama_stack.core.telemetry.telemetry as telemetry_module
-
from .base import BaseTelemetryCollector, MetricStub, SpanStub
+# TODO: Fix thi to work with Automatic Instrumentation
class InMemoryTelemetryCollector(BaseTelemetryCollector):
"""In-memory telemetry collector for library-client tests.
@@ -75,13 +74,10 @@ class InMemoryTelemetryManager:
meter_provider = MeterProvider(metric_readers=[metric_reader])
metrics.set_meter_provider(meter_provider)
- telemetry_module._TRACER_PROVIDER = tracer_provider
-
self.collector = InMemoryTelemetryCollector(span_exporter, metric_reader)
self._tracer_provider = tracer_provider
self._meter_provider = meter_provider
def shutdown(self) -> None:
- telemetry_module._TRACER_PROVIDER = None
self._tracer_provider.shutdown()
self._meter_provider.shutdown()
diff --git a/tests/integration/telemetry/conftest.py b/tests/integration/telemetry/conftest.py
index fd9224ae4..9448e40a0 100644
--- a/tests/integration/telemetry/conftest.py
+++ b/tests/integration/telemetry/conftest.py
@@ -15,6 +15,7 @@ from tests.integration.fixtures.common import instantiate_llama_stack_client
from tests.integration.telemetry.collectors import InMemoryTelemetryManager, OtlpHttpTestCollector
+# TODO: Fix this to work with Automatic Instrumentation
@pytest.fixture(scope="session")
def telemetry_test_collector():
stack_mode = os.environ.get("LLAMA_STACK_TEST_STACK_CONFIG_TYPE", "library_client")
@@ -48,6 +49,7 @@ def telemetry_test_collector():
manager.shutdown()
+# TODO: Fix this to work with Automatic Instrumentation
@pytest.fixture(scope="session")
def llama_stack_client(telemetry_test_collector, request):
"""Ensure telemetry collector is ready before initializing the stack client."""
diff --git a/tests/unit/cli/test_stack_config.py b/tests/unit/cli/test_stack_config.py
index 6aefac003..41570194c 100644
--- a/tests/unit/cli/test_stack_config.py
+++ b/tests/unit/cli/test_stack_config.py
@@ -155,9 +155,6 @@ def old_config():
provider_type: inline::meta-reference
config: {{}}
api_providers:
- telemetry:
- provider_type: noop
- config: {{}}
"""
)
@@ -181,7 +178,7 @@ def test_parse_and_maybe_upgrade_config_up_to_date(up_to_date_config):
def test_parse_and_maybe_upgrade_config_old_format(old_config):
result = parse_and_maybe_upgrade_config(old_config)
assert result.version == LLAMA_STACK_RUN_CONFIG_VERSION
- assert all(api in result.providers for api in ["inference", "safety", "memory", "telemetry"])
+ assert all(api in result.providers for api in ["inference", "safety", "memory"])
safety_provider = result.providers["safety"][0]
assert safety_provider.provider_type == "inline::meta-reference"
assert "llama_guard_shield" in safety_provider.config
diff --git a/tests/unit/providers/agents/meta_reference/test_safety_optional.py b/tests/unit/providers/agents/meta_reference/test_safety_optional.py
index c2311b68f..10b15b26d 100644
--- a/tests/unit/providers/agents/meta_reference/test_safety_optional.py
+++ b/tests/unit/providers/agents/meta_reference/test_safety_optional.py
@@ -83,7 +83,7 @@ class TestProviderInitialization:
new_callable=AsyncMock,
):
# Should not raise any exception
- provider = await get_provider_impl(config, mock_deps, policy=[], telemetry_enabled=False)
+ provider = await get_provider_impl(config, mock_deps, policy=[])
assert provider is not None
async def test_initialization_without_safety_api(self, mock_persistence_config, mock_deps):
@@ -97,7 +97,7 @@ class TestProviderInitialization:
new_callable=AsyncMock,
):
# Should not raise any exception
- provider = await get_provider_impl(config, mock_deps, policy=[], telemetry_enabled=False)
+ provider = await get_provider_impl(config, mock_deps, policy=[])
assert provider is not None
assert provider.safety_api is None
diff --git a/tests/unit/providers/inference/test_bedrock_adapter.py b/tests/unit/providers/inference/test_bedrock_adapter.py
index a20f2860a..2a1ca769b 100644
--- a/tests/unit/providers/inference/test_bedrock_adapter.py
+++ b/tests/unit/providers/inference/test_bedrock_adapter.py
@@ -40,8 +40,8 @@ def test_api_key_from_header_overrides_config():
"""Test API key from request header overrides config via client property"""
config = BedrockConfig(api_key="config-key", region_name="us-east-1")
adapter = BedrockInferenceAdapter(config=config)
- adapter.provider_data_api_key_field = "aws_bedrock_api_key"
- adapter.get_request_provider_data = MagicMock(return_value=SimpleNamespace(aws_bedrock_api_key="header-key"))
+ adapter.provider_data_api_key_field = "aws_bearer_token_bedrock"
+ adapter.get_request_provider_data = MagicMock(return_value=SimpleNamespace(aws_bearer_token_bedrock="header-key"))
# The client property is where header override happens (in OpenAIMixin)
assert adapter.client.api_key == "header-key"
diff --git a/tests/unit/providers/inference/test_bedrock_config.py b/tests/unit/providers/inference/test_bedrock_config.py
index 4c1fd56a2..622080426 100644
--- a/tests/unit/providers/inference/test_bedrock_config.py
+++ b/tests/unit/providers/inference/test_bedrock_config.py
@@ -9,7 +9,7 @@ from llama_stack.providers.remote.inference.bedrock.config import BedrockConfig
def test_bedrock_config_defaults_no_env(monkeypatch):
"""Test BedrockConfig defaults when env vars are not set"""
- monkeypatch.delenv("AWS_BEDROCK_API_KEY", raising=False)
+ monkeypatch.delenv("AWS_BEARER_TOKEN_BEDROCK", raising=False)
monkeypatch.delenv("AWS_DEFAULT_REGION", raising=False)
config = BedrockConfig()
assert config.auth_credential is None
@@ -35,5 +35,5 @@ def test_bedrock_config_sample():
sample = BedrockConfig.sample_run_config()
assert "api_key" in sample
assert "region_name" in sample
- assert sample["api_key"] == "${env.AWS_BEDROCK_API_KEY:=}"
+ assert sample["api_key"] == "${env.AWS_BEARER_TOKEN_BEDROCK:=}"
assert sample["region_name"] == "${env.AWS_DEFAULT_REGION:=us-east-2}"
diff --git a/tests/unit/server/test_auth.py b/tests/unit/server/test_auth.py
index 57a552514..2db60c91c 100644
--- a/tests/unit/server/test_auth.py
+++ b/tests/unit/server/test_auth.py
@@ -364,23 +364,6 @@ def test_invalid_auth_header_format_oauth2(oauth2_client):
assert "Invalid Authorization header format" in response.json()["error"]["message"]
-async def mock_jwks_response(*args, **kwargs):
- return MockResponse(
- 200,
- {
- "keys": [
- {
- "kid": "1234567890",
- "kty": "oct",
- "alg": "HS256",
- "use": "sig",
- "k": base64.b64encode(b"foobarbaz").decode(),
- }
- ]
- },
- )
-
-
@pytest.fixture
def jwt_token_valid():
import jwt
@@ -421,28 +404,60 @@ def mock_jwks_urlopen():
yield mock_urlopen
+@pytest.fixture
+def mock_jwks_urlopen_with_auth_required():
+ """Mock urllib.request.urlopen that requires Bearer token for JWKS requests."""
+ with patch("urllib.request.urlopen") as mock_urlopen:
+
+ def side_effect(request, **kwargs):
+ # Check if Authorization header is present
+ auth_header = request.headers.get("Authorization") if hasattr(request, "headers") else None
+
+ if not auth_header or not auth_header.startswith("Bearer "):
+ # Simulate 401 Unauthorized
+ import urllib.error
+
+ raise urllib.error.HTTPError(
+ url=request.full_url if hasattr(request, "full_url") else "",
+ code=401,
+ msg="Unauthorized",
+ hdrs={},
+ fp=None,
+ )
+
+ # Mock the JWKS response for PyJWKClient
+ mock_response = Mock()
+ mock_response.read.return_value = json.dumps(
+ {
+ "keys": [
+ {
+ "kid": "1234567890",
+ "kty": "oct",
+ "alg": "HS256",
+ "use": "sig",
+ "k": base64.b64encode(b"foobarbaz").decode(),
+ }
+ ]
+ }
+ ).encode()
+ return mock_response
+
+ mock_urlopen.side_effect = side_effect
+ yield mock_urlopen
+
+
def test_valid_oauth2_authentication(oauth2_client, jwt_token_valid, mock_jwks_urlopen):
response = oauth2_client.get("/test", headers={"Authorization": f"Bearer {jwt_token_valid}"})
assert response.status_code == 200
assert response.json() == {"message": "Authentication successful"}
-@patch("httpx.AsyncClient.get", new=mock_jwks_response)
-def test_invalid_oauth2_authentication(oauth2_client, invalid_token, suppress_auth_errors):
+def test_invalid_oauth2_authentication(oauth2_client, invalid_token, mock_jwks_urlopen, suppress_auth_errors):
response = oauth2_client.get("/test", headers={"Authorization": f"Bearer {invalid_token}"})
assert response.status_code == 401
assert "Invalid JWT token" in response.json()["error"]["message"]
-async def mock_auth_jwks_response(*args, **kwargs):
- if "headers" not in kwargs or "Authorization" not in kwargs["headers"]:
- return MockResponse(401, {})
- authz = kwargs["headers"]["Authorization"]
- if authz != "Bearer my-jwks-token":
- return MockResponse(401, {})
- return await mock_jwks_response(args, kwargs)
-
-
@pytest.fixture
def oauth2_app_with_jwks_token():
app = FastAPI()
@@ -472,8 +487,9 @@ def oauth2_client_with_jwks_token(oauth2_app_with_jwks_token):
return TestClient(oauth2_app_with_jwks_token)
-@patch("httpx.AsyncClient.get", new=mock_auth_jwks_response)
-def test_oauth2_with_jwks_token_expected(oauth2_client, jwt_token_valid, suppress_auth_errors):
+def test_oauth2_with_jwks_token_expected(
+ oauth2_client, jwt_token_valid, mock_jwks_urlopen_with_auth_required, suppress_auth_errors
+):
response = oauth2_client.get("/test", headers={"Authorization": f"Bearer {jwt_token_valid}"})
assert response.status_code == 401