mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-12-03 01:48:05 +00:00
Merge branch 'llamastack:main' into auto_gen_llama_stack
This commit is contained in:
commit
4391bd3004
85 changed files with 67523 additions and 4475 deletions
4
.github/workflows/backward-compat.yml
vendored
4
.github/workflows/backward-compat.yml
vendored
|
|
@ -32,7 +32,7 @@ jobs:
|
|||
fetch-depth: 0 # Need full history to access main branch
|
||||
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c # v6.0.0
|
||||
uses: actions/setup-python@83679a892e2d95755f2dac6acb0bfd1e9ac5d548 # v6.1.0
|
||||
with:
|
||||
python-version: '3.12'
|
||||
|
||||
|
|
@ -410,7 +410,7 @@ jobs:
|
|||
fetch-depth: 0
|
||||
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c # v6.0.0
|
||||
uses: actions/setup-python@83679a892e2d95755f2dac6acb0bfd1e9ac5d548 # v6.1.0
|
||||
with:
|
||||
python-version: '3.12'
|
||||
|
||||
|
|
|
|||
18
.github/workflows/conformance.yml
vendored
18
.github/workflows/conformance.yml
vendored
|
|
@ -64,6 +64,7 @@ jobs:
|
|||
ref: ${{ github.event.pull_request.base.ref }}
|
||||
path: 'base'
|
||||
|
||||
|
||||
# Cache oasdiff to avoid checksum failures and speed up builds
|
||||
- name: Cache oasdiff
|
||||
if: steps.skip-check.outputs.skip != 'true'
|
||||
|
|
@ -136,6 +137,23 @@ jobs:
|
|||
run: |
|
||||
oasdiff breaking --fail-on ERR $BASE_SPEC $CURRENT_SPEC --match-path '^/v1/'
|
||||
|
||||
# Run oasdiff to detect breaking changes in the API specification when compared to the OpenAI openAPI spec
|
||||
- name: Run OpenAPI Breaking Change Diff Against OpenAI API
|
||||
if: steps.skip-check.outputs.skip != 'true'
|
||||
continue-on-error: true
|
||||
shell: bash
|
||||
run: |
|
||||
OPENAI_SPEC=docs/static/openai-spec-2.3.0.yml
|
||||
LLAMA_STACK_SPEC=docs/static/llama-stack-spec.yaml
|
||||
|
||||
# Compare Llama Stack spec against OpenAI spec.
|
||||
# This finds breaking changes in our implementation of common endpoints.
|
||||
# By using our spec as the base, we avoid errors for endpoints we don't implement.
|
||||
oasdiff breaking --fail-on ERR \
|
||||
"$LLAMA_STACK_SPEC" \
|
||||
"$OPENAI_SPEC" \
|
||||
--strip-prefix-base "/v1"
|
||||
|
||||
# Report when test is skipped
|
||||
- name: Report skip reason
|
||||
if: steps.skip-check.outputs.skip == 'true'
|
||||
|
|
|
|||
2
.github/workflows/pre-commit.yml
vendored
2
.github/workflows/pre-commit.yml
vendored
|
|
@ -30,7 +30,7 @@ jobs:
|
|||
fetch-depth: ${{ github.actor == 'dependabot[bot]' && 0 || 1 }}
|
||||
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c # v6.0.0
|
||||
uses: actions/setup-python@83679a892e2d95755f2dac6acb0bfd1e9ac5d548 # v6.1.0
|
||||
with:
|
||||
python-version: '3.12'
|
||||
cache: pip
|
||||
|
|
|
|||
|
|
@ -19,6 +19,7 @@ repos:
|
|||
- id: no-commit-to-branch
|
||||
- id: check-yaml
|
||||
args: ["--unsafe"]
|
||||
exclude: 'docs/static/openai-spec-2.3.0.yml'
|
||||
- id: detect-private-key
|
||||
- id: mixed-line-ending
|
||||
args: [--fix=lf] # Forces to replace line ending by LF (line feed)
|
||||
|
|
|
|||
|
|
@ -9,7 +9,6 @@ data:
|
|||
- inference
|
||||
- files
|
||||
- safety
|
||||
- telemetry
|
||||
- tool_runtime
|
||||
- vector_io
|
||||
providers:
|
||||
|
|
@ -67,12 +66,6 @@ data:
|
|||
db: ${env.POSTGRES_DB:=llamastack}
|
||||
user: ${env.POSTGRES_USER:=llamastack}
|
||||
password: ${env.POSTGRES_PASSWORD:=llamastack}
|
||||
telemetry:
|
||||
- provider_id: meta-reference
|
||||
provider_type: inline::meta-reference
|
||||
config:
|
||||
service_name: "${env.OTEL_SERVICE_NAME:=\u200B}"
|
||||
sinks: ${env.TELEMETRY_SINKS:=console}
|
||||
tool_runtime:
|
||||
- provider_id: brave-search
|
||||
provider_type: remote::brave-search
|
||||
|
|
|
|||
|
|
@ -126,8 +126,6 @@ registered_resources:
|
|||
provider_id: rag-runtime
|
||||
server:
|
||||
port: 8323
|
||||
telemetry:
|
||||
enabled: true
|
||||
vector_stores:
|
||||
default_provider_id: chromadb
|
||||
default_embedding_model:
|
||||
|
|
|
|||
|
|
@ -2091,6 +2091,7 @@ paths:
|
|||
schema:
|
||||
$ref: '#/components/schemas/InvokeToolRequest'
|
||||
required: true
|
||||
deprecated: true
|
||||
/v1/tool-runtime/list-tools:
|
||||
get:
|
||||
responses:
|
||||
|
|
@ -2142,6 +2143,7 @@ paths:
|
|||
- $ref: '#/components/schemas/URL'
|
||||
- type: 'null'
|
||||
title: Mcp Endpoint
|
||||
deprecated: true
|
||||
/v1/toolgroups:
|
||||
get:
|
||||
responses:
|
||||
|
|
@ -2168,6 +2170,7 @@ paths:
|
|||
summary: List Tool Groups
|
||||
description: List tool groups with optional provider.
|
||||
operationId: list_tool_groups_v1_toolgroups_get
|
||||
deprecated: true
|
||||
post:
|
||||
responses:
|
||||
'400':
|
||||
|
|
@ -2229,6 +2232,7 @@ paths:
|
|||
schema:
|
||||
type: string
|
||||
description: 'Path parameter: toolgroup_id'
|
||||
deprecated: true
|
||||
delete:
|
||||
responses:
|
||||
'400':
|
||||
|
|
@ -2293,6 +2297,7 @@ paths:
|
|||
- type: string
|
||||
- type: 'null'
|
||||
title: Toolgroup Id
|
||||
deprecated: true
|
||||
/v1/tools/{tool_name}:
|
||||
get:
|
||||
responses:
|
||||
|
|
@ -2326,6 +2331,7 @@ paths:
|
|||
schema:
|
||||
type: string
|
||||
description: 'Path parameter: tool_name'
|
||||
deprecated: true
|
||||
/v1/vector-io/insert:
|
||||
post:
|
||||
responses:
|
||||
|
|
@ -6796,6 +6802,12 @@ components:
|
|||
anyOf:
|
||||
- type: integer
|
||||
- type: 'null'
|
||||
metadata:
|
||||
anyOf:
|
||||
- additionalProperties:
|
||||
type: string
|
||||
type: object
|
||||
- type: 'null'
|
||||
input:
|
||||
items:
|
||||
anyOf:
|
||||
|
|
@ -7199,6 +7211,12 @@ components:
|
|||
anyOf:
|
||||
- type: integer
|
||||
- type: 'null'
|
||||
metadata:
|
||||
anyOf:
|
||||
- additionalProperties:
|
||||
type: string
|
||||
type: object
|
||||
- type: 'null'
|
||||
type: object
|
||||
required:
|
||||
- input
|
||||
|
|
@ -7330,6 +7348,12 @@ components:
|
|||
anyOf:
|
||||
- type: integer
|
||||
- type: 'null'
|
||||
metadata:
|
||||
anyOf:
|
||||
- additionalProperties:
|
||||
type: string
|
||||
type: object
|
||||
- type: 'null'
|
||||
type: object
|
||||
required:
|
||||
- created_at
|
||||
|
|
@ -12180,227 +12204,6 @@ components:
|
|||
- $ref: '#/components/schemas/OpenAIResponseContentPartReasoningText'
|
||||
title: OpenAIResponseContentPartReasoningText
|
||||
title: OpenAIResponseContentPartOutputText | OpenAIResponseContentPartRefusal | OpenAIResponseContentPartReasoningText
|
||||
SpanEndPayload:
|
||||
description: Payload for a span end event.
|
||||
properties:
|
||||
type:
|
||||
const: span_end
|
||||
default: span_end
|
||||
title: Type
|
||||
type: string
|
||||
status:
|
||||
$ref: '#/components/schemas/SpanStatus'
|
||||
required:
|
||||
- status
|
||||
title: SpanEndPayload
|
||||
type: object
|
||||
SpanStartPayload:
|
||||
description: Payload for a span start event.
|
||||
properties:
|
||||
type:
|
||||
const: span_start
|
||||
default: span_start
|
||||
title: Type
|
||||
type: string
|
||||
name:
|
||||
title: Name
|
||||
type: string
|
||||
parent_span_id:
|
||||
anyOf:
|
||||
- type: string
|
||||
- type: 'null'
|
||||
nullable: true
|
||||
required:
|
||||
- name
|
||||
title: SpanStartPayload
|
||||
type: object
|
||||
SpanStatus:
|
||||
description: The status of a span indicating whether it completed successfully or with an error.
|
||||
enum:
|
||||
- ok
|
||||
- error
|
||||
title: SpanStatus
|
||||
type: string
|
||||
StructuredLogPayload:
|
||||
discriminator:
|
||||
mapping:
|
||||
span_end: '#/components/schemas/SpanEndPayload'
|
||||
span_start: '#/components/schemas/SpanStartPayload'
|
||||
propertyName: type
|
||||
oneOf:
|
||||
- $ref: '#/components/schemas/SpanStartPayload'
|
||||
title: SpanStartPayload
|
||||
- $ref: '#/components/schemas/SpanEndPayload'
|
||||
title: SpanEndPayload
|
||||
title: SpanStartPayload | SpanEndPayload
|
||||
LogSeverity:
|
||||
description: The severity level of a log message.
|
||||
enum:
|
||||
- verbose
|
||||
- debug
|
||||
- info
|
||||
- warn
|
||||
- error
|
||||
- critical
|
||||
title: LogSeverity
|
||||
type: string
|
||||
MetricEvent:
|
||||
description: A metric event containing a measured value.
|
||||
properties:
|
||||
trace_id:
|
||||
title: Trace Id
|
||||
type: string
|
||||
span_id:
|
||||
title: Span Id
|
||||
type: string
|
||||
timestamp:
|
||||
format: date-time
|
||||
title: Timestamp
|
||||
type: string
|
||||
attributes:
|
||||
anyOf:
|
||||
- additionalProperties:
|
||||
anyOf:
|
||||
- type: string
|
||||
- type: integer
|
||||
- type: number
|
||||
- type: boolean
|
||||
- type: 'null'
|
||||
title: string | ... (4 variants)
|
||||
type: object
|
||||
- type: 'null'
|
||||
type:
|
||||
const: metric
|
||||
default: metric
|
||||
title: Type
|
||||
type: string
|
||||
metric:
|
||||
title: Metric
|
||||
type: string
|
||||
value:
|
||||
anyOf:
|
||||
- type: integer
|
||||
- type: number
|
||||
title: integer | number
|
||||
unit:
|
||||
title: Unit
|
||||
type: string
|
||||
required:
|
||||
- trace_id
|
||||
- span_id
|
||||
- timestamp
|
||||
- metric
|
||||
- value
|
||||
- unit
|
||||
title: MetricEvent
|
||||
type: object
|
||||
StructuredLogEvent:
|
||||
description: A structured log event containing typed payload data.
|
||||
properties:
|
||||
trace_id:
|
||||
title: Trace Id
|
||||
type: string
|
||||
span_id:
|
||||
title: Span Id
|
||||
type: string
|
||||
timestamp:
|
||||
format: date-time
|
||||
title: Timestamp
|
||||
type: string
|
||||
attributes:
|
||||
anyOf:
|
||||
- additionalProperties:
|
||||
anyOf:
|
||||
- type: string
|
||||
- type: integer
|
||||
- type: number
|
||||
- type: boolean
|
||||
- type: 'null'
|
||||
title: string | ... (4 variants)
|
||||
type: object
|
||||
- type: 'null'
|
||||
type:
|
||||
const: structured_log
|
||||
default: structured_log
|
||||
title: Type
|
||||
type: string
|
||||
payload:
|
||||
discriminator:
|
||||
mapping:
|
||||
span_end: '#/components/schemas/SpanEndPayload'
|
||||
span_start: '#/components/schemas/SpanStartPayload'
|
||||
propertyName: type
|
||||
oneOf:
|
||||
- $ref: '#/components/schemas/SpanStartPayload'
|
||||
title: SpanStartPayload
|
||||
- $ref: '#/components/schemas/SpanEndPayload'
|
||||
title: SpanEndPayload
|
||||
title: SpanStartPayload | SpanEndPayload
|
||||
required:
|
||||
- trace_id
|
||||
- span_id
|
||||
- timestamp
|
||||
- payload
|
||||
title: StructuredLogEvent
|
||||
type: object
|
||||
UnstructuredLogEvent:
|
||||
description: An unstructured log event containing a simple text message.
|
||||
properties:
|
||||
trace_id:
|
||||
title: Trace Id
|
||||
type: string
|
||||
span_id:
|
||||
title: Span Id
|
||||
type: string
|
||||
timestamp:
|
||||
format: date-time
|
||||
title: Timestamp
|
||||
type: string
|
||||
attributes:
|
||||
anyOf:
|
||||
- additionalProperties:
|
||||
anyOf:
|
||||
- type: string
|
||||
- type: integer
|
||||
- type: number
|
||||
- type: boolean
|
||||
- type: 'null'
|
||||
title: string | ... (4 variants)
|
||||
type: object
|
||||
- type: 'null'
|
||||
type:
|
||||
const: unstructured_log
|
||||
default: unstructured_log
|
||||
title: Type
|
||||
type: string
|
||||
message:
|
||||
title: Message
|
||||
type: string
|
||||
severity:
|
||||
$ref: '#/components/schemas/LogSeverity'
|
||||
required:
|
||||
- trace_id
|
||||
- span_id
|
||||
- timestamp
|
||||
- message
|
||||
- severity
|
||||
title: UnstructuredLogEvent
|
||||
type: object
|
||||
Event:
|
||||
discriminator:
|
||||
mapping:
|
||||
metric: '#/components/schemas/MetricEvent'
|
||||
structured_log: '#/components/schemas/StructuredLogEvent'
|
||||
unstructured_log: '#/components/schemas/UnstructuredLogEvent'
|
||||
propertyName: type
|
||||
oneOf:
|
||||
- $ref: '#/components/schemas/UnstructuredLogEvent'
|
||||
title: UnstructuredLogEvent
|
||||
- $ref: '#/components/schemas/MetricEvent'
|
||||
title: MetricEvent
|
||||
- $ref: '#/components/schemas/StructuredLogEvent'
|
||||
title: StructuredLogEvent
|
||||
title: UnstructuredLogEvent | MetricEvent | StructuredLogEvent
|
||||
MetricInResponse:
|
||||
description: A metric value included in API responses.
|
||||
properties:
|
||||
|
|
@ -13225,236 +13028,6 @@ components:
|
|||
- logger_config
|
||||
title: PostTrainingRLHFRequest
|
||||
type: object
|
||||
Span:
|
||||
description: A span representing a single operation within a trace.
|
||||
properties:
|
||||
span_id:
|
||||
title: Span Id
|
||||
type: string
|
||||
trace_id:
|
||||
title: Trace Id
|
||||
type: string
|
||||
parent_span_id:
|
||||
anyOf:
|
||||
- type: string
|
||||
- type: 'null'
|
||||
nullable: true
|
||||
name:
|
||||
title: Name
|
||||
type: string
|
||||
start_time:
|
||||
format: date-time
|
||||
title: Start Time
|
||||
type: string
|
||||
end_time:
|
||||
anyOf:
|
||||
- format: date-time
|
||||
type: string
|
||||
- type: 'null'
|
||||
nullable: true
|
||||
attributes:
|
||||
anyOf:
|
||||
- additionalProperties: true
|
||||
type: object
|
||||
- type: 'null'
|
||||
required:
|
||||
- span_id
|
||||
- trace_id
|
||||
- name
|
||||
- start_time
|
||||
title: Span
|
||||
type: object
|
||||
Trace:
|
||||
description: A trace representing the complete execution path of a request across multiple operations.
|
||||
properties:
|
||||
trace_id:
|
||||
title: Trace Id
|
||||
type: string
|
||||
root_span_id:
|
||||
title: Root Span Id
|
||||
type: string
|
||||
start_time:
|
||||
format: date-time
|
||||
title: Start Time
|
||||
type: string
|
||||
end_time:
|
||||
anyOf:
|
||||
- format: date-time
|
||||
type: string
|
||||
- type: 'null'
|
||||
nullable: true
|
||||
required:
|
||||
- trace_id
|
||||
- root_span_id
|
||||
- start_time
|
||||
title: Trace
|
||||
type: object
|
||||
EventType:
|
||||
description: The type of telemetry event being logged.
|
||||
enum:
|
||||
- unstructured_log
|
||||
- structured_log
|
||||
- metric
|
||||
title: EventType
|
||||
type: string
|
||||
StructuredLogType:
|
||||
description: The type of structured log event payload.
|
||||
enum:
|
||||
- span_start
|
||||
- span_end
|
||||
title: StructuredLogType
|
||||
type: string
|
||||
EvalTrace:
|
||||
description: A trace record for evaluation purposes.
|
||||
properties:
|
||||
session_id:
|
||||
title: Session Id
|
||||
type: string
|
||||
step:
|
||||
title: Step
|
||||
type: string
|
||||
input:
|
||||
title: Input
|
||||
type: string
|
||||
output:
|
||||
title: Output
|
||||
type: string
|
||||
expected_output:
|
||||
title: Expected Output
|
||||
type: string
|
||||
required:
|
||||
- session_id
|
||||
- step
|
||||
- input
|
||||
- output
|
||||
- expected_output
|
||||
title: EvalTrace
|
||||
type: object
|
||||
SpanWithStatus:
|
||||
description: A span that includes status information.
|
||||
properties:
|
||||
span_id:
|
||||
title: Span Id
|
||||
type: string
|
||||
trace_id:
|
||||
title: Trace Id
|
||||
type: string
|
||||
parent_span_id:
|
||||
anyOf:
|
||||
- type: string
|
||||
- type: 'null'
|
||||
nullable: true
|
||||
name:
|
||||
title: Name
|
||||
type: string
|
||||
start_time:
|
||||
format: date-time
|
||||
title: Start Time
|
||||
type: string
|
||||
end_time:
|
||||
anyOf:
|
||||
- format: date-time
|
||||
type: string
|
||||
- type: 'null'
|
||||
nullable: true
|
||||
attributes:
|
||||
anyOf:
|
||||
- additionalProperties: true
|
||||
type: object
|
||||
- type: 'null'
|
||||
status:
|
||||
anyOf:
|
||||
- $ref: '#/components/schemas/SpanStatus'
|
||||
title: SpanStatus
|
||||
- type: 'null'
|
||||
nullable: true
|
||||
title: SpanStatus
|
||||
required:
|
||||
- span_id
|
||||
- trace_id
|
||||
- name
|
||||
- start_time
|
||||
title: SpanWithStatus
|
||||
type: object
|
||||
QueryConditionOp:
|
||||
description: Comparison operators for query conditions.
|
||||
enum:
|
||||
- eq
|
||||
- ne
|
||||
- gt
|
||||
- lt
|
||||
title: QueryConditionOp
|
||||
type: string
|
||||
QueryCondition:
|
||||
description: A condition for filtering query results.
|
||||
properties:
|
||||
key:
|
||||
title: Key
|
||||
type: string
|
||||
op:
|
||||
$ref: '#/components/schemas/QueryConditionOp'
|
||||
value:
|
||||
title: Value
|
||||
required:
|
||||
- key
|
||||
- op
|
||||
- value
|
||||
title: QueryCondition
|
||||
type: object
|
||||
MetricLabel:
|
||||
description: A label associated with a metric.
|
||||
properties:
|
||||
name:
|
||||
title: Name
|
||||
type: string
|
||||
value:
|
||||
title: Value
|
||||
type: string
|
||||
required:
|
||||
- name
|
||||
- value
|
||||
title: MetricLabel
|
||||
type: object
|
||||
MetricDataPoint:
|
||||
description: A single data point in a metric time series.
|
||||
properties:
|
||||
timestamp:
|
||||
title: Timestamp
|
||||
type: integer
|
||||
value:
|
||||
title: Value
|
||||
type: number
|
||||
unit:
|
||||
title: Unit
|
||||
type: string
|
||||
required:
|
||||
- timestamp
|
||||
- value
|
||||
- unit
|
||||
title: MetricDataPoint
|
||||
type: object
|
||||
MetricSeries:
|
||||
description: A time series of metric data points.
|
||||
properties:
|
||||
metric:
|
||||
title: Metric
|
||||
type: string
|
||||
labels:
|
||||
items:
|
||||
$ref: '#/components/schemas/MetricLabel'
|
||||
title: Labels
|
||||
type: array
|
||||
values:
|
||||
items:
|
||||
$ref: '#/components/schemas/MetricDataPoint'
|
||||
title: Values
|
||||
type: array
|
||||
required:
|
||||
- metric
|
||||
- labels
|
||||
- values
|
||||
title: MetricSeries
|
||||
type: object
|
||||
responses:
|
||||
BadRequest400:
|
||||
description: The request was invalid or malformed
|
||||
|
|
|
|||
|
|
@ -10,203 +10,34 @@ import TabItem from '@theme/TabItem';
|
|||
|
||||
# Telemetry
|
||||
|
||||
The Llama Stack uses OpenTelemetry to provide comprehensive tracing, metrics, and logging capabilities.
|
||||
The preferred way to instrument Llama Stack is with OpenTelemetry. Llama Stack enriches the data
|
||||
collected by OpenTelemetry to capture helpful information about the performance and behavior of your
|
||||
application. Here is an example of how to forward your telemetry to an OTLP collector from Llama Stack:
|
||||
|
||||
```sh
|
||||
export OTEL_EXPORTER_OTLP_ENDPOINT="http://127.0.0.1:4318"
|
||||
export OTEL_EXPORTER_OTLP_PROTOCOL=http/protobuf
|
||||
export OTEL_SERVICE_NAME="llama-stack-server"
|
||||
|
||||
## Automatic Metrics Generation
|
||||
uv pip install opentelemetry-distro opentelemetry-exporter-otlp
|
||||
uv run opentelemetry-bootstrap -a requirements | uv pip install --requirement -
|
||||
|
||||
Llama Stack automatically generates metrics during inference operations. These metrics are aggregated at the **inference request level** and provide insights into token usage and model performance.
|
||||
|
||||
### Available Metrics
|
||||
|
||||
The following metrics are automatically generated for each inference request:
|
||||
|
||||
| Metric Name | Type | Unit | Description | Labels |
|
||||
|-------------|------|------|-------------|--------|
|
||||
| `llama_stack_prompt_tokens_total` | Counter | `tokens` | Number of tokens in the input prompt | `model_id`, `provider_id` |
|
||||
| `llama_stack_completion_tokens_total` | Counter | `tokens` | Number of tokens in the generated response | `model_id`, `provider_id` |
|
||||
| `llama_stack_tokens_total` | Counter | `tokens` | Total tokens used (prompt + completion) | `model_id`, `provider_id` |
|
||||
|
||||
### Metric Generation Flow
|
||||
|
||||
1. **Token Counting**: During inference operations (chat completion, completion, etc.), the system counts tokens in both input prompts and generated responses
|
||||
2. **Metric Construction**: For each request, `MetricEvent` objects are created with the token counts
|
||||
3. **Telemetry Logging**: Metrics are sent to the configured telemetry sinks
|
||||
4. **OpenTelemetry Export**: When OpenTelemetry is enabled, metrics are exposed as standard OpenTelemetry counters
|
||||
|
||||
### Metric Aggregation Level
|
||||
|
||||
All metrics are generated and aggregated at the **inference request level**. This means:
|
||||
|
||||
- Each individual inference request generates its own set of metrics
|
||||
- Metrics are not pre-aggregated across multiple requests
|
||||
- Aggregation (sums, averages, etc.) can be performed by your observability tools (Prometheus, Grafana, etc.)
|
||||
- Each metric includes labels for `model_id` and `provider_id` to enable filtering and grouping
|
||||
|
||||
### Example Metric Event
|
||||
|
||||
```python
|
||||
MetricEvent(
|
||||
trace_id="1234567890abcdef",
|
||||
span_id="abcdef1234567890",
|
||||
metric="total_tokens",
|
||||
value=150,
|
||||
timestamp=1703123456.789,
|
||||
unit="tokens",
|
||||
attributes={
|
||||
"model_id": "meta-llama/Llama-3.2-3B-Instruct",
|
||||
"provider_id": "tgi"
|
||||
},
|
||||
)
|
||||
uv run opentelemetry-instrument llama stack run run.yaml
|
||||
```
|
||||
|
||||
## Telemetry Sinks
|
||||
|
||||
Choose from multiple sink types based on your observability needs:
|
||||
### Known issues
|
||||
|
||||
<Tabs>
|
||||
<TabItem value="opentelemetry" label="OpenTelemetry">
|
||||
Some database instrumentation libraries have a known bug where spans get wrapped twice, or do not get connected to a trace.
|
||||
To prevent this, you can disable database specific tracing, and rely just on the SQLAlchemy tracing. If you are using
|
||||
`sqlite3` as your database, for example, you can disable the additional tracing like this:
|
||||
|
||||
Send events to an OpenTelemetry Collector for integration with observability platforms:
|
||||
|
||||
**Use Cases:**
|
||||
- Visualizing traces in tools like Jaeger
|
||||
- Collecting metrics for Prometheus
|
||||
- Integration with enterprise observability stacks
|
||||
|
||||
**Features:**
|
||||
- Standard OpenTelemetry format
|
||||
- Compatible with all OpenTelemetry collectors
|
||||
- Supports both traces and metrics
|
||||
|
||||
</TabItem>
|
||||
<TabItem value="console" label="Console">
|
||||
|
||||
Print events to the console for immediate debugging:
|
||||
|
||||
**Use Cases:**
|
||||
- Development and testing
|
||||
- Quick debugging sessions
|
||||
- Simple logging without external tools
|
||||
|
||||
**Features:**
|
||||
- Immediate output visibility
|
||||
- No setup required
|
||||
- Human-readable format
|
||||
|
||||
</TabItem>
|
||||
</Tabs>
|
||||
|
||||
## Configuration
|
||||
|
||||
### Meta-Reference Provider
|
||||
|
||||
Currently, only the meta-reference provider is implemented. It can be configured to send events to multiple sink types:
|
||||
|
||||
```yaml
|
||||
telemetry:
|
||||
- provider_id: meta-reference
|
||||
provider_type: inline::meta-reference
|
||||
config:
|
||||
service_name: "llama-stack-service"
|
||||
sinks: ['console', 'otel_trace', 'otel_metric']
|
||||
otel_exporter_otlp_endpoint: "http://localhost:4318"
|
||||
```sh
|
||||
export OTEL_PYTHON_DISABLED_INSTRUMENTATIONS="sqlite3"
|
||||
```
|
||||
|
||||
### Environment Variables
|
||||
|
||||
Configure telemetry behavior using environment variables:
|
||||
|
||||
- **`OTEL_EXPORTER_OTLP_ENDPOINT`**: OpenTelemetry Collector endpoint (default: `http://localhost:4318`)
|
||||
- **`OTEL_SERVICE_NAME`**: Service name for telemetry (default: empty string)
|
||||
- **`TELEMETRY_SINKS`**: Comma-separated list of sinks (default: `[]`)
|
||||
|
||||
### Quick Setup: Complete Telemetry Stack
|
||||
|
||||
Use the automated setup script to launch the complete telemetry stack (Jaeger, OpenTelemetry Collector, Prometheus, and Grafana):
|
||||
|
||||
```bash
|
||||
./scripts/telemetry/setup_telemetry.sh
|
||||
```
|
||||
|
||||
This sets up:
|
||||
- **Jaeger UI**: http://localhost:16686 (traces visualization)
|
||||
- **Prometheus**: http://localhost:9090 (metrics)
|
||||
- **Grafana**: http://localhost:3000 (dashboards with auto-configured data sources)
|
||||
- **OTEL Collector**: http://localhost:4318 (OTLP endpoint)
|
||||
|
||||
Once running, you can visualize traces by navigating to [Grafana](http://localhost:3000/) and login with login `admin` and password `admin`.
|
||||
|
||||
## Querying Metrics
|
||||
|
||||
When using the OpenTelemetry sink, metrics are exposed in standard format and can be queried through various tools:
|
||||
|
||||
<Tabs>
|
||||
<TabItem value="prometheus" label="Prometheus Queries">
|
||||
|
||||
Example Prometheus queries for analyzing token usage:
|
||||
|
||||
```promql
|
||||
# Total tokens used across all models
|
||||
sum(llama_stack_tokens_total)
|
||||
|
||||
# Tokens per model
|
||||
sum by (model_id) (llama_stack_tokens_total)
|
||||
|
||||
# Average tokens per request over 5 minutes
|
||||
rate(llama_stack_tokens_total[5m])
|
||||
|
||||
# Token usage by provider
|
||||
sum by (provider_id) (llama_stack_tokens_total)
|
||||
```
|
||||
|
||||
</TabItem>
|
||||
<TabItem value="grafana" label="Grafana Dashboards">
|
||||
|
||||
Create dashboards using Prometheus as a data source:
|
||||
|
||||
- **Token Usage Over Time**: Line charts showing token consumption trends
|
||||
- **Model Performance**: Comparison of different models by token efficiency
|
||||
- **Provider Analysis**: Breakdown of usage across different providers
|
||||
- **Request Patterns**: Understanding peak usage times and patterns
|
||||
|
||||
</TabItem>
|
||||
<TabItem value="otlp" label="OpenTelemetry Collector">
|
||||
|
||||
Forward metrics to other observability systems:
|
||||
|
||||
- Export to multiple backends simultaneously
|
||||
- Apply transformations and filtering
|
||||
- Integrate with existing monitoring infrastructure
|
||||
|
||||
</TabItem>
|
||||
</Tabs>
|
||||
|
||||
## Best Practices
|
||||
|
||||
### 🔍 **Monitoring Strategy**
|
||||
- Use OpenTelemetry for production environments
|
||||
- Set up alerts on key metrics like token usage and error rates
|
||||
|
||||
### 📊 **Metrics Analysis**
|
||||
- Track token usage trends to optimize costs
|
||||
- Monitor response times across different models
|
||||
- Analyze usage patterns to improve resource allocation
|
||||
|
||||
### 🚨 **Alerting & Debugging**
|
||||
- Set up alerts for unusual token consumption spikes
|
||||
- Use trace data to debug performance issues
|
||||
- Monitor error rates and failure patterns
|
||||
|
||||
### 🔧 **Configuration Management**
|
||||
- Use environment variables for flexible deployment
|
||||
- Ensure proper network access to OpenTelemetry collectors
|
||||
|
||||
|
||||
## Related Resources
|
||||
|
||||
- **[Agents](./agent)** - Monitoring agent execution with telemetry
|
||||
- **[Evaluations](./evals)** - Using telemetry data for performance evaluation
|
||||
- **[Getting Started Notebook](https://github.com/meta-llama/llama-stack/blob/main/docs/getting_started.ipynb)** - Telemetry examples and queries
|
||||
- **[OpenTelemetry Documentation](https://opentelemetry.io/)** - Comprehensive observability framework
|
||||
- **[Jaeger Documentation](https://www.jaegertracing.io/)** - Distributed tracing visualization
|
||||
|
|
|
|||
|
|
@ -17,7 +17,6 @@ A Llama Stack API is described as a collection of REST endpoints following OpenA
|
|||
- **Eval**: generate outputs (via Inference or Agents) and perform scoring
|
||||
- **VectorIO**: perform operations on vector stores, such as adding documents, searching, and deleting documents
|
||||
- **Files**: manage file uploads, storage, and retrieval
|
||||
- **Telemetry**: collect telemetry data from the system
|
||||
- **Post Training**: fine-tune a model
|
||||
- **Tool Runtime**: interact with various tools and protocols
|
||||
- **Responses**: generate responses from an LLM
|
||||
|
|
|
|||
|
|
@ -8,7 +8,6 @@ data:
|
|||
- inference
|
||||
- files
|
||||
- safety
|
||||
- telemetry
|
||||
- tool_runtime
|
||||
- vector_io
|
||||
providers:
|
||||
|
|
@ -73,12 +72,6 @@ data:
|
|||
db: ${env.POSTGRES_DB:=llamastack}
|
||||
user: ${env.POSTGRES_USER:=llamastack}
|
||||
password: ${env.POSTGRES_PASSWORD:=llamastack}
|
||||
telemetry:
|
||||
- provider_id: meta-reference
|
||||
provider_type: inline::meta-reference
|
||||
config:
|
||||
service_name: "${env.OTEL_SERVICE_NAME:=\u200B}"
|
||||
sinks: ${env.TELEMETRY_SINKS:=console}
|
||||
tool_runtime:
|
||||
- provider_id: brave-search
|
||||
provider_type: remote::brave-search
|
||||
|
|
|
|||
|
|
@ -140,8 +140,6 @@ server:
|
|||
auth:
|
||||
provider_config:
|
||||
type: github_token
|
||||
telemetry:
|
||||
enabled: true
|
||||
vector_stores:
|
||||
default_provider_id: chromadb
|
||||
default_embedding_model:
|
||||
|
|
|
|||
|
|
@ -116,10 +116,6 @@ The following environment variables can be configured:
|
|||
- `BRAVE_SEARCH_API_KEY`: Brave Search API key
|
||||
- `TAVILY_SEARCH_API_KEY`: Tavily Search API key
|
||||
|
||||
### Telemetry Configuration
|
||||
- `OTEL_SERVICE_NAME`: OpenTelemetry service name
|
||||
- `OTEL_EXPORTER_OTLP_ENDPOINT`: OpenTelemetry collector endpoint URL
|
||||
|
||||
## Enabling Providers
|
||||
|
||||
You can enable specific providers by setting appropriate environment variables. For example,
|
||||
|
|
@ -265,7 +261,7 @@ The starter distribution uses SQLite for local storage of various components:
|
|||
2. **Flexible Configuration**: Easy to enable/disable providers based on your needs
|
||||
3. **No Local GPU Required**: Most providers are cloud-based, making it accessible to developers without high-end hardware
|
||||
4. **Easy Migration**: Start with hosted providers and gradually move to local ones as needed
|
||||
5. **Production Ready**: Includes safety, evaluation, and telemetry components
|
||||
5. **Production Ready**: Includes safety and evaluation
|
||||
6. **Tool Integration**: Comes with web search, RAG, and model context protocol tools
|
||||
|
||||
The starter distribution is ideal for developers who want to experiment with different AI providers, build prototypes quickly, or create applications that can work with multiple AI backends.
|
||||
|
|
|
|||
|
|
@ -360,32 +360,6 @@ Methods:
|
|||
|
||||
- <code title="post /v1/synthetic-data-generation/generate">client.synthetic_data_generation.<a href="https://github.com/meta-llama/llama-stack-client-python/tree/main/src/llama_stack_client/resources/synthetic_data_generation.py">generate</a>(\*\*<a href="https://github.com/meta-llama/llama-stack-client-python/tree/main/src/llama_stack_client/types/synthetic_data_generation_generate_params.py">params</a>) -> <a href="https://github.com/meta-llama/llama-stack-client-python/tree/main/src/llama_stack_client/types/synthetic_data_generation_response.py">SyntheticDataGenerationResponse</a></code>
|
||||
|
||||
## Telemetry
|
||||
|
||||
Types:
|
||||
|
||||
```python
|
||||
from llama_stack_client.types import (
|
||||
QuerySpansResponse,
|
||||
SpanWithStatus,
|
||||
Trace,
|
||||
TelemetryGetSpanResponse,
|
||||
TelemetryGetSpanTreeResponse,
|
||||
TelemetryQuerySpansResponse,
|
||||
TelemetryQueryTracesResponse,
|
||||
)
|
||||
```
|
||||
|
||||
Methods:
|
||||
|
||||
- <code title="get /v1/telemetry/traces/{trace_id}/spans/{span_id}">client.telemetry.<a href="https://github.com/meta-llama/llama-stack-client-python/tree/main/src/llama_stack_client/resources/telemetry.py">get_span</a>(span_id, \*, trace_id) -> <a href="https://github.com/meta-llama/llama-stack-client-python/tree/main/src/llama_stack_client/types/telemetry_get_span_response.py">TelemetryGetSpanResponse</a></code>
|
||||
- <code title="get /v1/telemetry/spans/{span_id}/tree">client.telemetry.<a href="https://github.com/meta-llama/llama-stack-client-python/tree/main/src/llama_stack_client/resources/telemetry.py">get_span_tree</a>(span_id, \*\*<a href="https://github.com/meta-llama/llama-stack-client-python/tree/main/src/llama_stack_client/types/telemetry_get_span_tree_params.py">params</a>) -> <a href="https://github.com/meta-llama/llama-stack-client-python/tree/main/src/llama_stack_client/types/telemetry_get_span_tree_response.py">TelemetryGetSpanTreeResponse</a></code>
|
||||
- <code title="get /v1/telemetry/traces/{trace_id}">client.telemetry.<a href="https://github.com/meta-llama/llama-stack-client-python/tree/main/src/llama_stack_client/resources/telemetry.py">get_trace</a>(trace_id) -> <a href="https://github.com/meta-llama/llama-stack-client-python/tree/main/src/llama_stack_client/types/trace.py">Trace</a></code>
|
||||
- <code title="post /v1/telemetry/events">client.telemetry.<a href="https://github.com/meta-llama/llama-stack-client-python/tree/main/src/llama_stack_client/resources/telemetry.py">log_event</a>(\*\*<a href="https://github.com/meta-llama/llama-stack-client-python/tree/main/src/llama_stack_client/types/telemetry_log_event_params.py">params</a>) -> None</code>
|
||||
- <code title="get /v1/telemetry/spans">client.telemetry.<a href="https://github.com/meta-llama/llama-stack-client-python/tree/main/src/llama_stack_client/resources/telemetry.py">query_spans</a>(\*\*<a href="https://github.com/meta-llama/llama-stack-client-python/tree/main/src/llama_stack_client/types/telemetry_query_spans_params.py">params</a>) -> <a href="https://github.com/meta-llama/llama-stack-client-python/tree/main/src/llama_stack_client/types/telemetry_query_spans_response.py">TelemetryQuerySpansResponse</a></code>
|
||||
- <code title="get /v1/telemetry/traces">client.telemetry.<a href="https://github.com/meta-llama/llama-stack-client-python/tree/main/src/llama_stack_client/resources/telemetry.py">query_traces</a>(\*\*<a href="https://github.com/meta-llama/llama-stack-client-python/tree/main/src/llama_stack_client/types/telemetry_query_traces_params.py">params</a>) -> <a href="https://github.com/meta-llama/llama-stack-client-python/tree/main/src/llama_stack_client/types/telemetry_query_traces_response.py">TelemetryQueryTracesResponse</a></code>
|
||||
- <code title="post /v1/telemetry/spans/export">client.telemetry.<a href="https://github.com/meta-llama/llama-stack-client-python/tree/main/src/llama_stack_client/resources/telemetry.py">save_spans_to_dataset</a>(\*\*<a href="https://github.com/meta-llama/llama-stack-client-python/tree/main/src/llama_stack_client/types/telemetry_save_spans_to_dataset_params.py">params</a>) -> None</code>
|
||||
|
||||
## Datasetio
|
||||
|
||||
Types:
|
||||
|
|
|
|||
|
|
@ -13,7 +13,7 @@ function HomepageHeader() {
|
|||
<div className={styles.heroContent}>
|
||||
<h1 className={styles.heroTitle}>Build AI Applications with Llama Stack</h1>
|
||||
<p className={styles.heroSubtitle}>
|
||||
Unified APIs for Inference, RAG, Agents, Tools, Safety, and Telemetry
|
||||
Unified APIs for Inference, RAG, Agents, Tools, and Safety
|
||||
</p>
|
||||
<div className={styles.buttons}>
|
||||
<Link
|
||||
|
|
@ -206,7 +206,7 @@ export default function Home() {
|
|||
return (
|
||||
<Layout
|
||||
title="Build AI Applications"
|
||||
description="The open-source framework for building generative AI applications with unified APIs for Inference, RAG, Agents, Tools, Safety, and Telemetry.">
|
||||
description="The open-source framework for building generative AI applications with unified APIs for Inference, RAG, Agents, Tools, Safety, and Evals.">
|
||||
<HomepageHeader />
|
||||
<main>
|
||||
<QuickStart />
|
||||
|
|
|
|||
626
docs/static/deprecated-llama-stack-spec.yaml
vendored
626
docs/static/deprecated-llama-stack-spec.yaml
vendored
|
|
@ -378,6 +378,91 @@ paths:
|
|||
type: string
|
||||
description: 'Path parameter: identifier'
|
||||
deprecated: true
|
||||
/v1/tool-runtime/invoke:
|
||||
post:
|
||||
responses:
|
||||
'200':
|
||||
description: A ToolInvocationResult.
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/ToolInvocationResult'
|
||||
'400':
|
||||
description: Bad Request
|
||||
$ref: '#/components/responses/BadRequest400'
|
||||
'429':
|
||||
description: Too Many Requests
|
||||
$ref: '#/components/responses/TooManyRequests429'
|
||||
'500':
|
||||
description: Internal Server Error
|
||||
$ref: '#/components/responses/InternalServerError500'
|
||||
default:
|
||||
description: Default Response
|
||||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Tool Runtime
|
||||
summary: Invoke Tool
|
||||
description: Run a tool with the given arguments.
|
||||
operationId: invoke_tool_v1_tool_runtime_invoke_post
|
||||
requestBody:
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/InvokeToolRequest'
|
||||
required: true
|
||||
deprecated: true
|
||||
/v1/tool-runtime/list-tools:
|
||||
get:
|
||||
responses:
|
||||
'200':
|
||||
description: A ListToolDefsResponse.
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/ListToolDefsResponse'
|
||||
'400':
|
||||
$ref: '#/components/responses/BadRequest400'
|
||||
description: Bad Request
|
||||
'429':
|
||||
$ref: '#/components/responses/TooManyRequests429'
|
||||
description: Too Many Requests
|
||||
'500':
|
||||
$ref: '#/components/responses/InternalServerError500'
|
||||
description: Internal Server Error
|
||||
default:
|
||||
$ref: '#/components/responses/DefaultError'
|
||||
description: Default Response
|
||||
tags:
|
||||
- Tool Runtime
|
||||
summary: List Runtime Tools
|
||||
description: List all tools in the runtime.
|
||||
operationId: list_runtime_tools_v1_tool_runtime_list_tools_get
|
||||
parameters:
|
||||
- name: authorization
|
||||
in: query
|
||||
required: false
|
||||
schema:
|
||||
anyOf:
|
||||
- type: string
|
||||
- type: 'null'
|
||||
title: Authorization
|
||||
- name: tool_group_id
|
||||
in: query
|
||||
required: false
|
||||
schema:
|
||||
anyOf:
|
||||
- type: string
|
||||
- type: 'null'
|
||||
title: Tool Group Id
|
||||
- name: mcp_endpoint
|
||||
in: query
|
||||
required: false
|
||||
schema:
|
||||
anyOf:
|
||||
- $ref: '#/components/schemas/URL'
|
||||
- type: 'null'
|
||||
title: Mcp Endpoint
|
||||
deprecated: true
|
||||
/v1/toolgroups:
|
||||
get:
|
||||
responses:
|
||||
|
|
@ -404,6 +489,7 @@ paths:
|
|||
summary: List Tool Groups
|
||||
description: List tool groups with optional provider.
|
||||
operationId: list_tool_groups_v1_toolgroups_get
|
||||
deprecated: true
|
||||
post:
|
||||
responses:
|
||||
'400':
|
||||
|
|
@ -465,6 +551,7 @@ paths:
|
|||
schema:
|
||||
type: string
|
||||
description: 'Path parameter: toolgroup_id'
|
||||
deprecated: true
|
||||
delete:
|
||||
responses:
|
||||
'400':
|
||||
|
|
@ -494,6 +581,76 @@ paths:
|
|||
type: string
|
||||
description: 'Path parameter: toolgroup_id'
|
||||
deprecated: true
|
||||
/v1/tools:
|
||||
get:
|
||||
responses:
|
||||
'200':
|
||||
description: A ListToolDefsResponse.
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/ListToolDefsResponse'
|
||||
'400':
|
||||
$ref: '#/components/responses/BadRequest400'
|
||||
description: Bad Request
|
||||
'429':
|
||||
$ref: '#/components/responses/TooManyRequests429'
|
||||
description: Too Many Requests
|
||||
'500':
|
||||
$ref: '#/components/responses/InternalServerError500'
|
||||
description: Internal Server Error
|
||||
default:
|
||||
$ref: '#/components/responses/DefaultError'
|
||||
description: Default Response
|
||||
tags:
|
||||
- Tool Groups
|
||||
summary: List Tools
|
||||
description: List tools with optional tool group.
|
||||
operationId: list_tools_v1_tools_get
|
||||
parameters:
|
||||
- name: toolgroup_id
|
||||
in: query
|
||||
required: false
|
||||
schema:
|
||||
anyOf:
|
||||
- type: string
|
||||
- type: 'null'
|
||||
title: Toolgroup Id
|
||||
deprecated: true
|
||||
/v1/tools/{tool_name}:
|
||||
get:
|
||||
responses:
|
||||
'200':
|
||||
description: A ToolDef.
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/ToolDef'
|
||||
'400':
|
||||
description: Bad Request
|
||||
$ref: '#/components/responses/BadRequest400'
|
||||
'429':
|
||||
description: Too Many Requests
|
||||
$ref: '#/components/responses/TooManyRequests429'
|
||||
'500':
|
||||
description: Internal Server Error
|
||||
$ref: '#/components/responses/InternalServerError500'
|
||||
default:
|
||||
description: Default Response
|
||||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Tool Groups
|
||||
summary: Get Tool
|
||||
description: Get a tool by its name.
|
||||
operationId: get_tool_v1_tools__tool_name__get
|
||||
parameters:
|
||||
- name: tool_name
|
||||
in: path
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
description: 'Path parameter: tool_name'
|
||||
deprecated: true
|
||||
/v1beta/datasets:
|
||||
get:
|
||||
responses:
|
||||
|
|
@ -3639,6 +3796,12 @@ components:
|
|||
anyOf:
|
||||
- type: integer
|
||||
- type: 'null'
|
||||
metadata:
|
||||
anyOf:
|
||||
- additionalProperties:
|
||||
type: string
|
||||
type: object
|
||||
- type: 'null'
|
||||
input:
|
||||
items:
|
||||
anyOf:
|
||||
|
|
@ -4042,6 +4205,12 @@ components:
|
|||
anyOf:
|
||||
- type: integer
|
||||
- type: 'null'
|
||||
metadata:
|
||||
anyOf:
|
||||
- additionalProperties:
|
||||
type: string
|
||||
type: object
|
||||
- type: 'null'
|
||||
type: object
|
||||
required:
|
||||
- input
|
||||
|
|
@ -4173,6 +4342,12 @@ components:
|
|||
anyOf:
|
||||
- type: integer
|
||||
- type: 'null'
|
||||
metadata:
|
||||
anyOf:
|
||||
- additionalProperties:
|
||||
type: string
|
||||
type: object
|
||||
- type: 'null'
|
||||
type: object
|
||||
required:
|
||||
- created_at
|
||||
|
|
@ -9023,227 +9198,6 @@ components:
|
|||
- $ref: '#/components/schemas/OpenAIResponseContentPartReasoningText'
|
||||
title: OpenAIResponseContentPartReasoningText
|
||||
title: OpenAIResponseContentPartOutputText | OpenAIResponseContentPartRefusal | OpenAIResponseContentPartReasoningText
|
||||
SpanEndPayload:
|
||||
description: Payload for a span end event.
|
||||
properties:
|
||||
type:
|
||||
const: span_end
|
||||
default: span_end
|
||||
title: Type
|
||||
type: string
|
||||
status:
|
||||
$ref: '#/components/schemas/SpanStatus'
|
||||
required:
|
||||
- status
|
||||
title: SpanEndPayload
|
||||
type: object
|
||||
SpanStartPayload:
|
||||
description: Payload for a span start event.
|
||||
properties:
|
||||
type:
|
||||
const: span_start
|
||||
default: span_start
|
||||
title: Type
|
||||
type: string
|
||||
name:
|
||||
title: Name
|
||||
type: string
|
||||
parent_span_id:
|
||||
anyOf:
|
||||
- type: string
|
||||
- type: 'null'
|
||||
nullable: true
|
||||
required:
|
||||
- name
|
||||
title: SpanStartPayload
|
||||
type: object
|
||||
SpanStatus:
|
||||
description: The status of a span indicating whether it completed successfully or with an error.
|
||||
enum:
|
||||
- ok
|
||||
- error
|
||||
title: SpanStatus
|
||||
type: string
|
||||
StructuredLogPayload:
|
||||
discriminator:
|
||||
mapping:
|
||||
span_end: '#/components/schemas/SpanEndPayload'
|
||||
span_start: '#/components/schemas/SpanStartPayload'
|
||||
propertyName: type
|
||||
oneOf:
|
||||
- $ref: '#/components/schemas/SpanStartPayload'
|
||||
title: SpanStartPayload
|
||||
- $ref: '#/components/schemas/SpanEndPayload'
|
||||
title: SpanEndPayload
|
||||
title: SpanStartPayload | SpanEndPayload
|
||||
LogSeverity:
|
||||
description: The severity level of a log message.
|
||||
enum:
|
||||
- verbose
|
||||
- debug
|
||||
- info
|
||||
- warn
|
||||
- error
|
||||
- critical
|
||||
title: LogSeverity
|
||||
type: string
|
||||
MetricEvent:
|
||||
description: A metric event containing a measured value.
|
||||
properties:
|
||||
trace_id:
|
||||
title: Trace Id
|
||||
type: string
|
||||
span_id:
|
||||
title: Span Id
|
||||
type: string
|
||||
timestamp:
|
||||
format: date-time
|
||||
title: Timestamp
|
||||
type: string
|
||||
attributes:
|
||||
anyOf:
|
||||
- additionalProperties:
|
||||
anyOf:
|
||||
- type: string
|
||||
- type: integer
|
||||
- type: number
|
||||
- type: boolean
|
||||
- type: 'null'
|
||||
title: string | ... (4 variants)
|
||||
type: object
|
||||
- type: 'null'
|
||||
type:
|
||||
const: metric
|
||||
default: metric
|
||||
title: Type
|
||||
type: string
|
||||
metric:
|
||||
title: Metric
|
||||
type: string
|
||||
value:
|
||||
anyOf:
|
||||
- type: integer
|
||||
- type: number
|
||||
title: integer | number
|
||||
unit:
|
||||
title: Unit
|
||||
type: string
|
||||
required:
|
||||
- trace_id
|
||||
- span_id
|
||||
- timestamp
|
||||
- metric
|
||||
- value
|
||||
- unit
|
||||
title: MetricEvent
|
||||
type: object
|
||||
StructuredLogEvent:
|
||||
description: A structured log event containing typed payload data.
|
||||
properties:
|
||||
trace_id:
|
||||
title: Trace Id
|
||||
type: string
|
||||
span_id:
|
||||
title: Span Id
|
||||
type: string
|
||||
timestamp:
|
||||
format: date-time
|
||||
title: Timestamp
|
||||
type: string
|
||||
attributes:
|
||||
anyOf:
|
||||
- additionalProperties:
|
||||
anyOf:
|
||||
- type: string
|
||||
- type: integer
|
||||
- type: number
|
||||
- type: boolean
|
||||
- type: 'null'
|
||||
title: string | ... (4 variants)
|
||||
type: object
|
||||
- type: 'null'
|
||||
type:
|
||||
const: structured_log
|
||||
default: structured_log
|
||||
title: Type
|
||||
type: string
|
||||
payload:
|
||||
discriminator:
|
||||
mapping:
|
||||
span_end: '#/components/schemas/SpanEndPayload'
|
||||
span_start: '#/components/schemas/SpanStartPayload'
|
||||
propertyName: type
|
||||
oneOf:
|
||||
- $ref: '#/components/schemas/SpanStartPayload'
|
||||
title: SpanStartPayload
|
||||
- $ref: '#/components/schemas/SpanEndPayload'
|
||||
title: SpanEndPayload
|
||||
title: SpanStartPayload | SpanEndPayload
|
||||
required:
|
||||
- trace_id
|
||||
- span_id
|
||||
- timestamp
|
||||
- payload
|
||||
title: StructuredLogEvent
|
||||
type: object
|
||||
UnstructuredLogEvent:
|
||||
description: An unstructured log event containing a simple text message.
|
||||
properties:
|
||||
trace_id:
|
||||
title: Trace Id
|
||||
type: string
|
||||
span_id:
|
||||
title: Span Id
|
||||
type: string
|
||||
timestamp:
|
||||
format: date-time
|
||||
title: Timestamp
|
||||
type: string
|
||||
attributes:
|
||||
anyOf:
|
||||
- additionalProperties:
|
||||
anyOf:
|
||||
- type: string
|
||||
- type: integer
|
||||
- type: number
|
||||
- type: boolean
|
||||
- type: 'null'
|
||||
title: string | ... (4 variants)
|
||||
type: object
|
||||
- type: 'null'
|
||||
type:
|
||||
const: unstructured_log
|
||||
default: unstructured_log
|
||||
title: Type
|
||||
type: string
|
||||
message:
|
||||
title: Message
|
||||
type: string
|
||||
severity:
|
||||
$ref: '#/components/schemas/LogSeverity'
|
||||
required:
|
||||
- trace_id
|
||||
- span_id
|
||||
- timestamp
|
||||
- message
|
||||
- severity
|
||||
title: UnstructuredLogEvent
|
||||
type: object
|
||||
Event:
|
||||
discriminator:
|
||||
mapping:
|
||||
metric: '#/components/schemas/MetricEvent'
|
||||
structured_log: '#/components/schemas/StructuredLogEvent'
|
||||
unstructured_log: '#/components/schemas/UnstructuredLogEvent'
|
||||
propertyName: type
|
||||
oneOf:
|
||||
- $ref: '#/components/schemas/UnstructuredLogEvent'
|
||||
title: UnstructuredLogEvent
|
||||
- $ref: '#/components/schemas/MetricEvent'
|
||||
title: MetricEvent
|
||||
- $ref: '#/components/schemas/StructuredLogEvent'
|
||||
title: StructuredLogEvent
|
||||
title: UnstructuredLogEvent | MetricEvent | StructuredLogEvent
|
||||
MetricInResponse:
|
||||
description: A metric value included in API responses.
|
||||
properties:
|
||||
|
|
@ -10068,236 +10022,6 @@ components:
|
|||
- logger_config
|
||||
title: PostTrainingRLHFRequest
|
||||
type: object
|
||||
Span:
|
||||
description: A span representing a single operation within a trace.
|
||||
properties:
|
||||
span_id:
|
||||
title: Span Id
|
||||
type: string
|
||||
trace_id:
|
||||
title: Trace Id
|
||||
type: string
|
||||
parent_span_id:
|
||||
anyOf:
|
||||
- type: string
|
||||
- type: 'null'
|
||||
nullable: true
|
||||
name:
|
||||
title: Name
|
||||
type: string
|
||||
start_time:
|
||||
format: date-time
|
||||
title: Start Time
|
||||
type: string
|
||||
end_time:
|
||||
anyOf:
|
||||
- format: date-time
|
||||
type: string
|
||||
- type: 'null'
|
||||
nullable: true
|
||||
attributes:
|
||||
anyOf:
|
||||
- additionalProperties: true
|
||||
type: object
|
||||
- type: 'null'
|
||||
required:
|
||||
- span_id
|
||||
- trace_id
|
||||
- name
|
||||
- start_time
|
||||
title: Span
|
||||
type: object
|
||||
Trace:
|
||||
description: A trace representing the complete execution path of a request across multiple operations.
|
||||
properties:
|
||||
trace_id:
|
||||
title: Trace Id
|
||||
type: string
|
||||
root_span_id:
|
||||
title: Root Span Id
|
||||
type: string
|
||||
start_time:
|
||||
format: date-time
|
||||
title: Start Time
|
||||
type: string
|
||||
end_time:
|
||||
anyOf:
|
||||
- format: date-time
|
||||
type: string
|
||||
- type: 'null'
|
||||
nullable: true
|
||||
required:
|
||||
- trace_id
|
||||
- root_span_id
|
||||
- start_time
|
||||
title: Trace
|
||||
type: object
|
||||
EventType:
|
||||
description: The type of telemetry event being logged.
|
||||
enum:
|
||||
- unstructured_log
|
||||
- structured_log
|
||||
- metric
|
||||
title: EventType
|
||||
type: string
|
||||
StructuredLogType:
|
||||
description: The type of structured log event payload.
|
||||
enum:
|
||||
- span_start
|
||||
- span_end
|
||||
title: StructuredLogType
|
||||
type: string
|
||||
EvalTrace:
|
||||
description: A trace record for evaluation purposes.
|
||||
properties:
|
||||
session_id:
|
||||
title: Session Id
|
||||
type: string
|
||||
step:
|
||||
title: Step
|
||||
type: string
|
||||
input:
|
||||
title: Input
|
||||
type: string
|
||||
output:
|
||||
title: Output
|
||||
type: string
|
||||
expected_output:
|
||||
title: Expected Output
|
||||
type: string
|
||||
required:
|
||||
- session_id
|
||||
- step
|
||||
- input
|
||||
- output
|
||||
- expected_output
|
||||
title: EvalTrace
|
||||
type: object
|
||||
SpanWithStatus:
|
||||
description: A span that includes status information.
|
||||
properties:
|
||||
span_id:
|
||||
title: Span Id
|
||||
type: string
|
||||
trace_id:
|
||||
title: Trace Id
|
||||
type: string
|
||||
parent_span_id:
|
||||
anyOf:
|
||||
- type: string
|
||||
- type: 'null'
|
||||
nullable: true
|
||||
name:
|
||||
title: Name
|
||||
type: string
|
||||
start_time:
|
||||
format: date-time
|
||||
title: Start Time
|
||||
type: string
|
||||
end_time:
|
||||
anyOf:
|
||||
- format: date-time
|
||||
type: string
|
||||
- type: 'null'
|
||||
nullable: true
|
||||
attributes:
|
||||
anyOf:
|
||||
- additionalProperties: true
|
||||
type: object
|
||||
- type: 'null'
|
||||
status:
|
||||
anyOf:
|
||||
- $ref: '#/components/schemas/SpanStatus'
|
||||
title: SpanStatus
|
||||
- type: 'null'
|
||||
nullable: true
|
||||
title: SpanStatus
|
||||
required:
|
||||
- span_id
|
||||
- trace_id
|
||||
- name
|
||||
- start_time
|
||||
title: SpanWithStatus
|
||||
type: object
|
||||
QueryConditionOp:
|
||||
description: Comparison operators for query conditions.
|
||||
enum:
|
||||
- eq
|
||||
- ne
|
||||
- gt
|
||||
- lt
|
||||
title: QueryConditionOp
|
||||
type: string
|
||||
QueryCondition:
|
||||
description: A condition for filtering query results.
|
||||
properties:
|
||||
key:
|
||||
title: Key
|
||||
type: string
|
||||
op:
|
||||
$ref: '#/components/schemas/QueryConditionOp'
|
||||
value:
|
||||
title: Value
|
||||
required:
|
||||
- key
|
||||
- op
|
||||
- value
|
||||
title: QueryCondition
|
||||
type: object
|
||||
MetricLabel:
|
||||
description: A label associated with a metric.
|
||||
properties:
|
||||
name:
|
||||
title: Name
|
||||
type: string
|
||||
value:
|
||||
title: Value
|
||||
type: string
|
||||
required:
|
||||
- name
|
||||
- value
|
||||
title: MetricLabel
|
||||
type: object
|
||||
MetricDataPoint:
|
||||
description: A single data point in a metric time series.
|
||||
properties:
|
||||
timestamp:
|
||||
title: Timestamp
|
||||
type: integer
|
||||
value:
|
||||
title: Value
|
||||
type: number
|
||||
unit:
|
||||
title: Unit
|
||||
type: string
|
||||
required:
|
||||
- timestamp
|
||||
- value
|
||||
- unit
|
||||
title: MetricDataPoint
|
||||
type: object
|
||||
MetricSeries:
|
||||
description: A time series of metric data points.
|
||||
properties:
|
||||
metric:
|
||||
title: Metric
|
||||
type: string
|
||||
labels:
|
||||
items:
|
||||
$ref: '#/components/schemas/MetricLabel'
|
||||
title: Labels
|
||||
type: array
|
||||
values:
|
||||
items:
|
||||
$ref: '#/components/schemas/MetricDataPoint'
|
||||
title: Values
|
||||
type: array
|
||||
required:
|
||||
- metric
|
||||
- labels
|
||||
- values
|
||||
title: MetricSeries
|
||||
type: object
|
||||
responses:
|
||||
BadRequest400:
|
||||
description: The request was invalid or malformed
|
||||
|
|
|
|||
463
docs/static/experimental-llama-stack-spec.yaml
vendored
463
docs/static/experimental-llama-stack-spec.yaml
vendored
|
|
@ -3336,6 +3336,12 @@ components:
|
|||
anyOf:
|
||||
- type: integer
|
||||
- type: 'null'
|
||||
metadata:
|
||||
anyOf:
|
||||
- additionalProperties:
|
||||
type: string
|
||||
type: object
|
||||
- type: 'null'
|
||||
input:
|
||||
items:
|
||||
anyOf:
|
||||
|
|
@ -3736,6 +3742,12 @@ components:
|
|||
anyOf:
|
||||
- type: integer
|
||||
- type: 'null'
|
||||
metadata:
|
||||
anyOf:
|
||||
- additionalProperties:
|
||||
type: string
|
||||
type: object
|
||||
- type: 'null'
|
||||
type: object
|
||||
required:
|
||||
- created_at
|
||||
|
|
@ -7952,227 +7964,6 @@ components:
|
|||
- $ref: '#/components/schemas/OpenAIResponseContentPartReasoningText'
|
||||
title: OpenAIResponseContentPartReasoningText
|
||||
title: OpenAIResponseContentPartOutputText | OpenAIResponseContentPartRefusal | OpenAIResponseContentPartReasoningText
|
||||
SpanEndPayload:
|
||||
description: Payload for a span end event.
|
||||
properties:
|
||||
type:
|
||||
const: span_end
|
||||
default: span_end
|
||||
title: Type
|
||||
type: string
|
||||
status:
|
||||
$ref: '#/components/schemas/SpanStatus'
|
||||
required:
|
||||
- status
|
||||
title: SpanEndPayload
|
||||
type: object
|
||||
SpanStartPayload:
|
||||
description: Payload for a span start event.
|
||||
properties:
|
||||
type:
|
||||
const: span_start
|
||||
default: span_start
|
||||
title: Type
|
||||
type: string
|
||||
name:
|
||||
title: Name
|
||||
type: string
|
||||
parent_span_id:
|
||||
anyOf:
|
||||
- type: string
|
||||
- type: 'null'
|
||||
nullable: true
|
||||
required:
|
||||
- name
|
||||
title: SpanStartPayload
|
||||
type: object
|
||||
SpanStatus:
|
||||
description: The status of a span indicating whether it completed successfully or with an error.
|
||||
enum:
|
||||
- ok
|
||||
- error
|
||||
title: SpanStatus
|
||||
type: string
|
||||
StructuredLogPayload:
|
||||
discriminator:
|
||||
mapping:
|
||||
span_end: '#/components/schemas/SpanEndPayload'
|
||||
span_start: '#/components/schemas/SpanStartPayload'
|
||||
propertyName: type
|
||||
oneOf:
|
||||
- $ref: '#/components/schemas/SpanStartPayload'
|
||||
title: SpanStartPayload
|
||||
- $ref: '#/components/schemas/SpanEndPayload'
|
||||
title: SpanEndPayload
|
||||
title: SpanStartPayload | SpanEndPayload
|
||||
LogSeverity:
|
||||
description: The severity level of a log message.
|
||||
enum:
|
||||
- verbose
|
||||
- debug
|
||||
- info
|
||||
- warn
|
||||
- error
|
||||
- critical
|
||||
title: LogSeverity
|
||||
type: string
|
||||
MetricEvent:
|
||||
description: A metric event containing a measured value.
|
||||
properties:
|
||||
trace_id:
|
||||
title: Trace Id
|
||||
type: string
|
||||
span_id:
|
||||
title: Span Id
|
||||
type: string
|
||||
timestamp:
|
||||
format: date-time
|
||||
title: Timestamp
|
||||
type: string
|
||||
attributes:
|
||||
anyOf:
|
||||
- additionalProperties:
|
||||
anyOf:
|
||||
- type: string
|
||||
- type: integer
|
||||
- type: number
|
||||
- type: boolean
|
||||
- type: 'null'
|
||||
title: string | ... (4 variants)
|
||||
type: object
|
||||
- type: 'null'
|
||||
type:
|
||||
const: metric
|
||||
default: metric
|
||||
title: Type
|
||||
type: string
|
||||
metric:
|
||||
title: Metric
|
||||
type: string
|
||||
value:
|
||||
anyOf:
|
||||
- type: integer
|
||||
- type: number
|
||||
title: integer | number
|
||||
unit:
|
||||
title: Unit
|
||||
type: string
|
||||
required:
|
||||
- trace_id
|
||||
- span_id
|
||||
- timestamp
|
||||
- metric
|
||||
- value
|
||||
- unit
|
||||
title: MetricEvent
|
||||
type: object
|
||||
StructuredLogEvent:
|
||||
description: A structured log event containing typed payload data.
|
||||
properties:
|
||||
trace_id:
|
||||
title: Trace Id
|
||||
type: string
|
||||
span_id:
|
||||
title: Span Id
|
||||
type: string
|
||||
timestamp:
|
||||
format: date-time
|
||||
title: Timestamp
|
||||
type: string
|
||||
attributes:
|
||||
anyOf:
|
||||
- additionalProperties:
|
||||
anyOf:
|
||||
- type: string
|
||||
- type: integer
|
||||
- type: number
|
||||
- type: boolean
|
||||
- type: 'null'
|
||||
title: string | ... (4 variants)
|
||||
type: object
|
||||
- type: 'null'
|
||||
type:
|
||||
const: structured_log
|
||||
default: structured_log
|
||||
title: Type
|
||||
type: string
|
||||
payload:
|
||||
discriminator:
|
||||
mapping:
|
||||
span_end: '#/components/schemas/SpanEndPayload'
|
||||
span_start: '#/components/schemas/SpanStartPayload'
|
||||
propertyName: type
|
||||
oneOf:
|
||||
- $ref: '#/components/schemas/SpanStartPayload'
|
||||
title: SpanStartPayload
|
||||
- $ref: '#/components/schemas/SpanEndPayload'
|
||||
title: SpanEndPayload
|
||||
title: SpanStartPayload | SpanEndPayload
|
||||
required:
|
||||
- trace_id
|
||||
- span_id
|
||||
- timestamp
|
||||
- payload
|
||||
title: StructuredLogEvent
|
||||
type: object
|
||||
UnstructuredLogEvent:
|
||||
description: An unstructured log event containing a simple text message.
|
||||
properties:
|
||||
trace_id:
|
||||
title: Trace Id
|
||||
type: string
|
||||
span_id:
|
||||
title: Span Id
|
||||
type: string
|
||||
timestamp:
|
||||
format: date-time
|
||||
title: Timestamp
|
||||
type: string
|
||||
attributes:
|
||||
anyOf:
|
||||
- additionalProperties:
|
||||
anyOf:
|
||||
- type: string
|
||||
- type: integer
|
||||
- type: number
|
||||
- type: boolean
|
||||
- type: 'null'
|
||||
title: string | ... (4 variants)
|
||||
type: object
|
||||
- type: 'null'
|
||||
type:
|
||||
const: unstructured_log
|
||||
default: unstructured_log
|
||||
title: Type
|
||||
type: string
|
||||
message:
|
||||
title: Message
|
||||
type: string
|
||||
severity:
|
||||
$ref: '#/components/schemas/LogSeverity'
|
||||
required:
|
||||
- trace_id
|
||||
- span_id
|
||||
- timestamp
|
||||
- message
|
||||
- severity
|
||||
title: UnstructuredLogEvent
|
||||
type: object
|
||||
Event:
|
||||
discriminator:
|
||||
mapping:
|
||||
metric: '#/components/schemas/MetricEvent'
|
||||
structured_log: '#/components/schemas/StructuredLogEvent'
|
||||
unstructured_log: '#/components/schemas/UnstructuredLogEvent'
|
||||
propertyName: type
|
||||
oneOf:
|
||||
- $ref: '#/components/schemas/UnstructuredLogEvent'
|
||||
title: UnstructuredLogEvent
|
||||
- $ref: '#/components/schemas/MetricEvent'
|
||||
title: MetricEvent
|
||||
- $ref: '#/components/schemas/StructuredLogEvent'
|
||||
title: StructuredLogEvent
|
||||
title: UnstructuredLogEvent | MetricEvent | StructuredLogEvent
|
||||
MetricInResponse:
|
||||
description: A metric value included in API responses.
|
||||
properties:
|
||||
|
|
@ -8997,236 +8788,6 @@ components:
|
|||
- logger_config
|
||||
title: PostTrainingRLHFRequest
|
||||
type: object
|
||||
Span:
|
||||
description: A span representing a single operation within a trace.
|
||||
properties:
|
||||
span_id:
|
||||
title: Span Id
|
||||
type: string
|
||||
trace_id:
|
||||
title: Trace Id
|
||||
type: string
|
||||
parent_span_id:
|
||||
anyOf:
|
||||
- type: string
|
||||
- type: 'null'
|
||||
nullable: true
|
||||
name:
|
||||
title: Name
|
||||
type: string
|
||||
start_time:
|
||||
format: date-time
|
||||
title: Start Time
|
||||
type: string
|
||||
end_time:
|
||||
anyOf:
|
||||
- format: date-time
|
||||
type: string
|
||||
- type: 'null'
|
||||
nullable: true
|
||||
attributes:
|
||||
anyOf:
|
||||
- additionalProperties: true
|
||||
type: object
|
||||
- type: 'null'
|
||||
required:
|
||||
- span_id
|
||||
- trace_id
|
||||
- name
|
||||
- start_time
|
||||
title: Span
|
||||
type: object
|
||||
Trace:
|
||||
description: A trace representing the complete execution path of a request across multiple operations.
|
||||
properties:
|
||||
trace_id:
|
||||
title: Trace Id
|
||||
type: string
|
||||
root_span_id:
|
||||
title: Root Span Id
|
||||
type: string
|
||||
start_time:
|
||||
format: date-time
|
||||
title: Start Time
|
||||
type: string
|
||||
end_time:
|
||||
anyOf:
|
||||
- format: date-time
|
||||
type: string
|
||||
- type: 'null'
|
||||
nullable: true
|
||||
required:
|
||||
- trace_id
|
||||
- root_span_id
|
||||
- start_time
|
||||
title: Trace
|
||||
type: object
|
||||
EventType:
|
||||
description: The type of telemetry event being logged.
|
||||
enum:
|
||||
- unstructured_log
|
||||
- structured_log
|
||||
- metric
|
||||
title: EventType
|
||||
type: string
|
||||
StructuredLogType:
|
||||
description: The type of structured log event payload.
|
||||
enum:
|
||||
- span_start
|
||||
- span_end
|
||||
title: StructuredLogType
|
||||
type: string
|
||||
EvalTrace:
|
||||
description: A trace record for evaluation purposes.
|
||||
properties:
|
||||
session_id:
|
||||
title: Session Id
|
||||
type: string
|
||||
step:
|
||||
title: Step
|
||||
type: string
|
||||
input:
|
||||
title: Input
|
||||
type: string
|
||||
output:
|
||||
title: Output
|
||||
type: string
|
||||
expected_output:
|
||||
title: Expected Output
|
||||
type: string
|
||||
required:
|
||||
- session_id
|
||||
- step
|
||||
- input
|
||||
- output
|
||||
- expected_output
|
||||
title: EvalTrace
|
||||
type: object
|
||||
SpanWithStatus:
|
||||
description: A span that includes status information.
|
||||
properties:
|
||||
span_id:
|
||||
title: Span Id
|
||||
type: string
|
||||
trace_id:
|
||||
title: Trace Id
|
||||
type: string
|
||||
parent_span_id:
|
||||
anyOf:
|
||||
- type: string
|
||||
- type: 'null'
|
||||
nullable: true
|
||||
name:
|
||||
title: Name
|
||||
type: string
|
||||
start_time:
|
||||
format: date-time
|
||||
title: Start Time
|
||||
type: string
|
||||
end_time:
|
||||
anyOf:
|
||||
- format: date-time
|
||||
type: string
|
||||
- type: 'null'
|
||||
nullable: true
|
||||
attributes:
|
||||
anyOf:
|
||||
- additionalProperties: true
|
||||
type: object
|
||||
- type: 'null'
|
||||
status:
|
||||
anyOf:
|
||||
- $ref: '#/components/schemas/SpanStatus'
|
||||
title: SpanStatus
|
||||
- type: 'null'
|
||||
nullable: true
|
||||
title: SpanStatus
|
||||
required:
|
||||
- span_id
|
||||
- trace_id
|
||||
- name
|
||||
- start_time
|
||||
title: SpanWithStatus
|
||||
type: object
|
||||
QueryConditionOp:
|
||||
description: Comparison operators for query conditions.
|
||||
enum:
|
||||
- eq
|
||||
- ne
|
||||
- gt
|
||||
- lt
|
||||
title: QueryConditionOp
|
||||
type: string
|
||||
QueryCondition:
|
||||
description: A condition for filtering query results.
|
||||
properties:
|
||||
key:
|
||||
title: Key
|
||||
type: string
|
||||
op:
|
||||
$ref: '#/components/schemas/QueryConditionOp'
|
||||
value:
|
||||
title: Value
|
||||
required:
|
||||
- key
|
||||
- op
|
||||
- value
|
||||
title: QueryCondition
|
||||
type: object
|
||||
MetricLabel:
|
||||
description: A label associated with a metric.
|
||||
properties:
|
||||
name:
|
||||
title: Name
|
||||
type: string
|
||||
value:
|
||||
title: Value
|
||||
type: string
|
||||
required:
|
||||
- name
|
||||
- value
|
||||
title: MetricLabel
|
||||
type: object
|
||||
MetricDataPoint:
|
||||
description: A single data point in a metric time series.
|
||||
properties:
|
||||
timestamp:
|
||||
title: Timestamp
|
||||
type: integer
|
||||
value:
|
||||
title: Value
|
||||
type: number
|
||||
unit:
|
||||
title: Unit
|
||||
type: string
|
||||
required:
|
||||
- timestamp
|
||||
- value
|
||||
- unit
|
||||
title: MetricDataPoint
|
||||
type: object
|
||||
MetricSeries:
|
||||
description: A time series of metric data points.
|
||||
properties:
|
||||
metric:
|
||||
title: Metric
|
||||
type: string
|
||||
labels:
|
||||
items:
|
||||
$ref: '#/components/schemas/MetricLabel'
|
||||
title: Labels
|
||||
type: array
|
||||
values:
|
||||
items:
|
||||
$ref: '#/components/schemas/MetricDataPoint'
|
||||
title: Values
|
||||
type: array
|
||||
required:
|
||||
- metric
|
||||
- labels
|
||||
- values
|
||||
title: MetricSeries
|
||||
type: object
|
||||
responses:
|
||||
BadRequest400:
|
||||
description: The request was invalid or malformed
|
||||
|
|
|
|||
697
docs/static/llama-stack-spec.yaml
vendored
697
docs/static/llama-stack-spec.yaml
vendored
|
|
@ -1872,216 +1872,6 @@ paths:
|
|||
schema:
|
||||
type: string
|
||||
description: 'Path parameter: identifier'
|
||||
/v1/tool-runtime/invoke:
|
||||
post:
|
||||
responses:
|
||||
'200':
|
||||
description: A ToolInvocationResult.
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/ToolInvocationResult'
|
||||
'400':
|
||||
description: Bad Request
|
||||
$ref: '#/components/responses/BadRequest400'
|
||||
'429':
|
||||
description: Too Many Requests
|
||||
$ref: '#/components/responses/TooManyRequests429'
|
||||
'500':
|
||||
description: Internal Server Error
|
||||
$ref: '#/components/responses/InternalServerError500'
|
||||
default:
|
||||
description: Default Response
|
||||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Tool Runtime
|
||||
summary: Invoke Tool
|
||||
description: Run a tool with the given arguments.
|
||||
operationId: invoke_tool_v1_tool_runtime_invoke_post
|
||||
requestBody:
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/InvokeToolRequest'
|
||||
required: true
|
||||
/v1/tool-runtime/list-tools:
|
||||
get:
|
||||
responses:
|
||||
'200':
|
||||
description: A ListToolDefsResponse.
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/ListToolDefsResponse'
|
||||
'400':
|
||||
$ref: '#/components/responses/BadRequest400'
|
||||
description: Bad Request
|
||||
'429':
|
||||
$ref: '#/components/responses/TooManyRequests429'
|
||||
description: Too Many Requests
|
||||
'500':
|
||||
$ref: '#/components/responses/InternalServerError500'
|
||||
description: Internal Server Error
|
||||
default:
|
||||
$ref: '#/components/responses/DefaultError'
|
||||
description: Default Response
|
||||
tags:
|
||||
- Tool Runtime
|
||||
summary: List Runtime Tools
|
||||
description: List all tools in the runtime.
|
||||
operationId: list_runtime_tools_v1_tool_runtime_list_tools_get
|
||||
parameters:
|
||||
- name: authorization
|
||||
in: query
|
||||
required: false
|
||||
schema:
|
||||
anyOf:
|
||||
- type: string
|
||||
- type: 'null'
|
||||
title: Authorization
|
||||
- name: tool_group_id
|
||||
in: query
|
||||
required: false
|
||||
schema:
|
||||
anyOf:
|
||||
- type: string
|
||||
- type: 'null'
|
||||
title: Tool Group Id
|
||||
- name: mcp_endpoint
|
||||
in: query
|
||||
required: false
|
||||
schema:
|
||||
anyOf:
|
||||
- $ref: '#/components/schemas/URL'
|
||||
- type: 'null'
|
||||
title: Mcp Endpoint
|
||||
/v1/toolgroups:
|
||||
get:
|
||||
responses:
|
||||
'200':
|
||||
description: A ListToolGroupsResponse.
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/ListToolGroupsResponse'
|
||||
'400':
|
||||
description: Bad Request
|
||||
$ref: '#/components/responses/BadRequest400'
|
||||
'429':
|
||||
description: Too Many Requests
|
||||
$ref: '#/components/responses/TooManyRequests429'
|
||||
'500':
|
||||
description: Internal Server Error
|
||||
$ref: '#/components/responses/InternalServerError500'
|
||||
default:
|
||||
description: Default Response
|
||||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Tool Groups
|
||||
summary: List Tool Groups
|
||||
description: List tool groups with optional provider.
|
||||
operationId: list_tool_groups_v1_toolgroups_get
|
||||
/v1/toolgroups/{toolgroup_id}:
|
||||
get:
|
||||
responses:
|
||||
'200':
|
||||
description: A ToolGroup.
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/ToolGroup'
|
||||
'400':
|
||||
description: Bad Request
|
||||
$ref: '#/components/responses/BadRequest400'
|
||||
'429':
|
||||
description: Too Many Requests
|
||||
$ref: '#/components/responses/TooManyRequests429'
|
||||
'500':
|
||||
description: Internal Server Error
|
||||
$ref: '#/components/responses/InternalServerError500'
|
||||
default:
|
||||
description: Default Response
|
||||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Tool Groups
|
||||
summary: Get Tool Group
|
||||
description: Get a tool group by its ID.
|
||||
operationId: get_tool_group_v1_toolgroups__toolgroup_id__get
|
||||
parameters:
|
||||
- name: toolgroup_id
|
||||
in: path
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
description: 'Path parameter: toolgroup_id'
|
||||
/v1/tools:
|
||||
get:
|
||||
responses:
|
||||
'200':
|
||||
description: A ListToolDefsResponse.
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/ListToolDefsResponse'
|
||||
'400':
|
||||
$ref: '#/components/responses/BadRequest400'
|
||||
description: Bad Request
|
||||
'429':
|
||||
$ref: '#/components/responses/TooManyRequests429'
|
||||
description: Too Many Requests
|
||||
'500':
|
||||
$ref: '#/components/responses/InternalServerError500'
|
||||
description: Internal Server Error
|
||||
default:
|
||||
$ref: '#/components/responses/DefaultError'
|
||||
description: Default Response
|
||||
tags:
|
||||
- Tool Groups
|
||||
summary: List Tools
|
||||
description: List tools with optional tool group.
|
||||
operationId: list_tools_v1_tools_get
|
||||
parameters:
|
||||
- name: toolgroup_id
|
||||
in: query
|
||||
required: false
|
||||
schema:
|
||||
anyOf:
|
||||
- type: string
|
||||
- type: 'null'
|
||||
title: Toolgroup Id
|
||||
/v1/tools/{tool_name}:
|
||||
get:
|
||||
responses:
|
||||
'200':
|
||||
description: A ToolDef.
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/ToolDef'
|
||||
'400':
|
||||
description: Bad Request
|
||||
$ref: '#/components/responses/BadRequest400'
|
||||
'429':
|
||||
description: Too Many Requests
|
||||
$ref: '#/components/responses/TooManyRequests429'
|
||||
'500':
|
||||
description: Internal Server Error
|
||||
$ref: '#/components/responses/InternalServerError500'
|
||||
default:
|
||||
description: Default Response
|
||||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Tool Groups
|
||||
summary: Get Tool
|
||||
description: Get a tool by its name.
|
||||
operationId: get_tool_v1_tools__tool_name__get
|
||||
parameters:
|
||||
- name: tool_name
|
||||
in: path
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
description: 'Path parameter: tool_name'
|
||||
/v1/vector-io/insert:
|
||||
post:
|
||||
responses:
|
||||
|
|
@ -5817,6 +5607,12 @@ components:
|
|||
anyOf:
|
||||
- type: integer
|
||||
- type: 'null'
|
||||
metadata:
|
||||
anyOf:
|
||||
- additionalProperties:
|
||||
type: string
|
||||
type: object
|
||||
- type: 'null'
|
||||
input:
|
||||
items:
|
||||
anyOf:
|
||||
|
|
@ -6220,6 +6016,12 @@ components:
|
|||
anyOf:
|
||||
- type: integer
|
||||
- type: 'null'
|
||||
metadata:
|
||||
anyOf:
|
||||
- additionalProperties:
|
||||
type: string
|
||||
type: object
|
||||
- type: 'null'
|
||||
type: object
|
||||
required:
|
||||
- input
|
||||
|
|
@ -6351,6 +6153,12 @@ components:
|
|||
anyOf:
|
||||
- type: integer
|
||||
- type: 'null'
|
||||
metadata:
|
||||
anyOf:
|
||||
- additionalProperties:
|
||||
type: string
|
||||
type: object
|
||||
- type: 'null'
|
||||
type: object
|
||||
required:
|
||||
- created_at
|
||||
|
|
@ -8115,24 +7923,6 @@ components:
|
|||
required:
|
||||
- data
|
||||
title: ListShieldsResponse
|
||||
InvokeToolRequest:
|
||||
properties:
|
||||
tool_name:
|
||||
type: string
|
||||
title: Tool Name
|
||||
kwargs:
|
||||
additionalProperties: true
|
||||
type: object
|
||||
title: Kwargs
|
||||
authorization:
|
||||
anyOf:
|
||||
- type: string
|
||||
- type: 'null'
|
||||
type: object
|
||||
required:
|
||||
- tool_name
|
||||
- kwargs
|
||||
title: InvokeToolRequest
|
||||
ImageContentItem:
|
||||
description: A image content item
|
||||
properties:
|
||||
|
|
@ -10850,227 +10640,6 @@ components:
|
|||
- $ref: '#/components/schemas/OpenAIResponseContentPartReasoningText'
|
||||
title: OpenAIResponseContentPartReasoningText
|
||||
title: OpenAIResponseContentPartOutputText | OpenAIResponseContentPartRefusal | OpenAIResponseContentPartReasoningText
|
||||
SpanEndPayload:
|
||||
description: Payload for a span end event.
|
||||
properties:
|
||||
type:
|
||||
const: span_end
|
||||
default: span_end
|
||||
title: Type
|
||||
type: string
|
||||
status:
|
||||
$ref: '#/components/schemas/SpanStatus'
|
||||
required:
|
||||
- status
|
||||
title: SpanEndPayload
|
||||
type: object
|
||||
SpanStartPayload:
|
||||
description: Payload for a span start event.
|
||||
properties:
|
||||
type:
|
||||
const: span_start
|
||||
default: span_start
|
||||
title: Type
|
||||
type: string
|
||||
name:
|
||||
title: Name
|
||||
type: string
|
||||
parent_span_id:
|
||||
anyOf:
|
||||
- type: string
|
||||
- type: 'null'
|
||||
nullable: true
|
||||
required:
|
||||
- name
|
||||
title: SpanStartPayload
|
||||
type: object
|
||||
SpanStatus:
|
||||
description: The status of a span indicating whether it completed successfully or with an error.
|
||||
enum:
|
||||
- ok
|
||||
- error
|
||||
title: SpanStatus
|
||||
type: string
|
||||
StructuredLogPayload:
|
||||
discriminator:
|
||||
mapping:
|
||||
span_end: '#/components/schemas/SpanEndPayload'
|
||||
span_start: '#/components/schemas/SpanStartPayload'
|
||||
propertyName: type
|
||||
oneOf:
|
||||
- $ref: '#/components/schemas/SpanStartPayload'
|
||||
title: SpanStartPayload
|
||||
- $ref: '#/components/schemas/SpanEndPayload'
|
||||
title: SpanEndPayload
|
||||
title: SpanStartPayload | SpanEndPayload
|
||||
LogSeverity:
|
||||
description: The severity level of a log message.
|
||||
enum:
|
||||
- verbose
|
||||
- debug
|
||||
- info
|
||||
- warn
|
||||
- error
|
||||
- critical
|
||||
title: LogSeverity
|
||||
type: string
|
||||
MetricEvent:
|
||||
description: A metric event containing a measured value.
|
||||
properties:
|
||||
trace_id:
|
||||
title: Trace Id
|
||||
type: string
|
||||
span_id:
|
||||
title: Span Id
|
||||
type: string
|
||||
timestamp:
|
||||
format: date-time
|
||||
title: Timestamp
|
||||
type: string
|
||||
attributes:
|
||||
anyOf:
|
||||
- additionalProperties:
|
||||
anyOf:
|
||||
- type: string
|
||||
- type: integer
|
||||
- type: number
|
||||
- type: boolean
|
||||
- type: 'null'
|
||||
title: string | ... (4 variants)
|
||||
type: object
|
||||
- type: 'null'
|
||||
type:
|
||||
const: metric
|
||||
default: metric
|
||||
title: Type
|
||||
type: string
|
||||
metric:
|
||||
title: Metric
|
||||
type: string
|
||||
value:
|
||||
anyOf:
|
||||
- type: integer
|
||||
- type: number
|
||||
title: integer | number
|
||||
unit:
|
||||
title: Unit
|
||||
type: string
|
||||
required:
|
||||
- trace_id
|
||||
- span_id
|
||||
- timestamp
|
||||
- metric
|
||||
- value
|
||||
- unit
|
||||
title: MetricEvent
|
||||
type: object
|
||||
StructuredLogEvent:
|
||||
description: A structured log event containing typed payload data.
|
||||
properties:
|
||||
trace_id:
|
||||
title: Trace Id
|
||||
type: string
|
||||
span_id:
|
||||
title: Span Id
|
||||
type: string
|
||||
timestamp:
|
||||
format: date-time
|
||||
title: Timestamp
|
||||
type: string
|
||||
attributes:
|
||||
anyOf:
|
||||
- additionalProperties:
|
||||
anyOf:
|
||||
- type: string
|
||||
- type: integer
|
||||
- type: number
|
||||
- type: boolean
|
||||
- type: 'null'
|
||||
title: string | ... (4 variants)
|
||||
type: object
|
||||
- type: 'null'
|
||||
type:
|
||||
const: structured_log
|
||||
default: structured_log
|
||||
title: Type
|
||||
type: string
|
||||
payload:
|
||||
discriminator:
|
||||
mapping:
|
||||
span_end: '#/components/schemas/SpanEndPayload'
|
||||
span_start: '#/components/schemas/SpanStartPayload'
|
||||
propertyName: type
|
||||
oneOf:
|
||||
- $ref: '#/components/schemas/SpanStartPayload'
|
||||
title: SpanStartPayload
|
||||
- $ref: '#/components/schemas/SpanEndPayload'
|
||||
title: SpanEndPayload
|
||||
title: SpanStartPayload | SpanEndPayload
|
||||
required:
|
||||
- trace_id
|
||||
- span_id
|
||||
- timestamp
|
||||
- payload
|
||||
title: StructuredLogEvent
|
||||
type: object
|
||||
UnstructuredLogEvent:
|
||||
description: An unstructured log event containing a simple text message.
|
||||
properties:
|
||||
trace_id:
|
||||
title: Trace Id
|
||||
type: string
|
||||
span_id:
|
||||
title: Span Id
|
||||
type: string
|
||||
timestamp:
|
||||
format: date-time
|
||||
title: Timestamp
|
||||
type: string
|
||||
attributes:
|
||||
anyOf:
|
||||
- additionalProperties:
|
||||
anyOf:
|
||||
- type: string
|
||||
- type: integer
|
||||
- type: number
|
||||
- type: boolean
|
||||
- type: 'null'
|
||||
title: string | ... (4 variants)
|
||||
type: object
|
||||
- type: 'null'
|
||||
type:
|
||||
const: unstructured_log
|
||||
default: unstructured_log
|
||||
title: Type
|
||||
type: string
|
||||
message:
|
||||
title: Message
|
||||
type: string
|
||||
severity:
|
||||
$ref: '#/components/schemas/LogSeverity'
|
||||
required:
|
||||
- trace_id
|
||||
- span_id
|
||||
- timestamp
|
||||
- message
|
||||
- severity
|
||||
title: UnstructuredLogEvent
|
||||
type: object
|
||||
Event:
|
||||
discriminator:
|
||||
mapping:
|
||||
metric: '#/components/schemas/MetricEvent'
|
||||
structured_log: '#/components/schemas/StructuredLogEvent'
|
||||
unstructured_log: '#/components/schemas/UnstructuredLogEvent'
|
||||
propertyName: type
|
||||
oneOf:
|
||||
- $ref: '#/components/schemas/UnstructuredLogEvent'
|
||||
title: UnstructuredLogEvent
|
||||
- $ref: '#/components/schemas/MetricEvent'
|
||||
title: MetricEvent
|
||||
- $ref: '#/components/schemas/StructuredLogEvent'
|
||||
title: StructuredLogEvent
|
||||
title: UnstructuredLogEvent | MetricEvent | StructuredLogEvent
|
||||
MetricInResponse:
|
||||
description: A metric value included in API responses.
|
||||
properties:
|
||||
|
|
@ -11892,236 +11461,6 @@ components:
|
|||
- logger_config
|
||||
title: PostTrainingRLHFRequest
|
||||
type: object
|
||||
Span:
|
||||
description: A span representing a single operation within a trace.
|
||||
properties:
|
||||
span_id:
|
||||
title: Span Id
|
||||
type: string
|
||||
trace_id:
|
||||
title: Trace Id
|
||||
type: string
|
||||
parent_span_id:
|
||||
anyOf:
|
||||
- type: string
|
||||
- type: 'null'
|
||||
nullable: true
|
||||
name:
|
||||
title: Name
|
||||
type: string
|
||||
start_time:
|
||||
format: date-time
|
||||
title: Start Time
|
||||
type: string
|
||||
end_time:
|
||||
anyOf:
|
||||
- format: date-time
|
||||
type: string
|
||||
- type: 'null'
|
||||
nullable: true
|
||||
attributes:
|
||||
anyOf:
|
||||
- additionalProperties: true
|
||||
type: object
|
||||
- type: 'null'
|
||||
required:
|
||||
- span_id
|
||||
- trace_id
|
||||
- name
|
||||
- start_time
|
||||
title: Span
|
||||
type: object
|
||||
Trace:
|
||||
description: A trace representing the complete execution path of a request across multiple operations.
|
||||
properties:
|
||||
trace_id:
|
||||
title: Trace Id
|
||||
type: string
|
||||
root_span_id:
|
||||
title: Root Span Id
|
||||
type: string
|
||||
start_time:
|
||||
format: date-time
|
||||
title: Start Time
|
||||
type: string
|
||||
end_time:
|
||||
anyOf:
|
||||
- format: date-time
|
||||
type: string
|
||||
- type: 'null'
|
||||
nullable: true
|
||||
required:
|
||||
- trace_id
|
||||
- root_span_id
|
||||
- start_time
|
||||
title: Trace
|
||||
type: object
|
||||
EventType:
|
||||
description: The type of telemetry event being logged.
|
||||
enum:
|
||||
- unstructured_log
|
||||
- structured_log
|
||||
- metric
|
||||
title: EventType
|
||||
type: string
|
||||
StructuredLogType:
|
||||
description: The type of structured log event payload.
|
||||
enum:
|
||||
- span_start
|
||||
- span_end
|
||||
title: StructuredLogType
|
||||
type: string
|
||||
EvalTrace:
|
||||
description: A trace record for evaluation purposes.
|
||||
properties:
|
||||
session_id:
|
||||
title: Session Id
|
||||
type: string
|
||||
step:
|
||||
title: Step
|
||||
type: string
|
||||
input:
|
||||
title: Input
|
||||
type: string
|
||||
output:
|
||||
title: Output
|
||||
type: string
|
||||
expected_output:
|
||||
title: Expected Output
|
||||
type: string
|
||||
required:
|
||||
- session_id
|
||||
- step
|
||||
- input
|
||||
- output
|
||||
- expected_output
|
||||
title: EvalTrace
|
||||
type: object
|
||||
SpanWithStatus:
|
||||
description: A span that includes status information.
|
||||
properties:
|
||||
span_id:
|
||||
title: Span Id
|
||||
type: string
|
||||
trace_id:
|
||||
title: Trace Id
|
||||
type: string
|
||||
parent_span_id:
|
||||
anyOf:
|
||||
- type: string
|
||||
- type: 'null'
|
||||
nullable: true
|
||||
name:
|
||||
title: Name
|
||||
type: string
|
||||
start_time:
|
||||
format: date-time
|
||||
title: Start Time
|
||||
type: string
|
||||
end_time:
|
||||
anyOf:
|
||||
- format: date-time
|
||||
type: string
|
||||
- type: 'null'
|
||||
nullable: true
|
||||
attributes:
|
||||
anyOf:
|
||||
- additionalProperties: true
|
||||
type: object
|
||||
- type: 'null'
|
||||
status:
|
||||
anyOf:
|
||||
- $ref: '#/components/schemas/SpanStatus'
|
||||
title: SpanStatus
|
||||
- type: 'null'
|
||||
nullable: true
|
||||
title: SpanStatus
|
||||
required:
|
||||
- span_id
|
||||
- trace_id
|
||||
- name
|
||||
- start_time
|
||||
title: SpanWithStatus
|
||||
type: object
|
||||
QueryConditionOp:
|
||||
description: Comparison operators for query conditions.
|
||||
enum:
|
||||
- eq
|
||||
- ne
|
||||
- gt
|
||||
- lt
|
||||
title: QueryConditionOp
|
||||
type: string
|
||||
QueryCondition:
|
||||
description: A condition for filtering query results.
|
||||
properties:
|
||||
key:
|
||||
title: Key
|
||||
type: string
|
||||
op:
|
||||
$ref: '#/components/schemas/QueryConditionOp'
|
||||
value:
|
||||
title: Value
|
||||
required:
|
||||
- key
|
||||
- op
|
||||
- value
|
||||
title: QueryCondition
|
||||
type: object
|
||||
MetricLabel:
|
||||
description: A label associated with a metric.
|
||||
properties:
|
||||
name:
|
||||
title: Name
|
||||
type: string
|
||||
value:
|
||||
title: Value
|
||||
type: string
|
||||
required:
|
||||
- name
|
||||
- value
|
||||
title: MetricLabel
|
||||
type: object
|
||||
MetricDataPoint:
|
||||
description: A single data point in a metric time series.
|
||||
properties:
|
||||
timestamp:
|
||||
title: Timestamp
|
||||
type: integer
|
||||
value:
|
||||
title: Value
|
||||
type: number
|
||||
unit:
|
||||
title: Unit
|
||||
type: string
|
||||
required:
|
||||
- timestamp
|
||||
- value
|
||||
- unit
|
||||
title: MetricDataPoint
|
||||
type: object
|
||||
MetricSeries:
|
||||
description: A time series of metric data points.
|
||||
properties:
|
||||
metric:
|
||||
title: Metric
|
||||
type: string
|
||||
labels:
|
||||
items:
|
||||
$ref: '#/components/schemas/MetricLabel'
|
||||
title: Labels
|
||||
type: array
|
||||
values:
|
||||
items:
|
||||
$ref: '#/components/schemas/MetricDataPoint'
|
||||
title: Values
|
||||
type: array
|
||||
required:
|
||||
- metric
|
||||
- labels
|
||||
- values
|
||||
title: MetricSeries
|
||||
type: object
|
||||
responses:
|
||||
BadRequest400:
|
||||
description: The request was invalid or malformed
|
||||
|
|
|
|||
66741
docs/static/openai-spec-2.3.0.yml
vendored
Normal file
66741
docs/static/openai-spec-2.3.0.yml
vendored
Normal file
File diff suppressed because it is too large
Load diff
475
docs/static/stainless-llama-stack-spec.yaml
vendored
475
docs/static/stainless-llama-stack-spec.yaml
vendored
|
|
@ -2091,6 +2091,7 @@ paths:
|
|||
schema:
|
||||
$ref: '#/components/schemas/InvokeToolRequest'
|
||||
required: true
|
||||
deprecated: true
|
||||
/v1/tool-runtime/list-tools:
|
||||
get:
|
||||
responses:
|
||||
|
|
@ -2142,6 +2143,7 @@ paths:
|
|||
- $ref: '#/components/schemas/URL'
|
||||
- type: 'null'
|
||||
title: Mcp Endpoint
|
||||
deprecated: true
|
||||
/v1/toolgroups:
|
||||
get:
|
||||
responses:
|
||||
|
|
@ -2168,6 +2170,7 @@ paths:
|
|||
summary: List Tool Groups
|
||||
description: List tool groups with optional provider.
|
||||
operationId: list_tool_groups_v1_toolgroups_get
|
||||
deprecated: true
|
||||
post:
|
||||
responses:
|
||||
'400':
|
||||
|
|
@ -2229,6 +2232,7 @@ paths:
|
|||
schema:
|
||||
type: string
|
||||
description: 'Path parameter: toolgroup_id'
|
||||
deprecated: true
|
||||
delete:
|
||||
responses:
|
||||
'400':
|
||||
|
|
@ -2293,6 +2297,7 @@ paths:
|
|||
- type: string
|
||||
- type: 'null'
|
||||
title: Toolgroup Id
|
||||
deprecated: true
|
||||
/v1/tools/{tool_name}:
|
||||
get:
|
||||
responses:
|
||||
|
|
@ -2326,6 +2331,7 @@ paths:
|
|||
schema:
|
||||
type: string
|
||||
description: 'Path parameter: tool_name'
|
||||
deprecated: true
|
||||
/v1/vector-io/insert:
|
||||
post:
|
||||
responses:
|
||||
|
|
@ -6796,6 +6802,12 @@ components:
|
|||
anyOf:
|
||||
- type: integer
|
||||
- type: 'null'
|
||||
metadata:
|
||||
anyOf:
|
||||
- additionalProperties:
|
||||
type: string
|
||||
type: object
|
||||
- type: 'null'
|
||||
input:
|
||||
items:
|
||||
anyOf:
|
||||
|
|
@ -7199,6 +7211,12 @@ components:
|
|||
anyOf:
|
||||
- type: integer
|
||||
- type: 'null'
|
||||
metadata:
|
||||
anyOf:
|
||||
- additionalProperties:
|
||||
type: string
|
||||
type: object
|
||||
- type: 'null'
|
||||
type: object
|
||||
required:
|
||||
- input
|
||||
|
|
@ -7330,6 +7348,12 @@ components:
|
|||
anyOf:
|
||||
- type: integer
|
||||
- type: 'null'
|
||||
metadata:
|
||||
anyOf:
|
||||
- additionalProperties:
|
||||
type: string
|
||||
type: object
|
||||
- type: 'null'
|
||||
type: object
|
||||
required:
|
||||
- created_at
|
||||
|
|
@ -12180,227 +12204,6 @@ components:
|
|||
- $ref: '#/components/schemas/OpenAIResponseContentPartReasoningText'
|
||||
title: OpenAIResponseContentPartReasoningText
|
||||
title: OpenAIResponseContentPartOutputText | OpenAIResponseContentPartRefusal | OpenAIResponseContentPartReasoningText
|
||||
SpanEndPayload:
|
||||
description: Payload for a span end event.
|
||||
properties:
|
||||
type:
|
||||
const: span_end
|
||||
default: span_end
|
||||
title: Type
|
||||
type: string
|
||||
status:
|
||||
$ref: '#/components/schemas/SpanStatus'
|
||||
required:
|
||||
- status
|
||||
title: SpanEndPayload
|
||||
type: object
|
||||
SpanStartPayload:
|
||||
description: Payload for a span start event.
|
||||
properties:
|
||||
type:
|
||||
const: span_start
|
||||
default: span_start
|
||||
title: Type
|
||||
type: string
|
||||
name:
|
||||
title: Name
|
||||
type: string
|
||||
parent_span_id:
|
||||
anyOf:
|
||||
- type: string
|
||||
- type: 'null'
|
||||
nullable: true
|
||||
required:
|
||||
- name
|
||||
title: SpanStartPayload
|
||||
type: object
|
||||
SpanStatus:
|
||||
description: The status of a span indicating whether it completed successfully or with an error.
|
||||
enum:
|
||||
- ok
|
||||
- error
|
||||
title: SpanStatus
|
||||
type: string
|
||||
StructuredLogPayload:
|
||||
discriminator:
|
||||
mapping:
|
||||
span_end: '#/components/schemas/SpanEndPayload'
|
||||
span_start: '#/components/schemas/SpanStartPayload'
|
||||
propertyName: type
|
||||
oneOf:
|
||||
- $ref: '#/components/schemas/SpanStartPayload'
|
||||
title: SpanStartPayload
|
||||
- $ref: '#/components/schemas/SpanEndPayload'
|
||||
title: SpanEndPayload
|
||||
title: SpanStartPayload | SpanEndPayload
|
||||
LogSeverity:
|
||||
description: The severity level of a log message.
|
||||
enum:
|
||||
- verbose
|
||||
- debug
|
||||
- info
|
||||
- warn
|
||||
- error
|
||||
- critical
|
||||
title: LogSeverity
|
||||
type: string
|
||||
MetricEvent:
|
||||
description: A metric event containing a measured value.
|
||||
properties:
|
||||
trace_id:
|
||||
title: Trace Id
|
||||
type: string
|
||||
span_id:
|
||||
title: Span Id
|
||||
type: string
|
||||
timestamp:
|
||||
format: date-time
|
||||
title: Timestamp
|
||||
type: string
|
||||
attributes:
|
||||
anyOf:
|
||||
- additionalProperties:
|
||||
anyOf:
|
||||
- type: string
|
||||
- type: integer
|
||||
- type: number
|
||||
- type: boolean
|
||||
- type: 'null'
|
||||
title: string | ... (4 variants)
|
||||
type: object
|
||||
- type: 'null'
|
||||
type:
|
||||
const: metric
|
||||
default: metric
|
||||
title: Type
|
||||
type: string
|
||||
metric:
|
||||
title: Metric
|
||||
type: string
|
||||
value:
|
||||
anyOf:
|
||||
- type: integer
|
||||
- type: number
|
||||
title: integer | number
|
||||
unit:
|
||||
title: Unit
|
||||
type: string
|
||||
required:
|
||||
- trace_id
|
||||
- span_id
|
||||
- timestamp
|
||||
- metric
|
||||
- value
|
||||
- unit
|
||||
title: MetricEvent
|
||||
type: object
|
||||
StructuredLogEvent:
|
||||
description: A structured log event containing typed payload data.
|
||||
properties:
|
||||
trace_id:
|
||||
title: Trace Id
|
||||
type: string
|
||||
span_id:
|
||||
title: Span Id
|
||||
type: string
|
||||
timestamp:
|
||||
format: date-time
|
||||
title: Timestamp
|
||||
type: string
|
||||
attributes:
|
||||
anyOf:
|
||||
- additionalProperties:
|
||||
anyOf:
|
||||
- type: string
|
||||
- type: integer
|
||||
- type: number
|
||||
- type: boolean
|
||||
- type: 'null'
|
||||
title: string | ... (4 variants)
|
||||
type: object
|
||||
- type: 'null'
|
||||
type:
|
||||
const: structured_log
|
||||
default: structured_log
|
||||
title: Type
|
||||
type: string
|
||||
payload:
|
||||
discriminator:
|
||||
mapping:
|
||||
span_end: '#/components/schemas/SpanEndPayload'
|
||||
span_start: '#/components/schemas/SpanStartPayload'
|
||||
propertyName: type
|
||||
oneOf:
|
||||
- $ref: '#/components/schemas/SpanStartPayload'
|
||||
title: SpanStartPayload
|
||||
- $ref: '#/components/schemas/SpanEndPayload'
|
||||
title: SpanEndPayload
|
||||
title: SpanStartPayload | SpanEndPayload
|
||||
required:
|
||||
- trace_id
|
||||
- span_id
|
||||
- timestamp
|
||||
- payload
|
||||
title: StructuredLogEvent
|
||||
type: object
|
||||
UnstructuredLogEvent:
|
||||
description: An unstructured log event containing a simple text message.
|
||||
properties:
|
||||
trace_id:
|
||||
title: Trace Id
|
||||
type: string
|
||||
span_id:
|
||||
title: Span Id
|
||||
type: string
|
||||
timestamp:
|
||||
format: date-time
|
||||
title: Timestamp
|
||||
type: string
|
||||
attributes:
|
||||
anyOf:
|
||||
- additionalProperties:
|
||||
anyOf:
|
||||
- type: string
|
||||
- type: integer
|
||||
- type: number
|
||||
- type: boolean
|
||||
- type: 'null'
|
||||
title: string | ... (4 variants)
|
||||
type: object
|
||||
- type: 'null'
|
||||
type:
|
||||
const: unstructured_log
|
||||
default: unstructured_log
|
||||
title: Type
|
||||
type: string
|
||||
message:
|
||||
title: Message
|
||||
type: string
|
||||
severity:
|
||||
$ref: '#/components/schemas/LogSeverity'
|
||||
required:
|
||||
- trace_id
|
||||
- span_id
|
||||
- timestamp
|
||||
- message
|
||||
- severity
|
||||
title: UnstructuredLogEvent
|
||||
type: object
|
||||
Event:
|
||||
discriminator:
|
||||
mapping:
|
||||
metric: '#/components/schemas/MetricEvent'
|
||||
structured_log: '#/components/schemas/StructuredLogEvent'
|
||||
unstructured_log: '#/components/schemas/UnstructuredLogEvent'
|
||||
propertyName: type
|
||||
oneOf:
|
||||
- $ref: '#/components/schemas/UnstructuredLogEvent'
|
||||
title: UnstructuredLogEvent
|
||||
- $ref: '#/components/schemas/MetricEvent'
|
||||
title: MetricEvent
|
||||
- $ref: '#/components/schemas/StructuredLogEvent'
|
||||
title: StructuredLogEvent
|
||||
title: UnstructuredLogEvent | MetricEvent | StructuredLogEvent
|
||||
MetricInResponse:
|
||||
description: A metric value included in API responses.
|
||||
properties:
|
||||
|
|
@ -13225,236 +13028,6 @@ components:
|
|||
- logger_config
|
||||
title: PostTrainingRLHFRequest
|
||||
type: object
|
||||
Span:
|
||||
description: A span representing a single operation within a trace.
|
||||
properties:
|
||||
span_id:
|
||||
title: Span Id
|
||||
type: string
|
||||
trace_id:
|
||||
title: Trace Id
|
||||
type: string
|
||||
parent_span_id:
|
||||
anyOf:
|
||||
- type: string
|
||||
- type: 'null'
|
||||
nullable: true
|
||||
name:
|
||||
title: Name
|
||||
type: string
|
||||
start_time:
|
||||
format: date-time
|
||||
title: Start Time
|
||||
type: string
|
||||
end_time:
|
||||
anyOf:
|
||||
- format: date-time
|
||||
type: string
|
||||
- type: 'null'
|
||||
nullable: true
|
||||
attributes:
|
||||
anyOf:
|
||||
- additionalProperties: true
|
||||
type: object
|
||||
- type: 'null'
|
||||
required:
|
||||
- span_id
|
||||
- trace_id
|
||||
- name
|
||||
- start_time
|
||||
title: Span
|
||||
type: object
|
||||
Trace:
|
||||
description: A trace representing the complete execution path of a request across multiple operations.
|
||||
properties:
|
||||
trace_id:
|
||||
title: Trace Id
|
||||
type: string
|
||||
root_span_id:
|
||||
title: Root Span Id
|
||||
type: string
|
||||
start_time:
|
||||
format: date-time
|
||||
title: Start Time
|
||||
type: string
|
||||
end_time:
|
||||
anyOf:
|
||||
- format: date-time
|
||||
type: string
|
||||
- type: 'null'
|
||||
nullable: true
|
||||
required:
|
||||
- trace_id
|
||||
- root_span_id
|
||||
- start_time
|
||||
title: Trace
|
||||
type: object
|
||||
EventType:
|
||||
description: The type of telemetry event being logged.
|
||||
enum:
|
||||
- unstructured_log
|
||||
- structured_log
|
||||
- metric
|
||||
title: EventType
|
||||
type: string
|
||||
StructuredLogType:
|
||||
description: The type of structured log event payload.
|
||||
enum:
|
||||
- span_start
|
||||
- span_end
|
||||
title: StructuredLogType
|
||||
type: string
|
||||
EvalTrace:
|
||||
description: A trace record for evaluation purposes.
|
||||
properties:
|
||||
session_id:
|
||||
title: Session Id
|
||||
type: string
|
||||
step:
|
||||
title: Step
|
||||
type: string
|
||||
input:
|
||||
title: Input
|
||||
type: string
|
||||
output:
|
||||
title: Output
|
||||
type: string
|
||||
expected_output:
|
||||
title: Expected Output
|
||||
type: string
|
||||
required:
|
||||
- session_id
|
||||
- step
|
||||
- input
|
||||
- output
|
||||
- expected_output
|
||||
title: EvalTrace
|
||||
type: object
|
||||
SpanWithStatus:
|
||||
description: A span that includes status information.
|
||||
properties:
|
||||
span_id:
|
||||
title: Span Id
|
||||
type: string
|
||||
trace_id:
|
||||
title: Trace Id
|
||||
type: string
|
||||
parent_span_id:
|
||||
anyOf:
|
||||
- type: string
|
||||
- type: 'null'
|
||||
nullable: true
|
||||
name:
|
||||
title: Name
|
||||
type: string
|
||||
start_time:
|
||||
format: date-time
|
||||
title: Start Time
|
||||
type: string
|
||||
end_time:
|
||||
anyOf:
|
||||
- format: date-time
|
||||
type: string
|
||||
- type: 'null'
|
||||
nullable: true
|
||||
attributes:
|
||||
anyOf:
|
||||
- additionalProperties: true
|
||||
type: object
|
||||
- type: 'null'
|
||||
status:
|
||||
anyOf:
|
||||
- $ref: '#/components/schemas/SpanStatus'
|
||||
title: SpanStatus
|
||||
- type: 'null'
|
||||
nullable: true
|
||||
title: SpanStatus
|
||||
required:
|
||||
- span_id
|
||||
- trace_id
|
||||
- name
|
||||
- start_time
|
||||
title: SpanWithStatus
|
||||
type: object
|
||||
QueryConditionOp:
|
||||
description: Comparison operators for query conditions.
|
||||
enum:
|
||||
- eq
|
||||
- ne
|
||||
- gt
|
||||
- lt
|
||||
title: QueryConditionOp
|
||||
type: string
|
||||
QueryCondition:
|
||||
description: A condition for filtering query results.
|
||||
properties:
|
||||
key:
|
||||
title: Key
|
||||
type: string
|
||||
op:
|
||||
$ref: '#/components/schemas/QueryConditionOp'
|
||||
value:
|
||||
title: Value
|
||||
required:
|
||||
- key
|
||||
- op
|
||||
- value
|
||||
title: QueryCondition
|
||||
type: object
|
||||
MetricLabel:
|
||||
description: A label associated with a metric.
|
||||
properties:
|
||||
name:
|
||||
title: Name
|
||||
type: string
|
||||
value:
|
||||
title: Value
|
||||
type: string
|
||||
required:
|
||||
- name
|
||||
- value
|
||||
title: MetricLabel
|
||||
type: object
|
||||
MetricDataPoint:
|
||||
description: A single data point in a metric time series.
|
||||
properties:
|
||||
timestamp:
|
||||
title: Timestamp
|
||||
type: integer
|
||||
value:
|
||||
title: Value
|
||||
type: number
|
||||
unit:
|
||||
title: Unit
|
||||
type: string
|
||||
required:
|
||||
- timestamp
|
||||
- value
|
||||
- unit
|
||||
title: MetricDataPoint
|
||||
type: object
|
||||
MetricSeries:
|
||||
description: A time series of metric data points.
|
||||
properties:
|
||||
metric:
|
||||
title: Metric
|
||||
type: string
|
||||
labels:
|
||||
items:
|
||||
$ref: '#/components/schemas/MetricLabel'
|
||||
title: Labels
|
||||
type: array
|
||||
values:
|
||||
items:
|
||||
$ref: '#/components/schemas/MetricDataPoint'
|
||||
title: Values
|
||||
type: array
|
||||
required:
|
||||
- metric
|
||||
- labels
|
||||
- values
|
||||
title: MetricSeries
|
||||
type: object
|
||||
responses:
|
||||
BadRequest400:
|
||||
description: The request was invalid or malformed
|
||||
|
|
|
|||
|
|
@ -171,10 +171,18 @@ if [[ "$COLLECT_ONLY" == false ]]; then
|
|||
|
||||
# Set MCP host for in-process MCP server tests
|
||||
# - For library client and server mode: localhost (both on same host)
|
||||
# - For docker mode: host.docker.internal (container needs to reach host)
|
||||
# - For docker mode on Linux: localhost (container uses host network, shares network namespace)
|
||||
# - For docker mode on macOS/Windows: host.docker.internal (container uses bridge network)
|
||||
if [[ "$STACK_CONFIG" == docker:* ]]; then
|
||||
export LLAMA_STACK_TEST_MCP_HOST="host.docker.internal"
|
||||
echo "Setting MCP host: host.docker.internal (docker mode)"
|
||||
if [[ "$(uname)" != "Darwin" ]] && [[ "$(uname)" != *"MINGW"* ]]; then
|
||||
# On Linux with host network mode, container shares host network namespace
|
||||
export LLAMA_STACK_TEST_MCP_HOST="localhost"
|
||||
echo "Setting MCP host: localhost (docker mode with host network)"
|
||||
else
|
||||
# On macOS/Windows with bridge network, need special host access
|
||||
export LLAMA_STACK_TEST_MCP_HOST="host.docker.internal"
|
||||
echo "Setting MCP host: host.docker.internal (docker mode with bridge network)"
|
||||
fi
|
||||
else
|
||||
export LLAMA_STACK_TEST_MCP_HOST="localhost"
|
||||
echo "Setting MCP host: localhost (library/server mode)"
|
||||
|
|
|
|||
|
|
@ -8,7 +8,6 @@
|
|||
Schema discovery and collection for OpenAPI generation.
|
||||
"""
|
||||
|
||||
import importlib
|
||||
from typing import Any
|
||||
|
||||
|
||||
|
|
@ -20,23 +19,6 @@ def _ensure_components_schemas(openapi_schema: dict[str, Any]) -> None:
|
|||
openapi_schema["components"]["schemas"] = {}
|
||||
|
||||
|
||||
def _load_extra_schema_modules() -> None:
|
||||
"""
|
||||
Import modules outside llama_stack_api that use schema_utils to register schemas.
|
||||
|
||||
The API package already imports its submodules via __init__, but server-side modules
|
||||
like telemetry need to be imported explicitly so their decorator side effects run.
|
||||
"""
|
||||
extra_modules = [
|
||||
"llama_stack.core.telemetry.telemetry",
|
||||
]
|
||||
for module_name in extra_modules:
|
||||
try:
|
||||
importlib.import_module(module_name)
|
||||
except ImportError:
|
||||
continue
|
||||
|
||||
|
||||
def _extract_and_fix_defs(schema: dict[str, Any], openapi_schema: dict[str, Any]) -> None:
|
||||
"""
|
||||
Extract $defs from a schema, move them to components/schemas, and fix references.
|
||||
|
|
@ -79,9 +61,6 @@ def _ensure_json_schema_types_included(openapi_schema: dict[str, Any]) -> dict[s
|
|||
iter_registered_schema_types,
|
||||
)
|
||||
|
||||
# Import extra modules (e.g., telemetry) whose schema registrations live outside llama_stack_api
|
||||
_load_extra_schema_modules()
|
||||
|
||||
# Handle explicitly registered schemas first (union types, Annotated structs, etc.)
|
||||
for registration_info in iter_registered_schema_types():
|
||||
schema_type = registration_info.type
|
||||
|
|
|
|||
|
|
@ -1,11 +1,24 @@
|
|||
{
|
||||
"annotations": {
|
||||
"list": []
|
||||
"list": [
|
||||
{
|
||||
"builtIn": 1,
|
||||
"datasource": {
|
||||
"type": "grafana",
|
||||
"uid": "-- Grafana --"
|
||||
},
|
||||
"enable": true,
|
||||
"hide": true,
|
||||
"iconColor": "rgba(0, 211, 255, 1)",
|
||||
"name": "Annotations & Alerts",
|
||||
"type": "dashboard"
|
||||
}
|
||||
]
|
||||
},
|
||||
"editable": true,
|
||||
"fiscalYearStartMonth": 0,
|
||||
"graphTooltip": 0,
|
||||
"id": null,
|
||||
"id": 1,
|
||||
"links": [],
|
||||
"liveNow": false,
|
||||
"panels": [
|
||||
|
|
@ -16,11 +29,40 @@
|
|||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisBorderShow": false,
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 10,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"insertNulls": false,
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "auto",
|
||||
"fillOpacity": 10
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "none"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
|
|
@ -32,7 +74,8 @@
|
|||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
|
|
@ -40,15 +83,16 @@
|
|||
"x": 0,
|
||||
"y": 0
|
||||
},
|
||||
"id": 1,
|
||||
"id": 2,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [],
|
||||
"displayMode": "table",
|
||||
"displayMode": "list",
|
||||
"placement": "bottom",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"maxHeight": 600,
|
||||
"mode": "multi",
|
||||
"sort": "none"
|
||||
}
|
||||
|
|
@ -59,9 +103,112 @@
|
|||
"type": "prometheus",
|
||||
"uid": "prometheus"
|
||||
},
|
||||
"expr": "llama_stack_completion_tokens_total",
|
||||
"legendFormat": "{{model_id}} ({{provider_id}})",
|
||||
"refId": "A"
|
||||
"disableTextWrap": false,
|
||||
"editorMode": "builder",
|
||||
"expr": "sum by(gen_ai_request_model) (llama_stack_gen_ai_client_token_usage_sum{gen_ai_token_type=\"input\"})",
|
||||
"fullMetaSearch": false,
|
||||
"includeNullMetadata": true,
|
||||
"legendFormat": "__auto",
|
||||
"range": true,
|
||||
"refId": "A",
|
||||
"useBackend": false
|
||||
}
|
||||
],
|
||||
"title": "Prompt Tokens",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prometheus"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisBorderShow": false,
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 10,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"insertNulls": false,
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "auto",
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "none"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 0
|
||||
},
|
||||
"id": 1,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [],
|
||||
"displayMode": "list",
|
||||
"placement": "bottom",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"maxHeight": 600,
|
||||
"mode": "multi",
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prometheus"
|
||||
},
|
||||
"disableTextWrap": false,
|
||||
"editorMode": "builder",
|
||||
"exemplar": false,
|
||||
"expr": "sum by(gen_ai_request_model) (llama_stack_gen_ai_client_token_usage_sum{gen_ai_token_type=\"output\"})",
|
||||
"fullMetaSearch": false,
|
||||
"includeNullMetadata": true,
|
||||
"interval": "",
|
||||
"legendFormat": "__auto",
|
||||
"range": true,
|
||||
"refId": "A",
|
||||
"useBackend": false
|
||||
}
|
||||
],
|
||||
"title": "Completion Tokens",
|
||||
|
|
@ -74,78 +221,40 @@
|
|||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisBorderShow": false,
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 10,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"insertNulls": false,
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "auto",
|
||||
"fillOpacity": 10
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 0
|
||||
},
|
||||
"id": 2,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [],
|
||||
"displayMode": "table",
|
||||
"placement": "bottom",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "multi",
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prometheus"
|
||||
},
|
||||
"expr": "llama_stack_prompt_tokens_total",
|
||||
"legendFormat": "Prompt - {{model_id}}",
|
||||
"refId": "A"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prometheus"
|
||||
},
|
||||
"expr": "llama_stack_tokens_total",
|
||||
"legendFormat": "Total - {{model_id}}",
|
||||
"refId": "B"
|
||||
}
|
||||
],
|
||||
"title": "Prompt & Total Tokens",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prometheus"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"custom": {
|
||||
"drawStyle": "line",
|
||||
"lineInterpolation": "linear",
|
||||
"showPoints": "auto",
|
||||
"fillOpacity": 10
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "none"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
|
|
@ -158,7 +267,8 @@
|
|||
]
|
||||
},
|
||||
"unit": "ms"
|
||||
}
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
|
|
@ -175,6 +285,7 @@
|
|||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"maxHeight": 600,
|
||||
"mode": "multi",
|
||||
"sort": "none"
|
||||
}
|
||||
|
|
@ -219,7 +330,8 @@
|
|||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
|
|
@ -240,8 +352,11 @@
|
|||
"fields": "",
|
||||
"values": false
|
||||
},
|
||||
"textMode": "auto"
|
||||
"showPercentChange": false,
|
||||
"textMode": "auto",
|
||||
"wideLayout": true
|
||||
},
|
||||
"pluginVersion": "11.0.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
|
|
@ -272,7 +387,8 @@
|
|||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
|
|
@ -293,8 +409,11 @@
|
|||
"fields": "",
|
||||
"values": false
|
||||
},
|
||||
"textMode": "auto"
|
||||
"showPercentChange": false,
|
||||
"textMode": "auto",
|
||||
"wideLayout": true
|
||||
},
|
||||
"pluginVersion": "11.0.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
|
|
@ -315,11 +434,40 @@
|
|||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisBorderShow": false,
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 10,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"insertNulls": false,
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "auto",
|
||||
"fillOpacity": 10
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "none"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
|
|
@ -332,7 +480,8 @@
|
|||
]
|
||||
},
|
||||
"unit": "reqps"
|
||||
}
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
|
|
@ -349,6 +498,7 @@
|
|||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"maxHeight": 600,
|
||||
"mode": "multi",
|
||||
"sort": "none"
|
||||
}
|
||||
|
|
@ -374,11 +524,40 @@
|
|||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisBorderShow": false,
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 10,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"insertNulls": false,
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "auto",
|
||||
"fillOpacity": 10
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "none"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
|
|
@ -391,7 +570,8 @@
|
|||
]
|
||||
},
|
||||
"unit": "Bps"
|
||||
}
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
|
|
@ -408,6 +588,7 @@
|
|||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"maxHeight": 600,
|
||||
"mode": "multi",
|
||||
"sort": "none"
|
||||
}
|
||||
|
|
@ -437,7 +618,7 @@
|
|||
}
|
||||
],
|
||||
"refresh": "5s",
|
||||
"schemaVersion": 38,
|
||||
"schemaVersion": 39,
|
||||
"tags": [
|
||||
"llama-stack"
|
||||
],
|
||||
|
|
@ -445,13 +626,14 @@
|
|||
"list": []
|
||||
},
|
||||
"time": {
|
||||
"from": "now-15m",
|
||||
"from": "now-3h",
|
||||
"to": "now"
|
||||
},
|
||||
"timeRangeUpdatedDuringEditOrView": false,
|
||||
"timepicker": {},
|
||||
"timezone": "browser",
|
||||
"title": "Llama Stack Metrics",
|
||||
"uid": "llama-stack-metrics",
|
||||
"version": 0,
|
||||
"version": 17,
|
||||
"weekStart": ""
|
||||
}
|
||||
|
|
|
|||
|
|
@ -191,22 +191,6 @@ class DistributionSpec(BaseModel):
|
|||
)
|
||||
|
||||
|
||||
class TelemetryConfig(BaseModel):
|
||||
"""
|
||||
Configuration for telemetry.
|
||||
|
||||
Llama Stack uses OpenTelemetry for telemetry. Please refer to https://opentelemetry.io/docs/languages/sdk-configuration/
|
||||
for env variables to configure the OpenTelemetry SDK.
|
||||
|
||||
Example:
|
||||
```bash
|
||||
OTEL_SERVICE_NAME=llama-stack OTEL_EXPORTER_OTLP_ENDPOINT=http://localhost:4318 uv run llama stack run starter
|
||||
```
|
||||
"""
|
||||
|
||||
enabled: bool = Field(default=False, description="enable or disable telemetry")
|
||||
|
||||
|
||||
class OAuth2JWKSConfig(BaseModel):
|
||||
# The JWKS URI for collecting public keys
|
||||
uri: str
|
||||
|
|
@ -527,8 +511,6 @@ can be instantiated multiple times (with different configs) if necessary.
|
|||
|
||||
logging: LoggingConfig | None = Field(default=None, description="Configuration for Llama Stack Logging")
|
||||
|
||||
telemetry: TelemetryConfig = Field(default_factory=TelemetryConfig, description="Configuration for telemetry")
|
||||
|
||||
server: ServerConfig = Field(
|
||||
default_factory=ServerConfig,
|
||||
description="Configuration for the HTTP(S) server",
|
||||
|
|
|
|||
|
|
@ -46,8 +46,6 @@ from llama_stack.core.request_headers import PROVIDER_DATA_VAR, request_provider
|
|||
from llama_stack.core.resolver import ProviderRegistry
|
||||
from llama_stack.core.server.routes import RouteImpls, find_matching_route, initialize_route_impls
|
||||
from llama_stack.core.stack import Stack, get_stack_run_config_from_distro, replace_env_vars
|
||||
from llama_stack.core.telemetry import Telemetry
|
||||
from llama_stack.core.telemetry.tracing import CURRENT_TRACE_CONTEXT, end_trace, setup_logger, start_trace
|
||||
from llama_stack.core.utils.config import redact_sensitive_fields
|
||||
from llama_stack.core.utils.context import preserve_contexts_async_generator
|
||||
from llama_stack.core.utils.exec import in_notebook
|
||||
|
|
@ -204,13 +202,6 @@ class AsyncLlamaStackAsLibraryClient(AsyncLlamaStackClient):
|
|||
super().__init__()
|
||||
# Initialize logging from environment variables first
|
||||
setup_logging()
|
||||
|
||||
# when using the library client, we should not log to console since many
|
||||
# of our logs are intended for server-side usage
|
||||
if sinks_from_env := os.environ.get("TELEMETRY_SINKS", None):
|
||||
current_sinks = sinks_from_env.strip().lower().split(",")
|
||||
os.environ["TELEMETRY_SINKS"] = ",".join(sink for sink in current_sinks if sink != "console")
|
||||
|
||||
if in_notebook():
|
||||
import nest_asyncio
|
||||
|
||||
|
|
@ -295,8 +286,6 @@ class AsyncLlamaStackAsLibraryClient(AsyncLlamaStackClient):
|
|||
raise _e
|
||||
|
||||
assert self.impls is not None
|
||||
if self.config.telemetry.enabled:
|
||||
setup_logger(Telemetry())
|
||||
|
||||
if not os.environ.get("PYTEST_CURRENT_TEST"):
|
||||
console = Console()
|
||||
|
|
@ -392,13 +381,7 @@ class AsyncLlamaStackAsLibraryClient(AsyncLlamaStackClient):
|
|||
body, field_names = self._handle_file_uploads(options, body)
|
||||
|
||||
body = self._convert_body(matched_func, body, exclude_params=set(field_names))
|
||||
|
||||
trace_path = webmethod.descriptive_name or route_path
|
||||
await start_trace(trace_path, {"__location__": "library_client"})
|
||||
try:
|
||||
result = await matched_func(**body)
|
||||
finally:
|
||||
await end_trace()
|
||||
result = await matched_func(**body)
|
||||
|
||||
# Handle FastAPI Response objects (e.g., from file content retrieval)
|
||||
if isinstance(result, FastAPIResponse):
|
||||
|
|
@ -457,19 +440,13 @@ class AsyncLlamaStackAsLibraryClient(AsyncLlamaStackClient):
|
|||
# Prepare body for the function call (handles both Pydantic and traditional params)
|
||||
body = self._convert_body(func, body)
|
||||
|
||||
trace_path = webmethod.descriptive_name or route_path
|
||||
await start_trace(trace_path, {"__location__": "library_client"})
|
||||
|
||||
async def gen():
|
||||
try:
|
||||
async for chunk in await func(**body):
|
||||
data = json.dumps(convert_pydantic_to_json_value(chunk))
|
||||
sse_event = f"data: {data}\n\n"
|
||||
yield sse_event.encode("utf-8")
|
||||
finally:
|
||||
await end_trace()
|
||||
async for chunk in await func(**body):
|
||||
data = json.dumps(convert_pydantic_to_json_value(chunk))
|
||||
sse_event = f"data: {data}\n\n"
|
||||
yield sse_event.encode("utf-8")
|
||||
|
||||
wrapped_gen = preserve_contexts_async_generator(gen(), [CURRENT_TRACE_CONTEXT, PROVIDER_DATA_VAR])
|
||||
wrapped_gen = preserve_contexts_async_generator(gen(), [PROVIDER_DATA_VAR])
|
||||
|
||||
mock_response = httpx.Response(
|
||||
status_code=httpx.codes.OK,
|
||||
|
|
|
|||
|
|
@ -392,8 +392,6 @@ async def instantiate_provider(
|
|||
args = [config, deps]
|
||||
if "policy" in inspect.signature(getattr(module, method)).parameters:
|
||||
args.append(policy)
|
||||
if "telemetry_enabled" in inspect.signature(getattr(module, method)).parameters and run_config.telemetry:
|
||||
args.append(run_config.telemetry.enabled)
|
||||
|
||||
fn = getattr(module, method)
|
||||
impl = await fn(*args)
|
||||
|
|
@ -401,18 +399,6 @@ async def instantiate_provider(
|
|||
impl.__provider_spec__ = provider_spec
|
||||
impl.__provider_config__ = config
|
||||
|
||||
# Apply tracing if telemetry is enabled and any base class has __marked_for_tracing__ marker
|
||||
if run_config.telemetry.enabled:
|
||||
traced_classes = [
|
||||
base for base in reversed(impl.__class__.__mro__) if getattr(base, "__marked_for_tracing__", False)
|
||||
]
|
||||
|
||||
if traced_classes:
|
||||
from llama_stack.core.telemetry.trace_protocol import trace_protocol
|
||||
|
||||
for cls in traced_classes:
|
||||
trace_protocol(cls)
|
||||
|
||||
protocols = api_protocol_map_for_compliance_check(run_config)
|
||||
additional_protocols = additional_protocols_map()
|
||||
# TODO: check compliance for special tool groups
|
||||
|
|
|
|||
|
|
@ -85,8 +85,6 @@ async def get_auto_router_impl(
|
|||
)
|
||||
await inference_store.initialize()
|
||||
api_to_dep_impl["store"] = inference_store
|
||||
api_to_dep_impl["telemetry_enabled"] = run_config.telemetry.enabled
|
||||
|
||||
elif api == Api.vector_io:
|
||||
api_to_dep_impl["vector_stores_config"] = run_config.vector_stores
|
||||
elif api == Api.safety:
|
||||
|
|
|
|||
|
|
@ -7,7 +7,6 @@
|
|||
import asyncio
|
||||
import time
|
||||
from collections.abc import AsyncIterator
|
||||
from datetime import UTC, datetime
|
||||
from typing import Annotated, Any
|
||||
|
||||
from fastapi import Body
|
||||
|
|
@ -15,11 +14,7 @@ from openai.types.chat import ChatCompletionToolChoiceOptionParam as OpenAIChatC
|
|||
from openai.types.chat import ChatCompletionToolParam as OpenAIChatCompletionToolParam
|
||||
from pydantic import TypeAdapter
|
||||
|
||||
from llama_stack.core.telemetry.telemetry import MetricEvent
|
||||
from llama_stack.core.telemetry.tracing import enqueue_event, get_current_span
|
||||
from llama_stack.log import get_logger
|
||||
from llama_stack.models.llama.llama3.chat_format import ChatFormat
|
||||
from llama_stack.models.llama.llama3.tokenizer import Tokenizer
|
||||
from llama_stack.providers.utils.inference.inference_store import InferenceStore
|
||||
from llama_stack_api import (
|
||||
HealthResponse,
|
||||
|
|
@ -60,15 +55,10 @@ class InferenceRouter(Inference):
|
|||
self,
|
||||
routing_table: RoutingTable,
|
||||
store: InferenceStore | None = None,
|
||||
telemetry_enabled: bool = False,
|
||||
) -> None:
|
||||
logger.debug("Initializing InferenceRouter")
|
||||
self.routing_table = routing_table
|
||||
self.telemetry_enabled = telemetry_enabled
|
||||
self.store = store
|
||||
if self.telemetry_enabled:
|
||||
self.tokenizer = Tokenizer.get_instance()
|
||||
self.formatter = ChatFormat(self.tokenizer)
|
||||
|
||||
async def initialize(self) -> None:
|
||||
logger.debug("InferenceRouter.initialize")
|
||||
|
|
@ -94,54 +84,6 @@ class InferenceRouter(Inference):
|
|||
)
|
||||
await self.routing_table.register_model(model_id, provider_model_id, provider_id, metadata, model_type)
|
||||
|
||||
def _construct_metrics(
|
||||
self,
|
||||
prompt_tokens: int,
|
||||
completion_tokens: int,
|
||||
total_tokens: int,
|
||||
fully_qualified_model_id: str,
|
||||
provider_id: str,
|
||||
) -> list[MetricEvent]:
|
||||
"""Constructs a list of MetricEvent objects containing token usage metrics.
|
||||
|
||||
Args:
|
||||
prompt_tokens: Number of tokens in the prompt
|
||||
completion_tokens: Number of tokens in the completion
|
||||
total_tokens: Total number of tokens used
|
||||
fully_qualified_model_id:
|
||||
provider_id: The provider identifier
|
||||
|
||||
Returns:
|
||||
List of MetricEvent objects with token usage metrics
|
||||
"""
|
||||
span = get_current_span()
|
||||
if span is None:
|
||||
logger.warning("No span found for token usage metrics")
|
||||
return []
|
||||
|
||||
metrics = [
|
||||
("prompt_tokens", prompt_tokens),
|
||||
("completion_tokens", completion_tokens),
|
||||
("total_tokens", total_tokens),
|
||||
]
|
||||
metric_events = []
|
||||
for metric_name, value in metrics:
|
||||
metric_events.append(
|
||||
MetricEvent(
|
||||
trace_id=span.trace_id,
|
||||
span_id=span.span_id,
|
||||
metric=metric_name,
|
||||
value=value,
|
||||
timestamp=datetime.now(UTC),
|
||||
unit="tokens",
|
||||
attributes={
|
||||
"model_id": fully_qualified_model_id,
|
||||
"provider_id": provider_id,
|
||||
},
|
||||
)
|
||||
)
|
||||
return metric_events
|
||||
|
||||
async def _get_model_provider(self, model_id: str, expected_model_type: str) -> tuple[Inference, str]:
|
||||
model = await self.routing_table.get_object_by_identifier("model", model_id)
|
||||
if model:
|
||||
|
|
@ -186,26 +128,9 @@ class InferenceRouter(Inference):
|
|||
|
||||
if params.stream:
|
||||
return await provider.openai_completion(params)
|
||||
# TODO: Metrics do NOT work with openai_completion stream=True due to the fact
|
||||
# that we do not return an AsyncIterator, our tests expect a stream of chunks we cannot intercept currently.
|
||||
|
||||
response = await provider.openai_completion(params)
|
||||
response.model = request_model_id
|
||||
if self.telemetry_enabled and response.usage is not None:
|
||||
metrics = self._construct_metrics(
|
||||
prompt_tokens=response.usage.prompt_tokens,
|
||||
completion_tokens=response.usage.completion_tokens,
|
||||
total_tokens=response.usage.total_tokens,
|
||||
fully_qualified_model_id=request_model_id,
|
||||
provider_id=provider.__provider_id__,
|
||||
)
|
||||
for metric in metrics:
|
||||
enqueue_event(metric)
|
||||
|
||||
# these metrics will show up in the client response.
|
||||
response.metrics = (
|
||||
metrics if not hasattr(response, "metrics") or response.metrics is None else response.metrics + metrics
|
||||
)
|
||||
return response
|
||||
|
||||
async def openai_chat_completion(
|
||||
|
|
@ -254,20 +179,6 @@ class InferenceRouter(Inference):
|
|||
if self.store:
|
||||
asyncio.create_task(self.store.store_chat_completion(response, params.messages))
|
||||
|
||||
if self.telemetry_enabled and response.usage is not None:
|
||||
metrics = self._construct_metrics(
|
||||
prompt_tokens=response.usage.prompt_tokens,
|
||||
completion_tokens=response.usage.completion_tokens,
|
||||
total_tokens=response.usage.total_tokens,
|
||||
fully_qualified_model_id=request_model_id,
|
||||
provider_id=provider.__provider_id__,
|
||||
)
|
||||
for metric in metrics:
|
||||
enqueue_event(metric)
|
||||
# these metrics will show up in the client response.
|
||||
response.metrics = (
|
||||
metrics if not hasattr(response, "metrics") or response.metrics is None else response.metrics + metrics
|
||||
)
|
||||
return response
|
||||
|
||||
async def openai_embeddings(
|
||||
|
|
@ -411,18 +322,6 @@ class InferenceRouter(Inference):
|
|||
for choice_data in choices_data.values():
|
||||
completion_text += "".join(choice_data["content_parts"])
|
||||
|
||||
# Add metrics to the chunk
|
||||
if self.telemetry_enabled and hasattr(chunk, "usage") and chunk.usage:
|
||||
metrics = self._construct_metrics(
|
||||
prompt_tokens=chunk.usage.prompt_tokens,
|
||||
completion_tokens=chunk.usage.completion_tokens,
|
||||
total_tokens=chunk.usage.total_tokens,
|
||||
fully_qualified_model_id=fully_qualified_model_id,
|
||||
provider_id=provider_id,
|
||||
)
|
||||
for metric in metrics:
|
||||
enqueue_event(metric)
|
||||
|
||||
yield chunk
|
||||
finally:
|
||||
# Store the final assembled completion
|
||||
|
|
|
|||
|
|
@ -6,11 +6,15 @@
|
|||
|
||||
from typing import Any
|
||||
|
||||
from opentelemetry import trace
|
||||
|
||||
from llama_stack.core.datatypes import SafetyConfig
|
||||
from llama_stack.log import get_logger
|
||||
from llama_stack.telemetry.helpers import safety_request_span_attributes, safety_span_name
|
||||
from llama_stack_api import ModerationObject, OpenAIMessageParam, RoutingTable, RunShieldResponse, Safety, Shield
|
||||
|
||||
logger = get_logger(name=__name__, category="core::routers")
|
||||
tracer = trace.get_tracer(__name__)
|
||||
|
||||
|
||||
class SafetyRouter(Safety):
|
||||
|
|
@ -51,13 +55,17 @@ class SafetyRouter(Safety):
|
|||
messages: list[OpenAIMessageParam],
|
||||
params: dict[str, Any] = None,
|
||||
) -> RunShieldResponse:
|
||||
logger.debug(f"SafetyRouter.run_shield: {shield_id}")
|
||||
provider = await self.routing_table.get_provider_impl(shield_id)
|
||||
return await provider.run_shield(
|
||||
shield_id=shield_id,
|
||||
messages=messages,
|
||||
params=params,
|
||||
)
|
||||
with tracer.start_as_current_span(name=safety_span_name(shield_id)):
|
||||
logger.debug(f"SafetyRouter.run_shield: {shield_id}")
|
||||
provider = await self.routing_table.get_provider_impl(shield_id)
|
||||
response = await provider.run_shield(
|
||||
shield_id=shield_id,
|
||||
messages=messages,
|
||||
params=params,
|
||||
)
|
||||
|
||||
safety_request_span_attributes(shield_id, messages, response)
|
||||
return response
|
||||
|
||||
async def run_moderation(self, input: str | list[str], model: str | None = None) -> ModerationObject:
|
||||
list_shields_response = await self.routing_table.list_shields()
|
||||
|
|
|
|||
|
|
@ -50,8 +50,6 @@ from llama_stack.core.stack import (
|
|||
cast_image_name_to_string,
|
||||
replace_env_vars,
|
||||
)
|
||||
from llama_stack.core.telemetry import Telemetry
|
||||
from llama_stack.core.telemetry.tracing import CURRENT_TRACE_CONTEXT, setup_logger
|
||||
from llama_stack.core.utils.config import redact_sensitive_fields
|
||||
from llama_stack.core.utils.config_resolution import Mode, resolve_config_or_distro
|
||||
from llama_stack.core.utils.context import preserve_contexts_async_generator
|
||||
|
|
@ -60,7 +58,6 @@ from llama_stack_api import Api, ConflictError, PaginatedResponse, ResourceNotFo
|
|||
|
||||
from .auth import AuthenticationMiddleware
|
||||
from .quota import QuotaMiddleware
|
||||
from .tracing import TracingMiddleware
|
||||
|
||||
REPO_ROOT = Path(__file__).parent.parent.parent.parent
|
||||
|
||||
|
|
@ -263,7 +260,7 @@ def create_dynamic_typed_route(func: Any, method: str, route: str) -> Callable:
|
|||
|
||||
try:
|
||||
if is_streaming:
|
||||
context_vars = [CURRENT_TRACE_CONTEXT, PROVIDER_DATA_VAR]
|
||||
context_vars = [PROVIDER_DATA_VAR]
|
||||
if test_context_var is not None:
|
||||
context_vars.append(test_context_var)
|
||||
gen = preserve_contexts_async_generator(sse_generator(func(**kwargs)), context_vars)
|
||||
|
|
@ -441,9 +438,6 @@ def create_app() -> StackApp:
|
|||
if cors_config:
|
||||
app.add_middleware(CORSMiddleware, **cors_config.model_dump())
|
||||
|
||||
if config.telemetry.enabled:
|
||||
setup_logger(Telemetry())
|
||||
|
||||
# Load external APIs if configured
|
||||
external_apis = load_external_apis(config)
|
||||
all_routes = get_all_api_routes(external_apis)
|
||||
|
|
@ -500,9 +494,6 @@ def create_app() -> StackApp:
|
|||
app.exception_handler(RequestValidationError)(global_exception_handler)
|
||||
app.exception_handler(Exception)(global_exception_handler)
|
||||
|
||||
if config.telemetry.enabled:
|
||||
app.add_middleware(TracingMiddleware, impls=impls, external_apis=external_apis)
|
||||
|
||||
return app
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -1,80 +0,0 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
from aiohttp import hdrs
|
||||
|
||||
from llama_stack.core.external import ExternalApiSpec
|
||||
from llama_stack.core.server.routes import find_matching_route, initialize_route_impls
|
||||
from llama_stack.core.telemetry.tracing import end_trace, start_trace
|
||||
from llama_stack.log import get_logger
|
||||
|
||||
logger = get_logger(name=__name__, category="core::server")
|
||||
|
||||
|
||||
class TracingMiddleware:
|
||||
def __init__(self, app, impls, external_apis: dict[str, ExternalApiSpec]):
|
||||
self.app = app
|
||||
self.impls = impls
|
||||
self.external_apis = external_apis
|
||||
# FastAPI built-in paths that should bypass custom routing
|
||||
self.fastapi_paths = ("/docs", "/redoc", "/openapi.json", "/favicon.ico", "/static")
|
||||
|
||||
async def __call__(self, scope, receive, send):
|
||||
if scope.get("type") == "lifespan":
|
||||
return await self.app(scope, receive, send)
|
||||
|
||||
path = scope.get("path", "")
|
||||
|
||||
# Check if the path is a FastAPI built-in path
|
||||
if path.startswith(self.fastapi_paths):
|
||||
# Pass through to FastAPI's built-in handlers
|
||||
logger.debug(f"Bypassing custom routing for FastAPI built-in path: {path}")
|
||||
return await self.app(scope, receive, send)
|
||||
|
||||
if not hasattr(self, "route_impls"):
|
||||
self.route_impls = initialize_route_impls(self.impls, self.external_apis)
|
||||
|
||||
try:
|
||||
_, _, route_path, webmethod = find_matching_route(
|
||||
scope.get("method", hdrs.METH_GET), path, self.route_impls
|
||||
)
|
||||
except ValueError:
|
||||
# If no matching endpoint is found, pass through to FastAPI
|
||||
logger.debug(f"No matching route found for path: {path}, falling back to FastAPI")
|
||||
return await self.app(scope, receive, send)
|
||||
|
||||
# Log deprecation warning if route is deprecated
|
||||
if getattr(webmethod, "deprecated", False):
|
||||
logger.warning(
|
||||
f"DEPRECATED ROUTE USED: {scope.get('method', 'GET')} {path} - "
|
||||
f"This route is deprecated and may be removed in a future version. "
|
||||
f"Please check the docs for the supported version."
|
||||
)
|
||||
|
||||
trace_attributes = {"__location__": "server", "raw_path": path}
|
||||
|
||||
# Extract W3C trace context headers and store as trace attributes
|
||||
headers = dict(scope.get("headers", []))
|
||||
traceparent = headers.get(b"traceparent", b"").decode()
|
||||
if traceparent:
|
||||
trace_attributes["traceparent"] = traceparent
|
||||
tracestate = headers.get(b"tracestate", b"").decode()
|
||||
if tracestate:
|
||||
trace_attributes["tracestate"] = tracestate
|
||||
|
||||
trace_path = webmethod.descriptive_name or route_path
|
||||
trace_context = await start_trace(trace_path, trace_attributes)
|
||||
|
||||
async def send_with_trace_id(message):
|
||||
if message["type"] == "http.response.start":
|
||||
headers = message.get("headers", [])
|
||||
headers.append([b"x-trace-id", str(trace_context.trace_id).encode()])
|
||||
message["headers"] = headers
|
||||
await send(message)
|
||||
|
||||
try:
|
||||
return await self.app(scope, receive, send_with_trace_id)
|
||||
finally:
|
||||
await end_trace()
|
||||
|
|
@ -1,32 +0,0 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from .telemetry import Telemetry
|
||||
from .trace_protocol import serialize_value, trace_protocol
|
||||
from .tracing import (
|
||||
CURRENT_TRACE_CONTEXT,
|
||||
ROOT_SPAN_MARKERS,
|
||||
end_trace,
|
||||
enqueue_event,
|
||||
get_current_span,
|
||||
setup_logger,
|
||||
span,
|
||||
start_trace,
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
"Telemetry",
|
||||
"trace_protocol",
|
||||
"serialize_value",
|
||||
"CURRENT_TRACE_CONTEXT",
|
||||
"ROOT_SPAN_MARKERS",
|
||||
"end_trace",
|
||||
"enqueue_event",
|
||||
"get_current_span",
|
||||
"setup_logger",
|
||||
"span",
|
||||
"start_trace",
|
||||
]
|
||||
|
|
@ -1,629 +0,0 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
import os
|
||||
import threading
|
||||
from collections.abc import Mapping, Sequence
|
||||
from datetime import datetime
|
||||
from enum import Enum
|
||||
from typing import (
|
||||
Annotated,
|
||||
Any,
|
||||
Literal,
|
||||
cast,
|
||||
)
|
||||
|
||||
from opentelemetry import metrics, trace
|
||||
from opentelemetry.exporter.otlp.proto.http.metric_exporter import OTLPMetricExporter
|
||||
from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter
|
||||
from opentelemetry.sdk.metrics import MeterProvider
|
||||
from opentelemetry.sdk.metrics.export import PeriodicExportingMetricReader
|
||||
from opentelemetry.sdk.trace import TracerProvider
|
||||
from opentelemetry.sdk.trace.export import BatchSpanProcessor
|
||||
from opentelemetry.trace.propagation.tracecontext import TraceContextTextMapPropagator
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from llama_stack.log import get_logger
|
||||
from llama_stack.models.llama.datatypes import Primitive
|
||||
from llama_stack_api import json_schema_type, register_schema
|
||||
|
||||
ROOT_SPAN_MARKERS = ["__root__", "__root_span__"]
|
||||
|
||||
# Type alias for OpenTelemetry attribute values (excludes None)
|
||||
AttributeValue = str | bool | int | float | Sequence[str] | Sequence[bool] | Sequence[int] | Sequence[float]
|
||||
Attributes = Mapping[str, AttributeValue]
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class SpanStatus(Enum):
|
||||
"""The status of a span indicating whether it completed successfully or with an error.
|
||||
:cvar OK: Span completed successfully without errors
|
||||
:cvar ERROR: Span completed with an error or failure
|
||||
"""
|
||||
|
||||
OK = "ok"
|
||||
ERROR = "error"
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class Span(BaseModel):
|
||||
"""A span representing a single operation within a trace.
|
||||
:param span_id: Unique identifier for the span
|
||||
:param trace_id: Unique identifier for the trace this span belongs to
|
||||
:param parent_span_id: (Optional) Unique identifier for the parent span, if this is a child span
|
||||
:param name: Human-readable name describing the operation this span represents
|
||||
:param start_time: Timestamp when the operation began
|
||||
:param end_time: (Optional) Timestamp when the operation finished, if completed
|
||||
:param attributes: (Optional) Key-value pairs containing additional metadata about the span
|
||||
"""
|
||||
|
||||
span_id: str
|
||||
trace_id: str
|
||||
parent_span_id: str | None = None
|
||||
name: str
|
||||
start_time: datetime
|
||||
end_time: datetime | None = None
|
||||
attributes: dict[str, Any] | None = Field(default_factory=lambda: {})
|
||||
|
||||
def set_attribute(self, key: str, value: Any):
|
||||
if self.attributes is None:
|
||||
self.attributes = {}
|
||||
self.attributes[key] = value
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class Trace(BaseModel):
|
||||
"""A trace representing the complete execution path of a request across multiple operations.
|
||||
:param trace_id: Unique identifier for the trace
|
||||
:param root_span_id: Unique identifier for the root span that started this trace
|
||||
:param start_time: Timestamp when the trace began
|
||||
:param end_time: (Optional) Timestamp when the trace finished, if completed
|
||||
"""
|
||||
|
||||
trace_id: str
|
||||
root_span_id: str
|
||||
start_time: datetime
|
||||
end_time: datetime | None = None
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class EventType(Enum):
|
||||
"""The type of telemetry event being logged.
|
||||
:cvar UNSTRUCTURED_LOG: A simple log message with severity level
|
||||
:cvar STRUCTURED_LOG: A structured log event with typed payload data
|
||||
:cvar METRIC: A metric measurement with value and unit
|
||||
"""
|
||||
|
||||
UNSTRUCTURED_LOG = "unstructured_log"
|
||||
STRUCTURED_LOG = "structured_log"
|
||||
METRIC = "metric"
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class LogSeverity(Enum):
|
||||
"""The severity level of a log message.
|
||||
:cvar VERBOSE: Detailed diagnostic information for troubleshooting
|
||||
:cvar DEBUG: Debug information useful during development
|
||||
:cvar INFO: General informational messages about normal operation
|
||||
:cvar WARN: Warning messages about potentially problematic situations
|
||||
:cvar ERROR: Error messages indicating failures that don't stop execution
|
||||
:cvar CRITICAL: Critical error messages indicating severe failures
|
||||
"""
|
||||
|
||||
VERBOSE = "verbose"
|
||||
DEBUG = "debug"
|
||||
INFO = "info"
|
||||
WARN = "warn"
|
||||
ERROR = "error"
|
||||
CRITICAL = "critical"
|
||||
|
||||
|
||||
class EventCommon(BaseModel):
|
||||
"""Common fields shared by all telemetry events.
|
||||
:param trace_id: Unique identifier for the trace this event belongs to
|
||||
:param span_id: Unique identifier for the span this event belongs to
|
||||
:param timestamp: Timestamp when the event occurred
|
||||
:param attributes: (Optional) Key-value pairs containing additional metadata about the event
|
||||
"""
|
||||
|
||||
trace_id: str
|
||||
span_id: str
|
||||
timestamp: datetime
|
||||
attributes: dict[str, Primitive] | None = Field(default_factory=lambda: {})
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class UnstructuredLogEvent(EventCommon):
|
||||
"""An unstructured log event containing a simple text message.
|
||||
:param type: Event type identifier set to UNSTRUCTURED_LOG
|
||||
:param message: The log message text
|
||||
:param severity: The severity level of the log message
|
||||
"""
|
||||
|
||||
type: Literal[EventType.UNSTRUCTURED_LOG] = EventType.UNSTRUCTURED_LOG
|
||||
message: str
|
||||
severity: LogSeverity
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class MetricEvent(EventCommon):
|
||||
"""A metric event containing a measured value.
|
||||
:param type: Event type identifier set to METRIC
|
||||
:param metric: The name of the metric being measured
|
||||
:param value: The numeric value of the metric measurement
|
||||
:param unit: The unit of measurement for the metric value
|
||||
"""
|
||||
|
||||
type: Literal[EventType.METRIC] = EventType.METRIC
|
||||
metric: str # this would be an enum
|
||||
value: int | float
|
||||
unit: str
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class StructuredLogType(Enum):
|
||||
"""The type of structured log event payload.
|
||||
:cvar SPAN_START: Event indicating the start of a new span
|
||||
:cvar SPAN_END: Event indicating the completion of a span
|
||||
"""
|
||||
|
||||
SPAN_START = "span_start"
|
||||
SPAN_END = "span_end"
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class SpanStartPayload(BaseModel):
|
||||
"""Payload for a span start event.
|
||||
:param type: Payload type identifier set to SPAN_START
|
||||
:param name: Human-readable name describing the operation this span represents
|
||||
:param parent_span_id: (Optional) Unique identifier for the parent span, if this is a child span
|
||||
"""
|
||||
|
||||
type: Literal[StructuredLogType.SPAN_START] = StructuredLogType.SPAN_START
|
||||
name: str
|
||||
parent_span_id: str | None = None
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class SpanEndPayload(BaseModel):
|
||||
"""Payload for a span end event.
|
||||
:param type: Payload type identifier set to SPAN_END
|
||||
:param status: The final status of the span indicating success or failure
|
||||
"""
|
||||
|
||||
type: Literal[StructuredLogType.SPAN_END] = StructuredLogType.SPAN_END
|
||||
status: SpanStatus
|
||||
|
||||
|
||||
StructuredLogPayload = Annotated[
|
||||
SpanStartPayload | SpanEndPayload,
|
||||
Field(discriminator="type"),
|
||||
]
|
||||
register_schema(StructuredLogPayload, name="StructuredLogPayload")
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class StructuredLogEvent(EventCommon):
|
||||
"""A structured log event containing typed payload data.
|
||||
:param type: Event type identifier set to STRUCTURED_LOG
|
||||
:param payload: The structured payload data for the log event
|
||||
"""
|
||||
|
||||
type: Literal[EventType.STRUCTURED_LOG] = EventType.STRUCTURED_LOG
|
||||
payload: StructuredLogPayload
|
||||
|
||||
|
||||
Event = Annotated[
|
||||
UnstructuredLogEvent | MetricEvent | StructuredLogEvent,
|
||||
Field(discriminator="type"),
|
||||
]
|
||||
register_schema(Event, name="Event")
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class EvalTrace(BaseModel):
|
||||
"""A trace record for evaluation purposes.
|
||||
:param session_id: Unique identifier for the evaluation session
|
||||
:param step: The evaluation step or phase identifier
|
||||
:param input: The input data for the evaluation
|
||||
:param output: The actual output produced during evaluation
|
||||
:param expected_output: The expected output for comparison during evaluation
|
||||
"""
|
||||
|
||||
session_id: str
|
||||
step: str
|
||||
input: str
|
||||
output: str
|
||||
expected_output: str
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class SpanWithStatus(Span):
|
||||
"""A span that includes status information.
|
||||
:param status: (Optional) The current status of the span
|
||||
"""
|
||||
|
||||
status: SpanStatus | None = None
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class QueryConditionOp(Enum):
|
||||
"""Comparison operators for query conditions.
|
||||
:cvar EQ: Equal to comparison
|
||||
:cvar NE: Not equal to comparison
|
||||
:cvar GT: Greater than comparison
|
||||
:cvar LT: Less than comparison
|
||||
"""
|
||||
|
||||
EQ = "eq"
|
||||
NE = "ne"
|
||||
GT = "gt"
|
||||
LT = "lt"
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class QueryCondition(BaseModel):
|
||||
"""A condition for filtering query results.
|
||||
:param key: The attribute key to filter on
|
||||
:param op: The comparison operator to apply
|
||||
:param value: The value to compare against
|
||||
"""
|
||||
|
||||
key: str
|
||||
op: QueryConditionOp
|
||||
value: Any
|
||||
|
||||
|
||||
class QueryTracesResponse(BaseModel):
|
||||
"""Response containing a list of traces.
|
||||
:param data: List of traces matching the query criteria
|
||||
"""
|
||||
|
||||
data: list[Trace]
|
||||
|
||||
|
||||
class QuerySpansResponse(BaseModel):
|
||||
"""Response containing a list of spans.
|
||||
:param data: List of spans matching the query criteria
|
||||
"""
|
||||
|
||||
data: list[Span]
|
||||
|
||||
|
||||
class QuerySpanTreeResponse(BaseModel):
|
||||
"""Response containing a tree structure of spans.
|
||||
:param data: Dictionary mapping span IDs to spans with status information
|
||||
"""
|
||||
|
||||
data: dict[str, SpanWithStatus]
|
||||
|
||||
|
||||
class MetricQueryType(Enum):
|
||||
"""The type of metric query to perform.
|
||||
:cvar RANGE: Query metrics over a time range
|
||||
:cvar INSTANT: Query metrics at a specific point in time
|
||||
"""
|
||||
|
||||
RANGE = "range"
|
||||
INSTANT = "instant"
|
||||
|
||||
|
||||
class MetricLabelOperator(Enum):
|
||||
"""Operators for matching metric labels.
|
||||
:cvar EQUALS: Label value must equal the specified value
|
||||
:cvar NOT_EQUALS: Label value must not equal the specified value
|
||||
:cvar REGEX_MATCH: Label value must match the specified regular expression
|
||||
:cvar REGEX_NOT_MATCH: Label value must not match the specified regular expression
|
||||
"""
|
||||
|
||||
EQUALS = "="
|
||||
NOT_EQUALS = "!="
|
||||
REGEX_MATCH = "=~"
|
||||
REGEX_NOT_MATCH = "!~"
|
||||
|
||||
|
||||
class MetricLabelMatcher(BaseModel):
|
||||
"""A matcher for filtering metrics by label values.
|
||||
:param name: The name of the label to match
|
||||
:param value: The value to match against
|
||||
:param operator: The comparison operator to use for matching
|
||||
"""
|
||||
|
||||
name: str
|
||||
value: str
|
||||
operator: MetricLabelOperator = MetricLabelOperator.EQUALS
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class MetricLabel(BaseModel):
|
||||
"""A label associated with a metric.
|
||||
:param name: The name of the label
|
||||
:param value: The value of the label
|
||||
"""
|
||||
|
||||
name: str
|
||||
value: str
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class MetricDataPoint(BaseModel):
|
||||
"""A single data point in a metric time series.
|
||||
:param timestamp: Unix timestamp when the metric value was recorded
|
||||
:param value: The numeric value of the metric at this timestamp
|
||||
"""
|
||||
|
||||
timestamp: int
|
||||
value: float
|
||||
unit: str
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class MetricSeries(BaseModel):
|
||||
"""A time series of metric data points.
|
||||
:param metric: The name of the metric
|
||||
:param labels: List of labels associated with this metric series
|
||||
:param values: List of data points in chronological order
|
||||
"""
|
||||
|
||||
metric: str
|
||||
labels: list[MetricLabel]
|
||||
values: list[MetricDataPoint]
|
||||
|
||||
|
||||
class QueryMetricsResponse(BaseModel):
|
||||
"""Response containing metric time series data.
|
||||
:param data: List of metric series matching the query criteria
|
||||
"""
|
||||
|
||||
data: list[MetricSeries]
|
||||
|
||||
|
||||
_GLOBAL_STORAGE: dict[str, dict[str | int, Any]] = {
|
||||
"active_spans": {},
|
||||
"counters": {},
|
||||
"gauges": {},
|
||||
"up_down_counters": {},
|
||||
"histograms": {},
|
||||
}
|
||||
_global_lock = threading.Lock()
|
||||
_TRACER_PROVIDER = None
|
||||
|
||||
logger = get_logger(name=__name__, category="telemetry")
|
||||
|
||||
|
||||
def _clean_attributes(attrs: dict[str, Any] | None) -> Attributes | None:
|
||||
"""Remove None values from attributes dict to match OpenTelemetry's expected type."""
|
||||
if attrs is None:
|
||||
return None
|
||||
return {k: v for k, v in attrs.items() if v is not None}
|
||||
|
||||
|
||||
def is_tracing_enabled(tracer):
|
||||
with tracer.start_as_current_span("check_tracing") as span:
|
||||
return span.is_recording()
|
||||
|
||||
|
||||
class Telemetry:
|
||||
def __init__(self) -> None:
|
||||
self.meter = None
|
||||
|
||||
global _TRACER_PROVIDER
|
||||
# Initialize the correct span processor based on the provider state.
|
||||
# This is needed since once the span processor is set, it cannot be unset.
|
||||
# Recreating the telemetry adapter multiple times will result in duplicate span processors.
|
||||
# Since the library client can be recreated multiple times in a notebook,
|
||||
# the kernel will hold on to the span processor and cause duplicate spans to be written.
|
||||
if os.environ.get("OTEL_EXPORTER_OTLP_ENDPOINT"):
|
||||
if _TRACER_PROVIDER is None:
|
||||
provider = TracerProvider()
|
||||
trace.set_tracer_provider(provider)
|
||||
_TRACER_PROVIDER = provider
|
||||
|
||||
# Use single OTLP endpoint for all telemetry signals
|
||||
|
||||
# Let OpenTelemetry SDK handle endpoint construction automatically
|
||||
# The SDK will read OTEL_EXPORTER_OTLP_ENDPOINT and construct appropriate URLs
|
||||
# https://opentelemetry.io/docs/languages/sdk-configuration/otlp-exporter
|
||||
span_exporter = OTLPSpanExporter()
|
||||
span_processor = BatchSpanProcessor(span_exporter)
|
||||
cast(TracerProvider, trace.get_tracer_provider()).add_span_processor(span_processor)
|
||||
|
||||
metric_reader = PeriodicExportingMetricReader(OTLPMetricExporter())
|
||||
metric_provider = MeterProvider(metric_readers=[metric_reader])
|
||||
metrics.set_meter_provider(metric_provider)
|
||||
self.is_otel_endpoint_set = True
|
||||
else:
|
||||
logger.warning("OTEL_EXPORTER_OTLP_ENDPOINT is not set, skipping telemetry")
|
||||
self.is_otel_endpoint_set = False
|
||||
|
||||
self.meter = metrics.get_meter(__name__)
|
||||
self._lock = _global_lock
|
||||
|
||||
async def initialize(self) -> None:
|
||||
pass
|
||||
|
||||
async def shutdown(self) -> None:
|
||||
if self.is_otel_endpoint_set:
|
||||
cast(TracerProvider, trace.get_tracer_provider()).force_flush()
|
||||
|
||||
async def log_event(self, event: Event, ttl_seconds: int = 604800) -> None:
|
||||
if isinstance(event, UnstructuredLogEvent):
|
||||
self._log_unstructured(event, ttl_seconds)
|
||||
elif isinstance(event, MetricEvent):
|
||||
self._log_metric(event)
|
||||
elif isinstance(event, StructuredLogEvent):
|
||||
self._log_structured(event, ttl_seconds)
|
||||
else:
|
||||
raise ValueError(f"Unknown event type: {event}")
|
||||
|
||||
def _log_unstructured(self, event: UnstructuredLogEvent, ttl_seconds: int) -> None:
|
||||
with self._lock:
|
||||
# Use global storage instead of instance storage
|
||||
span_id = int(event.span_id, 16)
|
||||
span = _GLOBAL_STORAGE["active_spans"].get(span_id)
|
||||
|
||||
if span:
|
||||
timestamp_ns = int(event.timestamp.timestamp() * 1e9)
|
||||
span.add_event(
|
||||
name=event.type.value,
|
||||
attributes={
|
||||
"message": event.message,
|
||||
"severity": event.severity.value,
|
||||
"__ttl__": ttl_seconds,
|
||||
**(event.attributes or {}),
|
||||
},
|
||||
timestamp=timestamp_ns,
|
||||
)
|
||||
else:
|
||||
print(f"Warning: No active span found for span_id {span_id}. Dropping event: {event}")
|
||||
|
||||
def _get_or_create_counter(self, name: str, unit: str) -> metrics.Counter:
|
||||
assert self.meter is not None
|
||||
if name not in _GLOBAL_STORAGE["counters"]:
|
||||
_GLOBAL_STORAGE["counters"][name] = self.meter.create_counter(
|
||||
name=name,
|
||||
unit=unit,
|
||||
description=f"Counter for {name}",
|
||||
)
|
||||
return cast(metrics.Counter, _GLOBAL_STORAGE["counters"][name])
|
||||
|
||||
def _get_or_create_gauge(self, name: str, unit: str) -> metrics.ObservableGauge:
|
||||
assert self.meter is not None
|
||||
if name not in _GLOBAL_STORAGE["gauges"]:
|
||||
_GLOBAL_STORAGE["gauges"][name] = self.meter.create_gauge(
|
||||
name=name,
|
||||
unit=unit,
|
||||
description=f"Gauge for {name}",
|
||||
)
|
||||
return cast(metrics.ObservableGauge, _GLOBAL_STORAGE["gauges"][name])
|
||||
|
||||
def _get_or_create_histogram(self, name: str, unit: str) -> metrics.Histogram:
|
||||
assert self.meter is not None
|
||||
if name not in _GLOBAL_STORAGE["histograms"]:
|
||||
_GLOBAL_STORAGE["histograms"][name] = self.meter.create_histogram(
|
||||
name=name,
|
||||
unit=unit,
|
||||
description=f"Histogram for {name}",
|
||||
)
|
||||
return cast(metrics.Histogram, _GLOBAL_STORAGE["histograms"][name])
|
||||
|
||||
def _log_metric(self, event: MetricEvent) -> None:
|
||||
# Add metric as an event to the current span
|
||||
try:
|
||||
with self._lock:
|
||||
# Only try to add to span if we have a valid span_id
|
||||
if event.span_id:
|
||||
try:
|
||||
span_id = int(event.span_id, 16)
|
||||
span = _GLOBAL_STORAGE["active_spans"].get(span_id)
|
||||
|
||||
if span:
|
||||
timestamp_ns = int(event.timestamp.timestamp() * 1e9)
|
||||
span.add_event(
|
||||
name=f"metric.{event.metric}",
|
||||
attributes={
|
||||
"value": event.value,
|
||||
"unit": event.unit,
|
||||
**(event.attributes or {}),
|
||||
},
|
||||
timestamp=timestamp_ns,
|
||||
)
|
||||
except (ValueError, KeyError):
|
||||
# Invalid span_id or span not found, but we already logged to console above
|
||||
pass
|
||||
except Exception:
|
||||
# Lock acquisition failed
|
||||
logger.debug("Failed to acquire lock to add metric to span")
|
||||
|
||||
# Log to OpenTelemetry meter if available
|
||||
if self.meter is None:
|
||||
return
|
||||
|
||||
# Use histograms for token-related metrics (per-request measurements)
|
||||
# Use counters for other cumulative metrics
|
||||
token_metrics = {"prompt_tokens", "completion_tokens", "total_tokens"}
|
||||
|
||||
if event.metric in token_metrics:
|
||||
# Token metrics are per-request measurements, use histogram
|
||||
histogram = self._get_or_create_histogram(event.metric, event.unit)
|
||||
histogram.record(event.value, attributes=_clean_attributes(event.attributes))
|
||||
elif isinstance(event.value, int):
|
||||
counter = self._get_or_create_counter(event.metric, event.unit)
|
||||
counter.add(event.value, attributes=_clean_attributes(event.attributes))
|
||||
elif isinstance(event.value, float):
|
||||
up_down_counter = self._get_or_create_up_down_counter(event.metric, event.unit)
|
||||
up_down_counter.add(event.value, attributes=_clean_attributes(event.attributes))
|
||||
|
||||
def _get_or_create_up_down_counter(self, name: str, unit: str) -> metrics.UpDownCounter:
|
||||
assert self.meter is not None
|
||||
if name not in _GLOBAL_STORAGE["up_down_counters"]:
|
||||
_GLOBAL_STORAGE["up_down_counters"][name] = self.meter.create_up_down_counter(
|
||||
name=name,
|
||||
unit=unit,
|
||||
description=f"UpDownCounter for {name}",
|
||||
)
|
||||
return cast(metrics.UpDownCounter, _GLOBAL_STORAGE["up_down_counters"][name])
|
||||
|
||||
def _log_structured(self, event: StructuredLogEvent, ttl_seconds: int) -> None:
|
||||
with self._lock:
|
||||
span_id = int(event.span_id, 16)
|
||||
tracer = trace.get_tracer(__name__)
|
||||
if event.attributes is None:
|
||||
event.attributes = {}
|
||||
event.attributes["__ttl__"] = ttl_seconds
|
||||
|
||||
# Extract these W3C trace context attributes so they are not written to
|
||||
# underlying storage, as we just need them to propagate the trace context.
|
||||
traceparent = event.attributes.pop("traceparent", None)
|
||||
tracestate = event.attributes.pop("tracestate", None)
|
||||
if traceparent:
|
||||
# If we have a traceparent header value, we're not the root span.
|
||||
for root_attribute in ROOT_SPAN_MARKERS:
|
||||
event.attributes.pop(root_attribute, None)
|
||||
|
||||
if isinstance(event.payload, SpanStartPayload):
|
||||
# Check if span already exists to prevent duplicates
|
||||
if span_id in _GLOBAL_STORAGE["active_spans"]:
|
||||
return
|
||||
|
||||
context = None
|
||||
if event.payload.parent_span_id:
|
||||
parent_span_id = int(event.payload.parent_span_id, 16)
|
||||
parent_span = _GLOBAL_STORAGE["active_spans"].get(parent_span_id)
|
||||
if parent_span:
|
||||
context = trace.set_span_in_context(parent_span)
|
||||
elif traceparent:
|
||||
carrier = {
|
||||
"traceparent": traceparent,
|
||||
"tracestate": tracestate,
|
||||
}
|
||||
context = TraceContextTextMapPropagator().extract(carrier=carrier)
|
||||
|
||||
span = tracer.start_span(
|
||||
name=event.payload.name,
|
||||
context=context,
|
||||
attributes=_clean_attributes(event.attributes),
|
||||
)
|
||||
_GLOBAL_STORAGE["active_spans"][span_id] = span
|
||||
|
||||
elif isinstance(event.payload, SpanEndPayload):
|
||||
span = _GLOBAL_STORAGE["active_spans"].get(span_id) # type: ignore[assignment]
|
||||
if span:
|
||||
if event.attributes:
|
||||
cleaned_attrs = _clean_attributes(event.attributes)
|
||||
if cleaned_attrs:
|
||||
span.set_attributes(cleaned_attrs)
|
||||
|
||||
status = (
|
||||
trace.Status(status_code=trace.StatusCode.OK)
|
||||
if event.payload.status == SpanStatus.OK
|
||||
else trace.Status(status_code=trace.StatusCode.ERROR)
|
||||
)
|
||||
span.set_status(status)
|
||||
span.end()
|
||||
_GLOBAL_STORAGE["active_spans"].pop(span_id, None)
|
||||
else:
|
||||
raise ValueError(f"Unknown structured log event: {event}")
|
||||
|
|
@ -1,154 +0,0 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
import asyncio
|
||||
import inspect
|
||||
import json
|
||||
from collections.abc import AsyncGenerator, Callable
|
||||
from functools import wraps
|
||||
from typing import Any, cast
|
||||
|
||||
from pydantic import BaseModel
|
||||
|
||||
from llama_stack.models.llama.datatypes import Primitive
|
||||
|
||||
type JSONValue = Primitive | list["JSONValue"] | dict[str, "JSONValue"]
|
||||
|
||||
|
||||
def serialize_value(value: Any) -> str:
|
||||
return str(_prepare_for_json(value))
|
||||
|
||||
|
||||
def _prepare_for_json(value: Any) -> JSONValue:
|
||||
"""Serialize a single value into JSON-compatible format."""
|
||||
if value is None:
|
||||
return ""
|
||||
elif isinstance(value, str | int | float | bool):
|
||||
return value
|
||||
elif hasattr(value, "_name_"):
|
||||
return cast(str, value._name_)
|
||||
elif isinstance(value, BaseModel):
|
||||
return cast(JSONValue, json.loads(value.model_dump_json()))
|
||||
elif isinstance(value, list | tuple | set):
|
||||
return [_prepare_for_json(item) for item in value]
|
||||
elif isinstance(value, dict):
|
||||
return {str(k): _prepare_for_json(v) for k, v in value.items()}
|
||||
else:
|
||||
try:
|
||||
json.dumps(value)
|
||||
return cast(JSONValue, value)
|
||||
except Exception:
|
||||
return str(value)
|
||||
|
||||
|
||||
def trace_protocol[T: type[Any]](cls: T) -> T:
|
||||
"""
|
||||
A class decorator that automatically traces all methods in a protocol/base class
|
||||
and its inheriting classes.
|
||||
"""
|
||||
|
||||
def trace_method(method: Callable[..., Any]) -> Callable[..., Any]:
|
||||
is_async = asyncio.iscoroutinefunction(method)
|
||||
is_async_gen = inspect.isasyncgenfunction(method)
|
||||
|
||||
def create_span_context(self: Any, *args: Any, **kwargs: Any) -> tuple[str, str, dict[str, Primitive]]:
|
||||
class_name = self.__class__.__name__
|
||||
method_name = method.__name__
|
||||
span_type = "async_generator" if is_async_gen else "async" if is_async else "sync"
|
||||
sig = inspect.signature(method)
|
||||
param_names = list(sig.parameters.keys())[1:] # Skip 'self'
|
||||
combined_args: dict[str, str] = {}
|
||||
for i, arg in enumerate(args):
|
||||
param_name = param_names[i] if i < len(param_names) else f"position_{i + 1}"
|
||||
combined_args[param_name] = serialize_value(arg)
|
||||
for k, v in kwargs.items():
|
||||
combined_args[str(k)] = serialize_value(v)
|
||||
|
||||
span_attributes: dict[str, Primitive] = {
|
||||
"__autotraced__": True,
|
||||
"__class__": class_name,
|
||||
"__method__": method_name,
|
||||
"__type__": span_type,
|
||||
"__args__": json.dumps(combined_args),
|
||||
}
|
||||
|
||||
return class_name, method_name, span_attributes
|
||||
|
||||
@wraps(method)
|
||||
async def async_gen_wrapper(self: Any, *args: Any, **kwargs: Any) -> AsyncGenerator[Any, None]:
|
||||
from llama_stack.core.telemetry import tracing
|
||||
|
||||
class_name, method_name, span_attributes = create_span_context(self, *args, **kwargs)
|
||||
|
||||
with tracing.span(f"{class_name}.{method_name}", span_attributes) as span:
|
||||
count = 0
|
||||
try:
|
||||
async for item in method(self, *args, **kwargs):
|
||||
yield item
|
||||
count += 1
|
||||
finally:
|
||||
span.set_attribute("chunk_count", count)
|
||||
|
||||
@wraps(method)
|
||||
async def async_wrapper(self: Any, *args: Any, **kwargs: Any) -> Any:
|
||||
from llama_stack.core.telemetry import tracing
|
||||
|
||||
class_name, method_name, span_attributes = create_span_context(self, *args, **kwargs)
|
||||
|
||||
with tracing.span(f"{class_name}.{method_name}", span_attributes) as span:
|
||||
try:
|
||||
result = await method(self, *args, **kwargs)
|
||||
span.set_attribute("output", serialize_value(result))
|
||||
return result
|
||||
except Exception as e:
|
||||
span.set_attribute("error", str(e))
|
||||
raise
|
||||
|
||||
@wraps(method)
|
||||
def sync_wrapper(self: Any, *args: Any, **kwargs: Any) -> Any:
|
||||
from llama_stack.core.telemetry import tracing
|
||||
|
||||
class_name, method_name, span_attributes = create_span_context(self, *args, **kwargs)
|
||||
|
||||
with tracing.span(f"{class_name}.{method_name}", span_attributes) as span:
|
||||
try:
|
||||
result = method(self, *args, **kwargs)
|
||||
span.set_attribute("output", serialize_value(result))
|
||||
return result
|
||||
except Exception as e:
|
||||
span.set_attribute("error", str(e))
|
||||
raise
|
||||
|
||||
if is_async_gen:
|
||||
return async_gen_wrapper
|
||||
elif is_async:
|
||||
return async_wrapper
|
||||
else:
|
||||
return sync_wrapper
|
||||
|
||||
# Wrap methods on the class itself (for classes applied at runtime)
|
||||
# Skip if already wrapped (indicated by __wrapped__ attribute)
|
||||
for name, method in vars(cls).items():
|
||||
if inspect.isfunction(method) and not name.startswith("_"):
|
||||
if not hasattr(method, "__wrapped__"):
|
||||
wrapped = trace_method(method)
|
||||
setattr(cls, name, wrapped) # noqa: B010
|
||||
|
||||
# Also set up __init_subclass__ for future subclasses
|
||||
original_init_subclass = cast(Callable[..., Any] | None, getattr(cls, "__init_subclass__", None))
|
||||
|
||||
def __init_subclass__(cls_child: type[Any], **kwargs: Any) -> None: # noqa: N807
|
||||
if original_init_subclass:
|
||||
cast(Callable[..., None], original_init_subclass)(**kwargs)
|
||||
|
||||
for name, method in vars(cls_child).items():
|
||||
if inspect.isfunction(method) and not name.startswith("_"):
|
||||
setattr(cls_child, name, trace_method(method)) # noqa: B010
|
||||
|
||||
cls_any = cast(Any, cls)
|
||||
cls_any.__init_subclass__ = classmethod(__init_subclass__)
|
||||
|
||||
return cls
|
||||
|
|
@ -1,388 +0,0 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
import asyncio
|
||||
import contextvars
|
||||
import logging # allow-direct-logging
|
||||
import queue
|
||||
import secrets
|
||||
import sys
|
||||
import threading
|
||||
import time
|
||||
from collections.abc import Callable
|
||||
from datetime import UTC, datetime
|
||||
from functools import wraps
|
||||
from typing import Any, Self
|
||||
|
||||
from llama_stack.core.telemetry.telemetry import (
|
||||
ROOT_SPAN_MARKERS,
|
||||
Event,
|
||||
LogSeverity,
|
||||
Span,
|
||||
SpanEndPayload,
|
||||
SpanStartPayload,
|
||||
SpanStatus,
|
||||
StructuredLogEvent,
|
||||
Telemetry,
|
||||
UnstructuredLogEvent,
|
||||
)
|
||||
from llama_stack.core.telemetry.trace_protocol import serialize_value
|
||||
from llama_stack.log import get_logger
|
||||
|
||||
logger = get_logger(__name__, category="core")
|
||||
|
||||
# Fallback logger that does NOT propagate to TelemetryHandler to avoid recursion
|
||||
_fallback_logger = logging.getLogger("llama_stack.telemetry.background")
|
||||
if not _fallback_logger.handlers:
|
||||
_fallback_logger.propagate = False
|
||||
_fallback_logger.setLevel(logging.ERROR)
|
||||
_fallback_handler = logging.StreamHandler(sys.stderr)
|
||||
_fallback_handler.setLevel(logging.ERROR)
|
||||
_fallback_handler.setFormatter(logging.Formatter("%(asctime)s [%(levelname)s] %(name)s: %(message)s"))
|
||||
_fallback_logger.addHandler(_fallback_handler)
|
||||
|
||||
|
||||
INVALID_SPAN_ID = 0x0000000000000000
|
||||
INVALID_TRACE_ID = 0x00000000000000000000000000000000
|
||||
|
||||
# The logical root span may not be visible to this process if a parent context
|
||||
# is passed in. The local root span is the first local span in a trace.
|
||||
LOCAL_ROOT_SPAN_MARKER = "__local_root_span__"
|
||||
|
||||
|
||||
def trace_id_to_str(trace_id: int) -> str:
|
||||
"""Convenience trace ID formatting method
|
||||
Args:
|
||||
trace_id: Trace ID int
|
||||
|
||||
Returns:
|
||||
The trace ID as 32-byte hexadecimal string
|
||||
"""
|
||||
return format(trace_id, "032x")
|
||||
|
||||
|
||||
def span_id_to_str(span_id: int) -> str:
|
||||
"""Convenience span ID formatting method
|
||||
Args:
|
||||
span_id: Span ID int
|
||||
|
||||
Returns:
|
||||
The span ID as 16-byte hexadecimal string
|
||||
"""
|
||||
return format(span_id, "016x")
|
||||
|
||||
|
||||
def generate_span_id() -> str:
|
||||
span_id = secrets.randbits(64)
|
||||
while span_id == INVALID_SPAN_ID:
|
||||
span_id = secrets.randbits(64)
|
||||
return span_id_to_str(span_id)
|
||||
|
||||
|
||||
def generate_trace_id() -> str:
|
||||
trace_id = secrets.randbits(128)
|
||||
while trace_id == INVALID_TRACE_ID:
|
||||
trace_id = secrets.randbits(128)
|
||||
return trace_id_to_str(trace_id)
|
||||
|
||||
|
||||
LOG_QUEUE_FULL_LOG_INTERVAL_SECONDS = 60.0
|
||||
|
||||
|
||||
class BackgroundLogger:
|
||||
def __init__(self, api: Telemetry, capacity: int = 100000):
|
||||
self.api = api
|
||||
self.log_queue: queue.Queue[Any] = queue.Queue(maxsize=capacity)
|
||||
self.worker_thread = threading.Thread(target=self._worker, daemon=True)
|
||||
self.worker_thread.start()
|
||||
self._last_queue_full_log_time: float = 0.0
|
||||
self._dropped_since_last_notice: int = 0
|
||||
|
||||
def log_event(self, event: Event) -> None:
|
||||
try:
|
||||
self.log_queue.put_nowait(event)
|
||||
except queue.Full:
|
||||
# Aggregate drops and emit at most once per interval via fallback logger
|
||||
self._dropped_since_last_notice += 1
|
||||
current_time = time.time()
|
||||
if current_time - self._last_queue_full_log_time >= LOG_QUEUE_FULL_LOG_INTERVAL_SECONDS:
|
||||
_fallback_logger.error(
|
||||
"Log queue is full; dropped %d events since last notice",
|
||||
self._dropped_since_last_notice,
|
||||
)
|
||||
self._last_queue_full_log_time = current_time
|
||||
self._dropped_since_last_notice = 0
|
||||
|
||||
def _worker(self):
|
||||
loop = asyncio.new_event_loop()
|
||||
asyncio.set_event_loop(loop)
|
||||
loop.run_until_complete(self._process_logs())
|
||||
|
||||
async def _process_logs(self):
|
||||
while True:
|
||||
try:
|
||||
event = self.log_queue.get()
|
||||
await self.api.log_event(event)
|
||||
except Exception:
|
||||
import traceback
|
||||
|
||||
traceback.print_exc()
|
||||
print("Error processing log event")
|
||||
finally:
|
||||
self.log_queue.task_done()
|
||||
|
||||
def __del__(self) -> None:
|
||||
self.log_queue.join()
|
||||
|
||||
|
||||
BACKGROUND_LOGGER: BackgroundLogger | None = None
|
||||
|
||||
|
||||
def enqueue_event(event: Event) -> None:
|
||||
"""Enqueue a telemetry event to the background logger if available.
|
||||
|
||||
This provides a non-blocking path for routers and other hot paths to
|
||||
submit telemetry without awaiting the Telemetry API, reducing contention
|
||||
with the main event loop.
|
||||
"""
|
||||
global BACKGROUND_LOGGER
|
||||
if BACKGROUND_LOGGER is None:
|
||||
raise RuntimeError("Telemetry API not initialized")
|
||||
BACKGROUND_LOGGER.log_event(event)
|
||||
|
||||
|
||||
class TraceContext:
|
||||
def __init__(self, logger: BackgroundLogger, trace_id: str):
|
||||
self.logger = logger
|
||||
self.trace_id = trace_id
|
||||
self.spans: list[Span] = []
|
||||
|
||||
def push_span(self, name: str, attributes: dict[str, Any] | None = None) -> Span:
|
||||
current_span = self.get_current_span()
|
||||
span = Span(
|
||||
span_id=generate_span_id(),
|
||||
trace_id=self.trace_id,
|
||||
name=name,
|
||||
start_time=datetime.now(UTC),
|
||||
parent_span_id=current_span.span_id if current_span else None,
|
||||
attributes=attributes,
|
||||
)
|
||||
|
||||
self.logger.log_event(
|
||||
StructuredLogEvent(
|
||||
trace_id=span.trace_id,
|
||||
span_id=span.span_id,
|
||||
timestamp=span.start_time,
|
||||
attributes=span.attributes,
|
||||
payload=SpanStartPayload(
|
||||
name=span.name,
|
||||
parent_span_id=span.parent_span_id,
|
||||
),
|
||||
)
|
||||
)
|
||||
|
||||
self.spans.append(span)
|
||||
return span
|
||||
|
||||
def pop_span(self, status: SpanStatus = SpanStatus.OK) -> None:
|
||||
span = self.spans.pop()
|
||||
if span is not None:
|
||||
self.logger.log_event(
|
||||
StructuredLogEvent(
|
||||
trace_id=span.trace_id,
|
||||
span_id=span.span_id,
|
||||
timestamp=span.start_time,
|
||||
attributes=span.attributes,
|
||||
payload=SpanEndPayload(
|
||||
status=status,
|
||||
),
|
||||
)
|
||||
)
|
||||
|
||||
def get_current_span(self) -> Span | None:
|
||||
return self.spans[-1] if self.spans else None
|
||||
|
||||
|
||||
CURRENT_TRACE_CONTEXT: contextvars.ContextVar[TraceContext | None] = contextvars.ContextVar(
|
||||
"trace_context", default=None
|
||||
)
|
||||
|
||||
|
||||
def setup_logger(api: Telemetry, level: int = logging.INFO):
|
||||
global BACKGROUND_LOGGER
|
||||
|
||||
if BACKGROUND_LOGGER is None:
|
||||
BACKGROUND_LOGGER = BackgroundLogger(api)
|
||||
root_logger = logging.getLogger()
|
||||
root_logger.setLevel(level)
|
||||
root_logger.addHandler(TelemetryHandler())
|
||||
|
||||
|
||||
async def start_trace(name: str, attributes: dict[str, Any] | None = None) -> TraceContext | None:
|
||||
global CURRENT_TRACE_CONTEXT, BACKGROUND_LOGGER
|
||||
|
||||
if BACKGROUND_LOGGER is None:
|
||||
logger.debug("No Telemetry implementation set. Skipping trace initialization...")
|
||||
return None
|
||||
|
||||
trace_id = generate_trace_id()
|
||||
context = TraceContext(BACKGROUND_LOGGER, trace_id)
|
||||
# Mark this span as the root for the trace for now. The processing of
|
||||
# traceparent context if supplied comes later and will result in the
|
||||
# ROOT_SPAN_MARKERS being removed. Also mark this is the 'local' root,
|
||||
# i.e. the root of the spans originating in this process as this is
|
||||
# needed to ensure that we insert this 'local' root span's id into
|
||||
# the trace record in sqlite store.
|
||||
attributes = dict.fromkeys(ROOT_SPAN_MARKERS, True) | {LOCAL_ROOT_SPAN_MARKER: True} | (attributes or {})
|
||||
context.push_span(name, attributes)
|
||||
|
||||
CURRENT_TRACE_CONTEXT.set(context)
|
||||
return context
|
||||
|
||||
|
||||
async def end_trace(status: SpanStatus = SpanStatus.OK):
|
||||
global CURRENT_TRACE_CONTEXT
|
||||
|
||||
context = CURRENT_TRACE_CONTEXT.get()
|
||||
if context is None:
|
||||
logger.debug("No trace context to end")
|
||||
return
|
||||
|
||||
context.pop_span(status)
|
||||
CURRENT_TRACE_CONTEXT.set(None)
|
||||
|
||||
|
||||
def severity(levelname: str) -> LogSeverity:
|
||||
if levelname == "DEBUG":
|
||||
return LogSeverity.DEBUG
|
||||
elif levelname == "INFO":
|
||||
return LogSeverity.INFO
|
||||
elif levelname == "WARNING":
|
||||
return LogSeverity.WARN
|
||||
elif levelname == "ERROR":
|
||||
return LogSeverity.ERROR
|
||||
elif levelname == "CRITICAL":
|
||||
return LogSeverity.CRITICAL
|
||||
else:
|
||||
raise ValueError(f"Unknown log level: {levelname}")
|
||||
|
||||
|
||||
# TODO: ideally, the actual emitting should be done inside a separate daemon
|
||||
# process completely isolated from the server
|
||||
class TelemetryHandler(logging.Handler):
|
||||
def emit(self, record: logging.LogRecord) -> None:
|
||||
# horrendous hack to avoid logging from asyncio and getting into an infinite loop
|
||||
if record.module in ("asyncio", "selector_events"):
|
||||
return
|
||||
|
||||
global CURRENT_TRACE_CONTEXT
|
||||
context = CURRENT_TRACE_CONTEXT.get()
|
||||
if context is None:
|
||||
return
|
||||
|
||||
span = context.get_current_span()
|
||||
if span is None:
|
||||
return
|
||||
|
||||
enqueue_event(
|
||||
UnstructuredLogEvent(
|
||||
trace_id=span.trace_id,
|
||||
span_id=span.span_id,
|
||||
timestamp=datetime.now(UTC),
|
||||
message=self.format(record),
|
||||
severity=severity(record.levelname),
|
||||
)
|
||||
)
|
||||
|
||||
def close(self) -> None:
|
||||
pass
|
||||
|
||||
|
||||
class SpanContextManager:
|
||||
def __init__(self, name: str, attributes: dict[str, Any] | None = None):
|
||||
self.name = name
|
||||
self.attributes = attributes
|
||||
self.span: Span | None = None
|
||||
|
||||
def __enter__(self) -> Self:
|
||||
global CURRENT_TRACE_CONTEXT
|
||||
context = CURRENT_TRACE_CONTEXT.get()
|
||||
if not context:
|
||||
logger.debug("No trace context to push span")
|
||||
return self
|
||||
|
||||
self.span = context.push_span(self.name, self.attributes)
|
||||
return self
|
||||
|
||||
def __exit__(self, exc_type, exc_value, traceback) -> None:
|
||||
global CURRENT_TRACE_CONTEXT
|
||||
context = CURRENT_TRACE_CONTEXT.get()
|
||||
if not context:
|
||||
logger.debug("No trace context to pop span")
|
||||
return
|
||||
|
||||
context.pop_span()
|
||||
|
||||
def set_attribute(self, key: str, value: Any) -> None:
|
||||
if self.span:
|
||||
if self.span.attributes is None:
|
||||
self.span.attributes = {}
|
||||
self.span.attributes[key] = serialize_value(value)
|
||||
|
||||
async def __aenter__(self) -> Self:
|
||||
global CURRENT_TRACE_CONTEXT
|
||||
context = CURRENT_TRACE_CONTEXT.get()
|
||||
if not context:
|
||||
logger.debug("No trace context to push span")
|
||||
return self
|
||||
|
||||
self.span = context.push_span(self.name, self.attributes)
|
||||
return self
|
||||
|
||||
async def __aexit__(self, exc_type, exc_value, traceback) -> None:
|
||||
global CURRENT_TRACE_CONTEXT
|
||||
context = CURRENT_TRACE_CONTEXT.get()
|
||||
if not context:
|
||||
logger.debug("No trace context to pop span")
|
||||
return
|
||||
|
||||
context.pop_span()
|
||||
|
||||
def __call__(self, func: Callable[..., Any]) -> Callable[..., Any]:
|
||||
@wraps(func)
|
||||
def sync_wrapper(*args: Any, **kwargs: Any) -> Any:
|
||||
with self:
|
||||
return func(*args, **kwargs)
|
||||
|
||||
@wraps(func)
|
||||
async def async_wrapper(*args: Any, **kwargs: Any) -> Any:
|
||||
async with self:
|
||||
return await func(*args, **kwargs)
|
||||
|
||||
@wraps(func)
|
||||
def wrapper(*args: Any, **kwargs: Any) -> Any:
|
||||
if asyncio.iscoroutinefunction(func):
|
||||
return async_wrapper(*args, **kwargs)
|
||||
else:
|
||||
return sync_wrapper(*args, **kwargs)
|
||||
|
||||
return wrapper
|
||||
|
||||
|
||||
def span(name: str, attributes: dict[str, Any] | None = None) -> SpanContextManager:
|
||||
return SpanContextManager(name, attributes)
|
||||
|
||||
|
||||
def get_current_span() -> Span | None:
|
||||
global CURRENT_TRACE_CONTEXT
|
||||
if CURRENT_TRACE_CONTEXT is None:
|
||||
logger.debug("No trace context to get current span")
|
||||
return None
|
||||
|
||||
context = CURRENT_TRACE_CONTEXT.get()
|
||||
if context:
|
||||
return context.get_current_span()
|
||||
return None
|
||||
|
|
@ -7,8 +7,6 @@
|
|||
from collections.abc import AsyncGenerator
|
||||
from contextvars import ContextVar
|
||||
|
||||
from llama_stack.core.telemetry.tracing import CURRENT_TRACE_CONTEXT
|
||||
|
||||
_MISSING = object()
|
||||
|
||||
|
||||
|
|
@ -69,16 +67,12 @@ def preserve_contexts_async_generator[T](
|
|||
try:
|
||||
yield item
|
||||
# Update our tracked values with any changes made during this iteration
|
||||
# Only for non-trace context vars - trace context must persist across yields
|
||||
# to allow nested span tracking for telemetry
|
||||
# This allows context changes to persist across generator iterations
|
||||
for context_var in context_vars:
|
||||
if context_var is not CURRENT_TRACE_CONTEXT:
|
||||
initial_context_values[context_var.name] = context_var.get()
|
||||
initial_context_values[context_var.name] = context_var.get()
|
||||
finally:
|
||||
# Restore non-trace context vars after each yield to prevent leaks between requests
|
||||
# CURRENT_TRACE_CONTEXT is NOT restored here to preserve telemetry span stack
|
||||
# Restore context vars after each yield to prevent leaks between requests
|
||||
for context_var in context_vars:
|
||||
if context_var is not CURRENT_TRACE_CONTEXT:
|
||||
_restore_context_var(context_var)
|
||||
_restore_context_var(context_var)
|
||||
|
||||
return wrapper()
|
||||
|
|
|
|||
|
|
@ -281,8 +281,6 @@ registered_resources:
|
|||
provider_id: rag-runtime
|
||||
server:
|
||||
port: 8321
|
||||
telemetry:
|
||||
enabled: true
|
||||
vector_stores:
|
||||
default_provider_id: faiss
|
||||
default_embedding_model:
|
||||
|
|
|
|||
|
|
@ -272,8 +272,6 @@ registered_resources:
|
|||
provider_id: rag-runtime
|
||||
server:
|
||||
port: 8321
|
||||
telemetry:
|
||||
enabled: true
|
||||
vector_stores:
|
||||
default_provider_id: faiss
|
||||
default_embedding_model:
|
||||
|
|
|
|||
|
|
@ -140,5 +140,3 @@ registered_resources:
|
|||
provider_id: rag-runtime
|
||||
server:
|
||||
port: 8321
|
||||
telemetry:
|
||||
enabled: true
|
||||
|
|
|
|||
|
|
@ -131,5 +131,3 @@ registered_resources:
|
|||
provider_id: rag-runtime
|
||||
server:
|
||||
port: 8321
|
||||
telemetry:
|
||||
enabled: true
|
||||
|
|
|
|||
|
|
@ -153,5 +153,3 @@ registered_resources:
|
|||
provider_id: rag-runtime
|
||||
server:
|
||||
port: 8321
|
||||
telemetry:
|
||||
enabled: true
|
||||
|
|
|
|||
|
|
@ -138,5 +138,3 @@ registered_resources:
|
|||
provider_id: rag-runtime
|
||||
server:
|
||||
port: 8321
|
||||
telemetry:
|
||||
enabled: true
|
||||
|
|
|
|||
|
|
@ -135,5 +135,3 @@ registered_resources:
|
|||
provider_id: rag-runtime
|
||||
server:
|
||||
port: 8321
|
||||
telemetry:
|
||||
enabled: true
|
||||
|
|
|
|||
|
|
@ -114,5 +114,3 @@ registered_resources:
|
|||
provider_id: rag-runtime
|
||||
server:
|
||||
port: 8321
|
||||
telemetry:
|
||||
enabled: true
|
||||
|
|
|
|||
|
|
@ -132,5 +132,3 @@ registered_resources:
|
|||
provider_id: tavily-search
|
||||
server:
|
||||
port: 8321
|
||||
telemetry:
|
||||
enabled: true
|
||||
|
|
|
|||
|
|
@ -251,5 +251,3 @@ registered_resources:
|
|||
provider_id: rag-runtime
|
||||
server:
|
||||
port: 8321
|
||||
telemetry:
|
||||
enabled: true
|
||||
|
|
|
|||
|
|
@ -114,5 +114,3 @@ registered_resources:
|
|||
provider_id: rag-runtime
|
||||
server:
|
||||
port: 8321
|
||||
telemetry:
|
||||
enabled: true
|
||||
|
|
|
|||
|
|
@ -284,8 +284,6 @@ registered_resources:
|
|||
provider_id: rag-runtime
|
||||
server:
|
||||
port: 8321
|
||||
telemetry:
|
||||
enabled: true
|
||||
vector_stores:
|
||||
default_provider_id: faiss
|
||||
default_embedding_model:
|
||||
|
|
|
|||
|
|
@ -275,8 +275,6 @@ registered_resources:
|
|||
provider_id: rag-runtime
|
||||
server:
|
||||
port: 8321
|
||||
telemetry:
|
||||
enabled: true
|
||||
vector_stores:
|
||||
default_provider_id: faiss
|
||||
default_embedding_model:
|
||||
|
|
|
|||
|
|
@ -281,8 +281,6 @@ registered_resources:
|
|||
provider_id: rag-runtime
|
||||
server:
|
||||
port: 8321
|
||||
telemetry:
|
||||
enabled: true
|
||||
vector_stores:
|
||||
default_provider_id: faiss
|
||||
default_embedding_model:
|
||||
|
|
|
|||
|
|
@ -272,8 +272,6 @@ registered_resources:
|
|||
provider_id: rag-runtime
|
||||
server:
|
||||
port: 8321
|
||||
telemetry:
|
||||
enabled: true
|
||||
vector_stores:
|
||||
default_provider_id: faiss
|
||||
default_embedding_model:
|
||||
|
|
|
|||
|
|
@ -24,7 +24,6 @@ from llama_stack.core.datatypes import (
|
|||
Provider,
|
||||
SafetyConfig,
|
||||
ShieldInput,
|
||||
TelemetryConfig,
|
||||
ToolGroupInput,
|
||||
VectorStoresConfig,
|
||||
)
|
||||
|
|
@ -189,7 +188,6 @@ class RunConfigSettings(BaseModel):
|
|||
default_benchmarks: list[BenchmarkInput] | None = None
|
||||
vector_stores_config: VectorStoresConfig | None = None
|
||||
safety_config: SafetyConfig | None = None
|
||||
telemetry: TelemetryConfig = Field(default_factory=lambda: TelemetryConfig(enabled=True))
|
||||
storage_backends: dict[str, Any] | None = None
|
||||
storage_stores: dict[str, Any] | None = None
|
||||
|
||||
|
|
@ -289,7 +287,6 @@ class RunConfigSettings(BaseModel):
|
|||
"server": {
|
||||
"port": 8321,
|
||||
},
|
||||
"telemetry": self.telemetry.model_dump(exclude_none=True) if self.telemetry else None,
|
||||
}
|
||||
|
||||
if self.vector_stores_config:
|
||||
|
|
|
|||
|
|
@ -132,5 +132,3 @@ registered_resources:
|
|||
provider_id: rag-runtime
|
||||
server:
|
||||
port: 8321
|
||||
telemetry:
|
||||
enabled: true
|
||||
|
|
|
|||
|
|
@ -37,7 +37,6 @@ CATEGORIES = [
|
|||
"eval",
|
||||
"tools",
|
||||
"client",
|
||||
"telemetry",
|
||||
"openai",
|
||||
"openai_responses",
|
||||
"openai_conversations",
|
||||
|
|
|
|||
|
|
@ -15,7 +15,6 @@ async def get_provider_impl(
|
|||
config: MetaReferenceAgentsImplConfig,
|
||||
deps: dict[Api, Any],
|
||||
policy: list[AccessRule],
|
||||
telemetry_enabled: bool = False,
|
||||
):
|
||||
from .agents import MetaReferenceAgentsImpl
|
||||
|
||||
|
|
@ -29,7 +28,6 @@ async def get_provider_impl(
|
|||
deps[Api.conversations],
|
||||
deps[Api.prompts],
|
||||
deps[Api.files],
|
||||
telemetry_enabled,
|
||||
policy,
|
||||
)
|
||||
await impl.initialize()
|
||||
|
|
|
|||
|
|
@ -50,7 +50,6 @@ class MetaReferenceAgentsImpl(Agents):
|
|||
prompts_api: Prompts,
|
||||
files_api: Files,
|
||||
policy: list[AccessRule],
|
||||
telemetry_enabled: bool = False,
|
||||
):
|
||||
self.config = config
|
||||
self.inference_api = inference_api
|
||||
|
|
@ -59,7 +58,6 @@ class MetaReferenceAgentsImpl(Agents):
|
|||
self.tool_runtime_api = tool_runtime_api
|
||||
self.tool_groups_api = tool_groups_api
|
||||
self.conversations_api = conversations_api
|
||||
self.telemetry_enabled = telemetry_enabled
|
||||
self.prompts_api = prompts_api
|
||||
self.files_api = files_api
|
||||
self.in_memory_store = InmemoryKVStoreImpl()
|
||||
|
|
@ -111,6 +109,7 @@ class MetaReferenceAgentsImpl(Agents):
|
|||
max_infer_iters: int | None = 10,
|
||||
guardrails: list[ResponseGuardrail] | None = None,
|
||||
max_tool_calls: int | None = None,
|
||||
metadata: dict[str, str] | None = None,
|
||||
) -> OpenAIResponseObject:
|
||||
assert self.openai_responses_impl is not None, "OpenAI responses not initialized"
|
||||
result = await self.openai_responses_impl.create_openai_response(
|
||||
|
|
@ -130,6 +129,7 @@ class MetaReferenceAgentsImpl(Agents):
|
|||
guardrails,
|
||||
parallel_tool_calls,
|
||||
max_tool_calls,
|
||||
metadata,
|
||||
)
|
||||
return result # type: ignore[no-any-return]
|
||||
|
||||
|
|
|
|||
|
|
@ -336,6 +336,7 @@ class OpenAIResponsesImpl:
|
|||
guardrails: list[str | ResponseGuardrailSpec] | None = None,
|
||||
parallel_tool_calls: bool | None = None,
|
||||
max_tool_calls: int | None = None,
|
||||
metadata: dict[str, str] | None = None,
|
||||
):
|
||||
stream = bool(stream)
|
||||
text = OpenAIResponseText(format=OpenAIResponseTextFormat(type="text")) if text is None else text
|
||||
|
|
@ -390,6 +391,7 @@ class OpenAIResponsesImpl:
|
|||
guardrail_ids=guardrail_ids,
|
||||
parallel_tool_calls=parallel_tool_calls,
|
||||
max_tool_calls=max_tool_calls,
|
||||
metadata=metadata,
|
||||
)
|
||||
|
||||
if stream:
|
||||
|
|
@ -442,6 +444,7 @@ class OpenAIResponsesImpl:
|
|||
guardrail_ids: list[str] | None = None,
|
||||
parallel_tool_calls: bool | None = True,
|
||||
max_tool_calls: int | None = None,
|
||||
metadata: dict[str, str] | None = None,
|
||||
) -> AsyncIterator[OpenAIResponseObjectStream]:
|
||||
# These should never be None when called from create_openai_response (which sets defaults)
|
||||
# but we assert here to help mypy understand the types
|
||||
|
|
@ -490,6 +493,7 @@ class OpenAIResponsesImpl:
|
|||
guardrail_ids=guardrail_ids,
|
||||
instructions=instructions,
|
||||
max_tool_calls=max_tool_calls,
|
||||
metadata=metadata,
|
||||
)
|
||||
|
||||
# Stream the response
|
||||
|
|
|
|||
|
|
@ -8,7 +8,8 @@ import uuid
|
|||
from collections.abc import AsyncIterator
|
||||
from typing import Any
|
||||
|
||||
from llama_stack.core.telemetry import tracing
|
||||
from opentelemetry import trace
|
||||
|
||||
from llama_stack.log import get_logger
|
||||
from llama_stack.providers.utils.inference.prompt_adapter import interleaved_content_as_str
|
||||
from llama_stack_api import (
|
||||
|
|
@ -79,6 +80,7 @@ from .utils import (
|
|||
)
|
||||
|
||||
logger = get_logger(name=__name__, category="agents::meta_reference")
|
||||
tracer = trace.get_tracer(__name__)
|
||||
|
||||
|
||||
def convert_tooldef_to_chat_tool(tool_def):
|
||||
|
|
@ -118,6 +120,7 @@ class StreamingResponseOrchestrator:
|
|||
prompt: OpenAIResponsePrompt | None = None,
|
||||
parallel_tool_calls: bool | None = None,
|
||||
max_tool_calls: int | None = None,
|
||||
metadata: dict[str, str] | None = None,
|
||||
):
|
||||
self.inference_api = inference_api
|
||||
self.ctx = ctx
|
||||
|
|
@ -135,6 +138,7 @@ class StreamingResponseOrchestrator:
|
|||
self.parallel_tool_calls = parallel_tool_calls
|
||||
# Max number of total calls to built-in tools that can be processed in a response
|
||||
self.max_tool_calls = max_tool_calls
|
||||
self.metadata = metadata
|
||||
self.sequence_number = 0
|
||||
# Store MCP tool mapping that gets built during tool processing
|
||||
self.mcp_tool_to_server: dict[str, OpenAIResponseInputToolMCP] = (
|
||||
|
|
@ -162,6 +166,7 @@ class StreamingResponseOrchestrator:
|
|||
model=self.ctx.model,
|
||||
status="completed",
|
||||
output=[OpenAIResponseMessage(role="assistant", content=[refusal_content], type="message")],
|
||||
metadata=self.metadata,
|
||||
)
|
||||
|
||||
return OpenAIResponseObjectStreamResponseCompleted(response=refusal_response)
|
||||
|
|
@ -197,6 +202,7 @@ class StreamingResponseOrchestrator:
|
|||
prompt=self.prompt,
|
||||
parallel_tool_calls=self.parallel_tool_calls,
|
||||
max_tool_calls=self.max_tool_calls,
|
||||
metadata=self.metadata,
|
||||
)
|
||||
|
||||
async def create_response(self) -> AsyncIterator[OpenAIResponseObjectStream]:
|
||||
|
|
@ -1106,8 +1112,10 @@ class StreamingResponseOrchestrator:
|
|||
"server_url": mcp_tool.server_url,
|
||||
"mcp_list_tools_id": list_id,
|
||||
}
|
||||
# List MCP tools with authorization from tool config
|
||||
async with tracing.span("list_mcp_tools", attributes):
|
||||
|
||||
# TODO: follow semantic conventions for Open Telemetry tool spans
|
||||
# https://opentelemetry.io/docs/specs/semconv/gen-ai/gen-ai-spans/#execute-tool-span
|
||||
with tracer.start_as_current_span("list_mcp_tools", attributes=attributes):
|
||||
tool_defs = await list_mcp_tools(
|
||||
endpoint=mcp_tool.server_url,
|
||||
headers=mcp_tool.headers,
|
||||
|
|
@ -1183,9 +1191,9 @@ class StreamingResponseOrchestrator:
|
|||
if mcp_server.require_approval == "never":
|
||||
return False
|
||||
if isinstance(mcp_server, ApprovalFilter):
|
||||
if tool_name in mcp_server.always:
|
||||
if mcp_server.always and tool_name in mcp_server.always:
|
||||
return True
|
||||
if tool_name in mcp_server.never:
|
||||
if mcp_server.never and tool_name in mcp_server.never:
|
||||
return False
|
||||
return True
|
||||
|
||||
|
|
|
|||
|
|
@ -9,7 +9,8 @@ import json
|
|||
from collections.abc import AsyncIterator
|
||||
from typing import Any
|
||||
|
||||
from llama_stack.core.telemetry import tracing
|
||||
from opentelemetry import trace
|
||||
|
||||
from llama_stack.log import get_logger
|
||||
from llama_stack_api import (
|
||||
ImageContentItem,
|
||||
|
|
@ -42,6 +43,7 @@ from llama_stack_api import (
|
|||
from .types import ChatCompletionContext, ToolExecutionResult
|
||||
|
||||
logger = get_logger(name=__name__, category="agents::meta_reference")
|
||||
tracer = trace.get_tracer(__name__)
|
||||
|
||||
|
||||
class ToolExecutor:
|
||||
|
|
@ -296,8 +298,9 @@ class ToolExecutor:
|
|||
"server_url": mcp_tool.server_url,
|
||||
"tool_name": function_name,
|
||||
}
|
||||
# Invoke MCP tool with authorization from tool config
|
||||
async with tracing.span("invoke_mcp_tool", attributes):
|
||||
# TODO: follow semantic conventions for Open Telemetry tool spans
|
||||
# https://opentelemetry.io/docs/specs/semconv/gen-ai/gen-ai-spans/#execute-tool-span
|
||||
with tracer.start_as_current_span("invoke_mcp_tool", attributes=attributes):
|
||||
result = await invoke_mcp_tool(
|
||||
endpoint=mcp_tool.server_url,
|
||||
tool_name=function_name,
|
||||
|
|
@ -318,7 +321,7 @@ class ToolExecutor:
|
|||
# Use vector_stores.search API instead of knowledge_search tool
|
||||
# to support filters and ranking_options
|
||||
query = tool_kwargs.get("query", "")
|
||||
async with tracing.span("knowledge_search", {}):
|
||||
with tracer.start_as_current_span("knowledge_search"):
|
||||
result = await self._execute_knowledge_search_via_vector_store(
|
||||
query=query,
|
||||
response_file_search_tool=response_file_search_tool,
|
||||
|
|
@ -327,7 +330,9 @@ class ToolExecutor:
|
|||
attributes = {
|
||||
"tool_name": function_name,
|
||||
}
|
||||
async with tracing.span("invoke_tool", attributes):
|
||||
# TODO: follow semantic conventions for Open Telemetry tool spans
|
||||
# https://opentelemetry.io/docs/specs/semconv/gen-ai/gen-ai-spans/#execute-tool-span
|
||||
with tracer.start_as_current_span("invoke_tool", attributes=attributes):
|
||||
result = await self.tool_runtime_api.invoke_tool(
|
||||
tool_name=function_name,
|
||||
kwargs=tool_kwargs,
|
||||
|
|
|
|||
|
|
@ -6,7 +6,6 @@
|
|||
|
||||
import asyncio
|
||||
|
||||
from llama_stack.core.telemetry import tracing
|
||||
from llama_stack.log import get_logger
|
||||
from llama_stack_api import OpenAIMessageParam, Safety, SafetyViolation, ViolationLevel
|
||||
|
||||
|
|
@ -31,15 +30,12 @@ class ShieldRunnerMixin:
|
|||
self.output_shields = output_shields
|
||||
|
||||
async def run_multiple_shields(self, messages: list[OpenAIMessageParam], identifiers: list[str]) -> None:
|
||||
async def run_shield_with_span(identifier: str):
|
||||
async with tracing.span(f"run_shield_{identifier}"):
|
||||
return await self.safety_api.run_shield(
|
||||
shield_id=identifier,
|
||||
messages=messages,
|
||||
params={},
|
||||
)
|
||||
|
||||
responses = await asyncio.gather(*[run_shield_with_span(identifier) for identifier in identifiers])
|
||||
responses = await asyncio.gather(
|
||||
*[
|
||||
self.safety_api.run_shield(shield_id=identifier, messages=messages, params={})
|
||||
for identifier in identifiers
|
||||
]
|
||||
)
|
||||
for identifier, response in zip(identifiers, responses, strict=False):
|
||||
if not response.violation:
|
||||
continue
|
||||
|
|
|
|||
|
|
@ -4,8 +4,6 @@
|
|||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import uuid
|
||||
from datetime import UTC, datetime
|
||||
from typing import TYPE_CHECKING, Annotated, Any, cast
|
||||
|
|
@ -39,7 +37,7 @@ from .config import S3FilesImplConfig
|
|||
# TODO: provider data for S3 credentials
|
||||
|
||||
|
||||
def _create_s3_client(config: S3FilesImplConfig) -> S3Client:
|
||||
def _create_s3_client(config: S3FilesImplConfig) -> "S3Client":
|
||||
try:
|
||||
s3_config = {
|
||||
"region_name": config.region,
|
||||
|
|
@ -66,7 +64,7 @@ def _create_s3_client(config: S3FilesImplConfig) -> S3Client:
|
|||
raise RuntimeError(f"Failed to initialize S3 client: {e}") from e
|
||||
|
||||
|
||||
async def _create_bucket_if_not_exists(client: S3Client, config: S3FilesImplConfig) -> None:
|
||||
async def _create_bucket_if_not_exists(client: "S3Client", config: S3FilesImplConfig) -> None:
|
||||
try:
|
||||
client.head_bucket(Bucket=config.bucket_name)
|
||||
except ClientError as e:
|
||||
|
|
@ -192,7 +190,7 @@ class S3FilesImpl(Files):
|
|||
pass
|
||||
|
||||
@property
|
||||
def client(self) -> S3Client:
|
||||
def client(self) -> "S3Client":
|
||||
assert self._client is not None, "Provider not initialized"
|
||||
return self._client
|
||||
|
||||
|
|
|
|||
|
|
@ -8,7 +8,6 @@ from collections.abc import AsyncIterator, Iterable
|
|||
|
||||
from openai import AuthenticationError
|
||||
|
||||
from llama_stack.core.telemetry.tracing import get_current_span
|
||||
from llama_stack.log import get_logger
|
||||
from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
|
||||
from llama_stack_api import (
|
||||
|
|
@ -84,7 +83,7 @@ class BedrockInferenceAdapter(OpenAIMixin):
|
|||
) -> OpenAIChatCompletion | AsyncIterator[OpenAIChatCompletionChunk]:
|
||||
"""Override to enable streaming usage metrics and handle authentication errors."""
|
||||
# Enable streaming usage metrics when telemetry is active
|
||||
if params.stream and get_current_span() is not None:
|
||||
if params.stream:
|
||||
if params.stream_options is None:
|
||||
params.stream_options = {"include_usage": True}
|
||||
elif "include_usage" not in params.stream_options:
|
||||
|
|
|
|||
|
|
@ -10,7 +10,6 @@ from typing import Any
|
|||
import litellm
|
||||
import requests
|
||||
|
||||
from llama_stack.core.telemetry.tracing import get_current_span
|
||||
from llama_stack.log import get_logger
|
||||
from llama_stack.providers.remote.inference.watsonx.config import WatsonXConfig
|
||||
from llama_stack.providers.utils.inference.litellm_openai_mixin import LiteLLMOpenAIMixin
|
||||
|
|
@ -59,7 +58,7 @@ class WatsonXInferenceAdapter(LiteLLMOpenAIMixin):
|
|||
|
||||
# Add usage tracking for streaming when telemetry is active
|
||||
stream_options = params.stream_options
|
||||
if params.stream and get_current_span() is not None:
|
||||
if params.stream:
|
||||
if stream_options is None:
|
||||
stream_options = {"include_usage": True}
|
||||
elif "include_usage" not in stream_options:
|
||||
|
|
|
|||
|
|
@ -217,10 +217,9 @@ class LiteLLMOpenAIMixin(
|
|||
params: OpenAIChatCompletionRequestWithExtraBody,
|
||||
) -> OpenAIChatCompletion | AsyncIterator[OpenAIChatCompletionChunk]:
|
||||
# Add usage tracking for streaming when telemetry is active
|
||||
from llama_stack.core.telemetry.tracing import get_current_span
|
||||
|
||||
stream_options = params.stream_options
|
||||
if params.stream and get_current_span() is not None:
|
||||
if params.stream:
|
||||
if stream_options is None:
|
||||
stream_options = {"include_usage": True}
|
||||
elif "include_usage" not in stream_options:
|
||||
|
|
|
|||
|
|
@ -89,6 +89,7 @@ async def client_wrapper(endpoint: str, headers: dict[str, str]) -> AsyncGenerat
|
|||
# sse_client and streamablehttp_client have different signatures, but both
|
||||
# are called the same way here, so we cast to Any to avoid type errors
|
||||
client = cast(Any, sse_client)
|
||||
|
||||
async with client(endpoint, headers=headers) as client_streams:
|
||||
async with ClientSession(read_stream=client_streams[0], write_stream=client_streams[1]) as session:
|
||||
await session.initialize()
|
||||
|
|
|
|||
5
src/llama_stack/telemetry/__init__.py
Normal file
5
src/llama_stack/telemetry/__init__.py
Normal file
|
|
@ -0,0 +1,5 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
27
src/llama_stack/telemetry/constants.py
Normal file
27
src/llama_stack/telemetry/constants.py
Normal file
|
|
@ -0,0 +1,27 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
"""
|
||||
This file contains constants used for naming data captured for telemetry.
|
||||
|
||||
This is used to ensure that the data captured for telemetry is consistent and can be used to
|
||||
identify and correlate data. If custom telemetry data is added to llama stack, please add
|
||||
constants for it here.
|
||||
"""
|
||||
|
||||
llama_stack_prefix = "llama_stack"
|
||||
|
||||
# Safety Attributes
|
||||
RUN_SHIELD_OPERATION_NAME = "run_shield"
|
||||
|
||||
SAFETY_REQUEST_PREFIX = f"{llama_stack_prefix}.safety.request"
|
||||
SAFETY_REQUEST_SHIELD_ID_ATTRIBUTE = f"{SAFETY_REQUEST_PREFIX}.shield_id"
|
||||
SAFETY_REQUEST_MESSAGES_ATTRIBUTE = f"{SAFETY_REQUEST_PREFIX}.messages"
|
||||
|
||||
SAFETY_RESPONSE_PREFIX = f"{llama_stack_prefix}.safety.response"
|
||||
SAFETY_RESPONSE_METADATA_ATTRIBUTE = f"{SAFETY_RESPONSE_PREFIX}.metadata"
|
||||
SAFETY_RESPONSE_VIOLATION_LEVEL_ATTRIBUTE = f"{SAFETY_RESPONSE_PREFIX}.violation.level"
|
||||
SAFETY_RESPONSE_USER_MESSAGE_ATTRIBUTE = f"{SAFETY_RESPONSE_PREFIX}.violation.user_message"
|
||||
43
src/llama_stack/telemetry/helpers.py
Normal file
43
src/llama_stack/telemetry/helpers.py
Normal file
|
|
@ -0,0 +1,43 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
import json
|
||||
|
||||
from opentelemetry import trace
|
||||
|
||||
from llama_stack_api import OpenAIMessageParam, RunShieldResponse
|
||||
|
||||
from .constants import (
|
||||
RUN_SHIELD_OPERATION_NAME,
|
||||
SAFETY_REQUEST_MESSAGES_ATTRIBUTE,
|
||||
SAFETY_REQUEST_SHIELD_ID_ATTRIBUTE,
|
||||
SAFETY_RESPONSE_METADATA_ATTRIBUTE,
|
||||
SAFETY_RESPONSE_USER_MESSAGE_ATTRIBUTE,
|
||||
SAFETY_RESPONSE_VIOLATION_LEVEL_ATTRIBUTE,
|
||||
)
|
||||
|
||||
|
||||
def safety_span_name(shield_id: str) -> str:
|
||||
return f"{RUN_SHIELD_OPERATION_NAME} {shield_id}"
|
||||
|
||||
|
||||
# TODO: Consider using Wrapt to automatically instrument code
|
||||
# This is the industry standard way to package automatically instrumentation in python.
|
||||
def safety_request_span_attributes(
|
||||
shield_id: str, messages: list[OpenAIMessageParam], response: RunShieldResponse
|
||||
) -> None:
|
||||
span = trace.get_current_span()
|
||||
span.set_attribute(SAFETY_REQUEST_SHIELD_ID_ATTRIBUTE, shield_id)
|
||||
messages_json = json.dumps([msg.model_dump() for msg in messages])
|
||||
span.set_attribute(SAFETY_REQUEST_MESSAGES_ATTRIBUTE, messages_json)
|
||||
|
||||
if response.violation:
|
||||
if response.violation.metadata:
|
||||
metadata_json = json.dumps(response.violation.metadata)
|
||||
span.set_attribute(SAFETY_RESPONSE_METADATA_ATTRIBUTE, metadata_json)
|
||||
if response.violation.user_message:
|
||||
span.set_attribute(SAFETY_RESPONSE_USER_MESSAGE_ATTRIBUTE, response.violation.user_message)
|
||||
span.set_attribute(SAFETY_RESPONSE_VIOLATION_LEVEL_ATTRIBUTE, response.violation.violation_level.value)
|
||||
|
|
@ -89,6 +89,7 @@ class Agents(Protocol):
|
|||
),
|
||||
] = None,
|
||||
max_tool_calls: int | None = None,
|
||||
metadata: dict[str, str] | None = None,
|
||||
) -> OpenAIResponseObject | AsyncIterator[OpenAIResponseObjectStream]:
|
||||
"""Create a model response.
|
||||
|
||||
|
|
@ -100,6 +101,7 @@ class Agents(Protocol):
|
|||
:param include: (Optional) Additional fields to include in the response.
|
||||
:param guardrails: (Optional) List of guardrails to apply during response generation. Can be guardrail IDs (strings) or guardrail specifications.
|
||||
:param max_tool_calls: (Optional) Max number of total calls to built-in tools that can be processed in a response.
|
||||
:param metadata: (Optional) Dictionary of metadata key-value pairs to attach to the response.
|
||||
:returns: An OpenAIResponseObject.
|
||||
"""
|
||||
...
|
||||
|
|
|
|||
|
|
@ -1,22 +0,0 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
|
||||
def telemetry_traceable(cls):
|
||||
"""
|
||||
Mark a protocol for automatic tracing when telemetry is enabled.
|
||||
|
||||
This is a metadata-only decorator with no dependencies on core.
|
||||
Actual tracing is applied by core routers at runtime if telemetry is enabled.
|
||||
|
||||
Usage:
|
||||
@runtime_checkable
|
||||
@telemetry_traceable
|
||||
class MyProtocol(Protocol):
|
||||
...
|
||||
"""
|
||||
cls.__marked_for_tracing__ = True
|
||||
return cls
|
||||
|
|
@ -9,7 +9,6 @@ from typing import Annotated, Literal, Protocol, runtime_checkable
|
|||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from llama_stack_api.common.tracing import telemetry_traceable
|
||||
from llama_stack_api.openai_responses import (
|
||||
OpenAIResponseInputFunctionToolCallOutput,
|
||||
OpenAIResponseMCPApprovalRequest,
|
||||
|
|
@ -157,7 +156,6 @@ class ConversationItemDeletedResource(BaseModel):
|
|||
|
||||
|
||||
@runtime_checkable
|
||||
@telemetry_traceable
|
||||
class Conversations(Protocol):
|
||||
"""Conversations
|
||||
|
||||
|
|
|
|||
|
|
@ -11,7 +11,6 @@ from fastapi import File, Form, Response, UploadFile
|
|||
from pydantic import BaseModel, Field
|
||||
|
||||
from llama_stack_api.common.responses import Order
|
||||
from llama_stack_api.common.tracing import telemetry_traceable
|
||||
from llama_stack_api.schema_utils import json_schema_type, webmethod
|
||||
from llama_stack_api.version import LLAMA_STACK_API_V1
|
||||
|
||||
|
|
@ -102,7 +101,6 @@ class OpenAIFileDeleteResponse(BaseModel):
|
|||
|
||||
|
||||
@runtime_checkable
|
||||
@telemetry_traceable
|
||||
class Files(Protocol):
|
||||
"""Files
|
||||
|
||||
|
|
|
|||
|
|
@ -22,7 +22,6 @@ from llama_stack_api.common.content_types import InterleavedContent
|
|||
from llama_stack_api.common.responses import (
|
||||
Order,
|
||||
)
|
||||
from llama_stack_api.common.tracing import telemetry_traceable
|
||||
from llama_stack_api.models import Model
|
||||
from llama_stack_api.schema_utils import json_schema_type, register_schema, webmethod
|
||||
from llama_stack_api.version import LLAMA_STACK_API_V1, LLAMA_STACK_API_V1ALPHA
|
||||
|
|
@ -989,7 +988,6 @@ class OpenAIEmbeddingsRequestWithExtraBody(BaseModel, extra="allow"):
|
|||
|
||||
|
||||
@runtime_checkable
|
||||
@telemetry_traceable
|
||||
class InferenceProvider(Protocol):
|
||||
"""
|
||||
This protocol defines the interface that should be implemented by all inference providers.
|
||||
|
|
|
|||
|
|
@ -9,7 +9,6 @@ from typing import Any, Literal, Protocol, runtime_checkable
|
|||
|
||||
from pydantic import BaseModel, ConfigDict, Field, field_validator
|
||||
|
||||
from llama_stack_api.common.tracing import telemetry_traceable
|
||||
from llama_stack_api.resource import Resource, ResourceType
|
||||
from llama_stack_api.schema_utils import json_schema_type, webmethod
|
||||
from llama_stack_api.version import LLAMA_STACK_API_V1
|
||||
|
|
@ -106,7 +105,6 @@ class OpenAIListModelsResponse(BaseModel):
|
|||
|
||||
|
||||
@runtime_checkable
|
||||
@telemetry_traceable
|
||||
class Models(Protocol):
|
||||
async def list_models(self) -> ListModelsResponse:
|
||||
"""List all models.
|
||||
|
|
|
|||
|
|
@ -597,6 +597,7 @@ class OpenAIResponseObject(BaseModel):
|
|||
:param usage: (Optional) Token usage information for the response
|
||||
:param instructions: (Optional) System message inserted into the model's context
|
||||
:param max_tool_calls: (Optional) Max number of total calls to built-in tools that can be processed in a response
|
||||
:param metadata: (Optional) Dictionary of metadata key-value pairs
|
||||
"""
|
||||
|
||||
created_at: int
|
||||
|
|
@ -619,6 +620,7 @@ class OpenAIResponseObject(BaseModel):
|
|||
usage: OpenAIResponseUsage | None = None
|
||||
instructions: str | None = None
|
||||
max_tool_calls: int | None = None
|
||||
metadata: dict[str, str] | None = None
|
||||
|
||||
|
||||
@json_schema_type
|
||||
|
|
|
|||
|
|
@ -10,7 +10,6 @@ from typing import Protocol, runtime_checkable
|
|||
|
||||
from pydantic import BaseModel, Field, field_validator, model_validator
|
||||
|
||||
from llama_stack_api.common.tracing import telemetry_traceable
|
||||
from llama_stack_api.schema_utils import json_schema_type, webmethod
|
||||
from llama_stack_api.version import LLAMA_STACK_API_V1
|
||||
|
||||
|
|
@ -93,7 +92,6 @@ class ListPromptsResponse(BaseModel):
|
|||
|
||||
|
||||
@runtime_checkable
|
||||
@telemetry_traceable
|
||||
class Prompts(Protocol):
|
||||
"""Prompts
|
||||
|
||||
|
|
|
|||
|
|
@ -9,7 +9,6 @@ from typing import Any, Protocol, runtime_checkable
|
|||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from llama_stack_api.common.tracing import telemetry_traceable
|
||||
from llama_stack_api.inference import OpenAIMessageParam
|
||||
from llama_stack_api.schema_utils import json_schema_type, webmethod
|
||||
from llama_stack_api.shields import Shield
|
||||
|
|
@ -94,7 +93,6 @@ class ShieldStore(Protocol):
|
|||
|
||||
|
||||
@runtime_checkable
|
||||
@telemetry_traceable
|
||||
class Safety(Protocol):
|
||||
"""Safety
|
||||
|
||||
|
|
|
|||
|
|
@ -8,7 +8,6 @@ from typing import Any, Literal, Protocol, runtime_checkable
|
|||
|
||||
from pydantic import BaseModel
|
||||
|
||||
from llama_stack_api.common.tracing import telemetry_traceable
|
||||
from llama_stack_api.resource import Resource, ResourceType
|
||||
from llama_stack_api.schema_utils import json_schema_type, webmethod
|
||||
from llama_stack_api.version import LLAMA_STACK_API_V1
|
||||
|
|
@ -49,7 +48,6 @@ class ListShieldsResponse(BaseModel):
|
|||
|
||||
|
||||
@runtime_checkable
|
||||
@telemetry_traceable
|
||||
class Shields(Protocol):
|
||||
@webmethod(route="/shields", method="GET", level=LLAMA_STACK_API_V1)
|
||||
async def list_shields(self) -> ListShieldsResponse:
|
||||
|
|
|
|||
|
|
@ -11,7 +11,6 @@ from pydantic import BaseModel
|
|||
from typing_extensions import runtime_checkable
|
||||
|
||||
from llama_stack_api.common.content_types import URL, InterleavedContent
|
||||
from llama_stack_api.common.tracing import telemetry_traceable
|
||||
from llama_stack_api.resource import Resource, ResourceType
|
||||
from llama_stack_api.schema_utils import json_schema_type, webmethod
|
||||
from llama_stack_api.version import LLAMA_STACK_API_V1
|
||||
|
|
@ -109,7 +108,6 @@ class ListToolDefsResponse(BaseModel):
|
|||
|
||||
|
||||
@runtime_checkable
|
||||
@telemetry_traceable
|
||||
class ToolGroups(Protocol):
|
||||
@webmethod(route="/toolgroups", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
|
||||
async def register_tool_group(
|
||||
|
|
@ -128,7 +126,7 @@ class ToolGroups(Protocol):
|
|||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/toolgroups/{toolgroup_id:path}", method="GET", level=LLAMA_STACK_API_V1)
|
||||
@webmethod(route="/toolgroups/{toolgroup_id:path}", method="GET", level=LLAMA_STACK_API_V1, deprecated=True)
|
||||
async def get_tool_group(
|
||||
self,
|
||||
toolgroup_id: str,
|
||||
|
|
@ -140,7 +138,7 @@ class ToolGroups(Protocol):
|
|||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/toolgroups", method="GET", level=LLAMA_STACK_API_V1)
|
||||
@webmethod(route="/toolgroups", method="GET", level=LLAMA_STACK_API_V1, deprecated=True)
|
||||
async def list_tool_groups(self) -> ListToolGroupsResponse:
|
||||
"""List tool groups with optional provider.
|
||||
|
||||
|
|
@ -148,7 +146,7 @@ class ToolGroups(Protocol):
|
|||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/tools", method="GET", level=LLAMA_STACK_API_V1)
|
||||
@webmethod(route="/tools", method="GET", level=LLAMA_STACK_API_V1, deprecated=True)
|
||||
async def list_tools(self, toolgroup_id: str | None = None) -> ListToolDefsResponse:
|
||||
"""List tools with optional tool group.
|
||||
|
||||
|
|
@ -157,7 +155,7 @@ class ToolGroups(Protocol):
|
|||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/tools/{tool_name:path}", method="GET", level=LLAMA_STACK_API_V1)
|
||||
@webmethod(route="/tools/{tool_name:path}", method="GET", level=LLAMA_STACK_API_V1, deprecated=True)
|
||||
async def get_tool(
|
||||
self,
|
||||
tool_name: str,
|
||||
|
|
@ -191,12 +189,11 @@ class SpecialToolGroup(Enum):
|
|||
|
||||
|
||||
@runtime_checkable
|
||||
@telemetry_traceable
|
||||
class ToolRuntime(Protocol):
|
||||
tool_store: ToolStore | None = None
|
||||
|
||||
# TODO: This needs to be renamed once OPEN API generator name conflict issue is fixed.
|
||||
@webmethod(route="/tool-runtime/list-tools", method="GET", level=LLAMA_STACK_API_V1)
|
||||
@webmethod(route="/tool-runtime/list-tools", method="GET", level=LLAMA_STACK_API_V1, deprecated=True)
|
||||
async def list_runtime_tools(
|
||||
self,
|
||||
tool_group_id: str | None = None,
|
||||
|
|
@ -212,7 +209,7 @@ class ToolRuntime(Protocol):
|
|||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/tool-runtime/invoke", method="POST", level=LLAMA_STACK_API_V1)
|
||||
@webmethod(route="/tool-runtime/invoke", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
|
||||
async def invoke_tool(
|
||||
self,
|
||||
tool_name: str,
|
||||
|
|
|
|||
|
|
@ -13,7 +13,6 @@ from typing import Annotated, Any, Literal, Protocol, runtime_checkable
|
|||
from fastapi import Body, Query
|
||||
from pydantic import BaseModel, Field, field_validator
|
||||
|
||||
from llama_stack_api.common.tracing import telemetry_traceable
|
||||
from llama_stack_api.inference import InterleavedContent
|
||||
from llama_stack_api.schema_utils import json_schema_type, register_schema, webmethod
|
||||
from llama_stack_api.vector_stores import VectorStore
|
||||
|
|
@ -572,7 +571,6 @@ class VectorStoreTable(Protocol):
|
|||
|
||||
|
||||
@runtime_checkable
|
||||
@telemetry_traceable
|
||||
class VectorIO(Protocol):
|
||||
vector_store_table: VectorStoreTable | None = None
|
||||
|
||||
|
|
|
|||
|
|
@ -17,7 +17,6 @@ from unittest.mock import AsyncMock, patch
|
|||
import pytest
|
||||
|
||||
from llama_stack.core.library_client import LlamaStackAsLibraryClient
|
||||
from llama_stack.core.telemetry.telemetry import MetricEvent
|
||||
from llama_stack_api import (
|
||||
Api,
|
||||
OpenAIAssistantMessageParam,
|
||||
|
|
@ -27,10 +26,6 @@ from llama_stack_api import (
|
|||
)
|
||||
|
||||
|
||||
class OpenAIChatCompletionWithMetrics(OpenAIChatCompletion):
|
||||
metrics: list[MetricEvent] | None = None
|
||||
|
||||
|
||||
def test_unregistered_model_routing_with_provider_data(client_with_models):
|
||||
"""
|
||||
Test that a model can be routed using provider_id/model_id format
|
||||
|
|
@ -72,7 +67,7 @@ def test_unregistered_model_routing_with_provider_data(client_with_models):
|
|||
# The inference router's routing_table.impls_by_provider_id should have anthropic
|
||||
# Let's patch the anthropic provider's openai_chat_completion method
|
||||
# to avoid making real API calls
|
||||
mock_response = OpenAIChatCompletionWithMetrics(
|
||||
mock_response = OpenAIChatCompletion(
|
||||
id="chatcmpl-test-123",
|
||||
created=1234567890,
|
||||
model="claude-3-5-sonnet-20241022",
|
||||
|
|
|
|||
|
|
@ -15,11 +15,10 @@ from opentelemetry.sdk.trace import TracerProvider
|
|||
from opentelemetry.sdk.trace.export import SimpleSpanProcessor
|
||||
from opentelemetry.sdk.trace.export.in_memory_span_exporter import InMemorySpanExporter
|
||||
|
||||
import llama_stack.core.telemetry.telemetry as telemetry_module
|
||||
|
||||
from .base import BaseTelemetryCollector, MetricStub, SpanStub
|
||||
|
||||
|
||||
# TODO: Fix thi to work with Automatic Instrumentation
|
||||
class InMemoryTelemetryCollector(BaseTelemetryCollector):
|
||||
"""In-memory telemetry collector for library-client tests.
|
||||
|
||||
|
|
@ -75,13 +74,10 @@ class InMemoryTelemetryManager:
|
|||
meter_provider = MeterProvider(metric_readers=[metric_reader])
|
||||
metrics.set_meter_provider(meter_provider)
|
||||
|
||||
telemetry_module._TRACER_PROVIDER = tracer_provider
|
||||
|
||||
self.collector = InMemoryTelemetryCollector(span_exporter, metric_reader)
|
||||
self._tracer_provider = tracer_provider
|
||||
self._meter_provider = meter_provider
|
||||
|
||||
def shutdown(self) -> None:
|
||||
telemetry_module._TRACER_PROVIDER = None
|
||||
self._tracer_provider.shutdown()
|
||||
self._meter_provider.shutdown()
|
||||
|
|
|
|||
|
|
@ -15,6 +15,7 @@ from tests.integration.fixtures.common import instantiate_llama_stack_client
|
|||
from tests.integration.telemetry.collectors import InMemoryTelemetryManager, OtlpHttpTestCollector
|
||||
|
||||
|
||||
# TODO: Fix this to work with Automatic Instrumentation
|
||||
@pytest.fixture(scope="session")
|
||||
def telemetry_test_collector():
|
||||
stack_mode = os.environ.get("LLAMA_STACK_TEST_STACK_CONFIG_TYPE", "library_client")
|
||||
|
|
@ -48,6 +49,7 @@ def telemetry_test_collector():
|
|||
manager.shutdown()
|
||||
|
||||
|
||||
# TODO: Fix this to work with Automatic Instrumentation
|
||||
@pytest.fixture(scope="session")
|
||||
def llama_stack_client(telemetry_test_collector, request):
|
||||
"""Ensure telemetry collector is ready before initializing the stack client."""
|
||||
|
|
|
|||
|
|
@ -155,9 +155,6 @@ def old_config():
|
|||
provider_type: inline::meta-reference
|
||||
config: {{}}
|
||||
api_providers:
|
||||
telemetry:
|
||||
provider_type: noop
|
||||
config: {{}}
|
||||
"""
|
||||
)
|
||||
|
||||
|
|
@ -181,7 +178,7 @@ def test_parse_and_maybe_upgrade_config_up_to_date(up_to_date_config):
|
|||
def test_parse_and_maybe_upgrade_config_old_format(old_config):
|
||||
result = parse_and_maybe_upgrade_config(old_config)
|
||||
assert result.version == LLAMA_STACK_RUN_CONFIG_VERSION
|
||||
assert all(api in result.providers for api in ["inference", "safety", "memory", "telemetry"])
|
||||
assert all(api in result.providers for api in ["inference", "safety", "memory"])
|
||||
safety_provider = result.providers["safety"][0]
|
||||
assert safety_provider.provider_type == "inline::meta-reference"
|
||||
assert "llama_guard_shield" in safety_provider.config
|
||||
|
|
|
|||
|
|
@ -83,7 +83,7 @@ class TestProviderInitialization:
|
|||
new_callable=AsyncMock,
|
||||
):
|
||||
# Should not raise any exception
|
||||
provider = await get_provider_impl(config, mock_deps, policy=[], telemetry_enabled=False)
|
||||
provider = await get_provider_impl(config, mock_deps, policy=[])
|
||||
assert provider is not None
|
||||
|
||||
async def test_initialization_without_safety_api(self, mock_persistence_config, mock_deps):
|
||||
|
|
@ -97,7 +97,7 @@ class TestProviderInitialization:
|
|||
new_callable=AsyncMock,
|
||||
):
|
||||
# Should not raise any exception
|
||||
provider = await get_provider_impl(config, mock_deps, policy=[], telemetry_enabled=False)
|
||||
provider = await get_provider_impl(config, mock_deps, policy=[])
|
||||
assert provider is not None
|
||||
assert provider.safety_api is None
|
||||
|
||||
|
|
|
|||
|
|
@ -364,23 +364,6 @@ def test_invalid_auth_header_format_oauth2(oauth2_client):
|
|||
assert "Invalid Authorization header format" in response.json()["error"]["message"]
|
||||
|
||||
|
||||
async def mock_jwks_response(*args, **kwargs):
|
||||
return MockResponse(
|
||||
200,
|
||||
{
|
||||
"keys": [
|
||||
{
|
||||
"kid": "1234567890",
|
||||
"kty": "oct",
|
||||
"alg": "HS256",
|
||||
"use": "sig",
|
||||
"k": base64.b64encode(b"foobarbaz").decode(),
|
||||
}
|
||||
]
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def jwt_token_valid():
|
||||
import jwt
|
||||
|
|
@ -421,28 +404,60 @@ def mock_jwks_urlopen():
|
|||
yield mock_urlopen
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_jwks_urlopen_with_auth_required():
|
||||
"""Mock urllib.request.urlopen that requires Bearer token for JWKS requests."""
|
||||
with patch("urllib.request.urlopen") as mock_urlopen:
|
||||
|
||||
def side_effect(request, **kwargs):
|
||||
# Check if Authorization header is present
|
||||
auth_header = request.headers.get("Authorization") if hasattr(request, "headers") else None
|
||||
|
||||
if not auth_header or not auth_header.startswith("Bearer "):
|
||||
# Simulate 401 Unauthorized
|
||||
import urllib.error
|
||||
|
||||
raise urllib.error.HTTPError(
|
||||
url=request.full_url if hasattr(request, "full_url") else "",
|
||||
code=401,
|
||||
msg="Unauthorized",
|
||||
hdrs={},
|
||||
fp=None,
|
||||
)
|
||||
|
||||
# Mock the JWKS response for PyJWKClient
|
||||
mock_response = Mock()
|
||||
mock_response.read.return_value = json.dumps(
|
||||
{
|
||||
"keys": [
|
||||
{
|
||||
"kid": "1234567890",
|
||||
"kty": "oct",
|
||||
"alg": "HS256",
|
||||
"use": "sig",
|
||||
"k": base64.b64encode(b"foobarbaz").decode(),
|
||||
}
|
||||
]
|
||||
}
|
||||
).encode()
|
||||
return mock_response
|
||||
|
||||
mock_urlopen.side_effect = side_effect
|
||||
yield mock_urlopen
|
||||
|
||||
|
||||
def test_valid_oauth2_authentication(oauth2_client, jwt_token_valid, mock_jwks_urlopen):
|
||||
response = oauth2_client.get("/test", headers={"Authorization": f"Bearer {jwt_token_valid}"})
|
||||
assert response.status_code == 200
|
||||
assert response.json() == {"message": "Authentication successful"}
|
||||
|
||||
|
||||
@patch("httpx.AsyncClient.get", new=mock_jwks_response)
|
||||
def test_invalid_oauth2_authentication(oauth2_client, invalid_token, suppress_auth_errors):
|
||||
def test_invalid_oauth2_authentication(oauth2_client, invalid_token, mock_jwks_urlopen, suppress_auth_errors):
|
||||
response = oauth2_client.get("/test", headers={"Authorization": f"Bearer {invalid_token}"})
|
||||
assert response.status_code == 401
|
||||
assert "Invalid JWT token" in response.json()["error"]["message"]
|
||||
|
||||
|
||||
async def mock_auth_jwks_response(*args, **kwargs):
|
||||
if "headers" not in kwargs or "Authorization" not in kwargs["headers"]:
|
||||
return MockResponse(401, {})
|
||||
authz = kwargs["headers"]["Authorization"]
|
||||
if authz != "Bearer my-jwks-token":
|
||||
return MockResponse(401, {})
|
||||
return await mock_jwks_response(args, kwargs)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def oauth2_app_with_jwks_token():
|
||||
app = FastAPI()
|
||||
|
|
@ -472,8 +487,9 @@ def oauth2_client_with_jwks_token(oauth2_app_with_jwks_token):
|
|||
return TestClient(oauth2_app_with_jwks_token)
|
||||
|
||||
|
||||
@patch("httpx.AsyncClient.get", new=mock_auth_jwks_response)
|
||||
def test_oauth2_with_jwks_token_expected(oauth2_client, jwt_token_valid, suppress_auth_errors):
|
||||
def test_oauth2_with_jwks_token_expected(
|
||||
oauth2_client, jwt_token_valid, mock_jwks_urlopen_with_auth_required, suppress_auth_errors
|
||||
):
|
||||
response = oauth2_client.get("/test", headers={"Authorization": f"Bearer {jwt_token_valid}"})
|
||||
assert response.status_code == 401
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue