mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-12-03 01:48:05 +00:00
Merge branch 'main' into routeur
This commit is contained in:
commit
3ce509e94a
87 changed files with 67526 additions and 4478 deletions
4
.github/workflows/backward-compat.yml
vendored
4
.github/workflows/backward-compat.yml
vendored
|
|
@ -32,7 +32,7 @@ jobs:
|
||||||
fetch-depth: 0 # Need full history to access main branch
|
fetch-depth: 0 # Need full history to access main branch
|
||||||
|
|
||||||
- name: Set up Python
|
- name: Set up Python
|
||||||
uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c # v6.0.0
|
uses: actions/setup-python@83679a892e2d95755f2dac6acb0bfd1e9ac5d548 # v6.1.0
|
||||||
with:
|
with:
|
||||||
python-version: '3.12'
|
python-version: '3.12'
|
||||||
|
|
||||||
|
|
@ -410,7 +410,7 @@ jobs:
|
||||||
fetch-depth: 0
|
fetch-depth: 0
|
||||||
|
|
||||||
- name: Set up Python
|
- name: Set up Python
|
||||||
uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c # v6.0.0
|
uses: actions/setup-python@83679a892e2d95755f2dac6acb0bfd1e9ac5d548 # v6.1.0
|
||||||
with:
|
with:
|
||||||
python-version: '3.12'
|
python-version: '3.12'
|
||||||
|
|
||||||
|
|
|
||||||
18
.github/workflows/conformance.yml
vendored
18
.github/workflows/conformance.yml
vendored
|
|
@ -64,6 +64,7 @@ jobs:
|
||||||
ref: ${{ github.event.pull_request.base.ref }}
|
ref: ${{ github.event.pull_request.base.ref }}
|
||||||
path: 'base'
|
path: 'base'
|
||||||
|
|
||||||
|
|
||||||
# Cache oasdiff to avoid checksum failures and speed up builds
|
# Cache oasdiff to avoid checksum failures and speed up builds
|
||||||
- name: Cache oasdiff
|
- name: Cache oasdiff
|
||||||
if: steps.skip-check.outputs.skip != 'true'
|
if: steps.skip-check.outputs.skip != 'true'
|
||||||
|
|
@ -136,6 +137,23 @@ jobs:
|
||||||
run: |
|
run: |
|
||||||
oasdiff breaking --fail-on ERR $BASE_SPEC $CURRENT_SPEC --match-path '^/v1/'
|
oasdiff breaking --fail-on ERR $BASE_SPEC $CURRENT_SPEC --match-path '^/v1/'
|
||||||
|
|
||||||
|
# Run oasdiff to detect breaking changes in the API specification when compared to the OpenAI openAPI spec
|
||||||
|
- name: Run OpenAPI Breaking Change Diff Against OpenAI API
|
||||||
|
if: steps.skip-check.outputs.skip != 'true'
|
||||||
|
continue-on-error: true
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
OPENAI_SPEC=docs/static/openai-spec-2.3.0.yml
|
||||||
|
LLAMA_STACK_SPEC=docs/static/llama-stack-spec.yaml
|
||||||
|
|
||||||
|
# Compare Llama Stack spec against OpenAI spec.
|
||||||
|
# This finds breaking changes in our implementation of common endpoints.
|
||||||
|
# By using our spec as the base, we avoid errors for endpoints we don't implement.
|
||||||
|
oasdiff breaking --fail-on ERR \
|
||||||
|
"$LLAMA_STACK_SPEC" \
|
||||||
|
"$OPENAI_SPEC" \
|
||||||
|
--strip-prefix-base "/v1"
|
||||||
|
|
||||||
# Report when test is skipped
|
# Report when test is skipped
|
||||||
- name: Report skip reason
|
- name: Report skip reason
|
||||||
if: steps.skip-check.outputs.skip == 'true'
|
if: steps.skip-check.outputs.skip == 'true'
|
||||||
|
|
|
||||||
2
.github/workflows/pre-commit.yml
vendored
2
.github/workflows/pre-commit.yml
vendored
|
|
@ -30,7 +30,7 @@ jobs:
|
||||||
fetch-depth: ${{ github.actor == 'dependabot[bot]' && 0 || 1 }}
|
fetch-depth: ${{ github.actor == 'dependabot[bot]' && 0 || 1 }}
|
||||||
|
|
||||||
- name: Set up Python
|
- name: Set up Python
|
||||||
uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c # v6.0.0
|
uses: actions/setup-python@83679a892e2d95755f2dac6acb0bfd1e9ac5d548 # v6.1.0
|
||||||
with:
|
with:
|
||||||
python-version: '3.12'
|
python-version: '3.12'
|
||||||
cache: pip
|
cache: pip
|
||||||
|
|
|
||||||
|
|
@ -19,6 +19,7 @@ repos:
|
||||||
- id: no-commit-to-branch
|
- id: no-commit-to-branch
|
||||||
- id: check-yaml
|
- id: check-yaml
|
||||||
args: ["--unsafe"]
|
args: ["--unsafe"]
|
||||||
|
exclude: 'docs/static/openai-spec-2.3.0.yml'
|
||||||
- id: detect-private-key
|
- id: detect-private-key
|
||||||
- id: mixed-line-ending
|
- id: mixed-line-ending
|
||||||
args: [--fix=lf] # Forces to replace line ending by LF (line feed)
|
args: [--fix=lf] # Forces to replace line ending by LF (line feed)
|
||||||
|
|
|
||||||
|
|
@ -9,7 +9,6 @@ data:
|
||||||
- inference
|
- inference
|
||||||
- files
|
- files
|
||||||
- safety
|
- safety
|
||||||
- telemetry
|
|
||||||
- tool_runtime
|
- tool_runtime
|
||||||
- vector_io
|
- vector_io
|
||||||
providers:
|
providers:
|
||||||
|
|
@ -67,12 +66,6 @@ data:
|
||||||
db: ${env.POSTGRES_DB:=llamastack}
|
db: ${env.POSTGRES_DB:=llamastack}
|
||||||
user: ${env.POSTGRES_USER:=llamastack}
|
user: ${env.POSTGRES_USER:=llamastack}
|
||||||
password: ${env.POSTGRES_PASSWORD:=llamastack}
|
password: ${env.POSTGRES_PASSWORD:=llamastack}
|
||||||
telemetry:
|
|
||||||
- provider_id: meta-reference
|
|
||||||
provider_type: inline::meta-reference
|
|
||||||
config:
|
|
||||||
service_name: "${env.OTEL_SERVICE_NAME:=\u200B}"
|
|
||||||
sinks: ${env.TELEMETRY_SINKS:=console}
|
|
||||||
tool_runtime:
|
tool_runtime:
|
||||||
- provider_id: brave-search
|
- provider_id: brave-search
|
||||||
provider_type: remote::brave-search
|
provider_type: remote::brave-search
|
||||||
|
|
|
||||||
|
|
@ -126,8 +126,6 @@ registered_resources:
|
||||||
provider_id: rag-runtime
|
provider_id: rag-runtime
|
||||||
server:
|
server:
|
||||||
port: 8323
|
port: 8323
|
||||||
telemetry:
|
|
||||||
enabled: true
|
|
||||||
vector_stores:
|
vector_stores:
|
||||||
default_provider_id: chromadb
|
default_provider_id: chromadb
|
||||||
default_embedding_model:
|
default_embedding_model:
|
||||||
|
|
|
||||||
|
|
@ -2101,6 +2101,7 @@ paths:
|
||||||
schema:
|
schema:
|
||||||
$ref: '#/components/schemas/InvokeToolRequest'
|
$ref: '#/components/schemas/InvokeToolRequest'
|
||||||
required: true
|
required: true
|
||||||
|
deprecated: true
|
||||||
/v1/tool-runtime/list-tools:
|
/v1/tool-runtime/list-tools:
|
||||||
get:
|
get:
|
||||||
responses:
|
responses:
|
||||||
|
|
@ -2152,6 +2153,7 @@ paths:
|
||||||
- $ref: '#/components/schemas/URL'
|
- $ref: '#/components/schemas/URL'
|
||||||
- type: 'null'
|
- type: 'null'
|
||||||
title: Mcp Endpoint
|
title: Mcp Endpoint
|
||||||
|
deprecated: true
|
||||||
/v1/toolgroups:
|
/v1/toolgroups:
|
||||||
get:
|
get:
|
||||||
responses:
|
responses:
|
||||||
|
|
@ -2178,6 +2180,7 @@ paths:
|
||||||
summary: List Tool Groups
|
summary: List Tool Groups
|
||||||
description: List tool groups with optional provider.
|
description: List tool groups with optional provider.
|
||||||
operationId: list_tool_groups_v1_toolgroups_get
|
operationId: list_tool_groups_v1_toolgroups_get
|
||||||
|
deprecated: true
|
||||||
post:
|
post:
|
||||||
responses:
|
responses:
|
||||||
'400':
|
'400':
|
||||||
|
|
@ -2239,6 +2242,7 @@ paths:
|
||||||
schema:
|
schema:
|
||||||
type: string
|
type: string
|
||||||
description: 'Path parameter: toolgroup_id'
|
description: 'Path parameter: toolgroup_id'
|
||||||
|
deprecated: true
|
||||||
delete:
|
delete:
|
||||||
responses:
|
responses:
|
||||||
'400':
|
'400':
|
||||||
|
|
@ -2303,6 +2307,7 @@ paths:
|
||||||
- type: string
|
- type: string
|
||||||
- type: 'null'
|
- type: 'null'
|
||||||
title: Toolgroup Id
|
title: Toolgroup Id
|
||||||
|
deprecated: true
|
||||||
/v1/tools/{tool_name}:
|
/v1/tools/{tool_name}:
|
||||||
get:
|
get:
|
||||||
responses:
|
responses:
|
||||||
|
|
@ -2336,6 +2341,7 @@ paths:
|
||||||
schema:
|
schema:
|
||||||
type: string
|
type: string
|
||||||
description: 'Path parameter: tool_name'
|
description: 'Path parameter: tool_name'
|
||||||
|
deprecated: true
|
||||||
/v1/vector-io/insert:
|
/v1/vector-io/insert:
|
||||||
post:
|
post:
|
||||||
responses:
|
responses:
|
||||||
|
|
@ -6812,6 +6818,12 @@ components:
|
||||||
anyOf:
|
anyOf:
|
||||||
- type: integer
|
- type: integer
|
||||||
- type: 'null'
|
- type: 'null'
|
||||||
|
metadata:
|
||||||
|
anyOf:
|
||||||
|
- additionalProperties:
|
||||||
|
type: string
|
||||||
|
type: object
|
||||||
|
- type: 'null'
|
||||||
input:
|
input:
|
||||||
items:
|
items:
|
||||||
anyOf:
|
anyOf:
|
||||||
|
|
@ -7215,6 +7227,12 @@ components:
|
||||||
anyOf:
|
anyOf:
|
||||||
- type: integer
|
- type: integer
|
||||||
- type: 'null'
|
- type: 'null'
|
||||||
|
metadata:
|
||||||
|
anyOf:
|
||||||
|
- additionalProperties:
|
||||||
|
type: string
|
||||||
|
type: object
|
||||||
|
- type: 'null'
|
||||||
type: object
|
type: object
|
||||||
required:
|
required:
|
||||||
- input
|
- input
|
||||||
|
|
@ -7346,6 +7364,12 @@ components:
|
||||||
anyOf:
|
anyOf:
|
||||||
- type: integer
|
- type: integer
|
||||||
- type: 'null'
|
- type: 'null'
|
||||||
|
metadata:
|
||||||
|
anyOf:
|
||||||
|
- additionalProperties:
|
||||||
|
type: string
|
||||||
|
type: object
|
||||||
|
- type: 'null'
|
||||||
type: object
|
type: object
|
||||||
required:
|
required:
|
||||||
- created_at
|
- created_at
|
||||||
|
|
@ -12196,227 +12220,6 @@ components:
|
||||||
- $ref: '#/components/schemas/OpenAIResponseContentPartReasoningText'
|
- $ref: '#/components/schemas/OpenAIResponseContentPartReasoningText'
|
||||||
title: OpenAIResponseContentPartReasoningText
|
title: OpenAIResponseContentPartReasoningText
|
||||||
title: OpenAIResponseContentPartOutputText | OpenAIResponseContentPartRefusal | OpenAIResponseContentPartReasoningText
|
title: OpenAIResponseContentPartOutputText | OpenAIResponseContentPartRefusal | OpenAIResponseContentPartReasoningText
|
||||||
SpanEndPayload:
|
|
||||||
description: Payload for a span end event.
|
|
||||||
properties:
|
|
||||||
type:
|
|
||||||
const: span_end
|
|
||||||
default: span_end
|
|
||||||
title: Type
|
|
||||||
type: string
|
|
||||||
status:
|
|
||||||
$ref: '#/components/schemas/SpanStatus'
|
|
||||||
required:
|
|
||||||
- status
|
|
||||||
title: SpanEndPayload
|
|
||||||
type: object
|
|
||||||
SpanStartPayload:
|
|
||||||
description: Payload for a span start event.
|
|
||||||
properties:
|
|
||||||
type:
|
|
||||||
const: span_start
|
|
||||||
default: span_start
|
|
||||||
title: Type
|
|
||||||
type: string
|
|
||||||
name:
|
|
||||||
title: Name
|
|
||||||
type: string
|
|
||||||
parent_span_id:
|
|
||||||
anyOf:
|
|
||||||
- type: string
|
|
||||||
- type: 'null'
|
|
||||||
nullable: true
|
|
||||||
required:
|
|
||||||
- name
|
|
||||||
title: SpanStartPayload
|
|
||||||
type: object
|
|
||||||
SpanStatus:
|
|
||||||
description: The status of a span indicating whether it completed successfully or with an error.
|
|
||||||
enum:
|
|
||||||
- ok
|
|
||||||
- error
|
|
||||||
title: SpanStatus
|
|
||||||
type: string
|
|
||||||
StructuredLogPayload:
|
|
||||||
discriminator:
|
|
||||||
mapping:
|
|
||||||
span_end: '#/components/schemas/SpanEndPayload'
|
|
||||||
span_start: '#/components/schemas/SpanStartPayload'
|
|
||||||
propertyName: type
|
|
||||||
oneOf:
|
|
||||||
- $ref: '#/components/schemas/SpanStartPayload'
|
|
||||||
title: SpanStartPayload
|
|
||||||
- $ref: '#/components/schemas/SpanEndPayload'
|
|
||||||
title: SpanEndPayload
|
|
||||||
title: SpanStartPayload | SpanEndPayload
|
|
||||||
LogSeverity:
|
|
||||||
description: The severity level of a log message.
|
|
||||||
enum:
|
|
||||||
- verbose
|
|
||||||
- debug
|
|
||||||
- info
|
|
||||||
- warn
|
|
||||||
- error
|
|
||||||
- critical
|
|
||||||
title: LogSeverity
|
|
||||||
type: string
|
|
||||||
MetricEvent:
|
|
||||||
description: A metric event containing a measured value.
|
|
||||||
properties:
|
|
||||||
trace_id:
|
|
||||||
title: Trace Id
|
|
||||||
type: string
|
|
||||||
span_id:
|
|
||||||
title: Span Id
|
|
||||||
type: string
|
|
||||||
timestamp:
|
|
||||||
format: date-time
|
|
||||||
title: Timestamp
|
|
||||||
type: string
|
|
||||||
attributes:
|
|
||||||
anyOf:
|
|
||||||
- additionalProperties:
|
|
||||||
anyOf:
|
|
||||||
- type: string
|
|
||||||
- type: integer
|
|
||||||
- type: number
|
|
||||||
- type: boolean
|
|
||||||
- type: 'null'
|
|
||||||
title: string | ... (4 variants)
|
|
||||||
type: object
|
|
||||||
- type: 'null'
|
|
||||||
type:
|
|
||||||
const: metric
|
|
||||||
default: metric
|
|
||||||
title: Type
|
|
||||||
type: string
|
|
||||||
metric:
|
|
||||||
title: Metric
|
|
||||||
type: string
|
|
||||||
value:
|
|
||||||
anyOf:
|
|
||||||
- type: integer
|
|
||||||
- type: number
|
|
||||||
title: integer | number
|
|
||||||
unit:
|
|
||||||
title: Unit
|
|
||||||
type: string
|
|
||||||
required:
|
|
||||||
- trace_id
|
|
||||||
- span_id
|
|
||||||
- timestamp
|
|
||||||
- metric
|
|
||||||
- value
|
|
||||||
- unit
|
|
||||||
title: MetricEvent
|
|
||||||
type: object
|
|
||||||
StructuredLogEvent:
|
|
||||||
description: A structured log event containing typed payload data.
|
|
||||||
properties:
|
|
||||||
trace_id:
|
|
||||||
title: Trace Id
|
|
||||||
type: string
|
|
||||||
span_id:
|
|
||||||
title: Span Id
|
|
||||||
type: string
|
|
||||||
timestamp:
|
|
||||||
format: date-time
|
|
||||||
title: Timestamp
|
|
||||||
type: string
|
|
||||||
attributes:
|
|
||||||
anyOf:
|
|
||||||
- additionalProperties:
|
|
||||||
anyOf:
|
|
||||||
- type: string
|
|
||||||
- type: integer
|
|
||||||
- type: number
|
|
||||||
- type: boolean
|
|
||||||
- type: 'null'
|
|
||||||
title: string | ... (4 variants)
|
|
||||||
type: object
|
|
||||||
- type: 'null'
|
|
||||||
type:
|
|
||||||
const: structured_log
|
|
||||||
default: structured_log
|
|
||||||
title: Type
|
|
||||||
type: string
|
|
||||||
payload:
|
|
||||||
discriminator:
|
|
||||||
mapping:
|
|
||||||
span_end: '#/components/schemas/SpanEndPayload'
|
|
||||||
span_start: '#/components/schemas/SpanStartPayload'
|
|
||||||
propertyName: type
|
|
||||||
oneOf:
|
|
||||||
- $ref: '#/components/schemas/SpanStartPayload'
|
|
||||||
title: SpanStartPayload
|
|
||||||
- $ref: '#/components/schemas/SpanEndPayload'
|
|
||||||
title: SpanEndPayload
|
|
||||||
title: SpanStartPayload | SpanEndPayload
|
|
||||||
required:
|
|
||||||
- trace_id
|
|
||||||
- span_id
|
|
||||||
- timestamp
|
|
||||||
- payload
|
|
||||||
title: StructuredLogEvent
|
|
||||||
type: object
|
|
||||||
UnstructuredLogEvent:
|
|
||||||
description: An unstructured log event containing a simple text message.
|
|
||||||
properties:
|
|
||||||
trace_id:
|
|
||||||
title: Trace Id
|
|
||||||
type: string
|
|
||||||
span_id:
|
|
||||||
title: Span Id
|
|
||||||
type: string
|
|
||||||
timestamp:
|
|
||||||
format: date-time
|
|
||||||
title: Timestamp
|
|
||||||
type: string
|
|
||||||
attributes:
|
|
||||||
anyOf:
|
|
||||||
- additionalProperties:
|
|
||||||
anyOf:
|
|
||||||
- type: string
|
|
||||||
- type: integer
|
|
||||||
- type: number
|
|
||||||
- type: boolean
|
|
||||||
- type: 'null'
|
|
||||||
title: string | ... (4 variants)
|
|
||||||
type: object
|
|
||||||
- type: 'null'
|
|
||||||
type:
|
|
||||||
const: unstructured_log
|
|
||||||
default: unstructured_log
|
|
||||||
title: Type
|
|
||||||
type: string
|
|
||||||
message:
|
|
||||||
title: Message
|
|
||||||
type: string
|
|
||||||
severity:
|
|
||||||
$ref: '#/components/schemas/LogSeverity'
|
|
||||||
required:
|
|
||||||
- trace_id
|
|
||||||
- span_id
|
|
||||||
- timestamp
|
|
||||||
- message
|
|
||||||
- severity
|
|
||||||
title: UnstructuredLogEvent
|
|
||||||
type: object
|
|
||||||
Event:
|
|
||||||
discriminator:
|
|
||||||
mapping:
|
|
||||||
metric: '#/components/schemas/MetricEvent'
|
|
||||||
structured_log: '#/components/schemas/StructuredLogEvent'
|
|
||||||
unstructured_log: '#/components/schemas/UnstructuredLogEvent'
|
|
||||||
propertyName: type
|
|
||||||
oneOf:
|
|
||||||
- $ref: '#/components/schemas/UnstructuredLogEvent'
|
|
||||||
title: UnstructuredLogEvent
|
|
||||||
- $ref: '#/components/schemas/MetricEvent'
|
|
||||||
title: MetricEvent
|
|
||||||
- $ref: '#/components/schemas/StructuredLogEvent'
|
|
||||||
title: StructuredLogEvent
|
|
||||||
title: UnstructuredLogEvent | MetricEvent | StructuredLogEvent
|
|
||||||
MetricInResponse:
|
MetricInResponse:
|
||||||
description: A metric value included in API responses.
|
description: A metric value included in API responses.
|
||||||
properties:
|
properties:
|
||||||
|
|
@ -13279,236 +13082,6 @@ components:
|
||||||
- logger_config
|
- logger_config
|
||||||
title: PostTrainingRLHFRequest
|
title: PostTrainingRLHFRequest
|
||||||
type: object
|
type: object
|
||||||
Span:
|
|
||||||
description: A span representing a single operation within a trace.
|
|
||||||
properties:
|
|
||||||
span_id:
|
|
||||||
title: Span Id
|
|
||||||
type: string
|
|
||||||
trace_id:
|
|
||||||
title: Trace Id
|
|
||||||
type: string
|
|
||||||
parent_span_id:
|
|
||||||
anyOf:
|
|
||||||
- type: string
|
|
||||||
- type: 'null'
|
|
||||||
nullable: true
|
|
||||||
name:
|
|
||||||
title: Name
|
|
||||||
type: string
|
|
||||||
start_time:
|
|
||||||
format: date-time
|
|
||||||
title: Start Time
|
|
||||||
type: string
|
|
||||||
end_time:
|
|
||||||
anyOf:
|
|
||||||
- format: date-time
|
|
||||||
type: string
|
|
||||||
- type: 'null'
|
|
||||||
nullable: true
|
|
||||||
attributes:
|
|
||||||
anyOf:
|
|
||||||
- additionalProperties: true
|
|
||||||
type: object
|
|
||||||
- type: 'null'
|
|
||||||
required:
|
|
||||||
- span_id
|
|
||||||
- trace_id
|
|
||||||
- name
|
|
||||||
- start_time
|
|
||||||
title: Span
|
|
||||||
type: object
|
|
||||||
Trace:
|
|
||||||
description: A trace representing the complete execution path of a request across multiple operations.
|
|
||||||
properties:
|
|
||||||
trace_id:
|
|
||||||
title: Trace Id
|
|
||||||
type: string
|
|
||||||
root_span_id:
|
|
||||||
title: Root Span Id
|
|
||||||
type: string
|
|
||||||
start_time:
|
|
||||||
format: date-time
|
|
||||||
title: Start Time
|
|
||||||
type: string
|
|
||||||
end_time:
|
|
||||||
anyOf:
|
|
||||||
- format: date-time
|
|
||||||
type: string
|
|
||||||
- type: 'null'
|
|
||||||
nullable: true
|
|
||||||
required:
|
|
||||||
- trace_id
|
|
||||||
- root_span_id
|
|
||||||
- start_time
|
|
||||||
title: Trace
|
|
||||||
type: object
|
|
||||||
EventType:
|
|
||||||
description: The type of telemetry event being logged.
|
|
||||||
enum:
|
|
||||||
- unstructured_log
|
|
||||||
- structured_log
|
|
||||||
- metric
|
|
||||||
title: EventType
|
|
||||||
type: string
|
|
||||||
StructuredLogType:
|
|
||||||
description: The type of structured log event payload.
|
|
||||||
enum:
|
|
||||||
- span_start
|
|
||||||
- span_end
|
|
||||||
title: StructuredLogType
|
|
||||||
type: string
|
|
||||||
EvalTrace:
|
|
||||||
description: A trace record for evaluation purposes.
|
|
||||||
properties:
|
|
||||||
session_id:
|
|
||||||
title: Session Id
|
|
||||||
type: string
|
|
||||||
step:
|
|
||||||
title: Step
|
|
||||||
type: string
|
|
||||||
input:
|
|
||||||
title: Input
|
|
||||||
type: string
|
|
||||||
output:
|
|
||||||
title: Output
|
|
||||||
type: string
|
|
||||||
expected_output:
|
|
||||||
title: Expected Output
|
|
||||||
type: string
|
|
||||||
required:
|
|
||||||
- session_id
|
|
||||||
- step
|
|
||||||
- input
|
|
||||||
- output
|
|
||||||
- expected_output
|
|
||||||
title: EvalTrace
|
|
||||||
type: object
|
|
||||||
SpanWithStatus:
|
|
||||||
description: A span that includes status information.
|
|
||||||
properties:
|
|
||||||
span_id:
|
|
||||||
title: Span Id
|
|
||||||
type: string
|
|
||||||
trace_id:
|
|
||||||
title: Trace Id
|
|
||||||
type: string
|
|
||||||
parent_span_id:
|
|
||||||
anyOf:
|
|
||||||
- type: string
|
|
||||||
- type: 'null'
|
|
||||||
nullable: true
|
|
||||||
name:
|
|
||||||
title: Name
|
|
||||||
type: string
|
|
||||||
start_time:
|
|
||||||
format: date-time
|
|
||||||
title: Start Time
|
|
||||||
type: string
|
|
||||||
end_time:
|
|
||||||
anyOf:
|
|
||||||
- format: date-time
|
|
||||||
type: string
|
|
||||||
- type: 'null'
|
|
||||||
nullable: true
|
|
||||||
attributes:
|
|
||||||
anyOf:
|
|
||||||
- additionalProperties: true
|
|
||||||
type: object
|
|
||||||
- type: 'null'
|
|
||||||
status:
|
|
||||||
anyOf:
|
|
||||||
- $ref: '#/components/schemas/SpanStatus'
|
|
||||||
title: SpanStatus
|
|
||||||
- type: 'null'
|
|
||||||
nullable: true
|
|
||||||
title: SpanStatus
|
|
||||||
required:
|
|
||||||
- span_id
|
|
||||||
- trace_id
|
|
||||||
- name
|
|
||||||
- start_time
|
|
||||||
title: SpanWithStatus
|
|
||||||
type: object
|
|
||||||
QueryConditionOp:
|
|
||||||
description: Comparison operators for query conditions.
|
|
||||||
enum:
|
|
||||||
- eq
|
|
||||||
- ne
|
|
||||||
- gt
|
|
||||||
- lt
|
|
||||||
title: QueryConditionOp
|
|
||||||
type: string
|
|
||||||
QueryCondition:
|
|
||||||
description: A condition for filtering query results.
|
|
||||||
properties:
|
|
||||||
key:
|
|
||||||
title: Key
|
|
||||||
type: string
|
|
||||||
op:
|
|
||||||
$ref: '#/components/schemas/QueryConditionOp'
|
|
||||||
value:
|
|
||||||
title: Value
|
|
||||||
required:
|
|
||||||
- key
|
|
||||||
- op
|
|
||||||
- value
|
|
||||||
title: QueryCondition
|
|
||||||
type: object
|
|
||||||
MetricLabel:
|
|
||||||
description: A label associated with a metric.
|
|
||||||
properties:
|
|
||||||
name:
|
|
||||||
title: Name
|
|
||||||
type: string
|
|
||||||
value:
|
|
||||||
title: Value
|
|
||||||
type: string
|
|
||||||
required:
|
|
||||||
- name
|
|
||||||
- value
|
|
||||||
title: MetricLabel
|
|
||||||
type: object
|
|
||||||
MetricDataPoint:
|
|
||||||
description: A single data point in a metric time series.
|
|
||||||
properties:
|
|
||||||
timestamp:
|
|
||||||
title: Timestamp
|
|
||||||
type: integer
|
|
||||||
value:
|
|
||||||
title: Value
|
|
||||||
type: number
|
|
||||||
unit:
|
|
||||||
title: Unit
|
|
||||||
type: string
|
|
||||||
required:
|
|
||||||
- timestamp
|
|
||||||
- value
|
|
||||||
- unit
|
|
||||||
title: MetricDataPoint
|
|
||||||
type: object
|
|
||||||
MetricSeries:
|
|
||||||
description: A time series of metric data points.
|
|
||||||
properties:
|
|
||||||
metric:
|
|
||||||
title: Metric
|
|
||||||
type: string
|
|
||||||
labels:
|
|
||||||
items:
|
|
||||||
$ref: '#/components/schemas/MetricLabel'
|
|
||||||
title: Labels
|
|
||||||
type: array
|
|
||||||
values:
|
|
||||||
items:
|
|
||||||
$ref: '#/components/schemas/MetricDataPoint'
|
|
||||||
title: Values
|
|
||||||
type: array
|
|
||||||
required:
|
|
||||||
- metric
|
|
||||||
- labels
|
|
||||||
- values
|
|
||||||
title: MetricSeries
|
|
||||||
type: object
|
|
||||||
responses:
|
responses:
|
||||||
BadRequest400:
|
BadRequest400:
|
||||||
description: The request was invalid or malformed
|
description: The request was invalid or malformed
|
||||||
|
|
|
||||||
|
|
@ -10,203 +10,34 @@ import TabItem from '@theme/TabItem';
|
||||||
|
|
||||||
# Telemetry
|
# Telemetry
|
||||||
|
|
||||||
The Llama Stack uses OpenTelemetry to provide comprehensive tracing, metrics, and logging capabilities.
|
The preferred way to instrument Llama Stack is with OpenTelemetry. Llama Stack enriches the data
|
||||||
|
collected by OpenTelemetry to capture helpful information about the performance and behavior of your
|
||||||
|
application. Here is an example of how to forward your telemetry to an OTLP collector from Llama Stack:
|
||||||
|
|
||||||
|
```sh
|
||||||
|
export OTEL_EXPORTER_OTLP_ENDPOINT="http://127.0.0.1:4318"
|
||||||
|
export OTEL_EXPORTER_OTLP_PROTOCOL=http/protobuf
|
||||||
|
export OTEL_SERVICE_NAME="llama-stack-server"
|
||||||
|
|
||||||
## Automatic Metrics Generation
|
uv pip install opentelemetry-distro opentelemetry-exporter-otlp
|
||||||
|
uv run opentelemetry-bootstrap -a requirements | uv pip install --requirement -
|
||||||
|
|
||||||
Llama Stack automatically generates metrics during inference operations. These metrics are aggregated at the **inference request level** and provide insights into token usage and model performance.
|
uv run opentelemetry-instrument llama stack run run.yaml
|
||||||
|
|
||||||
### Available Metrics
|
|
||||||
|
|
||||||
The following metrics are automatically generated for each inference request:
|
|
||||||
|
|
||||||
| Metric Name | Type | Unit | Description | Labels |
|
|
||||||
|-------------|------|------|-------------|--------|
|
|
||||||
| `llama_stack_prompt_tokens_total` | Counter | `tokens` | Number of tokens in the input prompt | `model_id`, `provider_id` |
|
|
||||||
| `llama_stack_completion_tokens_total` | Counter | `tokens` | Number of tokens in the generated response | `model_id`, `provider_id` |
|
|
||||||
| `llama_stack_tokens_total` | Counter | `tokens` | Total tokens used (prompt + completion) | `model_id`, `provider_id` |
|
|
||||||
|
|
||||||
### Metric Generation Flow
|
|
||||||
|
|
||||||
1. **Token Counting**: During inference operations (chat completion, completion, etc.), the system counts tokens in both input prompts and generated responses
|
|
||||||
2. **Metric Construction**: For each request, `MetricEvent` objects are created with the token counts
|
|
||||||
3. **Telemetry Logging**: Metrics are sent to the configured telemetry sinks
|
|
||||||
4. **OpenTelemetry Export**: When OpenTelemetry is enabled, metrics are exposed as standard OpenTelemetry counters
|
|
||||||
|
|
||||||
### Metric Aggregation Level
|
|
||||||
|
|
||||||
All metrics are generated and aggregated at the **inference request level**. This means:
|
|
||||||
|
|
||||||
- Each individual inference request generates its own set of metrics
|
|
||||||
- Metrics are not pre-aggregated across multiple requests
|
|
||||||
- Aggregation (sums, averages, etc.) can be performed by your observability tools (Prometheus, Grafana, etc.)
|
|
||||||
- Each metric includes labels for `model_id` and `provider_id` to enable filtering and grouping
|
|
||||||
|
|
||||||
### Example Metric Event
|
|
||||||
|
|
||||||
```python
|
|
||||||
MetricEvent(
|
|
||||||
trace_id="1234567890abcdef",
|
|
||||||
span_id="abcdef1234567890",
|
|
||||||
metric="total_tokens",
|
|
||||||
value=150,
|
|
||||||
timestamp=1703123456.789,
|
|
||||||
unit="tokens",
|
|
||||||
attributes={
|
|
||||||
"model_id": "meta-llama/Llama-3.2-3B-Instruct",
|
|
||||||
"provider_id": "tgi"
|
|
||||||
},
|
|
||||||
)
|
|
||||||
```
|
```
|
||||||
|
|
||||||
## Telemetry Sinks
|
|
||||||
|
|
||||||
Choose from multiple sink types based on your observability needs:
|
### Known issues
|
||||||
|
|
||||||
<Tabs>
|
Some database instrumentation libraries have a known bug where spans get wrapped twice, or do not get connected to a trace.
|
||||||
<TabItem value="opentelemetry" label="OpenTelemetry">
|
To prevent this, you can disable database specific tracing, and rely just on the SQLAlchemy tracing. If you are using
|
||||||
|
`sqlite3` as your database, for example, you can disable the additional tracing like this:
|
||||||
|
|
||||||
Send events to an OpenTelemetry Collector for integration with observability platforms:
|
```sh
|
||||||
|
export OTEL_PYTHON_DISABLED_INSTRUMENTATIONS="sqlite3"
|
||||||
**Use Cases:**
|
|
||||||
- Visualizing traces in tools like Jaeger
|
|
||||||
- Collecting metrics for Prometheus
|
|
||||||
- Integration with enterprise observability stacks
|
|
||||||
|
|
||||||
**Features:**
|
|
||||||
- Standard OpenTelemetry format
|
|
||||||
- Compatible with all OpenTelemetry collectors
|
|
||||||
- Supports both traces and metrics
|
|
||||||
|
|
||||||
</TabItem>
|
|
||||||
<TabItem value="console" label="Console">
|
|
||||||
|
|
||||||
Print events to the console for immediate debugging:
|
|
||||||
|
|
||||||
**Use Cases:**
|
|
||||||
- Development and testing
|
|
||||||
- Quick debugging sessions
|
|
||||||
- Simple logging without external tools
|
|
||||||
|
|
||||||
**Features:**
|
|
||||||
- Immediate output visibility
|
|
||||||
- No setup required
|
|
||||||
- Human-readable format
|
|
||||||
|
|
||||||
</TabItem>
|
|
||||||
</Tabs>
|
|
||||||
|
|
||||||
## Configuration
|
|
||||||
|
|
||||||
### Meta-Reference Provider
|
|
||||||
|
|
||||||
Currently, only the meta-reference provider is implemented. It can be configured to send events to multiple sink types:
|
|
||||||
|
|
||||||
```yaml
|
|
||||||
telemetry:
|
|
||||||
- provider_id: meta-reference
|
|
||||||
provider_type: inline::meta-reference
|
|
||||||
config:
|
|
||||||
service_name: "llama-stack-service"
|
|
||||||
sinks: ['console', 'otel_trace', 'otel_metric']
|
|
||||||
otel_exporter_otlp_endpoint: "http://localhost:4318"
|
|
||||||
```
|
```
|
||||||
|
|
||||||
### Environment Variables
|
|
||||||
|
|
||||||
Configure telemetry behavior using environment variables:
|
|
||||||
|
|
||||||
- **`OTEL_EXPORTER_OTLP_ENDPOINT`**: OpenTelemetry Collector endpoint (default: `http://localhost:4318`)
|
|
||||||
- **`OTEL_SERVICE_NAME`**: Service name for telemetry (default: empty string)
|
|
||||||
- **`TELEMETRY_SINKS`**: Comma-separated list of sinks (default: `[]`)
|
|
||||||
|
|
||||||
### Quick Setup: Complete Telemetry Stack
|
|
||||||
|
|
||||||
Use the automated setup script to launch the complete telemetry stack (Jaeger, OpenTelemetry Collector, Prometheus, and Grafana):
|
|
||||||
|
|
||||||
```bash
|
|
||||||
./scripts/telemetry/setup_telemetry.sh
|
|
||||||
```
|
|
||||||
|
|
||||||
This sets up:
|
|
||||||
- **Jaeger UI**: http://localhost:16686 (traces visualization)
|
|
||||||
- **Prometheus**: http://localhost:9090 (metrics)
|
|
||||||
- **Grafana**: http://localhost:3000 (dashboards with auto-configured data sources)
|
|
||||||
- **OTEL Collector**: http://localhost:4318 (OTLP endpoint)
|
|
||||||
|
|
||||||
Once running, you can visualize traces by navigating to [Grafana](http://localhost:3000/) and login with login `admin` and password `admin`.
|
|
||||||
|
|
||||||
## Querying Metrics
|
|
||||||
|
|
||||||
When using the OpenTelemetry sink, metrics are exposed in standard format and can be queried through various tools:
|
|
||||||
|
|
||||||
<Tabs>
|
|
||||||
<TabItem value="prometheus" label="Prometheus Queries">
|
|
||||||
|
|
||||||
Example Prometheus queries for analyzing token usage:
|
|
||||||
|
|
||||||
```promql
|
|
||||||
# Total tokens used across all models
|
|
||||||
sum(llama_stack_tokens_total)
|
|
||||||
|
|
||||||
# Tokens per model
|
|
||||||
sum by (model_id) (llama_stack_tokens_total)
|
|
||||||
|
|
||||||
# Average tokens per request over 5 minutes
|
|
||||||
rate(llama_stack_tokens_total[5m])
|
|
||||||
|
|
||||||
# Token usage by provider
|
|
||||||
sum by (provider_id) (llama_stack_tokens_total)
|
|
||||||
```
|
|
||||||
|
|
||||||
</TabItem>
|
|
||||||
<TabItem value="grafana" label="Grafana Dashboards">
|
|
||||||
|
|
||||||
Create dashboards using Prometheus as a data source:
|
|
||||||
|
|
||||||
- **Token Usage Over Time**: Line charts showing token consumption trends
|
|
||||||
- **Model Performance**: Comparison of different models by token efficiency
|
|
||||||
- **Provider Analysis**: Breakdown of usage across different providers
|
|
||||||
- **Request Patterns**: Understanding peak usage times and patterns
|
|
||||||
|
|
||||||
</TabItem>
|
|
||||||
<TabItem value="otlp" label="OpenTelemetry Collector">
|
|
||||||
|
|
||||||
Forward metrics to other observability systems:
|
|
||||||
|
|
||||||
- Export to multiple backends simultaneously
|
|
||||||
- Apply transformations and filtering
|
|
||||||
- Integrate with existing monitoring infrastructure
|
|
||||||
|
|
||||||
</TabItem>
|
|
||||||
</Tabs>
|
|
||||||
|
|
||||||
## Best Practices
|
|
||||||
|
|
||||||
### 🔍 **Monitoring Strategy**
|
|
||||||
- Use OpenTelemetry for production environments
|
|
||||||
- Set up alerts on key metrics like token usage and error rates
|
|
||||||
|
|
||||||
### 📊 **Metrics Analysis**
|
|
||||||
- Track token usage trends to optimize costs
|
|
||||||
- Monitor response times across different models
|
|
||||||
- Analyze usage patterns to improve resource allocation
|
|
||||||
|
|
||||||
### 🚨 **Alerting & Debugging**
|
|
||||||
- Set up alerts for unusual token consumption spikes
|
|
||||||
- Use trace data to debug performance issues
|
|
||||||
- Monitor error rates and failure patterns
|
|
||||||
|
|
||||||
### 🔧 **Configuration Management**
|
|
||||||
- Use environment variables for flexible deployment
|
|
||||||
- Ensure proper network access to OpenTelemetry collectors
|
|
||||||
|
|
||||||
|
|
||||||
## Related Resources
|
## Related Resources
|
||||||
|
|
||||||
- **[Agents](./agent)** - Monitoring agent execution with telemetry
|
|
||||||
- **[Evaluations](./evals)** - Using telemetry data for performance evaluation
|
|
||||||
- **[Getting Started Notebook](https://github.com/meta-llama/llama-stack/blob/main/docs/getting_started.ipynb)** - Telemetry examples and queries
|
|
||||||
- **[OpenTelemetry Documentation](https://opentelemetry.io/)** - Comprehensive observability framework
|
- **[OpenTelemetry Documentation](https://opentelemetry.io/)** - Comprehensive observability framework
|
||||||
- **[Jaeger Documentation](https://www.jaegertracing.io/)** - Distributed tracing visualization
|
- **[Jaeger Documentation](https://www.jaegertracing.io/)** - Distributed tracing visualization
|
||||||
|
|
|
||||||
|
|
@ -17,7 +17,6 @@ A Llama Stack API is described as a collection of REST endpoints following OpenA
|
||||||
- **Eval**: generate outputs (via Inference or Agents) and perform scoring
|
- **Eval**: generate outputs (via Inference or Agents) and perform scoring
|
||||||
- **VectorIO**: perform operations on vector stores, such as adding documents, searching, and deleting documents
|
- **VectorIO**: perform operations on vector stores, such as adding documents, searching, and deleting documents
|
||||||
- **Files**: manage file uploads, storage, and retrieval
|
- **Files**: manage file uploads, storage, and retrieval
|
||||||
- **Telemetry**: collect telemetry data from the system
|
|
||||||
- **Post Training**: fine-tune a model
|
- **Post Training**: fine-tune a model
|
||||||
- **Tool Runtime**: interact with various tools and protocols
|
- **Tool Runtime**: interact with various tools and protocols
|
||||||
- **Responses**: generate responses from an LLM
|
- **Responses**: generate responses from an LLM
|
||||||
|
|
|
||||||
|
|
@ -8,7 +8,6 @@ data:
|
||||||
- inference
|
- inference
|
||||||
- files
|
- files
|
||||||
- safety
|
- safety
|
||||||
- telemetry
|
|
||||||
- tool_runtime
|
- tool_runtime
|
||||||
- vector_io
|
- vector_io
|
||||||
providers:
|
providers:
|
||||||
|
|
@ -73,12 +72,6 @@ data:
|
||||||
db: ${env.POSTGRES_DB:=llamastack}
|
db: ${env.POSTGRES_DB:=llamastack}
|
||||||
user: ${env.POSTGRES_USER:=llamastack}
|
user: ${env.POSTGRES_USER:=llamastack}
|
||||||
password: ${env.POSTGRES_PASSWORD:=llamastack}
|
password: ${env.POSTGRES_PASSWORD:=llamastack}
|
||||||
telemetry:
|
|
||||||
- provider_id: meta-reference
|
|
||||||
provider_type: inline::meta-reference
|
|
||||||
config:
|
|
||||||
service_name: "${env.OTEL_SERVICE_NAME:=\u200B}"
|
|
||||||
sinks: ${env.TELEMETRY_SINKS:=console}
|
|
||||||
tool_runtime:
|
tool_runtime:
|
||||||
- provider_id: brave-search
|
- provider_id: brave-search
|
||||||
provider_type: remote::brave-search
|
provider_type: remote::brave-search
|
||||||
|
|
|
||||||
|
|
@ -140,8 +140,6 @@ server:
|
||||||
auth:
|
auth:
|
||||||
provider_config:
|
provider_config:
|
||||||
type: github_token
|
type: github_token
|
||||||
telemetry:
|
|
||||||
enabled: true
|
|
||||||
vector_stores:
|
vector_stores:
|
||||||
default_provider_id: chromadb
|
default_provider_id: chromadb
|
||||||
default_embedding_model:
|
default_embedding_model:
|
||||||
|
|
|
||||||
|
|
@ -28,7 +28,7 @@ Llama Stack provides several pre-configured distributions to help you get starte
|
||||||
- Run locally with Ollama for development
|
- Run locally with Ollama for development
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
docker pull llama-stack/distribution-starter
|
docker pull llamastack/distribution-starter
|
||||||
```
|
```
|
||||||
|
|
||||||
**Guides:** [Starter Distribution Guide](self_hosted_distro/starter)
|
**Guides:** [Starter Distribution Guide](self_hosted_distro/starter)
|
||||||
|
|
@ -41,7 +41,7 @@ docker pull llama-stack/distribution-starter
|
||||||
- Need to run inference locally
|
- Need to run inference locally
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
docker pull llama-stack/distribution-meta-reference-gpu
|
docker pull llamastack/distribution-meta-reference-gpu
|
||||||
```
|
```
|
||||||
|
|
||||||
**Guides:** [Meta Reference GPU Guide](self_hosted_distro/meta-reference-gpu)
|
**Guides:** [Meta Reference GPU Guide](self_hosted_distro/meta-reference-gpu)
|
||||||
|
|
|
||||||
|
|
@ -116,10 +116,6 @@ The following environment variables can be configured:
|
||||||
- `BRAVE_SEARCH_API_KEY`: Brave Search API key
|
- `BRAVE_SEARCH_API_KEY`: Brave Search API key
|
||||||
- `TAVILY_SEARCH_API_KEY`: Tavily Search API key
|
- `TAVILY_SEARCH_API_KEY`: Tavily Search API key
|
||||||
|
|
||||||
### Telemetry Configuration
|
|
||||||
- `OTEL_SERVICE_NAME`: OpenTelemetry service name
|
|
||||||
- `OTEL_EXPORTER_OTLP_ENDPOINT`: OpenTelemetry collector endpoint URL
|
|
||||||
|
|
||||||
## Enabling Providers
|
## Enabling Providers
|
||||||
|
|
||||||
You can enable specific providers by setting appropriate environment variables. For example,
|
You can enable specific providers by setting appropriate environment variables. For example,
|
||||||
|
|
@ -265,7 +261,7 @@ The starter distribution uses SQLite for local storage of various components:
|
||||||
2. **Flexible Configuration**: Easy to enable/disable providers based on your needs
|
2. **Flexible Configuration**: Easy to enable/disable providers based on your needs
|
||||||
3. **No Local GPU Required**: Most providers are cloud-based, making it accessible to developers without high-end hardware
|
3. **No Local GPU Required**: Most providers are cloud-based, making it accessible to developers without high-end hardware
|
||||||
4. **Easy Migration**: Start with hosted providers and gradually move to local ones as needed
|
4. **Easy Migration**: Start with hosted providers and gradually move to local ones as needed
|
||||||
5. **Production Ready**: Includes safety, evaluation, and telemetry components
|
5. **Production Ready**: Includes safety and evaluation
|
||||||
6. **Tool Integration**: Comes with web search, RAG, and model context protocol tools
|
6. **Tool Integration**: Comes with web search, RAG, and model context protocol tools
|
||||||
|
|
||||||
The starter distribution is ideal for developers who want to experiment with different AI providers, build prototypes quickly, or create applications that can work with multiple AI backends.
|
The starter distribution is ideal for developers who want to experiment with different AI providers, build prototypes quickly, or create applications that can work with multiple AI backends.
|
||||||
|
|
|
||||||
|
|
@ -27,7 +27,7 @@ If you have built a container image and want to deploy it in a Kubernetes cluste
|
||||||
|
|
||||||
Control log output via environment variables before starting the server.
|
Control log output via environment variables before starting the server.
|
||||||
|
|
||||||
- `LLAMA_STACK_LOGGING` sets per-component levels, e.g. `LLAMA_STACK_LOGGING=server=debug;core=info`.
|
- `LLAMA_STACK_LOGGING` sets per-component levels, e.g. `LLAMA_STACK_LOGGING=server=debug,core=info`.
|
||||||
- Supported categories: `all`, `core`, `server`, `router`, `inference`, `agents`, `safety`, `eval`, `tools`, `client`.
|
- Supported categories: `all`, `core`, `server`, `router`, `inference`, `agents`, `safety`, `eval`, `tools`, `client`.
|
||||||
- Levels: `debug`, `info`, `warning`, `error`, `critical` (default is `info`). Use `all=<level>` to apply globally.
|
- Levels: `debug`, `info`, `warning`, `error`, `critical` (default is `info`). Use `all=<level>` to apply globally.
|
||||||
- `LLAMA_STACK_LOG_FILE=/path/to/log` mirrors logs to a file while still printing to stdout.
|
- `LLAMA_STACK_LOG_FILE=/path/to/log` mirrors logs to a file while still printing to stdout.
|
||||||
|
|
|
||||||
|
|
@ -360,32 +360,6 @@ Methods:
|
||||||
|
|
||||||
- <code title="post /v1/synthetic-data-generation/generate">client.synthetic_data_generation.<a href="https://github.com/meta-llama/llama-stack-client-python/tree/main/src/llama_stack_client/resources/synthetic_data_generation.py">generate</a>(\*\*<a href="https://github.com/meta-llama/llama-stack-client-python/tree/main/src/llama_stack_client/types/synthetic_data_generation_generate_params.py">params</a>) -> <a href="https://github.com/meta-llama/llama-stack-client-python/tree/main/src/llama_stack_client/types/synthetic_data_generation_response.py">SyntheticDataGenerationResponse</a></code>
|
- <code title="post /v1/synthetic-data-generation/generate">client.synthetic_data_generation.<a href="https://github.com/meta-llama/llama-stack-client-python/tree/main/src/llama_stack_client/resources/synthetic_data_generation.py">generate</a>(\*\*<a href="https://github.com/meta-llama/llama-stack-client-python/tree/main/src/llama_stack_client/types/synthetic_data_generation_generate_params.py">params</a>) -> <a href="https://github.com/meta-llama/llama-stack-client-python/tree/main/src/llama_stack_client/types/synthetic_data_generation_response.py">SyntheticDataGenerationResponse</a></code>
|
||||||
|
|
||||||
## Telemetry
|
|
||||||
|
|
||||||
Types:
|
|
||||||
|
|
||||||
```python
|
|
||||||
from llama_stack_client.types import (
|
|
||||||
QuerySpansResponse,
|
|
||||||
SpanWithStatus,
|
|
||||||
Trace,
|
|
||||||
TelemetryGetSpanResponse,
|
|
||||||
TelemetryGetSpanTreeResponse,
|
|
||||||
TelemetryQuerySpansResponse,
|
|
||||||
TelemetryQueryTracesResponse,
|
|
||||||
)
|
|
||||||
```
|
|
||||||
|
|
||||||
Methods:
|
|
||||||
|
|
||||||
- <code title="get /v1/telemetry/traces/{trace_id}/spans/{span_id}">client.telemetry.<a href="https://github.com/meta-llama/llama-stack-client-python/tree/main/src/llama_stack_client/resources/telemetry.py">get_span</a>(span_id, \*, trace_id) -> <a href="https://github.com/meta-llama/llama-stack-client-python/tree/main/src/llama_stack_client/types/telemetry_get_span_response.py">TelemetryGetSpanResponse</a></code>
|
|
||||||
- <code title="get /v1/telemetry/spans/{span_id}/tree">client.telemetry.<a href="https://github.com/meta-llama/llama-stack-client-python/tree/main/src/llama_stack_client/resources/telemetry.py">get_span_tree</a>(span_id, \*\*<a href="https://github.com/meta-llama/llama-stack-client-python/tree/main/src/llama_stack_client/types/telemetry_get_span_tree_params.py">params</a>) -> <a href="https://github.com/meta-llama/llama-stack-client-python/tree/main/src/llama_stack_client/types/telemetry_get_span_tree_response.py">TelemetryGetSpanTreeResponse</a></code>
|
|
||||||
- <code title="get /v1/telemetry/traces/{trace_id}">client.telemetry.<a href="https://github.com/meta-llama/llama-stack-client-python/tree/main/src/llama_stack_client/resources/telemetry.py">get_trace</a>(trace_id) -> <a href="https://github.com/meta-llama/llama-stack-client-python/tree/main/src/llama_stack_client/types/trace.py">Trace</a></code>
|
|
||||||
- <code title="post /v1/telemetry/events">client.telemetry.<a href="https://github.com/meta-llama/llama-stack-client-python/tree/main/src/llama_stack_client/resources/telemetry.py">log_event</a>(\*\*<a href="https://github.com/meta-llama/llama-stack-client-python/tree/main/src/llama_stack_client/types/telemetry_log_event_params.py">params</a>) -> None</code>
|
|
||||||
- <code title="get /v1/telemetry/spans">client.telemetry.<a href="https://github.com/meta-llama/llama-stack-client-python/tree/main/src/llama_stack_client/resources/telemetry.py">query_spans</a>(\*\*<a href="https://github.com/meta-llama/llama-stack-client-python/tree/main/src/llama_stack_client/types/telemetry_query_spans_params.py">params</a>) -> <a href="https://github.com/meta-llama/llama-stack-client-python/tree/main/src/llama_stack_client/types/telemetry_query_spans_response.py">TelemetryQuerySpansResponse</a></code>
|
|
||||||
- <code title="get /v1/telemetry/traces">client.telemetry.<a href="https://github.com/meta-llama/llama-stack-client-python/tree/main/src/llama_stack_client/resources/telemetry.py">query_traces</a>(\*\*<a href="https://github.com/meta-llama/llama-stack-client-python/tree/main/src/llama_stack_client/types/telemetry_query_traces_params.py">params</a>) -> <a href="https://github.com/meta-llama/llama-stack-client-python/tree/main/src/llama_stack_client/types/telemetry_query_traces_response.py">TelemetryQueryTracesResponse</a></code>
|
|
||||||
- <code title="post /v1/telemetry/spans/export">client.telemetry.<a href="https://github.com/meta-llama/llama-stack-client-python/tree/main/src/llama_stack_client/resources/telemetry.py">save_spans_to_dataset</a>(\*\*<a href="https://github.com/meta-llama/llama-stack-client-python/tree/main/src/llama_stack_client/types/telemetry_save_spans_to_dataset_params.py">params</a>) -> None</code>
|
|
||||||
|
|
||||||
## Datasetio
|
## Datasetio
|
||||||
|
|
||||||
Types:
|
Types:
|
||||||
|
|
|
||||||
|
|
@ -13,7 +13,7 @@ function HomepageHeader() {
|
||||||
<div className={styles.heroContent}>
|
<div className={styles.heroContent}>
|
||||||
<h1 className={styles.heroTitle}>Build AI Applications with Llama Stack</h1>
|
<h1 className={styles.heroTitle}>Build AI Applications with Llama Stack</h1>
|
||||||
<p className={styles.heroSubtitle}>
|
<p className={styles.heroSubtitle}>
|
||||||
Unified APIs for Inference, RAG, Agents, Tools, Safety, and Telemetry
|
Unified APIs for Inference, RAG, Agents, Tools, and Safety
|
||||||
</p>
|
</p>
|
||||||
<div className={styles.buttons}>
|
<div className={styles.buttons}>
|
||||||
<Link
|
<Link
|
||||||
|
|
@ -206,7 +206,7 @@ export default function Home() {
|
||||||
return (
|
return (
|
||||||
<Layout
|
<Layout
|
||||||
title="Build AI Applications"
|
title="Build AI Applications"
|
||||||
description="The open-source framework for building generative AI applications with unified APIs for Inference, RAG, Agents, Tools, Safety, and Telemetry.">
|
description="The open-source framework for building generative AI applications with unified APIs for Inference, RAG, Agents, Tools, Safety, and Evals.">
|
||||||
<HomepageHeader />
|
<HomepageHeader />
|
||||||
<main>
|
<main>
|
||||||
<QuickStart />
|
<QuickStart />
|
||||||
|
|
|
||||||
626
docs/static/deprecated-llama-stack-spec.yaml
vendored
626
docs/static/deprecated-llama-stack-spec.yaml
vendored
|
|
@ -378,6 +378,91 @@ paths:
|
||||||
type: string
|
type: string
|
||||||
description: 'Path parameter: identifier'
|
description: 'Path parameter: identifier'
|
||||||
deprecated: true
|
deprecated: true
|
||||||
|
/v1/tool-runtime/invoke:
|
||||||
|
post:
|
||||||
|
responses:
|
||||||
|
'200':
|
||||||
|
description: A ToolInvocationResult.
|
||||||
|
content:
|
||||||
|
application/json:
|
||||||
|
schema:
|
||||||
|
$ref: '#/components/schemas/ToolInvocationResult'
|
||||||
|
'400':
|
||||||
|
description: Bad Request
|
||||||
|
$ref: '#/components/responses/BadRequest400'
|
||||||
|
'429':
|
||||||
|
description: Too Many Requests
|
||||||
|
$ref: '#/components/responses/TooManyRequests429'
|
||||||
|
'500':
|
||||||
|
description: Internal Server Error
|
||||||
|
$ref: '#/components/responses/InternalServerError500'
|
||||||
|
default:
|
||||||
|
description: Default Response
|
||||||
|
$ref: '#/components/responses/DefaultError'
|
||||||
|
tags:
|
||||||
|
- Tool Runtime
|
||||||
|
summary: Invoke Tool
|
||||||
|
description: Run a tool with the given arguments.
|
||||||
|
operationId: invoke_tool_v1_tool_runtime_invoke_post
|
||||||
|
requestBody:
|
||||||
|
content:
|
||||||
|
application/json:
|
||||||
|
schema:
|
||||||
|
$ref: '#/components/schemas/InvokeToolRequest'
|
||||||
|
required: true
|
||||||
|
deprecated: true
|
||||||
|
/v1/tool-runtime/list-tools:
|
||||||
|
get:
|
||||||
|
responses:
|
||||||
|
'200':
|
||||||
|
description: A ListToolDefsResponse.
|
||||||
|
content:
|
||||||
|
application/json:
|
||||||
|
schema:
|
||||||
|
$ref: '#/components/schemas/ListToolDefsResponse'
|
||||||
|
'400':
|
||||||
|
$ref: '#/components/responses/BadRequest400'
|
||||||
|
description: Bad Request
|
||||||
|
'429':
|
||||||
|
$ref: '#/components/responses/TooManyRequests429'
|
||||||
|
description: Too Many Requests
|
||||||
|
'500':
|
||||||
|
$ref: '#/components/responses/InternalServerError500'
|
||||||
|
description: Internal Server Error
|
||||||
|
default:
|
||||||
|
$ref: '#/components/responses/DefaultError'
|
||||||
|
description: Default Response
|
||||||
|
tags:
|
||||||
|
- Tool Runtime
|
||||||
|
summary: List Runtime Tools
|
||||||
|
description: List all tools in the runtime.
|
||||||
|
operationId: list_runtime_tools_v1_tool_runtime_list_tools_get
|
||||||
|
parameters:
|
||||||
|
- name: authorization
|
||||||
|
in: query
|
||||||
|
required: false
|
||||||
|
schema:
|
||||||
|
anyOf:
|
||||||
|
- type: string
|
||||||
|
- type: 'null'
|
||||||
|
title: Authorization
|
||||||
|
- name: tool_group_id
|
||||||
|
in: query
|
||||||
|
required: false
|
||||||
|
schema:
|
||||||
|
anyOf:
|
||||||
|
- type: string
|
||||||
|
- type: 'null'
|
||||||
|
title: Tool Group Id
|
||||||
|
- name: mcp_endpoint
|
||||||
|
in: query
|
||||||
|
required: false
|
||||||
|
schema:
|
||||||
|
anyOf:
|
||||||
|
- $ref: '#/components/schemas/URL'
|
||||||
|
- type: 'null'
|
||||||
|
title: Mcp Endpoint
|
||||||
|
deprecated: true
|
||||||
/v1/toolgroups:
|
/v1/toolgroups:
|
||||||
get:
|
get:
|
||||||
responses:
|
responses:
|
||||||
|
|
@ -404,6 +489,7 @@ paths:
|
||||||
summary: List Tool Groups
|
summary: List Tool Groups
|
||||||
description: List tool groups with optional provider.
|
description: List tool groups with optional provider.
|
||||||
operationId: list_tool_groups_v1_toolgroups_get
|
operationId: list_tool_groups_v1_toolgroups_get
|
||||||
|
deprecated: true
|
||||||
post:
|
post:
|
||||||
responses:
|
responses:
|
||||||
'400':
|
'400':
|
||||||
|
|
@ -465,6 +551,7 @@ paths:
|
||||||
schema:
|
schema:
|
||||||
type: string
|
type: string
|
||||||
description: 'Path parameter: toolgroup_id'
|
description: 'Path parameter: toolgroup_id'
|
||||||
|
deprecated: true
|
||||||
delete:
|
delete:
|
||||||
responses:
|
responses:
|
||||||
'400':
|
'400':
|
||||||
|
|
@ -494,6 +581,76 @@ paths:
|
||||||
type: string
|
type: string
|
||||||
description: 'Path parameter: toolgroup_id'
|
description: 'Path parameter: toolgroup_id'
|
||||||
deprecated: true
|
deprecated: true
|
||||||
|
/v1/tools:
|
||||||
|
get:
|
||||||
|
responses:
|
||||||
|
'200':
|
||||||
|
description: A ListToolDefsResponse.
|
||||||
|
content:
|
||||||
|
application/json:
|
||||||
|
schema:
|
||||||
|
$ref: '#/components/schemas/ListToolDefsResponse'
|
||||||
|
'400':
|
||||||
|
$ref: '#/components/responses/BadRequest400'
|
||||||
|
description: Bad Request
|
||||||
|
'429':
|
||||||
|
$ref: '#/components/responses/TooManyRequests429'
|
||||||
|
description: Too Many Requests
|
||||||
|
'500':
|
||||||
|
$ref: '#/components/responses/InternalServerError500'
|
||||||
|
description: Internal Server Error
|
||||||
|
default:
|
||||||
|
$ref: '#/components/responses/DefaultError'
|
||||||
|
description: Default Response
|
||||||
|
tags:
|
||||||
|
- Tool Groups
|
||||||
|
summary: List Tools
|
||||||
|
description: List tools with optional tool group.
|
||||||
|
operationId: list_tools_v1_tools_get
|
||||||
|
parameters:
|
||||||
|
- name: toolgroup_id
|
||||||
|
in: query
|
||||||
|
required: false
|
||||||
|
schema:
|
||||||
|
anyOf:
|
||||||
|
- type: string
|
||||||
|
- type: 'null'
|
||||||
|
title: Toolgroup Id
|
||||||
|
deprecated: true
|
||||||
|
/v1/tools/{tool_name}:
|
||||||
|
get:
|
||||||
|
responses:
|
||||||
|
'200':
|
||||||
|
description: A ToolDef.
|
||||||
|
content:
|
||||||
|
application/json:
|
||||||
|
schema:
|
||||||
|
$ref: '#/components/schemas/ToolDef'
|
||||||
|
'400':
|
||||||
|
description: Bad Request
|
||||||
|
$ref: '#/components/responses/BadRequest400'
|
||||||
|
'429':
|
||||||
|
description: Too Many Requests
|
||||||
|
$ref: '#/components/responses/TooManyRequests429'
|
||||||
|
'500':
|
||||||
|
description: Internal Server Error
|
||||||
|
$ref: '#/components/responses/InternalServerError500'
|
||||||
|
default:
|
||||||
|
description: Default Response
|
||||||
|
$ref: '#/components/responses/DefaultError'
|
||||||
|
tags:
|
||||||
|
- Tool Groups
|
||||||
|
summary: Get Tool
|
||||||
|
description: Get a tool by its name.
|
||||||
|
operationId: get_tool_v1_tools__tool_name__get
|
||||||
|
parameters:
|
||||||
|
- name: tool_name
|
||||||
|
in: path
|
||||||
|
required: true
|
||||||
|
schema:
|
||||||
|
type: string
|
||||||
|
description: 'Path parameter: tool_name'
|
||||||
|
deprecated: true
|
||||||
/v1beta/datasets:
|
/v1beta/datasets:
|
||||||
get:
|
get:
|
||||||
responses:
|
responses:
|
||||||
|
|
@ -3645,6 +3802,12 @@ components:
|
||||||
anyOf:
|
anyOf:
|
||||||
- type: integer
|
- type: integer
|
||||||
- type: 'null'
|
- type: 'null'
|
||||||
|
metadata:
|
||||||
|
anyOf:
|
||||||
|
- additionalProperties:
|
||||||
|
type: string
|
||||||
|
type: object
|
||||||
|
- type: 'null'
|
||||||
input:
|
input:
|
||||||
items:
|
items:
|
||||||
anyOf:
|
anyOf:
|
||||||
|
|
@ -4048,6 +4211,12 @@ components:
|
||||||
anyOf:
|
anyOf:
|
||||||
- type: integer
|
- type: integer
|
||||||
- type: 'null'
|
- type: 'null'
|
||||||
|
metadata:
|
||||||
|
anyOf:
|
||||||
|
- additionalProperties:
|
||||||
|
type: string
|
||||||
|
type: object
|
||||||
|
- type: 'null'
|
||||||
type: object
|
type: object
|
||||||
required:
|
required:
|
||||||
- input
|
- input
|
||||||
|
|
@ -4179,6 +4348,12 @@ components:
|
||||||
anyOf:
|
anyOf:
|
||||||
- type: integer
|
- type: integer
|
||||||
- type: 'null'
|
- type: 'null'
|
||||||
|
metadata:
|
||||||
|
anyOf:
|
||||||
|
- additionalProperties:
|
||||||
|
type: string
|
||||||
|
type: object
|
||||||
|
- type: 'null'
|
||||||
type: object
|
type: object
|
||||||
required:
|
required:
|
||||||
- created_at
|
- created_at
|
||||||
|
|
@ -9029,227 +9204,6 @@ components:
|
||||||
- $ref: '#/components/schemas/OpenAIResponseContentPartReasoningText'
|
- $ref: '#/components/schemas/OpenAIResponseContentPartReasoningText'
|
||||||
title: OpenAIResponseContentPartReasoningText
|
title: OpenAIResponseContentPartReasoningText
|
||||||
title: OpenAIResponseContentPartOutputText | OpenAIResponseContentPartRefusal | OpenAIResponseContentPartReasoningText
|
title: OpenAIResponseContentPartOutputText | OpenAIResponseContentPartRefusal | OpenAIResponseContentPartReasoningText
|
||||||
SpanEndPayload:
|
|
||||||
description: Payload for a span end event.
|
|
||||||
properties:
|
|
||||||
type:
|
|
||||||
const: span_end
|
|
||||||
default: span_end
|
|
||||||
title: Type
|
|
||||||
type: string
|
|
||||||
status:
|
|
||||||
$ref: '#/components/schemas/SpanStatus'
|
|
||||||
required:
|
|
||||||
- status
|
|
||||||
title: SpanEndPayload
|
|
||||||
type: object
|
|
||||||
SpanStartPayload:
|
|
||||||
description: Payload for a span start event.
|
|
||||||
properties:
|
|
||||||
type:
|
|
||||||
const: span_start
|
|
||||||
default: span_start
|
|
||||||
title: Type
|
|
||||||
type: string
|
|
||||||
name:
|
|
||||||
title: Name
|
|
||||||
type: string
|
|
||||||
parent_span_id:
|
|
||||||
anyOf:
|
|
||||||
- type: string
|
|
||||||
- type: 'null'
|
|
||||||
nullable: true
|
|
||||||
required:
|
|
||||||
- name
|
|
||||||
title: SpanStartPayload
|
|
||||||
type: object
|
|
||||||
SpanStatus:
|
|
||||||
description: The status of a span indicating whether it completed successfully or with an error.
|
|
||||||
enum:
|
|
||||||
- ok
|
|
||||||
- error
|
|
||||||
title: SpanStatus
|
|
||||||
type: string
|
|
||||||
StructuredLogPayload:
|
|
||||||
discriminator:
|
|
||||||
mapping:
|
|
||||||
span_end: '#/components/schemas/SpanEndPayload'
|
|
||||||
span_start: '#/components/schemas/SpanStartPayload'
|
|
||||||
propertyName: type
|
|
||||||
oneOf:
|
|
||||||
- $ref: '#/components/schemas/SpanStartPayload'
|
|
||||||
title: SpanStartPayload
|
|
||||||
- $ref: '#/components/schemas/SpanEndPayload'
|
|
||||||
title: SpanEndPayload
|
|
||||||
title: SpanStartPayload | SpanEndPayload
|
|
||||||
LogSeverity:
|
|
||||||
description: The severity level of a log message.
|
|
||||||
enum:
|
|
||||||
- verbose
|
|
||||||
- debug
|
|
||||||
- info
|
|
||||||
- warn
|
|
||||||
- error
|
|
||||||
- critical
|
|
||||||
title: LogSeverity
|
|
||||||
type: string
|
|
||||||
MetricEvent:
|
|
||||||
description: A metric event containing a measured value.
|
|
||||||
properties:
|
|
||||||
trace_id:
|
|
||||||
title: Trace Id
|
|
||||||
type: string
|
|
||||||
span_id:
|
|
||||||
title: Span Id
|
|
||||||
type: string
|
|
||||||
timestamp:
|
|
||||||
format: date-time
|
|
||||||
title: Timestamp
|
|
||||||
type: string
|
|
||||||
attributes:
|
|
||||||
anyOf:
|
|
||||||
- additionalProperties:
|
|
||||||
anyOf:
|
|
||||||
- type: string
|
|
||||||
- type: integer
|
|
||||||
- type: number
|
|
||||||
- type: boolean
|
|
||||||
- type: 'null'
|
|
||||||
title: string | ... (4 variants)
|
|
||||||
type: object
|
|
||||||
- type: 'null'
|
|
||||||
type:
|
|
||||||
const: metric
|
|
||||||
default: metric
|
|
||||||
title: Type
|
|
||||||
type: string
|
|
||||||
metric:
|
|
||||||
title: Metric
|
|
||||||
type: string
|
|
||||||
value:
|
|
||||||
anyOf:
|
|
||||||
- type: integer
|
|
||||||
- type: number
|
|
||||||
title: integer | number
|
|
||||||
unit:
|
|
||||||
title: Unit
|
|
||||||
type: string
|
|
||||||
required:
|
|
||||||
- trace_id
|
|
||||||
- span_id
|
|
||||||
- timestamp
|
|
||||||
- metric
|
|
||||||
- value
|
|
||||||
- unit
|
|
||||||
title: MetricEvent
|
|
||||||
type: object
|
|
||||||
StructuredLogEvent:
|
|
||||||
description: A structured log event containing typed payload data.
|
|
||||||
properties:
|
|
||||||
trace_id:
|
|
||||||
title: Trace Id
|
|
||||||
type: string
|
|
||||||
span_id:
|
|
||||||
title: Span Id
|
|
||||||
type: string
|
|
||||||
timestamp:
|
|
||||||
format: date-time
|
|
||||||
title: Timestamp
|
|
||||||
type: string
|
|
||||||
attributes:
|
|
||||||
anyOf:
|
|
||||||
- additionalProperties:
|
|
||||||
anyOf:
|
|
||||||
- type: string
|
|
||||||
- type: integer
|
|
||||||
- type: number
|
|
||||||
- type: boolean
|
|
||||||
- type: 'null'
|
|
||||||
title: string | ... (4 variants)
|
|
||||||
type: object
|
|
||||||
- type: 'null'
|
|
||||||
type:
|
|
||||||
const: structured_log
|
|
||||||
default: structured_log
|
|
||||||
title: Type
|
|
||||||
type: string
|
|
||||||
payload:
|
|
||||||
discriminator:
|
|
||||||
mapping:
|
|
||||||
span_end: '#/components/schemas/SpanEndPayload'
|
|
||||||
span_start: '#/components/schemas/SpanStartPayload'
|
|
||||||
propertyName: type
|
|
||||||
oneOf:
|
|
||||||
- $ref: '#/components/schemas/SpanStartPayload'
|
|
||||||
title: SpanStartPayload
|
|
||||||
- $ref: '#/components/schemas/SpanEndPayload'
|
|
||||||
title: SpanEndPayload
|
|
||||||
title: SpanStartPayload | SpanEndPayload
|
|
||||||
required:
|
|
||||||
- trace_id
|
|
||||||
- span_id
|
|
||||||
- timestamp
|
|
||||||
- payload
|
|
||||||
title: StructuredLogEvent
|
|
||||||
type: object
|
|
||||||
UnstructuredLogEvent:
|
|
||||||
description: An unstructured log event containing a simple text message.
|
|
||||||
properties:
|
|
||||||
trace_id:
|
|
||||||
title: Trace Id
|
|
||||||
type: string
|
|
||||||
span_id:
|
|
||||||
title: Span Id
|
|
||||||
type: string
|
|
||||||
timestamp:
|
|
||||||
format: date-time
|
|
||||||
title: Timestamp
|
|
||||||
type: string
|
|
||||||
attributes:
|
|
||||||
anyOf:
|
|
||||||
- additionalProperties:
|
|
||||||
anyOf:
|
|
||||||
- type: string
|
|
||||||
- type: integer
|
|
||||||
- type: number
|
|
||||||
- type: boolean
|
|
||||||
- type: 'null'
|
|
||||||
title: string | ... (4 variants)
|
|
||||||
type: object
|
|
||||||
- type: 'null'
|
|
||||||
type:
|
|
||||||
const: unstructured_log
|
|
||||||
default: unstructured_log
|
|
||||||
title: Type
|
|
||||||
type: string
|
|
||||||
message:
|
|
||||||
title: Message
|
|
||||||
type: string
|
|
||||||
severity:
|
|
||||||
$ref: '#/components/schemas/LogSeverity'
|
|
||||||
required:
|
|
||||||
- trace_id
|
|
||||||
- span_id
|
|
||||||
- timestamp
|
|
||||||
- message
|
|
||||||
- severity
|
|
||||||
title: UnstructuredLogEvent
|
|
||||||
type: object
|
|
||||||
Event:
|
|
||||||
discriminator:
|
|
||||||
mapping:
|
|
||||||
metric: '#/components/schemas/MetricEvent'
|
|
||||||
structured_log: '#/components/schemas/StructuredLogEvent'
|
|
||||||
unstructured_log: '#/components/schemas/UnstructuredLogEvent'
|
|
||||||
propertyName: type
|
|
||||||
oneOf:
|
|
||||||
- $ref: '#/components/schemas/UnstructuredLogEvent'
|
|
||||||
title: UnstructuredLogEvent
|
|
||||||
- $ref: '#/components/schemas/MetricEvent'
|
|
||||||
title: MetricEvent
|
|
||||||
- $ref: '#/components/schemas/StructuredLogEvent'
|
|
||||||
title: StructuredLogEvent
|
|
||||||
title: UnstructuredLogEvent | MetricEvent | StructuredLogEvent
|
|
||||||
MetricInResponse:
|
MetricInResponse:
|
||||||
description: A metric value included in API responses.
|
description: A metric value included in API responses.
|
||||||
properties:
|
properties:
|
||||||
|
|
@ -10112,236 +10066,6 @@ components:
|
||||||
- logger_config
|
- logger_config
|
||||||
title: PostTrainingRLHFRequest
|
title: PostTrainingRLHFRequest
|
||||||
type: object
|
type: object
|
||||||
Span:
|
|
||||||
description: A span representing a single operation within a trace.
|
|
||||||
properties:
|
|
||||||
span_id:
|
|
||||||
title: Span Id
|
|
||||||
type: string
|
|
||||||
trace_id:
|
|
||||||
title: Trace Id
|
|
||||||
type: string
|
|
||||||
parent_span_id:
|
|
||||||
anyOf:
|
|
||||||
- type: string
|
|
||||||
- type: 'null'
|
|
||||||
nullable: true
|
|
||||||
name:
|
|
||||||
title: Name
|
|
||||||
type: string
|
|
||||||
start_time:
|
|
||||||
format: date-time
|
|
||||||
title: Start Time
|
|
||||||
type: string
|
|
||||||
end_time:
|
|
||||||
anyOf:
|
|
||||||
- format: date-time
|
|
||||||
type: string
|
|
||||||
- type: 'null'
|
|
||||||
nullable: true
|
|
||||||
attributes:
|
|
||||||
anyOf:
|
|
||||||
- additionalProperties: true
|
|
||||||
type: object
|
|
||||||
- type: 'null'
|
|
||||||
required:
|
|
||||||
- span_id
|
|
||||||
- trace_id
|
|
||||||
- name
|
|
||||||
- start_time
|
|
||||||
title: Span
|
|
||||||
type: object
|
|
||||||
Trace:
|
|
||||||
description: A trace representing the complete execution path of a request across multiple operations.
|
|
||||||
properties:
|
|
||||||
trace_id:
|
|
||||||
title: Trace Id
|
|
||||||
type: string
|
|
||||||
root_span_id:
|
|
||||||
title: Root Span Id
|
|
||||||
type: string
|
|
||||||
start_time:
|
|
||||||
format: date-time
|
|
||||||
title: Start Time
|
|
||||||
type: string
|
|
||||||
end_time:
|
|
||||||
anyOf:
|
|
||||||
- format: date-time
|
|
||||||
type: string
|
|
||||||
- type: 'null'
|
|
||||||
nullable: true
|
|
||||||
required:
|
|
||||||
- trace_id
|
|
||||||
- root_span_id
|
|
||||||
- start_time
|
|
||||||
title: Trace
|
|
||||||
type: object
|
|
||||||
EventType:
|
|
||||||
description: The type of telemetry event being logged.
|
|
||||||
enum:
|
|
||||||
- unstructured_log
|
|
||||||
- structured_log
|
|
||||||
- metric
|
|
||||||
title: EventType
|
|
||||||
type: string
|
|
||||||
StructuredLogType:
|
|
||||||
description: The type of structured log event payload.
|
|
||||||
enum:
|
|
||||||
- span_start
|
|
||||||
- span_end
|
|
||||||
title: StructuredLogType
|
|
||||||
type: string
|
|
||||||
EvalTrace:
|
|
||||||
description: A trace record for evaluation purposes.
|
|
||||||
properties:
|
|
||||||
session_id:
|
|
||||||
title: Session Id
|
|
||||||
type: string
|
|
||||||
step:
|
|
||||||
title: Step
|
|
||||||
type: string
|
|
||||||
input:
|
|
||||||
title: Input
|
|
||||||
type: string
|
|
||||||
output:
|
|
||||||
title: Output
|
|
||||||
type: string
|
|
||||||
expected_output:
|
|
||||||
title: Expected Output
|
|
||||||
type: string
|
|
||||||
required:
|
|
||||||
- session_id
|
|
||||||
- step
|
|
||||||
- input
|
|
||||||
- output
|
|
||||||
- expected_output
|
|
||||||
title: EvalTrace
|
|
||||||
type: object
|
|
||||||
SpanWithStatus:
|
|
||||||
description: A span that includes status information.
|
|
||||||
properties:
|
|
||||||
span_id:
|
|
||||||
title: Span Id
|
|
||||||
type: string
|
|
||||||
trace_id:
|
|
||||||
title: Trace Id
|
|
||||||
type: string
|
|
||||||
parent_span_id:
|
|
||||||
anyOf:
|
|
||||||
- type: string
|
|
||||||
- type: 'null'
|
|
||||||
nullable: true
|
|
||||||
name:
|
|
||||||
title: Name
|
|
||||||
type: string
|
|
||||||
start_time:
|
|
||||||
format: date-time
|
|
||||||
title: Start Time
|
|
||||||
type: string
|
|
||||||
end_time:
|
|
||||||
anyOf:
|
|
||||||
- format: date-time
|
|
||||||
type: string
|
|
||||||
- type: 'null'
|
|
||||||
nullable: true
|
|
||||||
attributes:
|
|
||||||
anyOf:
|
|
||||||
- additionalProperties: true
|
|
||||||
type: object
|
|
||||||
- type: 'null'
|
|
||||||
status:
|
|
||||||
anyOf:
|
|
||||||
- $ref: '#/components/schemas/SpanStatus'
|
|
||||||
title: SpanStatus
|
|
||||||
- type: 'null'
|
|
||||||
nullable: true
|
|
||||||
title: SpanStatus
|
|
||||||
required:
|
|
||||||
- span_id
|
|
||||||
- trace_id
|
|
||||||
- name
|
|
||||||
- start_time
|
|
||||||
title: SpanWithStatus
|
|
||||||
type: object
|
|
||||||
QueryConditionOp:
|
|
||||||
description: Comparison operators for query conditions.
|
|
||||||
enum:
|
|
||||||
- eq
|
|
||||||
- ne
|
|
||||||
- gt
|
|
||||||
- lt
|
|
||||||
title: QueryConditionOp
|
|
||||||
type: string
|
|
||||||
QueryCondition:
|
|
||||||
description: A condition for filtering query results.
|
|
||||||
properties:
|
|
||||||
key:
|
|
||||||
title: Key
|
|
||||||
type: string
|
|
||||||
op:
|
|
||||||
$ref: '#/components/schemas/QueryConditionOp'
|
|
||||||
value:
|
|
||||||
title: Value
|
|
||||||
required:
|
|
||||||
- key
|
|
||||||
- op
|
|
||||||
- value
|
|
||||||
title: QueryCondition
|
|
||||||
type: object
|
|
||||||
MetricLabel:
|
|
||||||
description: A label associated with a metric.
|
|
||||||
properties:
|
|
||||||
name:
|
|
||||||
title: Name
|
|
||||||
type: string
|
|
||||||
value:
|
|
||||||
title: Value
|
|
||||||
type: string
|
|
||||||
required:
|
|
||||||
- name
|
|
||||||
- value
|
|
||||||
title: MetricLabel
|
|
||||||
type: object
|
|
||||||
MetricDataPoint:
|
|
||||||
description: A single data point in a metric time series.
|
|
||||||
properties:
|
|
||||||
timestamp:
|
|
||||||
title: Timestamp
|
|
||||||
type: integer
|
|
||||||
value:
|
|
||||||
title: Value
|
|
||||||
type: number
|
|
||||||
unit:
|
|
||||||
title: Unit
|
|
||||||
type: string
|
|
||||||
required:
|
|
||||||
- timestamp
|
|
||||||
- value
|
|
||||||
- unit
|
|
||||||
title: MetricDataPoint
|
|
||||||
type: object
|
|
||||||
MetricSeries:
|
|
||||||
description: A time series of metric data points.
|
|
||||||
properties:
|
|
||||||
metric:
|
|
||||||
title: Metric
|
|
||||||
type: string
|
|
||||||
labels:
|
|
||||||
items:
|
|
||||||
$ref: '#/components/schemas/MetricLabel'
|
|
||||||
title: Labels
|
|
||||||
type: array
|
|
||||||
values:
|
|
||||||
items:
|
|
||||||
$ref: '#/components/schemas/MetricDataPoint'
|
|
||||||
title: Values
|
|
||||||
type: array
|
|
||||||
required:
|
|
||||||
- metric
|
|
||||||
- labels
|
|
||||||
- values
|
|
||||||
title: MetricSeries
|
|
||||||
type: object
|
|
||||||
responses:
|
responses:
|
||||||
BadRequest400:
|
BadRequest400:
|
||||||
description: The request was invalid or malformed
|
description: The request was invalid or malformed
|
||||||
|
|
|
||||||
463
docs/static/experimental-llama-stack-spec.yaml
vendored
463
docs/static/experimental-llama-stack-spec.yaml
vendored
|
|
@ -3370,6 +3370,12 @@ components:
|
||||||
anyOf:
|
anyOf:
|
||||||
- type: integer
|
- type: integer
|
||||||
- type: 'null'
|
- type: 'null'
|
||||||
|
metadata:
|
||||||
|
anyOf:
|
||||||
|
- additionalProperties:
|
||||||
|
type: string
|
||||||
|
type: object
|
||||||
|
- type: 'null'
|
||||||
input:
|
input:
|
||||||
items:
|
items:
|
||||||
anyOf:
|
anyOf:
|
||||||
|
|
@ -3770,6 +3776,12 @@ components:
|
||||||
anyOf:
|
anyOf:
|
||||||
- type: integer
|
- type: integer
|
||||||
- type: 'null'
|
- type: 'null'
|
||||||
|
metadata:
|
||||||
|
anyOf:
|
||||||
|
- additionalProperties:
|
||||||
|
type: string
|
||||||
|
type: object
|
||||||
|
- type: 'null'
|
||||||
type: object
|
type: object
|
||||||
required:
|
required:
|
||||||
- created_at
|
- created_at
|
||||||
|
|
@ -7986,227 +7998,6 @@ components:
|
||||||
- $ref: '#/components/schemas/OpenAIResponseContentPartReasoningText'
|
- $ref: '#/components/schemas/OpenAIResponseContentPartReasoningText'
|
||||||
title: OpenAIResponseContentPartReasoningText
|
title: OpenAIResponseContentPartReasoningText
|
||||||
title: OpenAIResponseContentPartOutputText | OpenAIResponseContentPartRefusal | OpenAIResponseContentPartReasoningText
|
title: OpenAIResponseContentPartOutputText | OpenAIResponseContentPartRefusal | OpenAIResponseContentPartReasoningText
|
||||||
SpanEndPayload:
|
|
||||||
description: Payload for a span end event.
|
|
||||||
properties:
|
|
||||||
type:
|
|
||||||
const: span_end
|
|
||||||
default: span_end
|
|
||||||
title: Type
|
|
||||||
type: string
|
|
||||||
status:
|
|
||||||
$ref: '#/components/schemas/SpanStatus'
|
|
||||||
required:
|
|
||||||
- status
|
|
||||||
title: SpanEndPayload
|
|
||||||
type: object
|
|
||||||
SpanStartPayload:
|
|
||||||
description: Payload for a span start event.
|
|
||||||
properties:
|
|
||||||
type:
|
|
||||||
const: span_start
|
|
||||||
default: span_start
|
|
||||||
title: Type
|
|
||||||
type: string
|
|
||||||
name:
|
|
||||||
title: Name
|
|
||||||
type: string
|
|
||||||
parent_span_id:
|
|
||||||
anyOf:
|
|
||||||
- type: string
|
|
||||||
- type: 'null'
|
|
||||||
nullable: true
|
|
||||||
required:
|
|
||||||
- name
|
|
||||||
title: SpanStartPayload
|
|
||||||
type: object
|
|
||||||
SpanStatus:
|
|
||||||
description: The status of a span indicating whether it completed successfully or with an error.
|
|
||||||
enum:
|
|
||||||
- ok
|
|
||||||
- error
|
|
||||||
title: SpanStatus
|
|
||||||
type: string
|
|
||||||
StructuredLogPayload:
|
|
||||||
discriminator:
|
|
||||||
mapping:
|
|
||||||
span_end: '#/components/schemas/SpanEndPayload'
|
|
||||||
span_start: '#/components/schemas/SpanStartPayload'
|
|
||||||
propertyName: type
|
|
||||||
oneOf:
|
|
||||||
- $ref: '#/components/schemas/SpanStartPayload'
|
|
||||||
title: SpanStartPayload
|
|
||||||
- $ref: '#/components/schemas/SpanEndPayload'
|
|
||||||
title: SpanEndPayload
|
|
||||||
title: SpanStartPayload | SpanEndPayload
|
|
||||||
LogSeverity:
|
|
||||||
description: The severity level of a log message.
|
|
||||||
enum:
|
|
||||||
- verbose
|
|
||||||
- debug
|
|
||||||
- info
|
|
||||||
- warn
|
|
||||||
- error
|
|
||||||
- critical
|
|
||||||
title: LogSeverity
|
|
||||||
type: string
|
|
||||||
MetricEvent:
|
|
||||||
description: A metric event containing a measured value.
|
|
||||||
properties:
|
|
||||||
trace_id:
|
|
||||||
title: Trace Id
|
|
||||||
type: string
|
|
||||||
span_id:
|
|
||||||
title: Span Id
|
|
||||||
type: string
|
|
||||||
timestamp:
|
|
||||||
format: date-time
|
|
||||||
title: Timestamp
|
|
||||||
type: string
|
|
||||||
attributes:
|
|
||||||
anyOf:
|
|
||||||
- additionalProperties:
|
|
||||||
anyOf:
|
|
||||||
- type: string
|
|
||||||
- type: integer
|
|
||||||
- type: number
|
|
||||||
- type: boolean
|
|
||||||
- type: 'null'
|
|
||||||
title: string | ... (4 variants)
|
|
||||||
type: object
|
|
||||||
- type: 'null'
|
|
||||||
type:
|
|
||||||
const: metric
|
|
||||||
default: metric
|
|
||||||
title: Type
|
|
||||||
type: string
|
|
||||||
metric:
|
|
||||||
title: Metric
|
|
||||||
type: string
|
|
||||||
value:
|
|
||||||
anyOf:
|
|
||||||
- type: integer
|
|
||||||
- type: number
|
|
||||||
title: integer | number
|
|
||||||
unit:
|
|
||||||
title: Unit
|
|
||||||
type: string
|
|
||||||
required:
|
|
||||||
- trace_id
|
|
||||||
- span_id
|
|
||||||
- timestamp
|
|
||||||
- metric
|
|
||||||
- value
|
|
||||||
- unit
|
|
||||||
title: MetricEvent
|
|
||||||
type: object
|
|
||||||
StructuredLogEvent:
|
|
||||||
description: A structured log event containing typed payload data.
|
|
||||||
properties:
|
|
||||||
trace_id:
|
|
||||||
title: Trace Id
|
|
||||||
type: string
|
|
||||||
span_id:
|
|
||||||
title: Span Id
|
|
||||||
type: string
|
|
||||||
timestamp:
|
|
||||||
format: date-time
|
|
||||||
title: Timestamp
|
|
||||||
type: string
|
|
||||||
attributes:
|
|
||||||
anyOf:
|
|
||||||
- additionalProperties:
|
|
||||||
anyOf:
|
|
||||||
- type: string
|
|
||||||
- type: integer
|
|
||||||
- type: number
|
|
||||||
- type: boolean
|
|
||||||
- type: 'null'
|
|
||||||
title: string | ... (4 variants)
|
|
||||||
type: object
|
|
||||||
- type: 'null'
|
|
||||||
type:
|
|
||||||
const: structured_log
|
|
||||||
default: structured_log
|
|
||||||
title: Type
|
|
||||||
type: string
|
|
||||||
payload:
|
|
||||||
discriminator:
|
|
||||||
mapping:
|
|
||||||
span_end: '#/components/schemas/SpanEndPayload'
|
|
||||||
span_start: '#/components/schemas/SpanStartPayload'
|
|
||||||
propertyName: type
|
|
||||||
oneOf:
|
|
||||||
- $ref: '#/components/schemas/SpanStartPayload'
|
|
||||||
title: SpanStartPayload
|
|
||||||
- $ref: '#/components/schemas/SpanEndPayload'
|
|
||||||
title: SpanEndPayload
|
|
||||||
title: SpanStartPayload | SpanEndPayload
|
|
||||||
required:
|
|
||||||
- trace_id
|
|
||||||
- span_id
|
|
||||||
- timestamp
|
|
||||||
- payload
|
|
||||||
title: StructuredLogEvent
|
|
||||||
type: object
|
|
||||||
UnstructuredLogEvent:
|
|
||||||
description: An unstructured log event containing a simple text message.
|
|
||||||
properties:
|
|
||||||
trace_id:
|
|
||||||
title: Trace Id
|
|
||||||
type: string
|
|
||||||
span_id:
|
|
||||||
title: Span Id
|
|
||||||
type: string
|
|
||||||
timestamp:
|
|
||||||
format: date-time
|
|
||||||
title: Timestamp
|
|
||||||
type: string
|
|
||||||
attributes:
|
|
||||||
anyOf:
|
|
||||||
- additionalProperties:
|
|
||||||
anyOf:
|
|
||||||
- type: string
|
|
||||||
- type: integer
|
|
||||||
- type: number
|
|
||||||
- type: boolean
|
|
||||||
- type: 'null'
|
|
||||||
title: string | ... (4 variants)
|
|
||||||
type: object
|
|
||||||
- type: 'null'
|
|
||||||
type:
|
|
||||||
const: unstructured_log
|
|
||||||
default: unstructured_log
|
|
||||||
title: Type
|
|
||||||
type: string
|
|
||||||
message:
|
|
||||||
title: Message
|
|
||||||
type: string
|
|
||||||
severity:
|
|
||||||
$ref: '#/components/schemas/LogSeverity'
|
|
||||||
required:
|
|
||||||
- trace_id
|
|
||||||
- span_id
|
|
||||||
- timestamp
|
|
||||||
- message
|
|
||||||
- severity
|
|
||||||
title: UnstructuredLogEvent
|
|
||||||
type: object
|
|
||||||
Event:
|
|
||||||
discriminator:
|
|
||||||
mapping:
|
|
||||||
metric: '#/components/schemas/MetricEvent'
|
|
||||||
structured_log: '#/components/schemas/StructuredLogEvent'
|
|
||||||
unstructured_log: '#/components/schemas/UnstructuredLogEvent'
|
|
||||||
propertyName: type
|
|
||||||
oneOf:
|
|
||||||
- $ref: '#/components/schemas/UnstructuredLogEvent'
|
|
||||||
title: UnstructuredLogEvent
|
|
||||||
- $ref: '#/components/schemas/MetricEvent'
|
|
||||||
title: MetricEvent
|
|
||||||
- $ref: '#/components/schemas/StructuredLogEvent'
|
|
||||||
title: StructuredLogEvent
|
|
||||||
title: UnstructuredLogEvent | MetricEvent | StructuredLogEvent
|
|
||||||
MetricInResponse:
|
MetricInResponse:
|
||||||
description: A metric value included in API responses.
|
description: A metric value included in API responses.
|
||||||
properties:
|
properties:
|
||||||
|
|
@ -9069,236 +8860,6 @@ components:
|
||||||
- logger_config
|
- logger_config
|
||||||
title: PostTrainingRLHFRequest
|
title: PostTrainingRLHFRequest
|
||||||
type: object
|
type: object
|
||||||
Span:
|
|
||||||
description: A span representing a single operation within a trace.
|
|
||||||
properties:
|
|
||||||
span_id:
|
|
||||||
title: Span Id
|
|
||||||
type: string
|
|
||||||
trace_id:
|
|
||||||
title: Trace Id
|
|
||||||
type: string
|
|
||||||
parent_span_id:
|
|
||||||
anyOf:
|
|
||||||
- type: string
|
|
||||||
- type: 'null'
|
|
||||||
nullable: true
|
|
||||||
name:
|
|
||||||
title: Name
|
|
||||||
type: string
|
|
||||||
start_time:
|
|
||||||
format: date-time
|
|
||||||
title: Start Time
|
|
||||||
type: string
|
|
||||||
end_time:
|
|
||||||
anyOf:
|
|
||||||
- format: date-time
|
|
||||||
type: string
|
|
||||||
- type: 'null'
|
|
||||||
nullable: true
|
|
||||||
attributes:
|
|
||||||
anyOf:
|
|
||||||
- additionalProperties: true
|
|
||||||
type: object
|
|
||||||
- type: 'null'
|
|
||||||
required:
|
|
||||||
- span_id
|
|
||||||
- trace_id
|
|
||||||
- name
|
|
||||||
- start_time
|
|
||||||
title: Span
|
|
||||||
type: object
|
|
||||||
Trace:
|
|
||||||
description: A trace representing the complete execution path of a request across multiple operations.
|
|
||||||
properties:
|
|
||||||
trace_id:
|
|
||||||
title: Trace Id
|
|
||||||
type: string
|
|
||||||
root_span_id:
|
|
||||||
title: Root Span Id
|
|
||||||
type: string
|
|
||||||
start_time:
|
|
||||||
format: date-time
|
|
||||||
title: Start Time
|
|
||||||
type: string
|
|
||||||
end_time:
|
|
||||||
anyOf:
|
|
||||||
- format: date-time
|
|
||||||
type: string
|
|
||||||
- type: 'null'
|
|
||||||
nullable: true
|
|
||||||
required:
|
|
||||||
- trace_id
|
|
||||||
- root_span_id
|
|
||||||
- start_time
|
|
||||||
title: Trace
|
|
||||||
type: object
|
|
||||||
EventType:
|
|
||||||
description: The type of telemetry event being logged.
|
|
||||||
enum:
|
|
||||||
- unstructured_log
|
|
||||||
- structured_log
|
|
||||||
- metric
|
|
||||||
title: EventType
|
|
||||||
type: string
|
|
||||||
StructuredLogType:
|
|
||||||
description: The type of structured log event payload.
|
|
||||||
enum:
|
|
||||||
- span_start
|
|
||||||
- span_end
|
|
||||||
title: StructuredLogType
|
|
||||||
type: string
|
|
||||||
EvalTrace:
|
|
||||||
description: A trace record for evaluation purposes.
|
|
||||||
properties:
|
|
||||||
session_id:
|
|
||||||
title: Session Id
|
|
||||||
type: string
|
|
||||||
step:
|
|
||||||
title: Step
|
|
||||||
type: string
|
|
||||||
input:
|
|
||||||
title: Input
|
|
||||||
type: string
|
|
||||||
output:
|
|
||||||
title: Output
|
|
||||||
type: string
|
|
||||||
expected_output:
|
|
||||||
title: Expected Output
|
|
||||||
type: string
|
|
||||||
required:
|
|
||||||
- session_id
|
|
||||||
- step
|
|
||||||
- input
|
|
||||||
- output
|
|
||||||
- expected_output
|
|
||||||
title: EvalTrace
|
|
||||||
type: object
|
|
||||||
SpanWithStatus:
|
|
||||||
description: A span that includes status information.
|
|
||||||
properties:
|
|
||||||
span_id:
|
|
||||||
title: Span Id
|
|
||||||
type: string
|
|
||||||
trace_id:
|
|
||||||
title: Trace Id
|
|
||||||
type: string
|
|
||||||
parent_span_id:
|
|
||||||
anyOf:
|
|
||||||
- type: string
|
|
||||||
- type: 'null'
|
|
||||||
nullable: true
|
|
||||||
name:
|
|
||||||
title: Name
|
|
||||||
type: string
|
|
||||||
start_time:
|
|
||||||
format: date-time
|
|
||||||
title: Start Time
|
|
||||||
type: string
|
|
||||||
end_time:
|
|
||||||
anyOf:
|
|
||||||
- format: date-time
|
|
||||||
type: string
|
|
||||||
- type: 'null'
|
|
||||||
nullable: true
|
|
||||||
attributes:
|
|
||||||
anyOf:
|
|
||||||
- additionalProperties: true
|
|
||||||
type: object
|
|
||||||
- type: 'null'
|
|
||||||
status:
|
|
||||||
anyOf:
|
|
||||||
- $ref: '#/components/schemas/SpanStatus'
|
|
||||||
title: SpanStatus
|
|
||||||
- type: 'null'
|
|
||||||
nullable: true
|
|
||||||
title: SpanStatus
|
|
||||||
required:
|
|
||||||
- span_id
|
|
||||||
- trace_id
|
|
||||||
- name
|
|
||||||
- start_time
|
|
||||||
title: SpanWithStatus
|
|
||||||
type: object
|
|
||||||
QueryConditionOp:
|
|
||||||
description: Comparison operators for query conditions.
|
|
||||||
enum:
|
|
||||||
- eq
|
|
||||||
- ne
|
|
||||||
- gt
|
|
||||||
- lt
|
|
||||||
title: QueryConditionOp
|
|
||||||
type: string
|
|
||||||
QueryCondition:
|
|
||||||
description: A condition for filtering query results.
|
|
||||||
properties:
|
|
||||||
key:
|
|
||||||
title: Key
|
|
||||||
type: string
|
|
||||||
op:
|
|
||||||
$ref: '#/components/schemas/QueryConditionOp'
|
|
||||||
value:
|
|
||||||
title: Value
|
|
||||||
required:
|
|
||||||
- key
|
|
||||||
- op
|
|
||||||
- value
|
|
||||||
title: QueryCondition
|
|
||||||
type: object
|
|
||||||
MetricLabel:
|
|
||||||
description: A label associated with a metric.
|
|
||||||
properties:
|
|
||||||
name:
|
|
||||||
title: Name
|
|
||||||
type: string
|
|
||||||
value:
|
|
||||||
title: Value
|
|
||||||
type: string
|
|
||||||
required:
|
|
||||||
- name
|
|
||||||
- value
|
|
||||||
title: MetricLabel
|
|
||||||
type: object
|
|
||||||
MetricDataPoint:
|
|
||||||
description: A single data point in a metric time series.
|
|
||||||
properties:
|
|
||||||
timestamp:
|
|
||||||
title: Timestamp
|
|
||||||
type: integer
|
|
||||||
value:
|
|
||||||
title: Value
|
|
||||||
type: number
|
|
||||||
unit:
|
|
||||||
title: Unit
|
|
||||||
type: string
|
|
||||||
required:
|
|
||||||
- timestamp
|
|
||||||
- value
|
|
||||||
- unit
|
|
||||||
title: MetricDataPoint
|
|
||||||
type: object
|
|
||||||
MetricSeries:
|
|
||||||
description: A time series of metric data points.
|
|
||||||
properties:
|
|
||||||
metric:
|
|
||||||
title: Metric
|
|
||||||
type: string
|
|
||||||
labels:
|
|
||||||
items:
|
|
||||||
$ref: '#/components/schemas/MetricLabel'
|
|
||||||
title: Labels
|
|
||||||
type: array
|
|
||||||
values:
|
|
||||||
items:
|
|
||||||
$ref: '#/components/schemas/MetricDataPoint'
|
|
||||||
title: Values
|
|
||||||
type: array
|
|
||||||
required:
|
|
||||||
- metric
|
|
||||||
- labels
|
|
||||||
- values
|
|
||||||
title: MetricSeries
|
|
||||||
type: object
|
|
||||||
responses:
|
responses:
|
||||||
BadRequest400:
|
BadRequest400:
|
||||||
description: The request was invalid or malformed
|
description: The request was invalid or malformed
|
||||||
|
|
|
||||||
697
docs/static/llama-stack-spec.yaml
vendored
697
docs/static/llama-stack-spec.yaml
vendored
|
|
@ -1882,216 +1882,6 @@ paths:
|
||||||
schema:
|
schema:
|
||||||
type: string
|
type: string
|
||||||
description: 'Path parameter: identifier'
|
description: 'Path parameter: identifier'
|
||||||
/v1/tool-runtime/invoke:
|
|
||||||
post:
|
|
||||||
responses:
|
|
||||||
'200':
|
|
||||||
description: A ToolInvocationResult.
|
|
||||||
content:
|
|
||||||
application/json:
|
|
||||||
schema:
|
|
||||||
$ref: '#/components/schemas/ToolInvocationResult'
|
|
||||||
'400':
|
|
||||||
description: Bad Request
|
|
||||||
$ref: '#/components/responses/BadRequest400'
|
|
||||||
'429':
|
|
||||||
description: Too Many Requests
|
|
||||||
$ref: '#/components/responses/TooManyRequests429'
|
|
||||||
'500':
|
|
||||||
description: Internal Server Error
|
|
||||||
$ref: '#/components/responses/InternalServerError500'
|
|
||||||
default:
|
|
||||||
description: Default Response
|
|
||||||
$ref: '#/components/responses/DefaultError'
|
|
||||||
tags:
|
|
||||||
- Tool Runtime
|
|
||||||
summary: Invoke Tool
|
|
||||||
description: Run a tool with the given arguments.
|
|
||||||
operationId: invoke_tool_v1_tool_runtime_invoke_post
|
|
||||||
requestBody:
|
|
||||||
content:
|
|
||||||
application/json:
|
|
||||||
schema:
|
|
||||||
$ref: '#/components/schemas/InvokeToolRequest'
|
|
||||||
required: true
|
|
||||||
/v1/tool-runtime/list-tools:
|
|
||||||
get:
|
|
||||||
responses:
|
|
||||||
'200':
|
|
||||||
description: A ListToolDefsResponse.
|
|
||||||
content:
|
|
||||||
application/json:
|
|
||||||
schema:
|
|
||||||
$ref: '#/components/schemas/ListToolDefsResponse'
|
|
||||||
'400':
|
|
||||||
$ref: '#/components/responses/BadRequest400'
|
|
||||||
description: Bad Request
|
|
||||||
'429':
|
|
||||||
$ref: '#/components/responses/TooManyRequests429'
|
|
||||||
description: Too Many Requests
|
|
||||||
'500':
|
|
||||||
$ref: '#/components/responses/InternalServerError500'
|
|
||||||
description: Internal Server Error
|
|
||||||
default:
|
|
||||||
$ref: '#/components/responses/DefaultError'
|
|
||||||
description: Default Response
|
|
||||||
tags:
|
|
||||||
- Tool Runtime
|
|
||||||
summary: List Runtime Tools
|
|
||||||
description: List all tools in the runtime.
|
|
||||||
operationId: list_runtime_tools_v1_tool_runtime_list_tools_get
|
|
||||||
parameters:
|
|
||||||
- name: authorization
|
|
||||||
in: query
|
|
||||||
required: false
|
|
||||||
schema:
|
|
||||||
anyOf:
|
|
||||||
- type: string
|
|
||||||
- type: 'null'
|
|
||||||
title: Authorization
|
|
||||||
- name: tool_group_id
|
|
||||||
in: query
|
|
||||||
required: false
|
|
||||||
schema:
|
|
||||||
anyOf:
|
|
||||||
- type: string
|
|
||||||
- type: 'null'
|
|
||||||
title: Tool Group Id
|
|
||||||
- name: mcp_endpoint
|
|
||||||
in: query
|
|
||||||
required: false
|
|
||||||
schema:
|
|
||||||
anyOf:
|
|
||||||
- $ref: '#/components/schemas/URL'
|
|
||||||
- type: 'null'
|
|
||||||
title: Mcp Endpoint
|
|
||||||
/v1/toolgroups:
|
|
||||||
get:
|
|
||||||
responses:
|
|
||||||
'200':
|
|
||||||
description: A ListToolGroupsResponse.
|
|
||||||
content:
|
|
||||||
application/json:
|
|
||||||
schema:
|
|
||||||
$ref: '#/components/schemas/ListToolGroupsResponse'
|
|
||||||
'400':
|
|
||||||
description: Bad Request
|
|
||||||
$ref: '#/components/responses/BadRequest400'
|
|
||||||
'429':
|
|
||||||
description: Too Many Requests
|
|
||||||
$ref: '#/components/responses/TooManyRequests429'
|
|
||||||
'500':
|
|
||||||
description: Internal Server Error
|
|
||||||
$ref: '#/components/responses/InternalServerError500'
|
|
||||||
default:
|
|
||||||
description: Default Response
|
|
||||||
$ref: '#/components/responses/DefaultError'
|
|
||||||
tags:
|
|
||||||
- Tool Groups
|
|
||||||
summary: List Tool Groups
|
|
||||||
description: List tool groups with optional provider.
|
|
||||||
operationId: list_tool_groups_v1_toolgroups_get
|
|
||||||
/v1/toolgroups/{toolgroup_id}:
|
|
||||||
get:
|
|
||||||
responses:
|
|
||||||
'200':
|
|
||||||
description: A ToolGroup.
|
|
||||||
content:
|
|
||||||
application/json:
|
|
||||||
schema:
|
|
||||||
$ref: '#/components/schemas/ToolGroup'
|
|
||||||
'400':
|
|
||||||
description: Bad Request
|
|
||||||
$ref: '#/components/responses/BadRequest400'
|
|
||||||
'429':
|
|
||||||
description: Too Many Requests
|
|
||||||
$ref: '#/components/responses/TooManyRequests429'
|
|
||||||
'500':
|
|
||||||
description: Internal Server Error
|
|
||||||
$ref: '#/components/responses/InternalServerError500'
|
|
||||||
default:
|
|
||||||
description: Default Response
|
|
||||||
$ref: '#/components/responses/DefaultError'
|
|
||||||
tags:
|
|
||||||
- Tool Groups
|
|
||||||
summary: Get Tool Group
|
|
||||||
description: Get a tool group by its ID.
|
|
||||||
operationId: get_tool_group_v1_toolgroups__toolgroup_id__get
|
|
||||||
parameters:
|
|
||||||
- name: toolgroup_id
|
|
||||||
in: path
|
|
||||||
required: true
|
|
||||||
schema:
|
|
||||||
type: string
|
|
||||||
description: 'Path parameter: toolgroup_id'
|
|
||||||
/v1/tools:
|
|
||||||
get:
|
|
||||||
responses:
|
|
||||||
'200':
|
|
||||||
description: A ListToolDefsResponse.
|
|
||||||
content:
|
|
||||||
application/json:
|
|
||||||
schema:
|
|
||||||
$ref: '#/components/schemas/ListToolDefsResponse'
|
|
||||||
'400':
|
|
||||||
$ref: '#/components/responses/BadRequest400'
|
|
||||||
description: Bad Request
|
|
||||||
'429':
|
|
||||||
$ref: '#/components/responses/TooManyRequests429'
|
|
||||||
description: Too Many Requests
|
|
||||||
'500':
|
|
||||||
$ref: '#/components/responses/InternalServerError500'
|
|
||||||
description: Internal Server Error
|
|
||||||
default:
|
|
||||||
$ref: '#/components/responses/DefaultError'
|
|
||||||
description: Default Response
|
|
||||||
tags:
|
|
||||||
- Tool Groups
|
|
||||||
summary: List Tools
|
|
||||||
description: List tools with optional tool group.
|
|
||||||
operationId: list_tools_v1_tools_get
|
|
||||||
parameters:
|
|
||||||
- name: toolgroup_id
|
|
||||||
in: query
|
|
||||||
required: false
|
|
||||||
schema:
|
|
||||||
anyOf:
|
|
||||||
- type: string
|
|
||||||
- type: 'null'
|
|
||||||
title: Toolgroup Id
|
|
||||||
/v1/tools/{tool_name}:
|
|
||||||
get:
|
|
||||||
responses:
|
|
||||||
'200':
|
|
||||||
description: A ToolDef.
|
|
||||||
content:
|
|
||||||
application/json:
|
|
||||||
schema:
|
|
||||||
$ref: '#/components/schemas/ToolDef'
|
|
||||||
'400':
|
|
||||||
description: Bad Request
|
|
||||||
$ref: '#/components/responses/BadRequest400'
|
|
||||||
'429':
|
|
||||||
description: Too Many Requests
|
|
||||||
$ref: '#/components/responses/TooManyRequests429'
|
|
||||||
'500':
|
|
||||||
description: Internal Server Error
|
|
||||||
$ref: '#/components/responses/InternalServerError500'
|
|
||||||
default:
|
|
||||||
description: Default Response
|
|
||||||
$ref: '#/components/responses/DefaultError'
|
|
||||||
tags:
|
|
||||||
- Tool Groups
|
|
||||||
summary: Get Tool
|
|
||||||
description: Get a tool by its name.
|
|
||||||
operationId: get_tool_v1_tools__tool_name__get
|
|
||||||
parameters:
|
|
||||||
- name: tool_name
|
|
||||||
in: path
|
|
||||||
required: true
|
|
||||||
schema:
|
|
||||||
type: string
|
|
||||||
description: 'Path parameter: tool_name'
|
|
||||||
/v1/vector-io/insert:
|
/v1/vector-io/insert:
|
||||||
post:
|
post:
|
||||||
responses:
|
responses:
|
||||||
|
|
@ -5833,6 +5623,12 @@ components:
|
||||||
anyOf:
|
anyOf:
|
||||||
- type: integer
|
- type: integer
|
||||||
- type: 'null'
|
- type: 'null'
|
||||||
|
metadata:
|
||||||
|
anyOf:
|
||||||
|
- additionalProperties:
|
||||||
|
type: string
|
||||||
|
type: object
|
||||||
|
- type: 'null'
|
||||||
input:
|
input:
|
||||||
items:
|
items:
|
||||||
anyOf:
|
anyOf:
|
||||||
|
|
@ -6236,6 +6032,12 @@ components:
|
||||||
anyOf:
|
anyOf:
|
||||||
- type: integer
|
- type: integer
|
||||||
- type: 'null'
|
- type: 'null'
|
||||||
|
metadata:
|
||||||
|
anyOf:
|
||||||
|
- additionalProperties:
|
||||||
|
type: string
|
||||||
|
type: object
|
||||||
|
- type: 'null'
|
||||||
type: object
|
type: object
|
||||||
required:
|
required:
|
||||||
- input
|
- input
|
||||||
|
|
@ -6367,6 +6169,12 @@ components:
|
||||||
anyOf:
|
anyOf:
|
||||||
- type: integer
|
- type: integer
|
||||||
- type: 'null'
|
- type: 'null'
|
||||||
|
metadata:
|
||||||
|
anyOf:
|
||||||
|
- additionalProperties:
|
||||||
|
type: string
|
||||||
|
type: object
|
||||||
|
- type: 'null'
|
||||||
type: object
|
type: object
|
||||||
required:
|
required:
|
||||||
- created_at
|
- created_at
|
||||||
|
|
@ -8131,24 +7939,6 @@ components:
|
||||||
required:
|
required:
|
||||||
- data
|
- data
|
||||||
title: ListShieldsResponse
|
title: ListShieldsResponse
|
||||||
InvokeToolRequest:
|
|
||||||
properties:
|
|
||||||
tool_name:
|
|
||||||
type: string
|
|
||||||
title: Tool Name
|
|
||||||
kwargs:
|
|
||||||
additionalProperties: true
|
|
||||||
type: object
|
|
||||||
title: Kwargs
|
|
||||||
authorization:
|
|
||||||
anyOf:
|
|
||||||
- type: string
|
|
||||||
- type: 'null'
|
|
||||||
type: object
|
|
||||||
required:
|
|
||||||
- tool_name
|
|
||||||
- kwargs
|
|
||||||
title: InvokeToolRequest
|
|
||||||
ImageContentItem:
|
ImageContentItem:
|
||||||
description: A image content item
|
description: A image content item
|
||||||
properties:
|
properties:
|
||||||
|
|
@ -10866,227 +10656,6 @@ components:
|
||||||
- $ref: '#/components/schemas/OpenAIResponseContentPartReasoningText'
|
- $ref: '#/components/schemas/OpenAIResponseContentPartReasoningText'
|
||||||
title: OpenAIResponseContentPartReasoningText
|
title: OpenAIResponseContentPartReasoningText
|
||||||
title: OpenAIResponseContentPartOutputText | OpenAIResponseContentPartRefusal | OpenAIResponseContentPartReasoningText
|
title: OpenAIResponseContentPartOutputText | OpenAIResponseContentPartRefusal | OpenAIResponseContentPartReasoningText
|
||||||
SpanEndPayload:
|
|
||||||
description: Payload for a span end event.
|
|
||||||
properties:
|
|
||||||
type:
|
|
||||||
const: span_end
|
|
||||||
default: span_end
|
|
||||||
title: Type
|
|
||||||
type: string
|
|
||||||
status:
|
|
||||||
$ref: '#/components/schemas/SpanStatus'
|
|
||||||
required:
|
|
||||||
- status
|
|
||||||
title: SpanEndPayload
|
|
||||||
type: object
|
|
||||||
SpanStartPayload:
|
|
||||||
description: Payload for a span start event.
|
|
||||||
properties:
|
|
||||||
type:
|
|
||||||
const: span_start
|
|
||||||
default: span_start
|
|
||||||
title: Type
|
|
||||||
type: string
|
|
||||||
name:
|
|
||||||
title: Name
|
|
||||||
type: string
|
|
||||||
parent_span_id:
|
|
||||||
anyOf:
|
|
||||||
- type: string
|
|
||||||
- type: 'null'
|
|
||||||
nullable: true
|
|
||||||
required:
|
|
||||||
- name
|
|
||||||
title: SpanStartPayload
|
|
||||||
type: object
|
|
||||||
SpanStatus:
|
|
||||||
description: The status of a span indicating whether it completed successfully or with an error.
|
|
||||||
enum:
|
|
||||||
- ok
|
|
||||||
- error
|
|
||||||
title: SpanStatus
|
|
||||||
type: string
|
|
||||||
StructuredLogPayload:
|
|
||||||
discriminator:
|
|
||||||
mapping:
|
|
||||||
span_end: '#/components/schemas/SpanEndPayload'
|
|
||||||
span_start: '#/components/schemas/SpanStartPayload'
|
|
||||||
propertyName: type
|
|
||||||
oneOf:
|
|
||||||
- $ref: '#/components/schemas/SpanStartPayload'
|
|
||||||
title: SpanStartPayload
|
|
||||||
- $ref: '#/components/schemas/SpanEndPayload'
|
|
||||||
title: SpanEndPayload
|
|
||||||
title: SpanStartPayload | SpanEndPayload
|
|
||||||
LogSeverity:
|
|
||||||
description: The severity level of a log message.
|
|
||||||
enum:
|
|
||||||
- verbose
|
|
||||||
- debug
|
|
||||||
- info
|
|
||||||
- warn
|
|
||||||
- error
|
|
||||||
- critical
|
|
||||||
title: LogSeverity
|
|
||||||
type: string
|
|
||||||
MetricEvent:
|
|
||||||
description: A metric event containing a measured value.
|
|
||||||
properties:
|
|
||||||
trace_id:
|
|
||||||
title: Trace Id
|
|
||||||
type: string
|
|
||||||
span_id:
|
|
||||||
title: Span Id
|
|
||||||
type: string
|
|
||||||
timestamp:
|
|
||||||
format: date-time
|
|
||||||
title: Timestamp
|
|
||||||
type: string
|
|
||||||
attributes:
|
|
||||||
anyOf:
|
|
||||||
- additionalProperties:
|
|
||||||
anyOf:
|
|
||||||
- type: string
|
|
||||||
- type: integer
|
|
||||||
- type: number
|
|
||||||
- type: boolean
|
|
||||||
- type: 'null'
|
|
||||||
title: string | ... (4 variants)
|
|
||||||
type: object
|
|
||||||
- type: 'null'
|
|
||||||
type:
|
|
||||||
const: metric
|
|
||||||
default: metric
|
|
||||||
title: Type
|
|
||||||
type: string
|
|
||||||
metric:
|
|
||||||
title: Metric
|
|
||||||
type: string
|
|
||||||
value:
|
|
||||||
anyOf:
|
|
||||||
- type: integer
|
|
||||||
- type: number
|
|
||||||
title: integer | number
|
|
||||||
unit:
|
|
||||||
title: Unit
|
|
||||||
type: string
|
|
||||||
required:
|
|
||||||
- trace_id
|
|
||||||
- span_id
|
|
||||||
- timestamp
|
|
||||||
- metric
|
|
||||||
- value
|
|
||||||
- unit
|
|
||||||
title: MetricEvent
|
|
||||||
type: object
|
|
||||||
StructuredLogEvent:
|
|
||||||
description: A structured log event containing typed payload data.
|
|
||||||
properties:
|
|
||||||
trace_id:
|
|
||||||
title: Trace Id
|
|
||||||
type: string
|
|
||||||
span_id:
|
|
||||||
title: Span Id
|
|
||||||
type: string
|
|
||||||
timestamp:
|
|
||||||
format: date-time
|
|
||||||
title: Timestamp
|
|
||||||
type: string
|
|
||||||
attributes:
|
|
||||||
anyOf:
|
|
||||||
- additionalProperties:
|
|
||||||
anyOf:
|
|
||||||
- type: string
|
|
||||||
- type: integer
|
|
||||||
- type: number
|
|
||||||
- type: boolean
|
|
||||||
- type: 'null'
|
|
||||||
title: string | ... (4 variants)
|
|
||||||
type: object
|
|
||||||
- type: 'null'
|
|
||||||
type:
|
|
||||||
const: structured_log
|
|
||||||
default: structured_log
|
|
||||||
title: Type
|
|
||||||
type: string
|
|
||||||
payload:
|
|
||||||
discriminator:
|
|
||||||
mapping:
|
|
||||||
span_end: '#/components/schemas/SpanEndPayload'
|
|
||||||
span_start: '#/components/schemas/SpanStartPayload'
|
|
||||||
propertyName: type
|
|
||||||
oneOf:
|
|
||||||
- $ref: '#/components/schemas/SpanStartPayload'
|
|
||||||
title: SpanStartPayload
|
|
||||||
- $ref: '#/components/schemas/SpanEndPayload'
|
|
||||||
title: SpanEndPayload
|
|
||||||
title: SpanStartPayload | SpanEndPayload
|
|
||||||
required:
|
|
||||||
- trace_id
|
|
||||||
- span_id
|
|
||||||
- timestamp
|
|
||||||
- payload
|
|
||||||
title: StructuredLogEvent
|
|
||||||
type: object
|
|
||||||
UnstructuredLogEvent:
|
|
||||||
description: An unstructured log event containing a simple text message.
|
|
||||||
properties:
|
|
||||||
trace_id:
|
|
||||||
title: Trace Id
|
|
||||||
type: string
|
|
||||||
span_id:
|
|
||||||
title: Span Id
|
|
||||||
type: string
|
|
||||||
timestamp:
|
|
||||||
format: date-time
|
|
||||||
title: Timestamp
|
|
||||||
type: string
|
|
||||||
attributes:
|
|
||||||
anyOf:
|
|
||||||
- additionalProperties:
|
|
||||||
anyOf:
|
|
||||||
- type: string
|
|
||||||
- type: integer
|
|
||||||
- type: number
|
|
||||||
- type: boolean
|
|
||||||
- type: 'null'
|
|
||||||
title: string | ... (4 variants)
|
|
||||||
type: object
|
|
||||||
- type: 'null'
|
|
||||||
type:
|
|
||||||
const: unstructured_log
|
|
||||||
default: unstructured_log
|
|
||||||
title: Type
|
|
||||||
type: string
|
|
||||||
message:
|
|
||||||
title: Message
|
|
||||||
type: string
|
|
||||||
severity:
|
|
||||||
$ref: '#/components/schemas/LogSeverity'
|
|
||||||
required:
|
|
||||||
- trace_id
|
|
||||||
- span_id
|
|
||||||
- timestamp
|
|
||||||
- message
|
|
||||||
- severity
|
|
||||||
title: UnstructuredLogEvent
|
|
||||||
type: object
|
|
||||||
Event:
|
|
||||||
discriminator:
|
|
||||||
mapping:
|
|
||||||
metric: '#/components/schemas/MetricEvent'
|
|
||||||
structured_log: '#/components/schemas/StructuredLogEvent'
|
|
||||||
unstructured_log: '#/components/schemas/UnstructuredLogEvent'
|
|
||||||
propertyName: type
|
|
||||||
oneOf:
|
|
||||||
- $ref: '#/components/schemas/UnstructuredLogEvent'
|
|
||||||
title: UnstructuredLogEvent
|
|
||||||
- $ref: '#/components/schemas/MetricEvent'
|
|
||||||
title: MetricEvent
|
|
||||||
- $ref: '#/components/schemas/StructuredLogEvent'
|
|
||||||
title: StructuredLogEvent
|
|
||||||
title: UnstructuredLogEvent | MetricEvent | StructuredLogEvent
|
|
||||||
MetricInResponse:
|
MetricInResponse:
|
||||||
description: A metric value included in API responses.
|
description: A metric value included in API responses.
|
||||||
properties:
|
properties:
|
||||||
|
|
@ -11946,236 +11515,6 @@ components:
|
||||||
- logger_config
|
- logger_config
|
||||||
title: PostTrainingRLHFRequest
|
title: PostTrainingRLHFRequest
|
||||||
type: object
|
type: object
|
||||||
Span:
|
|
||||||
description: A span representing a single operation within a trace.
|
|
||||||
properties:
|
|
||||||
span_id:
|
|
||||||
title: Span Id
|
|
||||||
type: string
|
|
||||||
trace_id:
|
|
||||||
title: Trace Id
|
|
||||||
type: string
|
|
||||||
parent_span_id:
|
|
||||||
anyOf:
|
|
||||||
- type: string
|
|
||||||
- type: 'null'
|
|
||||||
nullable: true
|
|
||||||
name:
|
|
||||||
title: Name
|
|
||||||
type: string
|
|
||||||
start_time:
|
|
||||||
format: date-time
|
|
||||||
title: Start Time
|
|
||||||
type: string
|
|
||||||
end_time:
|
|
||||||
anyOf:
|
|
||||||
- format: date-time
|
|
||||||
type: string
|
|
||||||
- type: 'null'
|
|
||||||
nullable: true
|
|
||||||
attributes:
|
|
||||||
anyOf:
|
|
||||||
- additionalProperties: true
|
|
||||||
type: object
|
|
||||||
- type: 'null'
|
|
||||||
required:
|
|
||||||
- span_id
|
|
||||||
- trace_id
|
|
||||||
- name
|
|
||||||
- start_time
|
|
||||||
title: Span
|
|
||||||
type: object
|
|
||||||
Trace:
|
|
||||||
description: A trace representing the complete execution path of a request across multiple operations.
|
|
||||||
properties:
|
|
||||||
trace_id:
|
|
||||||
title: Trace Id
|
|
||||||
type: string
|
|
||||||
root_span_id:
|
|
||||||
title: Root Span Id
|
|
||||||
type: string
|
|
||||||
start_time:
|
|
||||||
format: date-time
|
|
||||||
title: Start Time
|
|
||||||
type: string
|
|
||||||
end_time:
|
|
||||||
anyOf:
|
|
||||||
- format: date-time
|
|
||||||
type: string
|
|
||||||
- type: 'null'
|
|
||||||
nullable: true
|
|
||||||
required:
|
|
||||||
- trace_id
|
|
||||||
- root_span_id
|
|
||||||
- start_time
|
|
||||||
title: Trace
|
|
||||||
type: object
|
|
||||||
EventType:
|
|
||||||
description: The type of telemetry event being logged.
|
|
||||||
enum:
|
|
||||||
- unstructured_log
|
|
||||||
- structured_log
|
|
||||||
- metric
|
|
||||||
title: EventType
|
|
||||||
type: string
|
|
||||||
StructuredLogType:
|
|
||||||
description: The type of structured log event payload.
|
|
||||||
enum:
|
|
||||||
- span_start
|
|
||||||
- span_end
|
|
||||||
title: StructuredLogType
|
|
||||||
type: string
|
|
||||||
EvalTrace:
|
|
||||||
description: A trace record for evaluation purposes.
|
|
||||||
properties:
|
|
||||||
session_id:
|
|
||||||
title: Session Id
|
|
||||||
type: string
|
|
||||||
step:
|
|
||||||
title: Step
|
|
||||||
type: string
|
|
||||||
input:
|
|
||||||
title: Input
|
|
||||||
type: string
|
|
||||||
output:
|
|
||||||
title: Output
|
|
||||||
type: string
|
|
||||||
expected_output:
|
|
||||||
title: Expected Output
|
|
||||||
type: string
|
|
||||||
required:
|
|
||||||
- session_id
|
|
||||||
- step
|
|
||||||
- input
|
|
||||||
- output
|
|
||||||
- expected_output
|
|
||||||
title: EvalTrace
|
|
||||||
type: object
|
|
||||||
SpanWithStatus:
|
|
||||||
description: A span that includes status information.
|
|
||||||
properties:
|
|
||||||
span_id:
|
|
||||||
title: Span Id
|
|
||||||
type: string
|
|
||||||
trace_id:
|
|
||||||
title: Trace Id
|
|
||||||
type: string
|
|
||||||
parent_span_id:
|
|
||||||
anyOf:
|
|
||||||
- type: string
|
|
||||||
- type: 'null'
|
|
||||||
nullable: true
|
|
||||||
name:
|
|
||||||
title: Name
|
|
||||||
type: string
|
|
||||||
start_time:
|
|
||||||
format: date-time
|
|
||||||
title: Start Time
|
|
||||||
type: string
|
|
||||||
end_time:
|
|
||||||
anyOf:
|
|
||||||
- format: date-time
|
|
||||||
type: string
|
|
||||||
- type: 'null'
|
|
||||||
nullable: true
|
|
||||||
attributes:
|
|
||||||
anyOf:
|
|
||||||
- additionalProperties: true
|
|
||||||
type: object
|
|
||||||
- type: 'null'
|
|
||||||
status:
|
|
||||||
anyOf:
|
|
||||||
- $ref: '#/components/schemas/SpanStatus'
|
|
||||||
title: SpanStatus
|
|
||||||
- type: 'null'
|
|
||||||
nullable: true
|
|
||||||
title: SpanStatus
|
|
||||||
required:
|
|
||||||
- span_id
|
|
||||||
- trace_id
|
|
||||||
- name
|
|
||||||
- start_time
|
|
||||||
title: SpanWithStatus
|
|
||||||
type: object
|
|
||||||
QueryConditionOp:
|
|
||||||
description: Comparison operators for query conditions.
|
|
||||||
enum:
|
|
||||||
- eq
|
|
||||||
- ne
|
|
||||||
- gt
|
|
||||||
- lt
|
|
||||||
title: QueryConditionOp
|
|
||||||
type: string
|
|
||||||
QueryCondition:
|
|
||||||
description: A condition for filtering query results.
|
|
||||||
properties:
|
|
||||||
key:
|
|
||||||
title: Key
|
|
||||||
type: string
|
|
||||||
op:
|
|
||||||
$ref: '#/components/schemas/QueryConditionOp'
|
|
||||||
value:
|
|
||||||
title: Value
|
|
||||||
required:
|
|
||||||
- key
|
|
||||||
- op
|
|
||||||
- value
|
|
||||||
title: QueryCondition
|
|
||||||
type: object
|
|
||||||
MetricLabel:
|
|
||||||
description: A label associated with a metric.
|
|
||||||
properties:
|
|
||||||
name:
|
|
||||||
title: Name
|
|
||||||
type: string
|
|
||||||
value:
|
|
||||||
title: Value
|
|
||||||
type: string
|
|
||||||
required:
|
|
||||||
- name
|
|
||||||
- value
|
|
||||||
title: MetricLabel
|
|
||||||
type: object
|
|
||||||
MetricDataPoint:
|
|
||||||
description: A single data point in a metric time series.
|
|
||||||
properties:
|
|
||||||
timestamp:
|
|
||||||
title: Timestamp
|
|
||||||
type: integer
|
|
||||||
value:
|
|
||||||
title: Value
|
|
||||||
type: number
|
|
||||||
unit:
|
|
||||||
title: Unit
|
|
||||||
type: string
|
|
||||||
required:
|
|
||||||
- timestamp
|
|
||||||
- value
|
|
||||||
- unit
|
|
||||||
title: MetricDataPoint
|
|
||||||
type: object
|
|
||||||
MetricSeries:
|
|
||||||
description: A time series of metric data points.
|
|
||||||
properties:
|
|
||||||
metric:
|
|
||||||
title: Metric
|
|
||||||
type: string
|
|
||||||
labels:
|
|
||||||
items:
|
|
||||||
$ref: '#/components/schemas/MetricLabel'
|
|
||||||
title: Labels
|
|
||||||
type: array
|
|
||||||
values:
|
|
||||||
items:
|
|
||||||
$ref: '#/components/schemas/MetricDataPoint'
|
|
||||||
title: Values
|
|
||||||
type: array
|
|
||||||
required:
|
|
||||||
- metric
|
|
||||||
- labels
|
|
||||||
- values
|
|
||||||
title: MetricSeries
|
|
||||||
type: object
|
|
||||||
responses:
|
responses:
|
||||||
BadRequest400:
|
BadRequest400:
|
||||||
description: The request was invalid or malformed
|
description: The request was invalid or malformed
|
||||||
|
|
|
||||||
66741
docs/static/openai-spec-2.3.0.yml
vendored
Normal file
66741
docs/static/openai-spec-2.3.0.yml
vendored
Normal file
File diff suppressed because it is too large
Load diff
475
docs/static/stainless-llama-stack-spec.yaml
vendored
475
docs/static/stainless-llama-stack-spec.yaml
vendored
|
|
@ -2101,6 +2101,7 @@ paths:
|
||||||
schema:
|
schema:
|
||||||
$ref: '#/components/schemas/InvokeToolRequest'
|
$ref: '#/components/schemas/InvokeToolRequest'
|
||||||
required: true
|
required: true
|
||||||
|
deprecated: true
|
||||||
/v1/tool-runtime/list-tools:
|
/v1/tool-runtime/list-tools:
|
||||||
get:
|
get:
|
||||||
responses:
|
responses:
|
||||||
|
|
@ -2152,6 +2153,7 @@ paths:
|
||||||
- $ref: '#/components/schemas/URL'
|
- $ref: '#/components/schemas/URL'
|
||||||
- type: 'null'
|
- type: 'null'
|
||||||
title: Mcp Endpoint
|
title: Mcp Endpoint
|
||||||
|
deprecated: true
|
||||||
/v1/toolgroups:
|
/v1/toolgroups:
|
||||||
get:
|
get:
|
||||||
responses:
|
responses:
|
||||||
|
|
@ -2178,6 +2180,7 @@ paths:
|
||||||
summary: List Tool Groups
|
summary: List Tool Groups
|
||||||
description: List tool groups with optional provider.
|
description: List tool groups with optional provider.
|
||||||
operationId: list_tool_groups_v1_toolgroups_get
|
operationId: list_tool_groups_v1_toolgroups_get
|
||||||
|
deprecated: true
|
||||||
post:
|
post:
|
||||||
responses:
|
responses:
|
||||||
'400':
|
'400':
|
||||||
|
|
@ -2239,6 +2242,7 @@ paths:
|
||||||
schema:
|
schema:
|
||||||
type: string
|
type: string
|
||||||
description: 'Path parameter: toolgroup_id'
|
description: 'Path parameter: toolgroup_id'
|
||||||
|
deprecated: true
|
||||||
delete:
|
delete:
|
||||||
responses:
|
responses:
|
||||||
'400':
|
'400':
|
||||||
|
|
@ -2303,6 +2307,7 @@ paths:
|
||||||
- type: string
|
- type: string
|
||||||
- type: 'null'
|
- type: 'null'
|
||||||
title: Toolgroup Id
|
title: Toolgroup Id
|
||||||
|
deprecated: true
|
||||||
/v1/tools/{tool_name}:
|
/v1/tools/{tool_name}:
|
||||||
get:
|
get:
|
||||||
responses:
|
responses:
|
||||||
|
|
@ -2336,6 +2341,7 @@ paths:
|
||||||
schema:
|
schema:
|
||||||
type: string
|
type: string
|
||||||
description: 'Path parameter: tool_name'
|
description: 'Path parameter: tool_name'
|
||||||
|
deprecated: true
|
||||||
/v1/vector-io/insert:
|
/v1/vector-io/insert:
|
||||||
post:
|
post:
|
||||||
responses:
|
responses:
|
||||||
|
|
@ -6812,6 +6818,12 @@ components:
|
||||||
anyOf:
|
anyOf:
|
||||||
- type: integer
|
- type: integer
|
||||||
- type: 'null'
|
- type: 'null'
|
||||||
|
metadata:
|
||||||
|
anyOf:
|
||||||
|
- additionalProperties:
|
||||||
|
type: string
|
||||||
|
type: object
|
||||||
|
- type: 'null'
|
||||||
input:
|
input:
|
||||||
items:
|
items:
|
||||||
anyOf:
|
anyOf:
|
||||||
|
|
@ -7215,6 +7227,12 @@ components:
|
||||||
anyOf:
|
anyOf:
|
||||||
- type: integer
|
- type: integer
|
||||||
- type: 'null'
|
- type: 'null'
|
||||||
|
metadata:
|
||||||
|
anyOf:
|
||||||
|
- additionalProperties:
|
||||||
|
type: string
|
||||||
|
type: object
|
||||||
|
- type: 'null'
|
||||||
type: object
|
type: object
|
||||||
required:
|
required:
|
||||||
- input
|
- input
|
||||||
|
|
@ -7346,6 +7364,12 @@ components:
|
||||||
anyOf:
|
anyOf:
|
||||||
- type: integer
|
- type: integer
|
||||||
- type: 'null'
|
- type: 'null'
|
||||||
|
metadata:
|
||||||
|
anyOf:
|
||||||
|
- additionalProperties:
|
||||||
|
type: string
|
||||||
|
type: object
|
||||||
|
- type: 'null'
|
||||||
type: object
|
type: object
|
||||||
required:
|
required:
|
||||||
- created_at
|
- created_at
|
||||||
|
|
@ -12196,227 +12220,6 @@ components:
|
||||||
- $ref: '#/components/schemas/OpenAIResponseContentPartReasoningText'
|
- $ref: '#/components/schemas/OpenAIResponseContentPartReasoningText'
|
||||||
title: OpenAIResponseContentPartReasoningText
|
title: OpenAIResponseContentPartReasoningText
|
||||||
title: OpenAIResponseContentPartOutputText | OpenAIResponseContentPartRefusal | OpenAIResponseContentPartReasoningText
|
title: OpenAIResponseContentPartOutputText | OpenAIResponseContentPartRefusal | OpenAIResponseContentPartReasoningText
|
||||||
SpanEndPayload:
|
|
||||||
description: Payload for a span end event.
|
|
||||||
properties:
|
|
||||||
type:
|
|
||||||
const: span_end
|
|
||||||
default: span_end
|
|
||||||
title: Type
|
|
||||||
type: string
|
|
||||||
status:
|
|
||||||
$ref: '#/components/schemas/SpanStatus'
|
|
||||||
required:
|
|
||||||
- status
|
|
||||||
title: SpanEndPayload
|
|
||||||
type: object
|
|
||||||
SpanStartPayload:
|
|
||||||
description: Payload for a span start event.
|
|
||||||
properties:
|
|
||||||
type:
|
|
||||||
const: span_start
|
|
||||||
default: span_start
|
|
||||||
title: Type
|
|
||||||
type: string
|
|
||||||
name:
|
|
||||||
title: Name
|
|
||||||
type: string
|
|
||||||
parent_span_id:
|
|
||||||
anyOf:
|
|
||||||
- type: string
|
|
||||||
- type: 'null'
|
|
||||||
nullable: true
|
|
||||||
required:
|
|
||||||
- name
|
|
||||||
title: SpanStartPayload
|
|
||||||
type: object
|
|
||||||
SpanStatus:
|
|
||||||
description: The status of a span indicating whether it completed successfully or with an error.
|
|
||||||
enum:
|
|
||||||
- ok
|
|
||||||
- error
|
|
||||||
title: SpanStatus
|
|
||||||
type: string
|
|
||||||
StructuredLogPayload:
|
|
||||||
discriminator:
|
|
||||||
mapping:
|
|
||||||
span_end: '#/components/schemas/SpanEndPayload'
|
|
||||||
span_start: '#/components/schemas/SpanStartPayload'
|
|
||||||
propertyName: type
|
|
||||||
oneOf:
|
|
||||||
- $ref: '#/components/schemas/SpanStartPayload'
|
|
||||||
title: SpanStartPayload
|
|
||||||
- $ref: '#/components/schemas/SpanEndPayload'
|
|
||||||
title: SpanEndPayload
|
|
||||||
title: SpanStartPayload | SpanEndPayload
|
|
||||||
LogSeverity:
|
|
||||||
description: The severity level of a log message.
|
|
||||||
enum:
|
|
||||||
- verbose
|
|
||||||
- debug
|
|
||||||
- info
|
|
||||||
- warn
|
|
||||||
- error
|
|
||||||
- critical
|
|
||||||
title: LogSeverity
|
|
||||||
type: string
|
|
||||||
MetricEvent:
|
|
||||||
description: A metric event containing a measured value.
|
|
||||||
properties:
|
|
||||||
trace_id:
|
|
||||||
title: Trace Id
|
|
||||||
type: string
|
|
||||||
span_id:
|
|
||||||
title: Span Id
|
|
||||||
type: string
|
|
||||||
timestamp:
|
|
||||||
format: date-time
|
|
||||||
title: Timestamp
|
|
||||||
type: string
|
|
||||||
attributes:
|
|
||||||
anyOf:
|
|
||||||
- additionalProperties:
|
|
||||||
anyOf:
|
|
||||||
- type: string
|
|
||||||
- type: integer
|
|
||||||
- type: number
|
|
||||||
- type: boolean
|
|
||||||
- type: 'null'
|
|
||||||
title: string | ... (4 variants)
|
|
||||||
type: object
|
|
||||||
- type: 'null'
|
|
||||||
type:
|
|
||||||
const: metric
|
|
||||||
default: metric
|
|
||||||
title: Type
|
|
||||||
type: string
|
|
||||||
metric:
|
|
||||||
title: Metric
|
|
||||||
type: string
|
|
||||||
value:
|
|
||||||
anyOf:
|
|
||||||
- type: integer
|
|
||||||
- type: number
|
|
||||||
title: integer | number
|
|
||||||
unit:
|
|
||||||
title: Unit
|
|
||||||
type: string
|
|
||||||
required:
|
|
||||||
- trace_id
|
|
||||||
- span_id
|
|
||||||
- timestamp
|
|
||||||
- metric
|
|
||||||
- value
|
|
||||||
- unit
|
|
||||||
title: MetricEvent
|
|
||||||
type: object
|
|
||||||
StructuredLogEvent:
|
|
||||||
description: A structured log event containing typed payload data.
|
|
||||||
properties:
|
|
||||||
trace_id:
|
|
||||||
title: Trace Id
|
|
||||||
type: string
|
|
||||||
span_id:
|
|
||||||
title: Span Id
|
|
||||||
type: string
|
|
||||||
timestamp:
|
|
||||||
format: date-time
|
|
||||||
title: Timestamp
|
|
||||||
type: string
|
|
||||||
attributes:
|
|
||||||
anyOf:
|
|
||||||
- additionalProperties:
|
|
||||||
anyOf:
|
|
||||||
- type: string
|
|
||||||
- type: integer
|
|
||||||
- type: number
|
|
||||||
- type: boolean
|
|
||||||
- type: 'null'
|
|
||||||
title: string | ... (4 variants)
|
|
||||||
type: object
|
|
||||||
- type: 'null'
|
|
||||||
type:
|
|
||||||
const: structured_log
|
|
||||||
default: structured_log
|
|
||||||
title: Type
|
|
||||||
type: string
|
|
||||||
payload:
|
|
||||||
discriminator:
|
|
||||||
mapping:
|
|
||||||
span_end: '#/components/schemas/SpanEndPayload'
|
|
||||||
span_start: '#/components/schemas/SpanStartPayload'
|
|
||||||
propertyName: type
|
|
||||||
oneOf:
|
|
||||||
- $ref: '#/components/schemas/SpanStartPayload'
|
|
||||||
title: SpanStartPayload
|
|
||||||
- $ref: '#/components/schemas/SpanEndPayload'
|
|
||||||
title: SpanEndPayload
|
|
||||||
title: SpanStartPayload | SpanEndPayload
|
|
||||||
required:
|
|
||||||
- trace_id
|
|
||||||
- span_id
|
|
||||||
- timestamp
|
|
||||||
- payload
|
|
||||||
title: StructuredLogEvent
|
|
||||||
type: object
|
|
||||||
UnstructuredLogEvent:
|
|
||||||
description: An unstructured log event containing a simple text message.
|
|
||||||
properties:
|
|
||||||
trace_id:
|
|
||||||
title: Trace Id
|
|
||||||
type: string
|
|
||||||
span_id:
|
|
||||||
title: Span Id
|
|
||||||
type: string
|
|
||||||
timestamp:
|
|
||||||
format: date-time
|
|
||||||
title: Timestamp
|
|
||||||
type: string
|
|
||||||
attributes:
|
|
||||||
anyOf:
|
|
||||||
- additionalProperties:
|
|
||||||
anyOf:
|
|
||||||
- type: string
|
|
||||||
- type: integer
|
|
||||||
- type: number
|
|
||||||
- type: boolean
|
|
||||||
- type: 'null'
|
|
||||||
title: string | ... (4 variants)
|
|
||||||
type: object
|
|
||||||
- type: 'null'
|
|
||||||
type:
|
|
||||||
const: unstructured_log
|
|
||||||
default: unstructured_log
|
|
||||||
title: Type
|
|
||||||
type: string
|
|
||||||
message:
|
|
||||||
title: Message
|
|
||||||
type: string
|
|
||||||
severity:
|
|
||||||
$ref: '#/components/schemas/LogSeverity'
|
|
||||||
required:
|
|
||||||
- trace_id
|
|
||||||
- span_id
|
|
||||||
- timestamp
|
|
||||||
- message
|
|
||||||
- severity
|
|
||||||
title: UnstructuredLogEvent
|
|
||||||
type: object
|
|
||||||
Event:
|
|
||||||
discriminator:
|
|
||||||
mapping:
|
|
||||||
metric: '#/components/schemas/MetricEvent'
|
|
||||||
structured_log: '#/components/schemas/StructuredLogEvent'
|
|
||||||
unstructured_log: '#/components/schemas/UnstructuredLogEvent'
|
|
||||||
propertyName: type
|
|
||||||
oneOf:
|
|
||||||
- $ref: '#/components/schemas/UnstructuredLogEvent'
|
|
||||||
title: UnstructuredLogEvent
|
|
||||||
- $ref: '#/components/schemas/MetricEvent'
|
|
||||||
title: MetricEvent
|
|
||||||
- $ref: '#/components/schemas/StructuredLogEvent'
|
|
||||||
title: StructuredLogEvent
|
|
||||||
title: UnstructuredLogEvent | MetricEvent | StructuredLogEvent
|
|
||||||
MetricInResponse:
|
MetricInResponse:
|
||||||
description: A metric value included in API responses.
|
description: A metric value included in API responses.
|
||||||
properties:
|
properties:
|
||||||
|
|
@ -13279,236 +13082,6 @@ components:
|
||||||
- logger_config
|
- logger_config
|
||||||
title: PostTrainingRLHFRequest
|
title: PostTrainingRLHFRequest
|
||||||
type: object
|
type: object
|
||||||
Span:
|
|
||||||
description: A span representing a single operation within a trace.
|
|
||||||
properties:
|
|
||||||
span_id:
|
|
||||||
title: Span Id
|
|
||||||
type: string
|
|
||||||
trace_id:
|
|
||||||
title: Trace Id
|
|
||||||
type: string
|
|
||||||
parent_span_id:
|
|
||||||
anyOf:
|
|
||||||
- type: string
|
|
||||||
- type: 'null'
|
|
||||||
nullable: true
|
|
||||||
name:
|
|
||||||
title: Name
|
|
||||||
type: string
|
|
||||||
start_time:
|
|
||||||
format: date-time
|
|
||||||
title: Start Time
|
|
||||||
type: string
|
|
||||||
end_time:
|
|
||||||
anyOf:
|
|
||||||
- format: date-time
|
|
||||||
type: string
|
|
||||||
- type: 'null'
|
|
||||||
nullable: true
|
|
||||||
attributes:
|
|
||||||
anyOf:
|
|
||||||
- additionalProperties: true
|
|
||||||
type: object
|
|
||||||
- type: 'null'
|
|
||||||
required:
|
|
||||||
- span_id
|
|
||||||
- trace_id
|
|
||||||
- name
|
|
||||||
- start_time
|
|
||||||
title: Span
|
|
||||||
type: object
|
|
||||||
Trace:
|
|
||||||
description: A trace representing the complete execution path of a request across multiple operations.
|
|
||||||
properties:
|
|
||||||
trace_id:
|
|
||||||
title: Trace Id
|
|
||||||
type: string
|
|
||||||
root_span_id:
|
|
||||||
title: Root Span Id
|
|
||||||
type: string
|
|
||||||
start_time:
|
|
||||||
format: date-time
|
|
||||||
title: Start Time
|
|
||||||
type: string
|
|
||||||
end_time:
|
|
||||||
anyOf:
|
|
||||||
- format: date-time
|
|
||||||
type: string
|
|
||||||
- type: 'null'
|
|
||||||
nullable: true
|
|
||||||
required:
|
|
||||||
- trace_id
|
|
||||||
- root_span_id
|
|
||||||
- start_time
|
|
||||||
title: Trace
|
|
||||||
type: object
|
|
||||||
EventType:
|
|
||||||
description: The type of telemetry event being logged.
|
|
||||||
enum:
|
|
||||||
- unstructured_log
|
|
||||||
- structured_log
|
|
||||||
- metric
|
|
||||||
title: EventType
|
|
||||||
type: string
|
|
||||||
StructuredLogType:
|
|
||||||
description: The type of structured log event payload.
|
|
||||||
enum:
|
|
||||||
- span_start
|
|
||||||
- span_end
|
|
||||||
title: StructuredLogType
|
|
||||||
type: string
|
|
||||||
EvalTrace:
|
|
||||||
description: A trace record for evaluation purposes.
|
|
||||||
properties:
|
|
||||||
session_id:
|
|
||||||
title: Session Id
|
|
||||||
type: string
|
|
||||||
step:
|
|
||||||
title: Step
|
|
||||||
type: string
|
|
||||||
input:
|
|
||||||
title: Input
|
|
||||||
type: string
|
|
||||||
output:
|
|
||||||
title: Output
|
|
||||||
type: string
|
|
||||||
expected_output:
|
|
||||||
title: Expected Output
|
|
||||||
type: string
|
|
||||||
required:
|
|
||||||
- session_id
|
|
||||||
- step
|
|
||||||
- input
|
|
||||||
- output
|
|
||||||
- expected_output
|
|
||||||
title: EvalTrace
|
|
||||||
type: object
|
|
||||||
SpanWithStatus:
|
|
||||||
description: A span that includes status information.
|
|
||||||
properties:
|
|
||||||
span_id:
|
|
||||||
title: Span Id
|
|
||||||
type: string
|
|
||||||
trace_id:
|
|
||||||
title: Trace Id
|
|
||||||
type: string
|
|
||||||
parent_span_id:
|
|
||||||
anyOf:
|
|
||||||
- type: string
|
|
||||||
- type: 'null'
|
|
||||||
nullable: true
|
|
||||||
name:
|
|
||||||
title: Name
|
|
||||||
type: string
|
|
||||||
start_time:
|
|
||||||
format: date-time
|
|
||||||
title: Start Time
|
|
||||||
type: string
|
|
||||||
end_time:
|
|
||||||
anyOf:
|
|
||||||
- format: date-time
|
|
||||||
type: string
|
|
||||||
- type: 'null'
|
|
||||||
nullable: true
|
|
||||||
attributes:
|
|
||||||
anyOf:
|
|
||||||
- additionalProperties: true
|
|
||||||
type: object
|
|
||||||
- type: 'null'
|
|
||||||
status:
|
|
||||||
anyOf:
|
|
||||||
- $ref: '#/components/schemas/SpanStatus'
|
|
||||||
title: SpanStatus
|
|
||||||
- type: 'null'
|
|
||||||
nullable: true
|
|
||||||
title: SpanStatus
|
|
||||||
required:
|
|
||||||
- span_id
|
|
||||||
- trace_id
|
|
||||||
- name
|
|
||||||
- start_time
|
|
||||||
title: SpanWithStatus
|
|
||||||
type: object
|
|
||||||
QueryConditionOp:
|
|
||||||
description: Comparison operators for query conditions.
|
|
||||||
enum:
|
|
||||||
- eq
|
|
||||||
- ne
|
|
||||||
- gt
|
|
||||||
- lt
|
|
||||||
title: QueryConditionOp
|
|
||||||
type: string
|
|
||||||
QueryCondition:
|
|
||||||
description: A condition for filtering query results.
|
|
||||||
properties:
|
|
||||||
key:
|
|
||||||
title: Key
|
|
||||||
type: string
|
|
||||||
op:
|
|
||||||
$ref: '#/components/schemas/QueryConditionOp'
|
|
||||||
value:
|
|
||||||
title: Value
|
|
||||||
required:
|
|
||||||
- key
|
|
||||||
- op
|
|
||||||
- value
|
|
||||||
title: QueryCondition
|
|
||||||
type: object
|
|
||||||
MetricLabel:
|
|
||||||
description: A label associated with a metric.
|
|
||||||
properties:
|
|
||||||
name:
|
|
||||||
title: Name
|
|
||||||
type: string
|
|
||||||
value:
|
|
||||||
title: Value
|
|
||||||
type: string
|
|
||||||
required:
|
|
||||||
- name
|
|
||||||
- value
|
|
||||||
title: MetricLabel
|
|
||||||
type: object
|
|
||||||
MetricDataPoint:
|
|
||||||
description: A single data point in a metric time series.
|
|
||||||
properties:
|
|
||||||
timestamp:
|
|
||||||
title: Timestamp
|
|
||||||
type: integer
|
|
||||||
value:
|
|
||||||
title: Value
|
|
||||||
type: number
|
|
||||||
unit:
|
|
||||||
title: Unit
|
|
||||||
type: string
|
|
||||||
required:
|
|
||||||
- timestamp
|
|
||||||
- value
|
|
||||||
- unit
|
|
||||||
title: MetricDataPoint
|
|
||||||
type: object
|
|
||||||
MetricSeries:
|
|
||||||
description: A time series of metric data points.
|
|
||||||
properties:
|
|
||||||
metric:
|
|
||||||
title: Metric
|
|
||||||
type: string
|
|
||||||
labels:
|
|
||||||
items:
|
|
||||||
$ref: '#/components/schemas/MetricLabel'
|
|
||||||
title: Labels
|
|
||||||
type: array
|
|
||||||
values:
|
|
||||||
items:
|
|
||||||
$ref: '#/components/schemas/MetricDataPoint'
|
|
||||||
title: Values
|
|
||||||
type: array
|
|
||||||
required:
|
|
||||||
- metric
|
|
||||||
- labels
|
|
||||||
- values
|
|
||||||
title: MetricSeries
|
|
||||||
type: object
|
|
||||||
responses:
|
responses:
|
||||||
BadRequest400:
|
BadRequest400:
|
||||||
description: The request was invalid or malformed
|
description: The request was invalid or malformed
|
||||||
|
|
|
||||||
|
|
@ -171,10 +171,18 @@ if [[ "$COLLECT_ONLY" == false ]]; then
|
||||||
|
|
||||||
# Set MCP host for in-process MCP server tests
|
# Set MCP host for in-process MCP server tests
|
||||||
# - For library client and server mode: localhost (both on same host)
|
# - For library client and server mode: localhost (both on same host)
|
||||||
# - For docker mode: host.docker.internal (container needs to reach host)
|
# - For docker mode on Linux: localhost (container uses host network, shares network namespace)
|
||||||
|
# - For docker mode on macOS/Windows: host.docker.internal (container uses bridge network)
|
||||||
if [[ "$STACK_CONFIG" == docker:* ]]; then
|
if [[ "$STACK_CONFIG" == docker:* ]]; then
|
||||||
export LLAMA_STACK_TEST_MCP_HOST="host.docker.internal"
|
if [[ "$(uname)" != "Darwin" ]] && [[ "$(uname)" != *"MINGW"* ]]; then
|
||||||
echo "Setting MCP host: host.docker.internal (docker mode)"
|
# On Linux with host network mode, container shares host network namespace
|
||||||
|
export LLAMA_STACK_TEST_MCP_HOST="localhost"
|
||||||
|
echo "Setting MCP host: localhost (docker mode with host network)"
|
||||||
|
else
|
||||||
|
# On macOS/Windows with bridge network, need special host access
|
||||||
|
export LLAMA_STACK_TEST_MCP_HOST="host.docker.internal"
|
||||||
|
echo "Setting MCP host: host.docker.internal (docker mode with bridge network)"
|
||||||
|
fi
|
||||||
else
|
else
|
||||||
export LLAMA_STACK_TEST_MCP_HOST="localhost"
|
export LLAMA_STACK_TEST_MCP_HOST="localhost"
|
||||||
echo "Setting MCP host: localhost (library/server mode)"
|
echo "Setting MCP host: localhost (library/server mode)"
|
||||||
|
|
|
||||||
|
|
@ -8,7 +8,6 @@
|
||||||
Schema discovery and collection for OpenAPI generation.
|
Schema discovery and collection for OpenAPI generation.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import importlib
|
|
||||||
from typing import Any
|
from typing import Any
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -20,23 +19,6 @@ def _ensure_components_schemas(openapi_schema: dict[str, Any]) -> None:
|
||||||
openapi_schema["components"]["schemas"] = {}
|
openapi_schema["components"]["schemas"] = {}
|
||||||
|
|
||||||
|
|
||||||
def _load_extra_schema_modules() -> None:
|
|
||||||
"""
|
|
||||||
Import modules outside llama_stack_api that use schema_utils to register schemas.
|
|
||||||
|
|
||||||
The API package already imports its submodules via __init__, but server-side modules
|
|
||||||
like telemetry need to be imported explicitly so their decorator side effects run.
|
|
||||||
"""
|
|
||||||
extra_modules = [
|
|
||||||
"llama_stack.core.telemetry.telemetry",
|
|
||||||
]
|
|
||||||
for module_name in extra_modules:
|
|
||||||
try:
|
|
||||||
importlib.import_module(module_name)
|
|
||||||
except ImportError:
|
|
||||||
continue
|
|
||||||
|
|
||||||
|
|
||||||
def _extract_and_fix_defs(schema: dict[str, Any], openapi_schema: dict[str, Any]) -> None:
|
def _extract_and_fix_defs(schema: dict[str, Any], openapi_schema: dict[str, Any]) -> None:
|
||||||
"""
|
"""
|
||||||
Extract $defs from a schema, move them to components/schemas, and fix references.
|
Extract $defs from a schema, move them to components/schemas, and fix references.
|
||||||
|
|
@ -79,9 +61,6 @@ def _ensure_json_schema_types_included(openapi_schema: dict[str, Any]) -> dict[s
|
||||||
iter_registered_schema_types,
|
iter_registered_schema_types,
|
||||||
)
|
)
|
||||||
|
|
||||||
# Import extra modules (e.g., telemetry) whose schema registrations live outside llama_stack_api
|
|
||||||
_load_extra_schema_modules()
|
|
||||||
|
|
||||||
# Handle explicitly registered schemas first (union types, Annotated structs, etc.)
|
# Handle explicitly registered schemas first (union types, Annotated structs, etc.)
|
||||||
for registration_info in iter_registered_schema_types():
|
for registration_info in iter_registered_schema_types():
|
||||||
schema_type = registration_info.type
|
schema_type = registration_info.type
|
||||||
|
|
|
||||||
|
|
@ -1,11 +1,24 @@
|
||||||
{
|
{
|
||||||
"annotations": {
|
"annotations": {
|
||||||
"list": []
|
"list": [
|
||||||
|
{
|
||||||
|
"builtIn": 1,
|
||||||
|
"datasource": {
|
||||||
|
"type": "grafana",
|
||||||
|
"uid": "-- Grafana --"
|
||||||
|
},
|
||||||
|
"enable": true,
|
||||||
|
"hide": true,
|
||||||
|
"iconColor": "rgba(0, 211, 255, 1)",
|
||||||
|
"name": "Annotations & Alerts",
|
||||||
|
"type": "dashboard"
|
||||||
|
}
|
||||||
|
]
|
||||||
},
|
},
|
||||||
"editable": true,
|
"editable": true,
|
||||||
"fiscalYearStartMonth": 0,
|
"fiscalYearStartMonth": 0,
|
||||||
"graphTooltip": 0,
|
"graphTooltip": 0,
|
||||||
"id": null,
|
"id": 1,
|
||||||
"links": [],
|
"links": [],
|
||||||
"liveNow": false,
|
"liveNow": false,
|
||||||
"panels": [
|
"panels": [
|
||||||
|
|
@ -16,11 +29,40 @@
|
||||||
},
|
},
|
||||||
"fieldConfig": {
|
"fieldConfig": {
|
||||||
"defaults": {
|
"defaults": {
|
||||||
|
"color": {
|
||||||
|
"mode": "palette-classic"
|
||||||
|
},
|
||||||
"custom": {
|
"custom": {
|
||||||
|
"axisBorderShow": false,
|
||||||
|
"axisCenteredZero": false,
|
||||||
|
"axisColorMode": "text",
|
||||||
|
"axisLabel": "",
|
||||||
|
"axisPlacement": "auto",
|
||||||
|
"barAlignment": 0,
|
||||||
"drawStyle": "line",
|
"drawStyle": "line",
|
||||||
|
"fillOpacity": 10,
|
||||||
|
"gradientMode": "none",
|
||||||
|
"hideFrom": {
|
||||||
|
"legend": false,
|
||||||
|
"tooltip": false,
|
||||||
|
"viz": false
|
||||||
|
},
|
||||||
|
"insertNulls": false,
|
||||||
"lineInterpolation": "linear",
|
"lineInterpolation": "linear",
|
||||||
|
"lineWidth": 1,
|
||||||
|
"pointSize": 5,
|
||||||
|
"scaleDistribution": {
|
||||||
|
"type": "linear"
|
||||||
|
},
|
||||||
"showPoints": "auto",
|
"showPoints": "auto",
|
||||||
"fillOpacity": 10
|
"spanNulls": false,
|
||||||
|
"stacking": {
|
||||||
|
"group": "A",
|
||||||
|
"mode": "none"
|
||||||
|
},
|
||||||
|
"thresholdsStyle": {
|
||||||
|
"mode": "off"
|
||||||
|
}
|
||||||
},
|
},
|
||||||
"mappings": [],
|
"mappings": [],
|
||||||
"thresholds": {
|
"thresholds": {
|
||||||
|
|
@ -32,7 +74,8 @@
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
}
|
},
|
||||||
|
"overrides": []
|
||||||
},
|
},
|
||||||
"gridPos": {
|
"gridPos": {
|
||||||
"h": 8,
|
"h": 8,
|
||||||
|
|
@ -40,15 +83,16 @@
|
||||||
"x": 0,
|
"x": 0,
|
||||||
"y": 0
|
"y": 0
|
||||||
},
|
},
|
||||||
"id": 1,
|
"id": 2,
|
||||||
"options": {
|
"options": {
|
||||||
"legend": {
|
"legend": {
|
||||||
"calcs": [],
|
"calcs": [],
|
||||||
"displayMode": "table",
|
"displayMode": "list",
|
||||||
"placement": "bottom",
|
"placement": "bottom",
|
||||||
"showLegend": true
|
"showLegend": true
|
||||||
},
|
},
|
||||||
"tooltip": {
|
"tooltip": {
|
||||||
|
"maxHeight": 600,
|
||||||
"mode": "multi",
|
"mode": "multi",
|
||||||
"sort": "none"
|
"sort": "none"
|
||||||
}
|
}
|
||||||
|
|
@ -59,9 +103,112 @@
|
||||||
"type": "prometheus",
|
"type": "prometheus",
|
||||||
"uid": "prometheus"
|
"uid": "prometheus"
|
||||||
},
|
},
|
||||||
"expr": "llama_stack_completion_tokens_total",
|
"disableTextWrap": false,
|
||||||
"legendFormat": "{{model_id}} ({{provider_id}})",
|
"editorMode": "builder",
|
||||||
"refId": "A"
|
"expr": "sum by(gen_ai_request_model) (llama_stack_gen_ai_client_token_usage_sum{gen_ai_token_type=\"input\"})",
|
||||||
|
"fullMetaSearch": false,
|
||||||
|
"includeNullMetadata": true,
|
||||||
|
"legendFormat": "__auto",
|
||||||
|
"range": true,
|
||||||
|
"refId": "A",
|
||||||
|
"useBackend": false
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"title": "Prompt Tokens",
|
||||||
|
"type": "timeseries"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"datasource": {
|
||||||
|
"type": "prometheus",
|
||||||
|
"uid": "prometheus"
|
||||||
|
},
|
||||||
|
"fieldConfig": {
|
||||||
|
"defaults": {
|
||||||
|
"color": {
|
||||||
|
"mode": "palette-classic"
|
||||||
|
},
|
||||||
|
"custom": {
|
||||||
|
"axisBorderShow": false,
|
||||||
|
"axisCenteredZero": false,
|
||||||
|
"axisColorMode": "text",
|
||||||
|
"axisLabel": "",
|
||||||
|
"axisPlacement": "auto",
|
||||||
|
"barAlignment": 0,
|
||||||
|
"drawStyle": "line",
|
||||||
|
"fillOpacity": 10,
|
||||||
|
"gradientMode": "none",
|
||||||
|
"hideFrom": {
|
||||||
|
"legend": false,
|
||||||
|
"tooltip": false,
|
||||||
|
"viz": false
|
||||||
|
},
|
||||||
|
"insertNulls": false,
|
||||||
|
"lineInterpolation": "linear",
|
||||||
|
"lineWidth": 1,
|
||||||
|
"pointSize": 5,
|
||||||
|
"scaleDistribution": {
|
||||||
|
"type": "linear"
|
||||||
|
},
|
||||||
|
"showPoints": "auto",
|
||||||
|
"spanNulls": false,
|
||||||
|
"stacking": {
|
||||||
|
"group": "A",
|
||||||
|
"mode": "none"
|
||||||
|
},
|
||||||
|
"thresholdsStyle": {
|
||||||
|
"mode": "off"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"mappings": [],
|
||||||
|
"thresholds": {
|
||||||
|
"mode": "absolute",
|
||||||
|
"steps": [
|
||||||
|
{
|
||||||
|
"color": "green",
|
||||||
|
"value": null
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"overrides": []
|
||||||
|
},
|
||||||
|
"gridPos": {
|
||||||
|
"h": 8,
|
||||||
|
"w": 12,
|
||||||
|
"x": 12,
|
||||||
|
"y": 0
|
||||||
|
},
|
||||||
|
"id": 1,
|
||||||
|
"options": {
|
||||||
|
"legend": {
|
||||||
|
"calcs": [],
|
||||||
|
"displayMode": "list",
|
||||||
|
"placement": "bottom",
|
||||||
|
"showLegend": true
|
||||||
|
},
|
||||||
|
"tooltip": {
|
||||||
|
"maxHeight": 600,
|
||||||
|
"mode": "multi",
|
||||||
|
"sort": "none"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"datasource": {
|
||||||
|
"type": "prometheus",
|
||||||
|
"uid": "prometheus"
|
||||||
|
},
|
||||||
|
"disableTextWrap": false,
|
||||||
|
"editorMode": "builder",
|
||||||
|
"exemplar": false,
|
||||||
|
"expr": "sum by(gen_ai_request_model) (llama_stack_gen_ai_client_token_usage_sum{gen_ai_token_type=\"output\"})",
|
||||||
|
"fullMetaSearch": false,
|
||||||
|
"includeNullMetadata": true,
|
||||||
|
"interval": "",
|
||||||
|
"legendFormat": "__auto",
|
||||||
|
"range": true,
|
||||||
|
"refId": "A",
|
||||||
|
"useBackend": false
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"title": "Completion Tokens",
|
"title": "Completion Tokens",
|
||||||
|
|
@ -74,78 +221,40 @@
|
||||||
},
|
},
|
||||||
"fieldConfig": {
|
"fieldConfig": {
|
||||||
"defaults": {
|
"defaults": {
|
||||||
|
"color": {
|
||||||
|
"mode": "palette-classic"
|
||||||
|
},
|
||||||
"custom": {
|
"custom": {
|
||||||
|
"axisBorderShow": false,
|
||||||
|
"axisCenteredZero": false,
|
||||||
|
"axisColorMode": "text",
|
||||||
|
"axisLabel": "",
|
||||||
|
"axisPlacement": "auto",
|
||||||
|
"barAlignment": 0,
|
||||||
"drawStyle": "line",
|
"drawStyle": "line",
|
||||||
|
"fillOpacity": 10,
|
||||||
|
"gradientMode": "none",
|
||||||
|
"hideFrom": {
|
||||||
|
"legend": false,
|
||||||
|
"tooltip": false,
|
||||||
|
"viz": false
|
||||||
|
},
|
||||||
|
"insertNulls": false,
|
||||||
"lineInterpolation": "linear",
|
"lineInterpolation": "linear",
|
||||||
|
"lineWidth": 1,
|
||||||
|
"pointSize": 5,
|
||||||
|
"scaleDistribution": {
|
||||||
|
"type": "linear"
|
||||||
|
},
|
||||||
"showPoints": "auto",
|
"showPoints": "auto",
|
||||||
"fillOpacity": 10
|
"spanNulls": false,
|
||||||
},
|
"stacking": {
|
||||||
"mappings": [],
|
"group": "A",
|
||||||
"thresholds": {
|
"mode": "none"
|
||||||
"mode": "absolute",
|
},
|
||||||
"steps": [
|
"thresholdsStyle": {
|
||||||
{
|
"mode": "off"
|
||||||
"color": "green",
|
}
|
||||||
"value": null
|
|
||||||
}
|
|
||||||
]
|
|
||||||
}
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"gridPos": {
|
|
||||||
"h": 8,
|
|
||||||
"w": 12,
|
|
||||||
"x": 12,
|
|
||||||
"y": 0
|
|
||||||
},
|
|
||||||
"id": 2,
|
|
||||||
"options": {
|
|
||||||
"legend": {
|
|
||||||
"calcs": [],
|
|
||||||
"displayMode": "table",
|
|
||||||
"placement": "bottom",
|
|
||||||
"showLegend": true
|
|
||||||
},
|
|
||||||
"tooltip": {
|
|
||||||
"mode": "multi",
|
|
||||||
"sort": "none"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"targets": [
|
|
||||||
{
|
|
||||||
"datasource": {
|
|
||||||
"type": "prometheus",
|
|
||||||
"uid": "prometheus"
|
|
||||||
},
|
|
||||||
"expr": "llama_stack_prompt_tokens_total",
|
|
||||||
"legendFormat": "Prompt - {{model_id}}",
|
|
||||||
"refId": "A"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"datasource": {
|
|
||||||
"type": "prometheus",
|
|
||||||
"uid": "prometheus"
|
|
||||||
},
|
|
||||||
"expr": "llama_stack_tokens_total",
|
|
||||||
"legendFormat": "Total - {{model_id}}",
|
|
||||||
"refId": "B"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"title": "Prompt & Total Tokens",
|
|
||||||
"type": "timeseries"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"datasource": {
|
|
||||||
"type": "prometheus",
|
|
||||||
"uid": "prometheus"
|
|
||||||
},
|
|
||||||
"fieldConfig": {
|
|
||||||
"defaults": {
|
|
||||||
"custom": {
|
|
||||||
"drawStyle": "line",
|
|
||||||
"lineInterpolation": "linear",
|
|
||||||
"showPoints": "auto",
|
|
||||||
"fillOpacity": 10
|
|
||||||
},
|
},
|
||||||
"mappings": [],
|
"mappings": [],
|
||||||
"thresholds": {
|
"thresholds": {
|
||||||
|
|
@ -158,7 +267,8 @@
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
"unit": "ms"
|
"unit": "ms"
|
||||||
}
|
},
|
||||||
|
"overrides": []
|
||||||
},
|
},
|
||||||
"gridPos": {
|
"gridPos": {
|
||||||
"h": 8,
|
"h": 8,
|
||||||
|
|
@ -175,6 +285,7 @@
|
||||||
"showLegend": true
|
"showLegend": true
|
||||||
},
|
},
|
||||||
"tooltip": {
|
"tooltip": {
|
||||||
|
"maxHeight": 600,
|
||||||
"mode": "multi",
|
"mode": "multi",
|
||||||
"sort": "none"
|
"sort": "none"
|
||||||
}
|
}
|
||||||
|
|
@ -219,7 +330,8 @@
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
}
|
},
|
||||||
|
"overrides": []
|
||||||
},
|
},
|
||||||
"gridPos": {
|
"gridPos": {
|
||||||
"h": 8,
|
"h": 8,
|
||||||
|
|
@ -240,8 +352,11 @@
|
||||||
"fields": "",
|
"fields": "",
|
||||||
"values": false
|
"values": false
|
||||||
},
|
},
|
||||||
"textMode": "auto"
|
"showPercentChange": false,
|
||||||
|
"textMode": "auto",
|
||||||
|
"wideLayout": true
|
||||||
},
|
},
|
||||||
|
"pluginVersion": "11.0.0",
|
||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"datasource": {
|
"datasource": {
|
||||||
|
|
@ -272,7 +387,8 @@
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
}
|
},
|
||||||
|
"overrides": []
|
||||||
},
|
},
|
||||||
"gridPos": {
|
"gridPos": {
|
||||||
"h": 8,
|
"h": 8,
|
||||||
|
|
@ -293,8 +409,11 @@
|
||||||
"fields": "",
|
"fields": "",
|
||||||
"values": false
|
"values": false
|
||||||
},
|
},
|
||||||
"textMode": "auto"
|
"showPercentChange": false,
|
||||||
|
"textMode": "auto",
|
||||||
|
"wideLayout": true
|
||||||
},
|
},
|
||||||
|
"pluginVersion": "11.0.0",
|
||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"datasource": {
|
"datasource": {
|
||||||
|
|
@ -315,11 +434,40 @@
|
||||||
},
|
},
|
||||||
"fieldConfig": {
|
"fieldConfig": {
|
||||||
"defaults": {
|
"defaults": {
|
||||||
|
"color": {
|
||||||
|
"mode": "palette-classic"
|
||||||
|
},
|
||||||
"custom": {
|
"custom": {
|
||||||
|
"axisBorderShow": false,
|
||||||
|
"axisCenteredZero": false,
|
||||||
|
"axisColorMode": "text",
|
||||||
|
"axisLabel": "",
|
||||||
|
"axisPlacement": "auto",
|
||||||
|
"barAlignment": 0,
|
||||||
"drawStyle": "line",
|
"drawStyle": "line",
|
||||||
|
"fillOpacity": 10,
|
||||||
|
"gradientMode": "none",
|
||||||
|
"hideFrom": {
|
||||||
|
"legend": false,
|
||||||
|
"tooltip": false,
|
||||||
|
"viz": false
|
||||||
|
},
|
||||||
|
"insertNulls": false,
|
||||||
"lineInterpolation": "linear",
|
"lineInterpolation": "linear",
|
||||||
|
"lineWidth": 1,
|
||||||
|
"pointSize": 5,
|
||||||
|
"scaleDistribution": {
|
||||||
|
"type": "linear"
|
||||||
|
},
|
||||||
"showPoints": "auto",
|
"showPoints": "auto",
|
||||||
"fillOpacity": 10
|
"spanNulls": false,
|
||||||
|
"stacking": {
|
||||||
|
"group": "A",
|
||||||
|
"mode": "none"
|
||||||
|
},
|
||||||
|
"thresholdsStyle": {
|
||||||
|
"mode": "off"
|
||||||
|
}
|
||||||
},
|
},
|
||||||
"mappings": [],
|
"mappings": [],
|
||||||
"thresholds": {
|
"thresholds": {
|
||||||
|
|
@ -332,7 +480,8 @@
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
"unit": "reqps"
|
"unit": "reqps"
|
||||||
}
|
},
|
||||||
|
"overrides": []
|
||||||
},
|
},
|
||||||
"gridPos": {
|
"gridPos": {
|
||||||
"h": 8,
|
"h": 8,
|
||||||
|
|
@ -349,6 +498,7 @@
|
||||||
"showLegend": true
|
"showLegend": true
|
||||||
},
|
},
|
||||||
"tooltip": {
|
"tooltip": {
|
||||||
|
"maxHeight": 600,
|
||||||
"mode": "multi",
|
"mode": "multi",
|
||||||
"sort": "none"
|
"sort": "none"
|
||||||
}
|
}
|
||||||
|
|
@ -374,11 +524,40 @@
|
||||||
},
|
},
|
||||||
"fieldConfig": {
|
"fieldConfig": {
|
||||||
"defaults": {
|
"defaults": {
|
||||||
|
"color": {
|
||||||
|
"mode": "palette-classic"
|
||||||
|
},
|
||||||
"custom": {
|
"custom": {
|
||||||
|
"axisBorderShow": false,
|
||||||
|
"axisCenteredZero": false,
|
||||||
|
"axisColorMode": "text",
|
||||||
|
"axisLabel": "",
|
||||||
|
"axisPlacement": "auto",
|
||||||
|
"barAlignment": 0,
|
||||||
"drawStyle": "line",
|
"drawStyle": "line",
|
||||||
|
"fillOpacity": 10,
|
||||||
|
"gradientMode": "none",
|
||||||
|
"hideFrom": {
|
||||||
|
"legend": false,
|
||||||
|
"tooltip": false,
|
||||||
|
"viz": false
|
||||||
|
},
|
||||||
|
"insertNulls": false,
|
||||||
"lineInterpolation": "linear",
|
"lineInterpolation": "linear",
|
||||||
|
"lineWidth": 1,
|
||||||
|
"pointSize": 5,
|
||||||
|
"scaleDistribution": {
|
||||||
|
"type": "linear"
|
||||||
|
},
|
||||||
"showPoints": "auto",
|
"showPoints": "auto",
|
||||||
"fillOpacity": 10
|
"spanNulls": false,
|
||||||
|
"stacking": {
|
||||||
|
"group": "A",
|
||||||
|
"mode": "none"
|
||||||
|
},
|
||||||
|
"thresholdsStyle": {
|
||||||
|
"mode": "off"
|
||||||
|
}
|
||||||
},
|
},
|
||||||
"mappings": [],
|
"mappings": [],
|
||||||
"thresholds": {
|
"thresholds": {
|
||||||
|
|
@ -391,7 +570,8 @@
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
"unit": "Bps"
|
"unit": "Bps"
|
||||||
}
|
},
|
||||||
|
"overrides": []
|
||||||
},
|
},
|
||||||
"gridPos": {
|
"gridPos": {
|
||||||
"h": 8,
|
"h": 8,
|
||||||
|
|
@ -408,6 +588,7 @@
|
||||||
"showLegend": true
|
"showLegend": true
|
||||||
},
|
},
|
||||||
"tooltip": {
|
"tooltip": {
|
||||||
|
"maxHeight": 600,
|
||||||
"mode": "multi",
|
"mode": "multi",
|
||||||
"sort": "none"
|
"sort": "none"
|
||||||
}
|
}
|
||||||
|
|
@ -437,7 +618,7 @@
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"refresh": "5s",
|
"refresh": "5s",
|
||||||
"schemaVersion": 38,
|
"schemaVersion": 39,
|
||||||
"tags": [
|
"tags": [
|
||||||
"llama-stack"
|
"llama-stack"
|
||||||
],
|
],
|
||||||
|
|
@ -445,13 +626,14 @@
|
||||||
"list": []
|
"list": []
|
||||||
},
|
},
|
||||||
"time": {
|
"time": {
|
||||||
"from": "now-15m",
|
"from": "now-3h",
|
||||||
"to": "now"
|
"to": "now"
|
||||||
},
|
},
|
||||||
|
"timeRangeUpdatedDuringEditOrView": false,
|
||||||
"timepicker": {},
|
"timepicker": {},
|
||||||
"timezone": "browser",
|
"timezone": "browser",
|
||||||
"title": "Llama Stack Metrics",
|
"title": "Llama Stack Metrics",
|
||||||
"uid": "llama-stack-metrics",
|
"uid": "llama-stack-metrics",
|
||||||
"version": 0,
|
"version": 17,
|
||||||
"weekStart": ""
|
"weekStart": ""
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -191,22 +191,6 @@ class DistributionSpec(BaseModel):
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class TelemetryConfig(BaseModel):
|
|
||||||
"""
|
|
||||||
Configuration for telemetry.
|
|
||||||
|
|
||||||
Llama Stack uses OpenTelemetry for telemetry. Please refer to https://opentelemetry.io/docs/languages/sdk-configuration/
|
|
||||||
for env variables to configure the OpenTelemetry SDK.
|
|
||||||
|
|
||||||
Example:
|
|
||||||
```bash
|
|
||||||
OTEL_SERVICE_NAME=llama-stack OTEL_EXPORTER_OTLP_ENDPOINT=http://localhost:4318 uv run llama stack run starter
|
|
||||||
```
|
|
||||||
"""
|
|
||||||
|
|
||||||
enabled: bool = Field(default=False, description="enable or disable telemetry")
|
|
||||||
|
|
||||||
|
|
||||||
class OAuth2JWKSConfig(BaseModel):
|
class OAuth2JWKSConfig(BaseModel):
|
||||||
# The JWKS URI for collecting public keys
|
# The JWKS URI for collecting public keys
|
||||||
uri: str
|
uri: str
|
||||||
|
|
@ -527,8 +511,6 @@ can be instantiated multiple times (with different configs) if necessary.
|
||||||
|
|
||||||
logging: LoggingConfig | None = Field(default=None, description="Configuration for Llama Stack Logging")
|
logging: LoggingConfig | None = Field(default=None, description="Configuration for Llama Stack Logging")
|
||||||
|
|
||||||
telemetry: TelemetryConfig = Field(default_factory=TelemetryConfig, description="Configuration for telemetry")
|
|
||||||
|
|
||||||
server: ServerConfig = Field(
|
server: ServerConfig = Field(
|
||||||
default_factory=ServerConfig,
|
default_factory=ServerConfig,
|
||||||
description="Configuration for the HTTP(S) server",
|
description="Configuration for the HTTP(S) server",
|
||||||
|
|
|
||||||
|
|
@ -46,8 +46,6 @@ from llama_stack.core.request_headers import PROVIDER_DATA_VAR, request_provider
|
||||||
from llama_stack.core.resolver import ProviderRegistry
|
from llama_stack.core.resolver import ProviderRegistry
|
||||||
from llama_stack.core.server.routes import RouteImpls, find_matching_route, initialize_route_impls
|
from llama_stack.core.server.routes import RouteImpls, find_matching_route, initialize_route_impls
|
||||||
from llama_stack.core.stack import Stack, get_stack_run_config_from_distro, replace_env_vars
|
from llama_stack.core.stack import Stack, get_stack_run_config_from_distro, replace_env_vars
|
||||||
from llama_stack.core.telemetry import Telemetry
|
|
||||||
from llama_stack.core.telemetry.tracing import CURRENT_TRACE_CONTEXT, end_trace, setup_logger, start_trace
|
|
||||||
from llama_stack.core.utils.config import redact_sensitive_fields
|
from llama_stack.core.utils.config import redact_sensitive_fields
|
||||||
from llama_stack.core.utils.context import preserve_contexts_async_generator
|
from llama_stack.core.utils.context import preserve_contexts_async_generator
|
||||||
from llama_stack.core.utils.exec import in_notebook
|
from llama_stack.core.utils.exec import in_notebook
|
||||||
|
|
@ -204,13 +202,6 @@ class AsyncLlamaStackAsLibraryClient(AsyncLlamaStackClient):
|
||||||
super().__init__()
|
super().__init__()
|
||||||
# Initialize logging from environment variables first
|
# Initialize logging from environment variables first
|
||||||
setup_logging()
|
setup_logging()
|
||||||
|
|
||||||
# when using the library client, we should not log to console since many
|
|
||||||
# of our logs are intended for server-side usage
|
|
||||||
if sinks_from_env := os.environ.get("TELEMETRY_SINKS", None):
|
|
||||||
current_sinks = sinks_from_env.strip().lower().split(",")
|
|
||||||
os.environ["TELEMETRY_SINKS"] = ",".join(sink for sink in current_sinks if sink != "console")
|
|
||||||
|
|
||||||
if in_notebook():
|
if in_notebook():
|
||||||
import nest_asyncio
|
import nest_asyncio
|
||||||
|
|
||||||
|
|
@ -295,8 +286,6 @@ class AsyncLlamaStackAsLibraryClient(AsyncLlamaStackClient):
|
||||||
raise _e
|
raise _e
|
||||||
|
|
||||||
assert self.impls is not None
|
assert self.impls is not None
|
||||||
if self.config.telemetry.enabled:
|
|
||||||
setup_logger(Telemetry())
|
|
||||||
|
|
||||||
if not os.environ.get("PYTEST_CURRENT_TEST"):
|
if not os.environ.get("PYTEST_CURRENT_TEST"):
|
||||||
console = Console()
|
console = Console()
|
||||||
|
|
@ -392,13 +381,7 @@ class AsyncLlamaStackAsLibraryClient(AsyncLlamaStackClient):
|
||||||
body, field_names = self._handle_file_uploads(options, body)
|
body, field_names = self._handle_file_uploads(options, body)
|
||||||
|
|
||||||
body = self._convert_body(matched_func, body, exclude_params=set(field_names))
|
body = self._convert_body(matched_func, body, exclude_params=set(field_names))
|
||||||
|
result = await matched_func(**body)
|
||||||
trace_path = webmethod.descriptive_name or route_path
|
|
||||||
await start_trace(trace_path, {"__location__": "library_client"})
|
|
||||||
try:
|
|
||||||
result = await matched_func(**body)
|
|
||||||
finally:
|
|
||||||
await end_trace()
|
|
||||||
|
|
||||||
# Handle FastAPI Response objects (e.g., from file content retrieval)
|
# Handle FastAPI Response objects (e.g., from file content retrieval)
|
||||||
if isinstance(result, FastAPIResponse):
|
if isinstance(result, FastAPIResponse):
|
||||||
|
|
@ -457,19 +440,13 @@ class AsyncLlamaStackAsLibraryClient(AsyncLlamaStackClient):
|
||||||
# Prepare body for the function call (handles both Pydantic and traditional params)
|
# Prepare body for the function call (handles both Pydantic and traditional params)
|
||||||
body = self._convert_body(func, body)
|
body = self._convert_body(func, body)
|
||||||
|
|
||||||
trace_path = webmethod.descriptive_name or route_path
|
|
||||||
await start_trace(trace_path, {"__location__": "library_client"})
|
|
||||||
|
|
||||||
async def gen():
|
async def gen():
|
||||||
try:
|
async for chunk in await func(**body):
|
||||||
async for chunk in await func(**body):
|
data = json.dumps(convert_pydantic_to_json_value(chunk))
|
||||||
data = json.dumps(convert_pydantic_to_json_value(chunk))
|
sse_event = f"data: {data}\n\n"
|
||||||
sse_event = f"data: {data}\n\n"
|
yield sse_event.encode("utf-8")
|
||||||
yield sse_event.encode("utf-8")
|
|
||||||
finally:
|
|
||||||
await end_trace()
|
|
||||||
|
|
||||||
wrapped_gen = preserve_contexts_async_generator(gen(), [CURRENT_TRACE_CONTEXT, PROVIDER_DATA_VAR])
|
wrapped_gen = preserve_contexts_async_generator(gen(), [PROVIDER_DATA_VAR])
|
||||||
|
|
||||||
mock_response = httpx.Response(
|
mock_response = httpx.Response(
|
||||||
status_code=httpx.codes.OK,
|
status_code=httpx.codes.OK,
|
||||||
|
|
|
||||||
|
|
@ -392,8 +392,6 @@ async def instantiate_provider(
|
||||||
args = [config, deps]
|
args = [config, deps]
|
||||||
if "policy" in inspect.signature(getattr(module, method)).parameters:
|
if "policy" in inspect.signature(getattr(module, method)).parameters:
|
||||||
args.append(policy)
|
args.append(policy)
|
||||||
if "telemetry_enabled" in inspect.signature(getattr(module, method)).parameters and run_config.telemetry:
|
|
||||||
args.append(run_config.telemetry.enabled)
|
|
||||||
|
|
||||||
fn = getattr(module, method)
|
fn = getattr(module, method)
|
||||||
impl = await fn(*args)
|
impl = await fn(*args)
|
||||||
|
|
@ -401,18 +399,6 @@ async def instantiate_provider(
|
||||||
impl.__provider_spec__ = provider_spec
|
impl.__provider_spec__ = provider_spec
|
||||||
impl.__provider_config__ = config
|
impl.__provider_config__ = config
|
||||||
|
|
||||||
# Apply tracing if telemetry is enabled and any base class has __marked_for_tracing__ marker
|
|
||||||
if run_config.telemetry.enabled:
|
|
||||||
traced_classes = [
|
|
||||||
base for base in reversed(impl.__class__.__mro__) if getattr(base, "__marked_for_tracing__", False)
|
|
||||||
]
|
|
||||||
|
|
||||||
if traced_classes:
|
|
||||||
from llama_stack.core.telemetry.trace_protocol import trace_protocol
|
|
||||||
|
|
||||||
for cls in traced_classes:
|
|
||||||
trace_protocol(cls)
|
|
||||||
|
|
||||||
protocols = api_protocol_map_for_compliance_check(run_config)
|
protocols = api_protocol_map_for_compliance_check(run_config)
|
||||||
additional_protocols = additional_protocols_map()
|
additional_protocols = additional_protocols_map()
|
||||||
# TODO: check compliance for special tool groups
|
# TODO: check compliance for special tool groups
|
||||||
|
|
|
||||||
|
|
@ -85,8 +85,6 @@ async def get_auto_router_impl(
|
||||||
)
|
)
|
||||||
await inference_store.initialize()
|
await inference_store.initialize()
|
||||||
api_to_dep_impl["store"] = inference_store
|
api_to_dep_impl["store"] = inference_store
|
||||||
api_to_dep_impl["telemetry_enabled"] = run_config.telemetry.enabled
|
|
||||||
|
|
||||||
elif api == Api.vector_io:
|
elif api == Api.vector_io:
|
||||||
api_to_dep_impl["vector_stores_config"] = run_config.vector_stores
|
api_to_dep_impl["vector_stores_config"] = run_config.vector_stores
|
||||||
elif api == Api.safety:
|
elif api == Api.safety:
|
||||||
|
|
|
||||||
|
|
@ -7,7 +7,6 @@
|
||||||
import asyncio
|
import asyncio
|
||||||
import time
|
import time
|
||||||
from collections.abc import AsyncIterator
|
from collections.abc import AsyncIterator
|
||||||
from datetime import UTC, datetime
|
|
||||||
from typing import Annotated, Any
|
from typing import Annotated, Any
|
||||||
|
|
||||||
from fastapi import Body
|
from fastapi import Body
|
||||||
|
|
@ -15,11 +14,7 @@ from openai.types.chat import ChatCompletionToolChoiceOptionParam as OpenAIChatC
|
||||||
from openai.types.chat import ChatCompletionToolParam as OpenAIChatCompletionToolParam
|
from openai.types.chat import ChatCompletionToolParam as OpenAIChatCompletionToolParam
|
||||||
from pydantic import TypeAdapter
|
from pydantic import TypeAdapter
|
||||||
|
|
||||||
from llama_stack.core.telemetry.telemetry import MetricEvent
|
|
||||||
from llama_stack.core.telemetry.tracing import enqueue_event, get_current_span
|
|
||||||
from llama_stack.log import get_logger
|
from llama_stack.log import get_logger
|
||||||
from llama_stack.models.llama.llama3.chat_format import ChatFormat
|
|
||||||
from llama_stack.models.llama.llama3.tokenizer import Tokenizer
|
|
||||||
from llama_stack.providers.utils.inference.inference_store import InferenceStore
|
from llama_stack.providers.utils.inference.inference_store import InferenceStore
|
||||||
from llama_stack_api import (
|
from llama_stack_api import (
|
||||||
HealthResponse,
|
HealthResponse,
|
||||||
|
|
@ -60,15 +55,10 @@ class InferenceRouter(Inference):
|
||||||
self,
|
self,
|
||||||
routing_table: RoutingTable,
|
routing_table: RoutingTable,
|
||||||
store: InferenceStore | None = None,
|
store: InferenceStore | None = None,
|
||||||
telemetry_enabled: bool = False,
|
|
||||||
) -> None:
|
) -> None:
|
||||||
logger.debug("Initializing InferenceRouter")
|
logger.debug("Initializing InferenceRouter")
|
||||||
self.routing_table = routing_table
|
self.routing_table = routing_table
|
||||||
self.telemetry_enabled = telemetry_enabled
|
|
||||||
self.store = store
|
self.store = store
|
||||||
if self.telemetry_enabled:
|
|
||||||
self.tokenizer = Tokenizer.get_instance()
|
|
||||||
self.formatter = ChatFormat(self.tokenizer)
|
|
||||||
|
|
||||||
async def initialize(self) -> None:
|
async def initialize(self) -> None:
|
||||||
logger.debug("InferenceRouter.initialize")
|
logger.debug("InferenceRouter.initialize")
|
||||||
|
|
@ -94,54 +84,6 @@ class InferenceRouter(Inference):
|
||||||
)
|
)
|
||||||
await self.routing_table.register_model(model_id, provider_model_id, provider_id, metadata, model_type)
|
await self.routing_table.register_model(model_id, provider_model_id, provider_id, metadata, model_type)
|
||||||
|
|
||||||
def _construct_metrics(
|
|
||||||
self,
|
|
||||||
prompt_tokens: int,
|
|
||||||
completion_tokens: int,
|
|
||||||
total_tokens: int,
|
|
||||||
fully_qualified_model_id: str,
|
|
||||||
provider_id: str,
|
|
||||||
) -> list[MetricEvent]:
|
|
||||||
"""Constructs a list of MetricEvent objects containing token usage metrics.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
prompt_tokens: Number of tokens in the prompt
|
|
||||||
completion_tokens: Number of tokens in the completion
|
|
||||||
total_tokens: Total number of tokens used
|
|
||||||
fully_qualified_model_id:
|
|
||||||
provider_id: The provider identifier
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
List of MetricEvent objects with token usage metrics
|
|
||||||
"""
|
|
||||||
span = get_current_span()
|
|
||||||
if span is None:
|
|
||||||
logger.warning("No span found for token usage metrics")
|
|
||||||
return []
|
|
||||||
|
|
||||||
metrics = [
|
|
||||||
("prompt_tokens", prompt_tokens),
|
|
||||||
("completion_tokens", completion_tokens),
|
|
||||||
("total_tokens", total_tokens),
|
|
||||||
]
|
|
||||||
metric_events = []
|
|
||||||
for metric_name, value in metrics:
|
|
||||||
metric_events.append(
|
|
||||||
MetricEvent(
|
|
||||||
trace_id=span.trace_id,
|
|
||||||
span_id=span.span_id,
|
|
||||||
metric=metric_name,
|
|
||||||
value=value,
|
|
||||||
timestamp=datetime.now(UTC),
|
|
||||||
unit="tokens",
|
|
||||||
attributes={
|
|
||||||
"model_id": fully_qualified_model_id,
|
|
||||||
"provider_id": provider_id,
|
|
||||||
},
|
|
||||||
)
|
|
||||||
)
|
|
||||||
return metric_events
|
|
||||||
|
|
||||||
async def _get_model_provider(self, model_id: str, expected_model_type: str) -> tuple[Inference, str]:
|
async def _get_model_provider(self, model_id: str, expected_model_type: str) -> tuple[Inference, str]:
|
||||||
model = await self.routing_table.get_object_by_identifier("model", model_id)
|
model = await self.routing_table.get_object_by_identifier("model", model_id)
|
||||||
if model:
|
if model:
|
||||||
|
|
@ -186,26 +128,9 @@ class InferenceRouter(Inference):
|
||||||
|
|
||||||
if params.stream:
|
if params.stream:
|
||||||
return await provider.openai_completion(params)
|
return await provider.openai_completion(params)
|
||||||
# TODO: Metrics do NOT work with openai_completion stream=True due to the fact
|
|
||||||
# that we do not return an AsyncIterator, our tests expect a stream of chunks we cannot intercept currently.
|
|
||||||
|
|
||||||
response = await provider.openai_completion(params)
|
response = await provider.openai_completion(params)
|
||||||
response.model = request_model_id
|
response.model = request_model_id
|
||||||
if self.telemetry_enabled and response.usage is not None:
|
|
||||||
metrics = self._construct_metrics(
|
|
||||||
prompt_tokens=response.usage.prompt_tokens,
|
|
||||||
completion_tokens=response.usage.completion_tokens,
|
|
||||||
total_tokens=response.usage.total_tokens,
|
|
||||||
fully_qualified_model_id=request_model_id,
|
|
||||||
provider_id=provider.__provider_id__,
|
|
||||||
)
|
|
||||||
for metric in metrics:
|
|
||||||
enqueue_event(metric)
|
|
||||||
|
|
||||||
# these metrics will show up in the client response.
|
|
||||||
response.metrics = (
|
|
||||||
metrics if not hasattr(response, "metrics") or response.metrics is None else response.metrics + metrics
|
|
||||||
)
|
|
||||||
return response
|
return response
|
||||||
|
|
||||||
async def openai_chat_completion(
|
async def openai_chat_completion(
|
||||||
|
|
@ -254,20 +179,6 @@ class InferenceRouter(Inference):
|
||||||
if self.store:
|
if self.store:
|
||||||
asyncio.create_task(self.store.store_chat_completion(response, params.messages))
|
asyncio.create_task(self.store.store_chat_completion(response, params.messages))
|
||||||
|
|
||||||
if self.telemetry_enabled and response.usage is not None:
|
|
||||||
metrics = self._construct_metrics(
|
|
||||||
prompt_tokens=response.usage.prompt_tokens,
|
|
||||||
completion_tokens=response.usage.completion_tokens,
|
|
||||||
total_tokens=response.usage.total_tokens,
|
|
||||||
fully_qualified_model_id=request_model_id,
|
|
||||||
provider_id=provider.__provider_id__,
|
|
||||||
)
|
|
||||||
for metric in metrics:
|
|
||||||
enqueue_event(metric)
|
|
||||||
# these metrics will show up in the client response.
|
|
||||||
response.metrics = (
|
|
||||||
metrics if not hasattr(response, "metrics") or response.metrics is None else response.metrics + metrics
|
|
||||||
)
|
|
||||||
return response
|
return response
|
||||||
|
|
||||||
async def openai_embeddings(
|
async def openai_embeddings(
|
||||||
|
|
@ -411,18 +322,6 @@ class InferenceRouter(Inference):
|
||||||
for choice_data in choices_data.values():
|
for choice_data in choices_data.values():
|
||||||
completion_text += "".join(choice_data["content_parts"])
|
completion_text += "".join(choice_data["content_parts"])
|
||||||
|
|
||||||
# Add metrics to the chunk
|
|
||||||
if self.telemetry_enabled and hasattr(chunk, "usage") and chunk.usage:
|
|
||||||
metrics = self._construct_metrics(
|
|
||||||
prompt_tokens=chunk.usage.prompt_tokens,
|
|
||||||
completion_tokens=chunk.usage.completion_tokens,
|
|
||||||
total_tokens=chunk.usage.total_tokens,
|
|
||||||
fully_qualified_model_id=fully_qualified_model_id,
|
|
||||||
provider_id=provider_id,
|
|
||||||
)
|
|
||||||
for metric in metrics:
|
|
||||||
enqueue_event(metric)
|
|
||||||
|
|
||||||
yield chunk
|
yield chunk
|
||||||
finally:
|
finally:
|
||||||
# Store the final assembled completion
|
# Store the final assembled completion
|
||||||
|
|
|
||||||
|
|
@ -6,11 +6,15 @@
|
||||||
|
|
||||||
from typing import Any
|
from typing import Any
|
||||||
|
|
||||||
|
from opentelemetry import trace
|
||||||
|
|
||||||
from llama_stack.core.datatypes import SafetyConfig
|
from llama_stack.core.datatypes import SafetyConfig
|
||||||
from llama_stack.log import get_logger
|
from llama_stack.log import get_logger
|
||||||
|
from llama_stack.telemetry.helpers import safety_request_span_attributes, safety_span_name
|
||||||
from llama_stack_api import ModerationObject, OpenAIMessageParam, RoutingTable, RunShieldResponse, Safety, Shield
|
from llama_stack_api import ModerationObject, OpenAIMessageParam, RoutingTable, RunShieldResponse, Safety, Shield
|
||||||
|
|
||||||
logger = get_logger(name=__name__, category="core::routers")
|
logger = get_logger(name=__name__, category="core::routers")
|
||||||
|
tracer = trace.get_tracer(__name__)
|
||||||
|
|
||||||
|
|
||||||
class SafetyRouter(Safety):
|
class SafetyRouter(Safety):
|
||||||
|
|
@ -51,13 +55,17 @@ class SafetyRouter(Safety):
|
||||||
messages: list[OpenAIMessageParam],
|
messages: list[OpenAIMessageParam],
|
||||||
params: dict[str, Any] = None,
|
params: dict[str, Any] = None,
|
||||||
) -> RunShieldResponse:
|
) -> RunShieldResponse:
|
||||||
logger.debug(f"SafetyRouter.run_shield: {shield_id}")
|
with tracer.start_as_current_span(name=safety_span_name(shield_id)):
|
||||||
provider = await self.routing_table.get_provider_impl(shield_id)
|
logger.debug(f"SafetyRouter.run_shield: {shield_id}")
|
||||||
return await provider.run_shield(
|
provider = await self.routing_table.get_provider_impl(shield_id)
|
||||||
shield_id=shield_id,
|
response = await provider.run_shield(
|
||||||
messages=messages,
|
shield_id=shield_id,
|
||||||
params=params,
|
messages=messages,
|
||||||
)
|
params=params,
|
||||||
|
)
|
||||||
|
|
||||||
|
safety_request_span_attributes(shield_id, messages, response)
|
||||||
|
return response
|
||||||
|
|
||||||
async def run_moderation(self, input: str | list[str], model: str | None = None) -> ModerationObject:
|
async def run_moderation(self, input: str | list[str], model: str | None = None) -> ModerationObject:
|
||||||
list_shields_response = await self.routing_table.list_shields()
|
list_shields_response = await self.routing_table.list_shields()
|
||||||
|
|
|
||||||
|
|
@ -51,8 +51,6 @@ from llama_stack.core.stack import (
|
||||||
cast_image_name_to_string,
|
cast_image_name_to_string,
|
||||||
replace_env_vars,
|
replace_env_vars,
|
||||||
)
|
)
|
||||||
from llama_stack.core.telemetry import Telemetry
|
|
||||||
from llama_stack.core.telemetry.tracing import CURRENT_TRACE_CONTEXT, setup_logger
|
|
||||||
from llama_stack.core.utils.config import redact_sensitive_fields
|
from llama_stack.core.utils.config import redact_sensitive_fields
|
||||||
from llama_stack.core.utils.config_resolution import Mode, resolve_config_or_distro
|
from llama_stack.core.utils.config_resolution import Mode, resolve_config_or_distro
|
||||||
from llama_stack.core.utils.context import preserve_contexts_async_generator
|
from llama_stack.core.utils.context import preserve_contexts_async_generator
|
||||||
|
|
@ -61,7 +59,6 @@ from llama_stack_api import Api, ConflictError, PaginatedResponse, ResourceNotFo
|
||||||
|
|
||||||
from .auth import AuthenticationMiddleware
|
from .auth import AuthenticationMiddleware
|
||||||
from .quota import QuotaMiddleware
|
from .quota import QuotaMiddleware
|
||||||
from .tracing import TracingMiddleware
|
|
||||||
|
|
||||||
REPO_ROOT = Path(__file__).parent.parent.parent.parent
|
REPO_ROOT = Path(__file__).parent.parent.parent.parent
|
||||||
|
|
||||||
|
|
@ -264,7 +261,7 @@ def create_dynamic_typed_route(func: Any, method: str, route: str) -> Callable:
|
||||||
|
|
||||||
try:
|
try:
|
||||||
if is_streaming:
|
if is_streaming:
|
||||||
context_vars = [CURRENT_TRACE_CONTEXT, PROVIDER_DATA_VAR]
|
context_vars = [PROVIDER_DATA_VAR]
|
||||||
if test_context_var is not None:
|
if test_context_var is not None:
|
||||||
context_vars.append(test_context_var)
|
context_vars.append(test_context_var)
|
||||||
gen = preserve_contexts_async_generator(sse_generator(func(**kwargs)), context_vars)
|
gen = preserve_contexts_async_generator(sse_generator(func(**kwargs)), context_vars)
|
||||||
|
|
@ -442,9 +439,6 @@ def create_app() -> StackApp:
|
||||||
if cors_config:
|
if cors_config:
|
||||||
app.add_middleware(CORSMiddleware, **cors_config.model_dump())
|
app.add_middleware(CORSMiddleware, **cors_config.model_dump())
|
||||||
|
|
||||||
if config.telemetry.enabled:
|
|
||||||
setup_logger(Telemetry())
|
|
||||||
|
|
||||||
# Load external APIs if configured
|
# Load external APIs if configured
|
||||||
external_apis = load_external_apis(config)
|
external_apis = load_external_apis(config)
|
||||||
all_routes = get_all_api_routes(external_apis)
|
all_routes = get_all_api_routes(external_apis)
|
||||||
|
|
@ -516,9 +510,6 @@ def create_app() -> StackApp:
|
||||||
# Generic Exception handler should be last
|
# Generic Exception handler should be last
|
||||||
app.exception_handler(Exception)(global_exception_handler)
|
app.exception_handler(Exception)(global_exception_handler)
|
||||||
|
|
||||||
if config.telemetry.enabled:
|
|
||||||
app.add_middleware(TracingMiddleware, impls=impls, external_apis=external_apis)
|
|
||||||
|
|
||||||
return app
|
return app
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,80 +0,0 @@
|
||||||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
||||||
# All rights reserved.
|
|
||||||
#
|
|
||||||
# This source code is licensed under the terms described in the LICENSE file in
|
|
||||||
# the root directory of this source tree.
|
|
||||||
from aiohttp import hdrs
|
|
||||||
|
|
||||||
from llama_stack.core.external import ExternalApiSpec
|
|
||||||
from llama_stack.core.server.routes import find_matching_route, initialize_route_impls
|
|
||||||
from llama_stack.core.telemetry.tracing import end_trace, start_trace
|
|
||||||
from llama_stack.log import get_logger
|
|
||||||
|
|
||||||
logger = get_logger(name=__name__, category="core::server")
|
|
||||||
|
|
||||||
|
|
||||||
class TracingMiddleware:
|
|
||||||
def __init__(self, app, impls, external_apis: dict[str, ExternalApiSpec]):
|
|
||||||
self.app = app
|
|
||||||
self.impls = impls
|
|
||||||
self.external_apis = external_apis
|
|
||||||
# FastAPI built-in paths that should bypass custom routing
|
|
||||||
self.fastapi_paths = ("/docs", "/redoc", "/openapi.json", "/favicon.ico", "/static")
|
|
||||||
|
|
||||||
async def __call__(self, scope, receive, send):
|
|
||||||
if scope.get("type") == "lifespan":
|
|
||||||
return await self.app(scope, receive, send)
|
|
||||||
|
|
||||||
path = scope.get("path", "")
|
|
||||||
|
|
||||||
# Check if the path is a FastAPI built-in path
|
|
||||||
if path.startswith(self.fastapi_paths):
|
|
||||||
# Pass through to FastAPI's built-in handlers
|
|
||||||
logger.debug(f"Bypassing custom routing for FastAPI built-in path: {path}")
|
|
||||||
return await self.app(scope, receive, send)
|
|
||||||
|
|
||||||
if not hasattr(self, "route_impls"):
|
|
||||||
self.route_impls = initialize_route_impls(self.impls, self.external_apis)
|
|
||||||
|
|
||||||
try:
|
|
||||||
_, _, route_path, webmethod = find_matching_route(
|
|
||||||
scope.get("method", hdrs.METH_GET), path, self.route_impls
|
|
||||||
)
|
|
||||||
except ValueError:
|
|
||||||
# If no matching endpoint is found, pass through to FastAPI
|
|
||||||
logger.debug(f"No matching route found for path: {path}, falling back to FastAPI")
|
|
||||||
return await self.app(scope, receive, send)
|
|
||||||
|
|
||||||
# Log deprecation warning if route is deprecated
|
|
||||||
if getattr(webmethod, "deprecated", False):
|
|
||||||
logger.warning(
|
|
||||||
f"DEPRECATED ROUTE USED: {scope.get('method', 'GET')} {path} - "
|
|
||||||
f"This route is deprecated and may be removed in a future version. "
|
|
||||||
f"Please check the docs for the supported version."
|
|
||||||
)
|
|
||||||
|
|
||||||
trace_attributes = {"__location__": "server", "raw_path": path}
|
|
||||||
|
|
||||||
# Extract W3C trace context headers and store as trace attributes
|
|
||||||
headers = dict(scope.get("headers", []))
|
|
||||||
traceparent = headers.get(b"traceparent", b"").decode()
|
|
||||||
if traceparent:
|
|
||||||
trace_attributes["traceparent"] = traceparent
|
|
||||||
tracestate = headers.get(b"tracestate", b"").decode()
|
|
||||||
if tracestate:
|
|
||||||
trace_attributes["tracestate"] = tracestate
|
|
||||||
|
|
||||||
trace_path = webmethod.descriptive_name or route_path
|
|
||||||
trace_context = await start_trace(trace_path, trace_attributes)
|
|
||||||
|
|
||||||
async def send_with_trace_id(message):
|
|
||||||
if message["type"] == "http.response.start":
|
|
||||||
headers = message.get("headers", [])
|
|
||||||
headers.append([b"x-trace-id", str(trace_context.trace_id).encode()])
|
|
||||||
message["headers"] = headers
|
|
||||||
await send(message)
|
|
||||||
|
|
||||||
try:
|
|
||||||
return await self.app(scope, receive, send_with_trace_id)
|
|
||||||
finally:
|
|
||||||
await end_trace()
|
|
||||||
|
|
@ -1,32 +0,0 @@
|
||||||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
||||||
# All rights reserved.
|
|
||||||
#
|
|
||||||
# This source code is licensed under the terms described in the LICENSE file in
|
|
||||||
# the root directory of this source tree.
|
|
||||||
|
|
||||||
from .telemetry import Telemetry
|
|
||||||
from .trace_protocol import serialize_value, trace_protocol
|
|
||||||
from .tracing import (
|
|
||||||
CURRENT_TRACE_CONTEXT,
|
|
||||||
ROOT_SPAN_MARKERS,
|
|
||||||
end_trace,
|
|
||||||
enqueue_event,
|
|
||||||
get_current_span,
|
|
||||||
setup_logger,
|
|
||||||
span,
|
|
||||||
start_trace,
|
|
||||||
)
|
|
||||||
|
|
||||||
__all__ = [
|
|
||||||
"Telemetry",
|
|
||||||
"trace_protocol",
|
|
||||||
"serialize_value",
|
|
||||||
"CURRENT_TRACE_CONTEXT",
|
|
||||||
"ROOT_SPAN_MARKERS",
|
|
||||||
"end_trace",
|
|
||||||
"enqueue_event",
|
|
||||||
"get_current_span",
|
|
||||||
"setup_logger",
|
|
||||||
"span",
|
|
||||||
"start_trace",
|
|
||||||
]
|
|
||||||
|
|
@ -1,629 +0,0 @@
|
||||||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
||||||
# All rights reserved.
|
|
||||||
#
|
|
||||||
# This source code is licensed under the terms described in the LICENSE file in
|
|
||||||
# the root directory of this source tree.
|
|
||||||
|
|
||||||
import os
|
|
||||||
import threading
|
|
||||||
from collections.abc import Mapping, Sequence
|
|
||||||
from datetime import datetime
|
|
||||||
from enum import Enum
|
|
||||||
from typing import (
|
|
||||||
Annotated,
|
|
||||||
Any,
|
|
||||||
Literal,
|
|
||||||
cast,
|
|
||||||
)
|
|
||||||
|
|
||||||
from opentelemetry import metrics, trace
|
|
||||||
from opentelemetry.exporter.otlp.proto.http.metric_exporter import OTLPMetricExporter
|
|
||||||
from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter
|
|
||||||
from opentelemetry.sdk.metrics import MeterProvider
|
|
||||||
from opentelemetry.sdk.metrics.export import PeriodicExportingMetricReader
|
|
||||||
from opentelemetry.sdk.trace import TracerProvider
|
|
||||||
from opentelemetry.sdk.trace.export import BatchSpanProcessor
|
|
||||||
from opentelemetry.trace.propagation.tracecontext import TraceContextTextMapPropagator
|
|
||||||
from pydantic import BaseModel, Field
|
|
||||||
|
|
||||||
from llama_stack.log import get_logger
|
|
||||||
from llama_stack.models.llama.datatypes import Primitive
|
|
||||||
from llama_stack_api import json_schema_type, register_schema
|
|
||||||
|
|
||||||
ROOT_SPAN_MARKERS = ["__root__", "__root_span__"]
|
|
||||||
|
|
||||||
# Type alias for OpenTelemetry attribute values (excludes None)
|
|
||||||
AttributeValue = str | bool | int | float | Sequence[str] | Sequence[bool] | Sequence[int] | Sequence[float]
|
|
||||||
Attributes = Mapping[str, AttributeValue]
|
|
||||||
|
|
||||||
|
|
||||||
@json_schema_type
|
|
||||||
class SpanStatus(Enum):
|
|
||||||
"""The status of a span indicating whether it completed successfully or with an error.
|
|
||||||
:cvar OK: Span completed successfully without errors
|
|
||||||
:cvar ERROR: Span completed with an error or failure
|
|
||||||
"""
|
|
||||||
|
|
||||||
OK = "ok"
|
|
||||||
ERROR = "error"
|
|
||||||
|
|
||||||
|
|
||||||
@json_schema_type
|
|
||||||
class Span(BaseModel):
|
|
||||||
"""A span representing a single operation within a trace.
|
|
||||||
:param span_id: Unique identifier for the span
|
|
||||||
:param trace_id: Unique identifier for the trace this span belongs to
|
|
||||||
:param parent_span_id: (Optional) Unique identifier for the parent span, if this is a child span
|
|
||||||
:param name: Human-readable name describing the operation this span represents
|
|
||||||
:param start_time: Timestamp when the operation began
|
|
||||||
:param end_time: (Optional) Timestamp when the operation finished, if completed
|
|
||||||
:param attributes: (Optional) Key-value pairs containing additional metadata about the span
|
|
||||||
"""
|
|
||||||
|
|
||||||
span_id: str
|
|
||||||
trace_id: str
|
|
||||||
parent_span_id: str | None = None
|
|
||||||
name: str
|
|
||||||
start_time: datetime
|
|
||||||
end_time: datetime | None = None
|
|
||||||
attributes: dict[str, Any] | None = Field(default_factory=lambda: {})
|
|
||||||
|
|
||||||
def set_attribute(self, key: str, value: Any):
|
|
||||||
if self.attributes is None:
|
|
||||||
self.attributes = {}
|
|
||||||
self.attributes[key] = value
|
|
||||||
|
|
||||||
|
|
||||||
@json_schema_type
|
|
||||||
class Trace(BaseModel):
|
|
||||||
"""A trace representing the complete execution path of a request across multiple operations.
|
|
||||||
:param trace_id: Unique identifier for the trace
|
|
||||||
:param root_span_id: Unique identifier for the root span that started this trace
|
|
||||||
:param start_time: Timestamp when the trace began
|
|
||||||
:param end_time: (Optional) Timestamp when the trace finished, if completed
|
|
||||||
"""
|
|
||||||
|
|
||||||
trace_id: str
|
|
||||||
root_span_id: str
|
|
||||||
start_time: datetime
|
|
||||||
end_time: datetime | None = None
|
|
||||||
|
|
||||||
|
|
||||||
@json_schema_type
|
|
||||||
class EventType(Enum):
|
|
||||||
"""The type of telemetry event being logged.
|
|
||||||
:cvar UNSTRUCTURED_LOG: A simple log message with severity level
|
|
||||||
:cvar STRUCTURED_LOG: A structured log event with typed payload data
|
|
||||||
:cvar METRIC: A metric measurement with value and unit
|
|
||||||
"""
|
|
||||||
|
|
||||||
UNSTRUCTURED_LOG = "unstructured_log"
|
|
||||||
STRUCTURED_LOG = "structured_log"
|
|
||||||
METRIC = "metric"
|
|
||||||
|
|
||||||
|
|
||||||
@json_schema_type
|
|
||||||
class LogSeverity(Enum):
|
|
||||||
"""The severity level of a log message.
|
|
||||||
:cvar VERBOSE: Detailed diagnostic information for troubleshooting
|
|
||||||
:cvar DEBUG: Debug information useful during development
|
|
||||||
:cvar INFO: General informational messages about normal operation
|
|
||||||
:cvar WARN: Warning messages about potentially problematic situations
|
|
||||||
:cvar ERROR: Error messages indicating failures that don't stop execution
|
|
||||||
:cvar CRITICAL: Critical error messages indicating severe failures
|
|
||||||
"""
|
|
||||||
|
|
||||||
VERBOSE = "verbose"
|
|
||||||
DEBUG = "debug"
|
|
||||||
INFO = "info"
|
|
||||||
WARN = "warn"
|
|
||||||
ERROR = "error"
|
|
||||||
CRITICAL = "critical"
|
|
||||||
|
|
||||||
|
|
||||||
class EventCommon(BaseModel):
|
|
||||||
"""Common fields shared by all telemetry events.
|
|
||||||
:param trace_id: Unique identifier for the trace this event belongs to
|
|
||||||
:param span_id: Unique identifier for the span this event belongs to
|
|
||||||
:param timestamp: Timestamp when the event occurred
|
|
||||||
:param attributes: (Optional) Key-value pairs containing additional metadata about the event
|
|
||||||
"""
|
|
||||||
|
|
||||||
trace_id: str
|
|
||||||
span_id: str
|
|
||||||
timestamp: datetime
|
|
||||||
attributes: dict[str, Primitive] | None = Field(default_factory=lambda: {})
|
|
||||||
|
|
||||||
|
|
||||||
@json_schema_type
|
|
||||||
class UnstructuredLogEvent(EventCommon):
|
|
||||||
"""An unstructured log event containing a simple text message.
|
|
||||||
:param type: Event type identifier set to UNSTRUCTURED_LOG
|
|
||||||
:param message: The log message text
|
|
||||||
:param severity: The severity level of the log message
|
|
||||||
"""
|
|
||||||
|
|
||||||
type: Literal[EventType.UNSTRUCTURED_LOG] = EventType.UNSTRUCTURED_LOG
|
|
||||||
message: str
|
|
||||||
severity: LogSeverity
|
|
||||||
|
|
||||||
|
|
||||||
@json_schema_type
|
|
||||||
class MetricEvent(EventCommon):
|
|
||||||
"""A metric event containing a measured value.
|
|
||||||
:param type: Event type identifier set to METRIC
|
|
||||||
:param metric: The name of the metric being measured
|
|
||||||
:param value: The numeric value of the metric measurement
|
|
||||||
:param unit: The unit of measurement for the metric value
|
|
||||||
"""
|
|
||||||
|
|
||||||
type: Literal[EventType.METRIC] = EventType.METRIC
|
|
||||||
metric: str # this would be an enum
|
|
||||||
value: int | float
|
|
||||||
unit: str
|
|
||||||
|
|
||||||
|
|
||||||
@json_schema_type
|
|
||||||
class StructuredLogType(Enum):
|
|
||||||
"""The type of structured log event payload.
|
|
||||||
:cvar SPAN_START: Event indicating the start of a new span
|
|
||||||
:cvar SPAN_END: Event indicating the completion of a span
|
|
||||||
"""
|
|
||||||
|
|
||||||
SPAN_START = "span_start"
|
|
||||||
SPAN_END = "span_end"
|
|
||||||
|
|
||||||
|
|
||||||
@json_schema_type
|
|
||||||
class SpanStartPayload(BaseModel):
|
|
||||||
"""Payload for a span start event.
|
|
||||||
:param type: Payload type identifier set to SPAN_START
|
|
||||||
:param name: Human-readable name describing the operation this span represents
|
|
||||||
:param parent_span_id: (Optional) Unique identifier for the parent span, if this is a child span
|
|
||||||
"""
|
|
||||||
|
|
||||||
type: Literal[StructuredLogType.SPAN_START] = StructuredLogType.SPAN_START
|
|
||||||
name: str
|
|
||||||
parent_span_id: str | None = None
|
|
||||||
|
|
||||||
|
|
||||||
@json_schema_type
|
|
||||||
class SpanEndPayload(BaseModel):
|
|
||||||
"""Payload for a span end event.
|
|
||||||
:param type: Payload type identifier set to SPAN_END
|
|
||||||
:param status: The final status of the span indicating success or failure
|
|
||||||
"""
|
|
||||||
|
|
||||||
type: Literal[StructuredLogType.SPAN_END] = StructuredLogType.SPAN_END
|
|
||||||
status: SpanStatus
|
|
||||||
|
|
||||||
|
|
||||||
StructuredLogPayload = Annotated[
|
|
||||||
SpanStartPayload | SpanEndPayload,
|
|
||||||
Field(discriminator="type"),
|
|
||||||
]
|
|
||||||
register_schema(StructuredLogPayload, name="StructuredLogPayload")
|
|
||||||
|
|
||||||
|
|
||||||
@json_schema_type
|
|
||||||
class StructuredLogEvent(EventCommon):
|
|
||||||
"""A structured log event containing typed payload data.
|
|
||||||
:param type: Event type identifier set to STRUCTURED_LOG
|
|
||||||
:param payload: The structured payload data for the log event
|
|
||||||
"""
|
|
||||||
|
|
||||||
type: Literal[EventType.STRUCTURED_LOG] = EventType.STRUCTURED_LOG
|
|
||||||
payload: StructuredLogPayload
|
|
||||||
|
|
||||||
|
|
||||||
Event = Annotated[
|
|
||||||
UnstructuredLogEvent | MetricEvent | StructuredLogEvent,
|
|
||||||
Field(discriminator="type"),
|
|
||||||
]
|
|
||||||
register_schema(Event, name="Event")
|
|
||||||
|
|
||||||
|
|
||||||
@json_schema_type
|
|
||||||
class EvalTrace(BaseModel):
|
|
||||||
"""A trace record for evaluation purposes.
|
|
||||||
:param session_id: Unique identifier for the evaluation session
|
|
||||||
:param step: The evaluation step or phase identifier
|
|
||||||
:param input: The input data for the evaluation
|
|
||||||
:param output: The actual output produced during evaluation
|
|
||||||
:param expected_output: The expected output for comparison during evaluation
|
|
||||||
"""
|
|
||||||
|
|
||||||
session_id: str
|
|
||||||
step: str
|
|
||||||
input: str
|
|
||||||
output: str
|
|
||||||
expected_output: str
|
|
||||||
|
|
||||||
|
|
||||||
@json_schema_type
|
|
||||||
class SpanWithStatus(Span):
|
|
||||||
"""A span that includes status information.
|
|
||||||
:param status: (Optional) The current status of the span
|
|
||||||
"""
|
|
||||||
|
|
||||||
status: SpanStatus | None = None
|
|
||||||
|
|
||||||
|
|
||||||
@json_schema_type
|
|
||||||
class QueryConditionOp(Enum):
|
|
||||||
"""Comparison operators for query conditions.
|
|
||||||
:cvar EQ: Equal to comparison
|
|
||||||
:cvar NE: Not equal to comparison
|
|
||||||
:cvar GT: Greater than comparison
|
|
||||||
:cvar LT: Less than comparison
|
|
||||||
"""
|
|
||||||
|
|
||||||
EQ = "eq"
|
|
||||||
NE = "ne"
|
|
||||||
GT = "gt"
|
|
||||||
LT = "lt"
|
|
||||||
|
|
||||||
|
|
||||||
@json_schema_type
|
|
||||||
class QueryCondition(BaseModel):
|
|
||||||
"""A condition for filtering query results.
|
|
||||||
:param key: The attribute key to filter on
|
|
||||||
:param op: The comparison operator to apply
|
|
||||||
:param value: The value to compare against
|
|
||||||
"""
|
|
||||||
|
|
||||||
key: str
|
|
||||||
op: QueryConditionOp
|
|
||||||
value: Any
|
|
||||||
|
|
||||||
|
|
||||||
class QueryTracesResponse(BaseModel):
|
|
||||||
"""Response containing a list of traces.
|
|
||||||
:param data: List of traces matching the query criteria
|
|
||||||
"""
|
|
||||||
|
|
||||||
data: list[Trace]
|
|
||||||
|
|
||||||
|
|
||||||
class QuerySpansResponse(BaseModel):
|
|
||||||
"""Response containing a list of spans.
|
|
||||||
:param data: List of spans matching the query criteria
|
|
||||||
"""
|
|
||||||
|
|
||||||
data: list[Span]
|
|
||||||
|
|
||||||
|
|
||||||
class QuerySpanTreeResponse(BaseModel):
|
|
||||||
"""Response containing a tree structure of spans.
|
|
||||||
:param data: Dictionary mapping span IDs to spans with status information
|
|
||||||
"""
|
|
||||||
|
|
||||||
data: dict[str, SpanWithStatus]
|
|
||||||
|
|
||||||
|
|
||||||
class MetricQueryType(Enum):
|
|
||||||
"""The type of metric query to perform.
|
|
||||||
:cvar RANGE: Query metrics over a time range
|
|
||||||
:cvar INSTANT: Query metrics at a specific point in time
|
|
||||||
"""
|
|
||||||
|
|
||||||
RANGE = "range"
|
|
||||||
INSTANT = "instant"
|
|
||||||
|
|
||||||
|
|
||||||
class MetricLabelOperator(Enum):
|
|
||||||
"""Operators for matching metric labels.
|
|
||||||
:cvar EQUALS: Label value must equal the specified value
|
|
||||||
:cvar NOT_EQUALS: Label value must not equal the specified value
|
|
||||||
:cvar REGEX_MATCH: Label value must match the specified regular expression
|
|
||||||
:cvar REGEX_NOT_MATCH: Label value must not match the specified regular expression
|
|
||||||
"""
|
|
||||||
|
|
||||||
EQUALS = "="
|
|
||||||
NOT_EQUALS = "!="
|
|
||||||
REGEX_MATCH = "=~"
|
|
||||||
REGEX_NOT_MATCH = "!~"
|
|
||||||
|
|
||||||
|
|
||||||
class MetricLabelMatcher(BaseModel):
|
|
||||||
"""A matcher for filtering metrics by label values.
|
|
||||||
:param name: The name of the label to match
|
|
||||||
:param value: The value to match against
|
|
||||||
:param operator: The comparison operator to use for matching
|
|
||||||
"""
|
|
||||||
|
|
||||||
name: str
|
|
||||||
value: str
|
|
||||||
operator: MetricLabelOperator = MetricLabelOperator.EQUALS
|
|
||||||
|
|
||||||
|
|
||||||
@json_schema_type
|
|
||||||
class MetricLabel(BaseModel):
|
|
||||||
"""A label associated with a metric.
|
|
||||||
:param name: The name of the label
|
|
||||||
:param value: The value of the label
|
|
||||||
"""
|
|
||||||
|
|
||||||
name: str
|
|
||||||
value: str
|
|
||||||
|
|
||||||
|
|
||||||
@json_schema_type
|
|
||||||
class MetricDataPoint(BaseModel):
|
|
||||||
"""A single data point in a metric time series.
|
|
||||||
:param timestamp: Unix timestamp when the metric value was recorded
|
|
||||||
:param value: The numeric value of the metric at this timestamp
|
|
||||||
"""
|
|
||||||
|
|
||||||
timestamp: int
|
|
||||||
value: float
|
|
||||||
unit: str
|
|
||||||
|
|
||||||
|
|
||||||
@json_schema_type
|
|
||||||
class MetricSeries(BaseModel):
|
|
||||||
"""A time series of metric data points.
|
|
||||||
:param metric: The name of the metric
|
|
||||||
:param labels: List of labels associated with this metric series
|
|
||||||
:param values: List of data points in chronological order
|
|
||||||
"""
|
|
||||||
|
|
||||||
metric: str
|
|
||||||
labels: list[MetricLabel]
|
|
||||||
values: list[MetricDataPoint]
|
|
||||||
|
|
||||||
|
|
||||||
class QueryMetricsResponse(BaseModel):
|
|
||||||
"""Response containing metric time series data.
|
|
||||||
:param data: List of metric series matching the query criteria
|
|
||||||
"""
|
|
||||||
|
|
||||||
data: list[MetricSeries]
|
|
||||||
|
|
||||||
|
|
||||||
_GLOBAL_STORAGE: dict[str, dict[str | int, Any]] = {
|
|
||||||
"active_spans": {},
|
|
||||||
"counters": {},
|
|
||||||
"gauges": {},
|
|
||||||
"up_down_counters": {},
|
|
||||||
"histograms": {},
|
|
||||||
}
|
|
||||||
_global_lock = threading.Lock()
|
|
||||||
_TRACER_PROVIDER = None
|
|
||||||
|
|
||||||
logger = get_logger(name=__name__, category="telemetry")
|
|
||||||
|
|
||||||
|
|
||||||
def _clean_attributes(attrs: dict[str, Any] | None) -> Attributes | None:
|
|
||||||
"""Remove None values from attributes dict to match OpenTelemetry's expected type."""
|
|
||||||
if attrs is None:
|
|
||||||
return None
|
|
||||||
return {k: v for k, v in attrs.items() if v is not None}
|
|
||||||
|
|
||||||
|
|
||||||
def is_tracing_enabled(tracer):
|
|
||||||
with tracer.start_as_current_span("check_tracing") as span:
|
|
||||||
return span.is_recording()
|
|
||||||
|
|
||||||
|
|
||||||
class Telemetry:
|
|
||||||
def __init__(self) -> None:
|
|
||||||
self.meter = None
|
|
||||||
|
|
||||||
global _TRACER_PROVIDER
|
|
||||||
# Initialize the correct span processor based on the provider state.
|
|
||||||
# This is needed since once the span processor is set, it cannot be unset.
|
|
||||||
# Recreating the telemetry adapter multiple times will result in duplicate span processors.
|
|
||||||
# Since the library client can be recreated multiple times in a notebook,
|
|
||||||
# the kernel will hold on to the span processor and cause duplicate spans to be written.
|
|
||||||
if os.environ.get("OTEL_EXPORTER_OTLP_ENDPOINT"):
|
|
||||||
if _TRACER_PROVIDER is None:
|
|
||||||
provider = TracerProvider()
|
|
||||||
trace.set_tracer_provider(provider)
|
|
||||||
_TRACER_PROVIDER = provider
|
|
||||||
|
|
||||||
# Use single OTLP endpoint for all telemetry signals
|
|
||||||
|
|
||||||
# Let OpenTelemetry SDK handle endpoint construction automatically
|
|
||||||
# The SDK will read OTEL_EXPORTER_OTLP_ENDPOINT and construct appropriate URLs
|
|
||||||
# https://opentelemetry.io/docs/languages/sdk-configuration/otlp-exporter
|
|
||||||
span_exporter = OTLPSpanExporter()
|
|
||||||
span_processor = BatchSpanProcessor(span_exporter)
|
|
||||||
cast(TracerProvider, trace.get_tracer_provider()).add_span_processor(span_processor)
|
|
||||||
|
|
||||||
metric_reader = PeriodicExportingMetricReader(OTLPMetricExporter())
|
|
||||||
metric_provider = MeterProvider(metric_readers=[metric_reader])
|
|
||||||
metrics.set_meter_provider(metric_provider)
|
|
||||||
self.is_otel_endpoint_set = True
|
|
||||||
else:
|
|
||||||
logger.warning("OTEL_EXPORTER_OTLP_ENDPOINT is not set, skipping telemetry")
|
|
||||||
self.is_otel_endpoint_set = False
|
|
||||||
|
|
||||||
self.meter = metrics.get_meter(__name__)
|
|
||||||
self._lock = _global_lock
|
|
||||||
|
|
||||||
async def initialize(self) -> None:
|
|
||||||
pass
|
|
||||||
|
|
||||||
async def shutdown(self) -> None:
|
|
||||||
if self.is_otel_endpoint_set:
|
|
||||||
cast(TracerProvider, trace.get_tracer_provider()).force_flush()
|
|
||||||
|
|
||||||
async def log_event(self, event: Event, ttl_seconds: int = 604800) -> None:
|
|
||||||
if isinstance(event, UnstructuredLogEvent):
|
|
||||||
self._log_unstructured(event, ttl_seconds)
|
|
||||||
elif isinstance(event, MetricEvent):
|
|
||||||
self._log_metric(event)
|
|
||||||
elif isinstance(event, StructuredLogEvent):
|
|
||||||
self._log_structured(event, ttl_seconds)
|
|
||||||
else:
|
|
||||||
raise ValueError(f"Unknown event type: {event}")
|
|
||||||
|
|
||||||
def _log_unstructured(self, event: UnstructuredLogEvent, ttl_seconds: int) -> None:
|
|
||||||
with self._lock:
|
|
||||||
# Use global storage instead of instance storage
|
|
||||||
span_id = int(event.span_id, 16)
|
|
||||||
span = _GLOBAL_STORAGE["active_spans"].get(span_id)
|
|
||||||
|
|
||||||
if span:
|
|
||||||
timestamp_ns = int(event.timestamp.timestamp() * 1e9)
|
|
||||||
span.add_event(
|
|
||||||
name=event.type.value,
|
|
||||||
attributes={
|
|
||||||
"message": event.message,
|
|
||||||
"severity": event.severity.value,
|
|
||||||
"__ttl__": ttl_seconds,
|
|
||||||
**(event.attributes or {}),
|
|
||||||
},
|
|
||||||
timestamp=timestamp_ns,
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
print(f"Warning: No active span found for span_id {span_id}. Dropping event: {event}")
|
|
||||||
|
|
||||||
def _get_or_create_counter(self, name: str, unit: str) -> metrics.Counter:
|
|
||||||
assert self.meter is not None
|
|
||||||
if name not in _GLOBAL_STORAGE["counters"]:
|
|
||||||
_GLOBAL_STORAGE["counters"][name] = self.meter.create_counter(
|
|
||||||
name=name,
|
|
||||||
unit=unit,
|
|
||||||
description=f"Counter for {name}",
|
|
||||||
)
|
|
||||||
return cast(metrics.Counter, _GLOBAL_STORAGE["counters"][name])
|
|
||||||
|
|
||||||
def _get_or_create_gauge(self, name: str, unit: str) -> metrics.ObservableGauge:
|
|
||||||
assert self.meter is not None
|
|
||||||
if name not in _GLOBAL_STORAGE["gauges"]:
|
|
||||||
_GLOBAL_STORAGE["gauges"][name] = self.meter.create_gauge(
|
|
||||||
name=name,
|
|
||||||
unit=unit,
|
|
||||||
description=f"Gauge for {name}",
|
|
||||||
)
|
|
||||||
return cast(metrics.ObservableGauge, _GLOBAL_STORAGE["gauges"][name])
|
|
||||||
|
|
||||||
def _get_or_create_histogram(self, name: str, unit: str) -> metrics.Histogram:
|
|
||||||
assert self.meter is not None
|
|
||||||
if name not in _GLOBAL_STORAGE["histograms"]:
|
|
||||||
_GLOBAL_STORAGE["histograms"][name] = self.meter.create_histogram(
|
|
||||||
name=name,
|
|
||||||
unit=unit,
|
|
||||||
description=f"Histogram for {name}",
|
|
||||||
)
|
|
||||||
return cast(metrics.Histogram, _GLOBAL_STORAGE["histograms"][name])
|
|
||||||
|
|
||||||
def _log_metric(self, event: MetricEvent) -> None:
|
|
||||||
# Add metric as an event to the current span
|
|
||||||
try:
|
|
||||||
with self._lock:
|
|
||||||
# Only try to add to span if we have a valid span_id
|
|
||||||
if event.span_id:
|
|
||||||
try:
|
|
||||||
span_id = int(event.span_id, 16)
|
|
||||||
span = _GLOBAL_STORAGE["active_spans"].get(span_id)
|
|
||||||
|
|
||||||
if span:
|
|
||||||
timestamp_ns = int(event.timestamp.timestamp() * 1e9)
|
|
||||||
span.add_event(
|
|
||||||
name=f"metric.{event.metric}",
|
|
||||||
attributes={
|
|
||||||
"value": event.value,
|
|
||||||
"unit": event.unit,
|
|
||||||
**(event.attributes or {}),
|
|
||||||
},
|
|
||||||
timestamp=timestamp_ns,
|
|
||||||
)
|
|
||||||
except (ValueError, KeyError):
|
|
||||||
# Invalid span_id or span not found, but we already logged to console above
|
|
||||||
pass
|
|
||||||
except Exception:
|
|
||||||
# Lock acquisition failed
|
|
||||||
logger.debug("Failed to acquire lock to add metric to span")
|
|
||||||
|
|
||||||
# Log to OpenTelemetry meter if available
|
|
||||||
if self.meter is None:
|
|
||||||
return
|
|
||||||
|
|
||||||
# Use histograms for token-related metrics (per-request measurements)
|
|
||||||
# Use counters for other cumulative metrics
|
|
||||||
token_metrics = {"prompt_tokens", "completion_tokens", "total_tokens"}
|
|
||||||
|
|
||||||
if event.metric in token_metrics:
|
|
||||||
# Token metrics are per-request measurements, use histogram
|
|
||||||
histogram = self._get_or_create_histogram(event.metric, event.unit)
|
|
||||||
histogram.record(event.value, attributes=_clean_attributes(event.attributes))
|
|
||||||
elif isinstance(event.value, int):
|
|
||||||
counter = self._get_or_create_counter(event.metric, event.unit)
|
|
||||||
counter.add(event.value, attributes=_clean_attributes(event.attributes))
|
|
||||||
elif isinstance(event.value, float):
|
|
||||||
up_down_counter = self._get_or_create_up_down_counter(event.metric, event.unit)
|
|
||||||
up_down_counter.add(event.value, attributes=_clean_attributes(event.attributes))
|
|
||||||
|
|
||||||
def _get_or_create_up_down_counter(self, name: str, unit: str) -> metrics.UpDownCounter:
|
|
||||||
assert self.meter is not None
|
|
||||||
if name not in _GLOBAL_STORAGE["up_down_counters"]:
|
|
||||||
_GLOBAL_STORAGE["up_down_counters"][name] = self.meter.create_up_down_counter(
|
|
||||||
name=name,
|
|
||||||
unit=unit,
|
|
||||||
description=f"UpDownCounter for {name}",
|
|
||||||
)
|
|
||||||
return cast(metrics.UpDownCounter, _GLOBAL_STORAGE["up_down_counters"][name])
|
|
||||||
|
|
||||||
def _log_structured(self, event: StructuredLogEvent, ttl_seconds: int) -> None:
|
|
||||||
with self._lock:
|
|
||||||
span_id = int(event.span_id, 16)
|
|
||||||
tracer = trace.get_tracer(__name__)
|
|
||||||
if event.attributes is None:
|
|
||||||
event.attributes = {}
|
|
||||||
event.attributes["__ttl__"] = ttl_seconds
|
|
||||||
|
|
||||||
# Extract these W3C trace context attributes so they are not written to
|
|
||||||
# underlying storage, as we just need them to propagate the trace context.
|
|
||||||
traceparent = event.attributes.pop("traceparent", None)
|
|
||||||
tracestate = event.attributes.pop("tracestate", None)
|
|
||||||
if traceparent:
|
|
||||||
# If we have a traceparent header value, we're not the root span.
|
|
||||||
for root_attribute in ROOT_SPAN_MARKERS:
|
|
||||||
event.attributes.pop(root_attribute, None)
|
|
||||||
|
|
||||||
if isinstance(event.payload, SpanStartPayload):
|
|
||||||
# Check if span already exists to prevent duplicates
|
|
||||||
if span_id in _GLOBAL_STORAGE["active_spans"]:
|
|
||||||
return
|
|
||||||
|
|
||||||
context = None
|
|
||||||
if event.payload.parent_span_id:
|
|
||||||
parent_span_id = int(event.payload.parent_span_id, 16)
|
|
||||||
parent_span = _GLOBAL_STORAGE["active_spans"].get(parent_span_id)
|
|
||||||
if parent_span:
|
|
||||||
context = trace.set_span_in_context(parent_span)
|
|
||||||
elif traceparent:
|
|
||||||
carrier = {
|
|
||||||
"traceparent": traceparent,
|
|
||||||
"tracestate": tracestate,
|
|
||||||
}
|
|
||||||
context = TraceContextTextMapPropagator().extract(carrier=carrier)
|
|
||||||
|
|
||||||
span = tracer.start_span(
|
|
||||||
name=event.payload.name,
|
|
||||||
context=context,
|
|
||||||
attributes=_clean_attributes(event.attributes),
|
|
||||||
)
|
|
||||||
_GLOBAL_STORAGE["active_spans"][span_id] = span
|
|
||||||
|
|
||||||
elif isinstance(event.payload, SpanEndPayload):
|
|
||||||
span = _GLOBAL_STORAGE["active_spans"].get(span_id) # type: ignore[assignment]
|
|
||||||
if span:
|
|
||||||
if event.attributes:
|
|
||||||
cleaned_attrs = _clean_attributes(event.attributes)
|
|
||||||
if cleaned_attrs:
|
|
||||||
span.set_attributes(cleaned_attrs)
|
|
||||||
|
|
||||||
status = (
|
|
||||||
trace.Status(status_code=trace.StatusCode.OK)
|
|
||||||
if event.payload.status == SpanStatus.OK
|
|
||||||
else trace.Status(status_code=trace.StatusCode.ERROR)
|
|
||||||
)
|
|
||||||
span.set_status(status)
|
|
||||||
span.end()
|
|
||||||
_GLOBAL_STORAGE["active_spans"].pop(span_id, None)
|
|
||||||
else:
|
|
||||||
raise ValueError(f"Unknown structured log event: {event}")
|
|
||||||
|
|
@ -1,154 +0,0 @@
|
||||||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
||||||
# All rights reserved.
|
|
||||||
#
|
|
||||||
# This source code is licensed under the terms described in the LICENSE file in
|
|
||||||
# the root directory of this source tree.
|
|
||||||
|
|
||||||
import asyncio
|
|
||||||
import inspect
|
|
||||||
import json
|
|
||||||
from collections.abc import AsyncGenerator, Callable
|
|
||||||
from functools import wraps
|
|
||||||
from typing import Any, cast
|
|
||||||
|
|
||||||
from pydantic import BaseModel
|
|
||||||
|
|
||||||
from llama_stack.models.llama.datatypes import Primitive
|
|
||||||
|
|
||||||
type JSONValue = Primitive | list["JSONValue"] | dict[str, "JSONValue"]
|
|
||||||
|
|
||||||
|
|
||||||
def serialize_value(value: Any) -> str:
|
|
||||||
return str(_prepare_for_json(value))
|
|
||||||
|
|
||||||
|
|
||||||
def _prepare_for_json(value: Any) -> JSONValue:
|
|
||||||
"""Serialize a single value into JSON-compatible format."""
|
|
||||||
if value is None:
|
|
||||||
return ""
|
|
||||||
elif isinstance(value, str | int | float | bool):
|
|
||||||
return value
|
|
||||||
elif hasattr(value, "_name_"):
|
|
||||||
return cast(str, value._name_)
|
|
||||||
elif isinstance(value, BaseModel):
|
|
||||||
return cast(JSONValue, json.loads(value.model_dump_json()))
|
|
||||||
elif isinstance(value, list | tuple | set):
|
|
||||||
return [_prepare_for_json(item) for item in value]
|
|
||||||
elif isinstance(value, dict):
|
|
||||||
return {str(k): _prepare_for_json(v) for k, v in value.items()}
|
|
||||||
else:
|
|
||||||
try:
|
|
||||||
json.dumps(value)
|
|
||||||
return cast(JSONValue, value)
|
|
||||||
except Exception:
|
|
||||||
return str(value)
|
|
||||||
|
|
||||||
|
|
||||||
def trace_protocol[T: type[Any]](cls: T) -> T:
|
|
||||||
"""
|
|
||||||
A class decorator that automatically traces all methods in a protocol/base class
|
|
||||||
and its inheriting classes.
|
|
||||||
"""
|
|
||||||
|
|
||||||
def trace_method(method: Callable[..., Any]) -> Callable[..., Any]:
|
|
||||||
is_async = asyncio.iscoroutinefunction(method)
|
|
||||||
is_async_gen = inspect.isasyncgenfunction(method)
|
|
||||||
|
|
||||||
def create_span_context(self: Any, *args: Any, **kwargs: Any) -> tuple[str, str, dict[str, Primitive]]:
|
|
||||||
class_name = self.__class__.__name__
|
|
||||||
method_name = method.__name__
|
|
||||||
span_type = "async_generator" if is_async_gen else "async" if is_async else "sync"
|
|
||||||
sig = inspect.signature(method)
|
|
||||||
param_names = list(sig.parameters.keys())[1:] # Skip 'self'
|
|
||||||
combined_args: dict[str, str] = {}
|
|
||||||
for i, arg in enumerate(args):
|
|
||||||
param_name = param_names[i] if i < len(param_names) else f"position_{i + 1}"
|
|
||||||
combined_args[param_name] = serialize_value(arg)
|
|
||||||
for k, v in kwargs.items():
|
|
||||||
combined_args[str(k)] = serialize_value(v)
|
|
||||||
|
|
||||||
span_attributes: dict[str, Primitive] = {
|
|
||||||
"__autotraced__": True,
|
|
||||||
"__class__": class_name,
|
|
||||||
"__method__": method_name,
|
|
||||||
"__type__": span_type,
|
|
||||||
"__args__": json.dumps(combined_args),
|
|
||||||
}
|
|
||||||
|
|
||||||
return class_name, method_name, span_attributes
|
|
||||||
|
|
||||||
@wraps(method)
|
|
||||||
async def async_gen_wrapper(self: Any, *args: Any, **kwargs: Any) -> AsyncGenerator[Any, None]:
|
|
||||||
from llama_stack.core.telemetry import tracing
|
|
||||||
|
|
||||||
class_name, method_name, span_attributes = create_span_context(self, *args, **kwargs)
|
|
||||||
|
|
||||||
with tracing.span(f"{class_name}.{method_name}", span_attributes) as span:
|
|
||||||
count = 0
|
|
||||||
try:
|
|
||||||
async for item in method(self, *args, **kwargs):
|
|
||||||
yield item
|
|
||||||
count += 1
|
|
||||||
finally:
|
|
||||||
span.set_attribute("chunk_count", count)
|
|
||||||
|
|
||||||
@wraps(method)
|
|
||||||
async def async_wrapper(self: Any, *args: Any, **kwargs: Any) -> Any:
|
|
||||||
from llama_stack.core.telemetry import tracing
|
|
||||||
|
|
||||||
class_name, method_name, span_attributes = create_span_context(self, *args, **kwargs)
|
|
||||||
|
|
||||||
with tracing.span(f"{class_name}.{method_name}", span_attributes) as span:
|
|
||||||
try:
|
|
||||||
result = await method(self, *args, **kwargs)
|
|
||||||
span.set_attribute("output", serialize_value(result))
|
|
||||||
return result
|
|
||||||
except Exception as e:
|
|
||||||
span.set_attribute("error", str(e))
|
|
||||||
raise
|
|
||||||
|
|
||||||
@wraps(method)
|
|
||||||
def sync_wrapper(self: Any, *args: Any, **kwargs: Any) -> Any:
|
|
||||||
from llama_stack.core.telemetry import tracing
|
|
||||||
|
|
||||||
class_name, method_name, span_attributes = create_span_context(self, *args, **kwargs)
|
|
||||||
|
|
||||||
with tracing.span(f"{class_name}.{method_name}", span_attributes) as span:
|
|
||||||
try:
|
|
||||||
result = method(self, *args, **kwargs)
|
|
||||||
span.set_attribute("output", serialize_value(result))
|
|
||||||
return result
|
|
||||||
except Exception as e:
|
|
||||||
span.set_attribute("error", str(e))
|
|
||||||
raise
|
|
||||||
|
|
||||||
if is_async_gen:
|
|
||||||
return async_gen_wrapper
|
|
||||||
elif is_async:
|
|
||||||
return async_wrapper
|
|
||||||
else:
|
|
||||||
return sync_wrapper
|
|
||||||
|
|
||||||
# Wrap methods on the class itself (for classes applied at runtime)
|
|
||||||
# Skip if already wrapped (indicated by __wrapped__ attribute)
|
|
||||||
for name, method in vars(cls).items():
|
|
||||||
if inspect.isfunction(method) and not name.startswith("_"):
|
|
||||||
if not hasattr(method, "__wrapped__"):
|
|
||||||
wrapped = trace_method(method)
|
|
||||||
setattr(cls, name, wrapped) # noqa: B010
|
|
||||||
|
|
||||||
# Also set up __init_subclass__ for future subclasses
|
|
||||||
original_init_subclass = cast(Callable[..., Any] | None, getattr(cls, "__init_subclass__", None))
|
|
||||||
|
|
||||||
def __init_subclass__(cls_child: type[Any], **kwargs: Any) -> None: # noqa: N807
|
|
||||||
if original_init_subclass:
|
|
||||||
cast(Callable[..., None], original_init_subclass)(**kwargs)
|
|
||||||
|
|
||||||
for name, method in vars(cls_child).items():
|
|
||||||
if inspect.isfunction(method) and not name.startswith("_"):
|
|
||||||
setattr(cls_child, name, trace_method(method)) # noqa: B010
|
|
||||||
|
|
||||||
cls_any = cast(Any, cls)
|
|
||||||
cls_any.__init_subclass__ = classmethod(__init_subclass__)
|
|
||||||
|
|
||||||
return cls
|
|
||||||
|
|
@ -1,388 +0,0 @@
|
||||||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
||||||
# All rights reserved.
|
|
||||||
#
|
|
||||||
# This source code is licensed under the terms described in the LICENSE file in
|
|
||||||
# the root directory of this source tree.
|
|
||||||
|
|
||||||
import asyncio
|
|
||||||
import contextvars
|
|
||||||
import logging # allow-direct-logging
|
|
||||||
import queue
|
|
||||||
import secrets
|
|
||||||
import sys
|
|
||||||
import threading
|
|
||||||
import time
|
|
||||||
from collections.abc import Callable
|
|
||||||
from datetime import UTC, datetime
|
|
||||||
from functools import wraps
|
|
||||||
from typing import Any, Self
|
|
||||||
|
|
||||||
from llama_stack.core.telemetry.telemetry import (
|
|
||||||
ROOT_SPAN_MARKERS,
|
|
||||||
Event,
|
|
||||||
LogSeverity,
|
|
||||||
Span,
|
|
||||||
SpanEndPayload,
|
|
||||||
SpanStartPayload,
|
|
||||||
SpanStatus,
|
|
||||||
StructuredLogEvent,
|
|
||||||
Telemetry,
|
|
||||||
UnstructuredLogEvent,
|
|
||||||
)
|
|
||||||
from llama_stack.core.telemetry.trace_protocol import serialize_value
|
|
||||||
from llama_stack.log import get_logger
|
|
||||||
|
|
||||||
logger = get_logger(__name__, category="core")
|
|
||||||
|
|
||||||
# Fallback logger that does NOT propagate to TelemetryHandler to avoid recursion
|
|
||||||
_fallback_logger = logging.getLogger("llama_stack.telemetry.background")
|
|
||||||
if not _fallback_logger.handlers:
|
|
||||||
_fallback_logger.propagate = False
|
|
||||||
_fallback_logger.setLevel(logging.ERROR)
|
|
||||||
_fallback_handler = logging.StreamHandler(sys.stderr)
|
|
||||||
_fallback_handler.setLevel(logging.ERROR)
|
|
||||||
_fallback_handler.setFormatter(logging.Formatter("%(asctime)s [%(levelname)s] %(name)s: %(message)s"))
|
|
||||||
_fallback_logger.addHandler(_fallback_handler)
|
|
||||||
|
|
||||||
|
|
||||||
INVALID_SPAN_ID = 0x0000000000000000
|
|
||||||
INVALID_TRACE_ID = 0x00000000000000000000000000000000
|
|
||||||
|
|
||||||
# The logical root span may not be visible to this process if a parent context
|
|
||||||
# is passed in. The local root span is the first local span in a trace.
|
|
||||||
LOCAL_ROOT_SPAN_MARKER = "__local_root_span__"
|
|
||||||
|
|
||||||
|
|
||||||
def trace_id_to_str(trace_id: int) -> str:
|
|
||||||
"""Convenience trace ID formatting method
|
|
||||||
Args:
|
|
||||||
trace_id: Trace ID int
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
The trace ID as 32-byte hexadecimal string
|
|
||||||
"""
|
|
||||||
return format(trace_id, "032x")
|
|
||||||
|
|
||||||
|
|
||||||
def span_id_to_str(span_id: int) -> str:
|
|
||||||
"""Convenience span ID formatting method
|
|
||||||
Args:
|
|
||||||
span_id: Span ID int
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
The span ID as 16-byte hexadecimal string
|
|
||||||
"""
|
|
||||||
return format(span_id, "016x")
|
|
||||||
|
|
||||||
|
|
||||||
def generate_span_id() -> str:
|
|
||||||
span_id = secrets.randbits(64)
|
|
||||||
while span_id == INVALID_SPAN_ID:
|
|
||||||
span_id = secrets.randbits(64)
|
|
||||||
return span_id_to_str(span_id)
|
|
||||||
|
|
||||||
|
|
||||||
def generate_trace_id() -> str:
|
|
||||||
trace_id = secrets.randbits(128)
|
|
||||||
while trace_id == INVALID_TRACE_ID:
|
|
||||||
trace_id = secrets.randbits(128)
|
|
||||||
return trace_id_to_str(trace_id)
|
|
||||||
|
|
||||||
|
|
||||||
LOG_QUEUE_FULL_LOG_INTERVAL_SECONDS = 60.0
|
|
||||||
|
|
||||||
|
|
||||||
class BackgroundLogger:
|
|
||||||
def __init__(self, api: Telemetry, capacity: int = 100000):
|
|
||||||
self.api = api
|
|
||||||
self.log_queue: queue.Queue[Any] = queue.Queue(maxsize=capacity)
|
|
||||||
self.worker_thread = threading.Thread(target=self._worker, daemon=True)
|
|
||||||
self.worker_thread.start()
|
|
||||||
self._last_queue_full_log_time: float = 0.0
|
|
||||||
self._dropped_since_last_notice: int = 0
|
|
||||||
|
|
||||||
def log_event(self, event: Event) -> None:
|
|
||||||
try:
|
|
||||||
self.log_queue.put_nowait(event)
|
|
||||||
except queue.Full:
|
|
||||||
# Aggregate drops and emit at most once per interval via fallback logger
|
|
||||||
self._dropped_since_last_notice += 1
|
|
||||||
current_time = time.time()
|
|
||||||
if current_time - self._last_queue_full_log_time >= LOG_QUEUE_FULL_LOG_INTERVAL_SECONDS:
|
|
||||||
_fallback_logger.error(
|
|
||||||
"Log queue is full; dropped %d events since last notice",
|
|
||||||
self._dropped_since_last_notice,
|
|
||||||
)
|
|
||||||
self._last_queue_full_log_time = current_time
|
|
||||||
self._dropped_since_last_notice = 0
|
|
||||||
|
|
||||||
def _worker(self):
|
|
||||||
loop = asyncio.new_event_loop()
|
|
||||||
asyncio.set_event_loop(loop)
|
|
||||||
loop.run_until_complete(self._process_logs())
|
|
||||||
|
|
||||||
async def _process_logs(self):
|
|
||||||
while True:
|
|
||||||
try:
|
|
||||||
event = self.log_queue.get()
|
|
||||||
await self.api.log_event(event)
|
|
||||||
except Exception:
|
|
||||||
import traceback
|
|
||||||
|
|
||||||
traceback.print_exc()
|
|
||||||
print("Error processing log event")
|
|
||||||
finally:
|
|
||||||
self.log_queue.task_done()
|
|
||||||
|
|
||||||
def __del__(self) -> None:
|
|
||||||
self.log_queue.join()
|
|
||||||
|
|
||||||
|
|
||||||
BACKGROUND_LOGGER: BackgroundLogger | None = None
|
|
||||||
|
|
||||||
|
|
||||||
def enqueue_event(event: Event) -> None:
|
|
||||||
"""Enqueue a telemetry event to the background logger if available.
|
|
||||||
|
|
||||||
This provides a non-blocking path for routers and other hot paths to
|
|
||||||
submit telemetry without awaiting the Telemetry API, reducing contention
|
|
||||||
with the main event loop.
|
|
||||||
"""
|
|
||||||
global BACKGROUND_LOGGER
|
|
||||||
if BACKGROUND_LOGGER is None:
|
|
||||||
raise RuntimeError("Telemetry API not initialized")
|
|
||||||
BACKGROUND_LOGGER.log_event(event)
|
|
||||||
|
|
||||||
|
|
||||||
class TraceContext:
|
|
||||||
def __init__(self, logger: BackgroundLogger, trace_id: str):
|
|
||||||
self.logger = logger
|
|
||||||
self.trace_id = trace_id
|
|
||||||
self.spans: list[Span] = []
|
|
||||||
|
|
||||||
def push_span(self, name: str, attributes: dict[str, Any] | None = None) -> Span:
|
|
||||||
current_span = self.get_current_span()
|
|
||||||
span = Span(
|
|
||||||
span_id=generate_span_id(),
|
|
||||||
trace_id=self.trace_id,
|
|
||||||
name=name,
|
|
||||||
start_time=datetime.now(UTC),
|
|
||||||
parent_span_id=current_span.span_id if current_span else None,
|
|
||||||
attributes=attributes,
|
|
||||||
)
|
|
||||||
|
|
||||||
self.logger.log_event(
|
|
||||||
StructuredLogEvent(
|
|
||||||
trace_id=span.trace_id,
|
|
||||||
span_id=span.span_id,
|
|
||||||
timestamp=span.start_time,
|
|
||||||
attributes=span.attributes,
|
|
||||||
payload=SpanStartPayload(
|
|
||||||
name=span.name,
|
|
||||||
parent_span_id=span.parent_span_id,
|
|
||||||
),
|
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
||||||
self.spans.append(span)
|
|
||||||
return span
|
|
||||||
|
|
||||||
def pop_span(self, status: SpanStatus = SpanStatus.OK) -> None:
|
|
||||||
span = self.spans.pop()
|
|
||||||
if span is not None:
|
|
||||||
self.logger.log_event(
|
|
||||||
StructuredLogEvent(
|
|
||||||
trace_id=span.trace_id,
|
|
||||||
span_id=span.span_id,
|
|
||||||
timestamp=span.start_time,
|
|
||||||
attributes=span.attributes,
|
|
||||||
payload=SpanEndPayload(
|
|
||||||
status=status,
|
|
||||||
),
|
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
||||||
def get_current_span(self) -> Span | None:
|
|
||||||
return self.spans[-1] if self.spans else None
|
|
||||||
|
|
||||||
|
|
||||||
CURRENT_TRACE_CONTEXT: contextvars.ContextVar[TraceContext | None] = contextvars.ContextVar(
|
|
||||||
"trace_context", default=None
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def setup_logger(api: Telemetry, level: int = logging.INFO):
|
|
||||||
global BACKGROUND_LOGGER
|
|
||||||
|
|
||||||
if BACKGROUND_LOGGER is None:
|
|
||||||
BACKGROUND_LOGGER = BackgroundLogger(api)
|
|
||||||
root_logger = logging.getLogger()
|
|
||||||
root_logger.setLevel(level)
|
|
||||||
root_logger.addHandler(TelemetryHandler())
|
|
||||||
|
|
||||||
|
|
||||||
async def start_trace(name: str, attributes: dict[str, Any] | None = None) -> TraceContext | None:
|
|
||||||
global CURRENT_TRACE_CONTEXT, BACKGROUND_LOGGER
|
|
||||||
|
|
||||||
if BACKGROUND_LOGGER is None:
|
|
||||||
logger.debug("No Telemetry implementation set. Skipping trace initialization...")
|
|
||||||
return None
|
|
||||||
|
|
||||||
trace_id = generate_trace_id()
|
|
||||||
context = TraceContext(BACKGROUND_LOGGER, trace_id)
|
|
||||||
# Mark this span as the root for the trace for now. The processing of
|
|
||||||
# traceparent context if supplied comes later and will result in the
|
|
||||||
# ROOT_SPAN_MARKERS being removed. Also mark this is the 'local' root,
|
|
||||||
# i.e. the root of the spans originating in this process as this is
|
|
||||||
# needed to ensure that we insert this 'local' root span's id into
|
|
||||||
# the trace record in sqlite store.
|
|
||||||
attributes = dict.fromkeys(ROOT_SPAN_MARKERS, True) | {LOCAL_ROOT_SPAN_MARKER: True} | (attributes or {})
|
|
||||||
context.push_span(name, attributes)
|
|
||||||
|
|
||||||
CURRENT_TRACE_CONTEXT.set(context)
|
|
||||||
return context
|
|
||||||
|
|
||||||
|
|
||||||
async def end_trace(status: SpanStatus = SpanStatus.OK):
|
|
||||||
global CURRENT_TRACE_CONTEXT
|
|
||||||
|
|
||||||
context = CURRENT_TRACE_CONTEXT.get()
|
|
||||||
if context is None:
|
|
||||||
logger.debug("No trace context to end")
|
|
||||||
return
|
|
||||||
|
|
||||||
context.pop_span(status)
|
|
||||||
CURRENT_TRACE_CONTEXT.set(None)
|
|
||||||
|
|
||||||
|
|
||||||
def severity(levelname: str) -> LogSeverity:
|
|
||||||
if levelname == "DEBUG":
|
|
||||||
return LogSeverity.DEBUG
|
|
||||||
elif levelname == "INFO":
|
|
||||||
return LogSeverity.INFO
|
|
||||||
elif levelname == "WARNING":
|
|
||||||
return LogSeverity.WARN
|
|
||||||
elif levelname == "ERROR":
|
|
||||||
return LogSeverity.ERROR
|
|
||||||
elif levelname == "CRITICAL":
|
|
||||||
return LogSeverity.CRITICAL
|
|
||||||
else:
|
|
||||||
raise ValueError(f"Unknown log level: {levelname}")
|
|
||||||
|
|
||||||
|
|
||||||
# TODO: ideally, the actual emitting should be done inside a separate daemon
|
|
||||||
# process completely isolated from the server
|
|
||||||
class TelemetryHandler(logging.Handler):
|
|
||||||
def emit(self, record: logging.LogRecord) -> None:
|
|
||||||
# horrendous hack to avoid logging from asyncio and getting into an infinite loop
|
|
||||||
if record.module in ("asyncio", "selector_events"):
|
|
||||||
return
|
|
||||||
|
|
||||||
global CURRENT_TRACE_CONTEXT
|
|
||||||
context = CURRENT_TRACE_CONTEXT.get()
|
|
||||||
if context is None:
|
|
||||||
return
|
|
||||||
|
|
||||||
span = context.get_current_span()
|
|
||||||
if span is None:
|
|
||||||
return
|
|
||||||
|
|
||||||
enqueue_event(
|
|
||||||
UnstructuredLogEvent(
|
|
||||||
trace_id=span.trace_id,
|
|
||||||
span_id=span.span_id,
|
|
||||||
timestamp=datetime.now(UTC),
|
|
||||||
message=self.format(record),
|
|
||||||
severity=severity(record.levelname),
|
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
||||||
def close(self) -> None:
|
|
||||||
pass
|
|
||||||
|
|
||||||
|
|
||||||
class SpanContextManager:
|
|
||||||
def __init__(self, name: str, attributes: dict[str, Any] | None = None):
|
|
||||||
self.name = name
|
|
||||||
self.attributes = attributes
|
|
||||||
self.span: Span | None = None
|
|
||||||
|
|
||||||
def __enter__(self) -> Self:
|
|
||||||
global CURRENT_TRACE_CONTEXT
|
|
||||||
context = CURRENT_TRACE_CONTEXT.get()
|
|
||||||
if not context:
|
|
||||||
logger.debug("No trace context to push span")
|
|
||||||
return self
|
|
||||||
|
|
||||||
self.span = context.push_span(self.name, self.attributes)
|
|
||||||
return self
|
|
||||||
|
|
||||||
def __exit__(self, exc_type, exc_value, traceback) -> None:
|
|
||||||
global CURRENT_TRACE_CONTEXT
|
|
||||||
context = CURRENT_TRACE_CONTEXT.get()
|
|
||||||
if not context:
|
|
||||||
logger.debug("No trace context to pop span")
|
|
||||||
return
|
|
||||||
|
|
||||||
context.pop_span()
|
|
||||||
|
|
||||||
def set_attribute(self, key: str, value: Any) -> None:
|
|
||||||
if self.span:
|
|
||||||
if self.span.attributes is None:
|
|
||||||
self.span.attributes = {}
|
|
||||||
self.span.attributes[key] = serialize_value(value)
|
|
||||||
|
|
||||||
async def __aenter__(self) -> Self:
|
|
||||||
global CURRENT_TRACE_CONTEXT
|
|
||||||
context = CURRENT_TRACE_CONTEXT.get()
|
|
||||||
if not context:
|
|
||||||
logger.debug("No trace context to push span")
|
|
||||||
return self
|
|
||||||
|
|
||||||
self.span = context.push_span(self.name, self.attributes)
|
|
||||||
return self
|
|
||||||
|
|
||||||
async def __aexit__(self, exc_type, exc_value, traceback) -> None:
|
|
||||||
global CURRENT_TRACE_CONTEXT
|
|
||||||
context = CURRENT_TRACE_CONTEXT.get()
|
|
||||||
if not context:
|
|
||||||
logger.debug("No trace context to pop span")
|
|
||||||
return
|
|
||||||
|
|
||||||
context.pop_span()
|
|
||||||
|
|
||||||
def __call__(self, func: Callable[..., Any]) -> Callable[..., Any]:
|
|
||||||
@wraps(func)
|
|
||||||
def sync_wrapper(*args: Any, **kwargs: Any) -> Any:
|
|
||||||
with self:
|
|
||||||
return func(*args, **kwargs)
|
|
||||||
|
|
||||||
@wraps(func)
|
|
||||||
async def async_wrapper(*args: Any, **kwargs: Any) -> Any:
|
|
||||||
async with self:
|
|
||||||
return await func(*args, **kwargs)
|
|
||||||
|
|
||||||
@wraps(func)
|
|
||||||
def wrapper(*args: Any, **kwargs: Any) -> Any:
|
|
||||||
if asyncio.iscoroutinefunction(func):
|
|
||||||
return async_wrapper(*args, **kwargs)
|
|
||||||
else:
|
|
||||||
return sync_wrapper(*args, **kwargs)
|
|
||||||
|
|
||||||
return wrapper
|
|
||||||
|
|
||||||
|
|
||||||
def span(name: str, attributes: dict[str, Any] | None = None) -> SpanContextManager:
|
|
||||||
return SpanContextManager(name, attributes)
|
|
||||||
|
|
||||||
|
|
||||||
def get_current_span() -> Span | None:
|
|
||||||
global CURRENT_TRACE_CONTEXT
|
|
||||||
if CURRENT_TRACE_CONTEXT is None:
|
|
||||||
logger.debug("No trace context to get current span")
|
|
||||||
return None
|
|
||||||
|
|
||||||
context = CURRENT_TRACE_CONTEXT.get()
|
|
||||||
if context:
|
|
||||||
return context.get_current_span()
|
|
||||||
return None
|
|
||||||
|
|
@ -7,8 +7,6 @@
|
||||||
from collections.abc import AsyncGenerator
|
from collections.abc import AsyncGenerator
|
||||||
from contextvars import ContextVar
|
from contextvars import ContextVar
|
||||||
|
|
||||||
from llama_stack.core.telemetry.tracing import CURRENT_TRACE_CONTEXT
|
|
||||||
|
|
||||||
_MISSING = object()
|
_MISSING = object()
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -69,16 +67,12 @@ def preserve_contexts_async_generator[T](
|
||||||
try:
|
try:
|
||||||
yield item
|
yield item
|
||||||
# Update our tracked values with any changes made during this iteration
|
# Update our tracked values with any changes made during this iteration
|
||||||
# Only for non-trace context vars - trace context must persist across yields
|
# This allows context changes to persist across generator iterations
|
||||||
# to allow nested span tracking for telemetry
|
|
||||||
for context_var in context_vars:
|
for context_var in context_vars:
|
||||||
if context_var is not CURRENT_TRACE_CONTEXT:
|
initial_context_values[context_var.name] = context_var.get()
|
||||||
initial_context_values[context_var.name] = context_var.get()
|
|
||||||
finally:
|
finally:
|
||||||
# Restore non-trace context vars after each yield to prevent leaks between requests
|
# Restore context vars after each yield to prevent leaks between requests
|
||||||
# CURRENT_TRACE_CONTEXT is NOT restored here to preserve telemetry span stack
|
|
||||||
for context_var in context_vars:
|
for context_var in context_vars:
|
||||||
if context_var is not CURRENT_TRACE_CONTEXT:
|
_restore_context_var(context_var)
|
||||||
_restore_context_var(context_var)
|
|
||||||
|
|
||||||
return wrapper()
|
return wrapper()
|
||||||
|
|
|
||||||
|
|
@ -281,8 +281,6 @@ registered_resources:
|
||||||
provider_id: rag-runtime
|
provider_id: rag-runtime
|
||||||
server:
|
server:
|
||||||
port: 8321
|
port: 8321
|
||||||
telemetry:
|
|
||||||
enabled: true
|
|
||||||
vector_stores:
|
vector_stores:
|
||||||
default_provider_id: faiss
|
default_provider_id: faiss
|
||||||
default_embedding_model:
|
default_embedding_model:
|
||||||
|
|
|
||||||
|
|
@ -272,8 +272,6 @@ registered_resources:
|
||||||
provider_id: rag-runtime
|
provider_id: rag-runtime
|
||||||
server:
|
server:
|
||||||
port: 8321
|
port: 8321
|
||||||
telemetry:
|
|
||||||
enabled: true
|
|
||||||
vector_stores:
|
vector_stores:
|
||||||
default_provider_id: faiss
|
default_provider_id: faiss
|
||||||
default_embedding_model:
|
default_embedding_model:
|
||||||
|
|
|
||||||
|
|
@ -140,5 +140,3 @@ registered_resources:
|
||||||
provider_id: rag-runtime
|
provider_id: rag-runtime
|
||||||
server:
|
server:
|
||||||
port: 8321
|
port: 8321
|
||||||
telemetry:
|
|
||||||
enabled: true
|
|
||||||
|
|
|
||||||
|
|
@ -131,5 +131,3 @@ registered_resources:
|
||||||
provider_id: rag-runtime
|
provider_id: rag-runtime
|
||||||
server:
|
server:
|
||||||
port: 8321
|
port: 8321
|
||||||
telemetry:
|
|
||||||
enabled: true
|
|
||||||
|
|
|
||||||
|
|
@ -153,5 +153,3 @@ registered_resources:
|
||||||
provider_id: rag-runtime
|
provider_id: rag-runtime
|
||||||
server:
|
server:
|
||||||
port: 8321
|
port: 8321
|
||||||
telemetry:
|
|
||||||
enabled: true
|
|
||||||
|
|
|
||||||
|
|
@ -138,5 +138,3 @@ registered_resources:
|
||||||
provider_id: rag-runtime
|
provider_id: rag-runtime
|
||||||
server:
|
server:
|
||||||
port: 8321
|
port: 8321
|
||||||
telemetry:
|
|
||||||
enabled: true
|
|
||||||
|
|
|
||||||
|
|
@ -135,5 +135,3 @@ registered_resources:
|
||||||
provider_id: rag-runtime
|
provider_id: rag-runtime
|
||||||
server:
|
server:
|
||||||
port: 8321
|
port: 8321
|
||||||
telemetry:
|
|
||||||
enabled: true
|
|
||||||
|
|
|
||||||
|
|
@ -114,5 +114,3 @@ registered_resources:
|
||||||
provider_id: rag-runtime
|
provider_id: rag-runtime
|
||||||
server:
|
server:
|
||||||
port: 8321
|
port: 8321
|
||||||
telemetry:
|
|
||||||
enabled: true
|
|
||||||
|
|
|
||||||
|
|
@ -132,5 +132,3 @@ registered_resources:
|
||||||
provider_id: tavily-search
|
provider_id: tavily-search
|
||||||
server:
|
server:
|
||||||
port: 8321
|
port: 8321
|
||||||
telemetry:
|
|
||||||
enabled: true
|
|
||||||
|
|
|
||||||
|
|
@ -251,5 +251,3 @@ registered_resources:
|
||||||
provider_id: rag-runtime
|
provider_id: rag-runtime
|
||||||
server:
|
server:
|
||||||
port: 8321
|
port: 8321
|
||||||
telemetry:
|
|
||||||
enabled: true
|
|
||||||
|
|
|
||||||
|
|
@ -114,5 +114,3 @@ registered_resources:
|
||||||
provider_id: rag-runtime
|
provider_id: rag-runtime
|
||||||
server:
|
server:
|
||||||
port: 8321
|
port: 8321
|
||||||
telemetry:
|
|
||||||
enabled: true
|
|
||||||
|
|
|
||||||
|
|
@ -284,8 +284,6 @@ registered_resources:
|
||||||
provider_id: rag-runtime
|
provider_id: rag-runtime
|
||||||
server:
|
server:
|
||||||
port: 8321
|
port: 8321
|
||||||
telemetry:
|
|
||||||
enabled: true
|
|
||||||
vector_stores:
|
vector_stores:
|
||||||
default_provider_id: faiss
|
default_provider_id: faiss
|
||||||
default_embedding_model:
|
default_embedding_model:
|
||||||
|
|
|
||||||
|
|
@ -275,8 +275,6 @@ registered_resources:
|
||||||
provider_id: rag-runtime
|
provider_id: rag-runtime
|
||||||
server:
|
server:
|
||||||
port: 8321
|
port: 8321
|
||||||
telemetry:
|
|
||||||
enabled: true
|
|
||||||
vector_stores:
|
vector_stores:
|
||||||
default_provider_id: faiss
|
default_provider_id: faiss
|
||||||
default_embedding_model:
|
default_embedding_model:
|
||||||
|
|
|
||||||
|
|
@ -281,8 +281,6 @@ registered_resources:
|
||||||
provider_id: rag-runtime
|
provider_id: rag-runtime
|
||||||
server:
|
server:
|
||||||
port: 8321
|
port: 8321
|
||||||
telemetry:
|
|
||||||
enabled: true
|
|
||||||
vector_stores:
|
vector_stores:
|
||||||
default_provider_id: faiss
|
default_provider_id: faiss
|
||||||
default_embedding_model:
|
default_embedding_model:
|
||||||
|
|
|
||||||
|
|
@ -272,8 +272,6 @@ registered_resources:
|
||||||
provider_id: rag-runtime
|
provider_id: rag-runtime
|
||||||
server:
|
server:
|
||||||
port: 8321
|
port: 8321
|
||||||
telemetry:
|
|
||||||
enabled: true
|
|
||||||
vector_stores:
|
vector_stores:
|
||||||
default_provider_id: faiss
|
default_provider_id: faiss
|
||||||
default_embedding_model:
|
default_embedding_model:
|
||||||
|
|
|
||||||
|
|
@ -24,7 +24,6 @@ from llama_stack.core.datatypes import (
|
||||||
Provider,
|
Provider,
|
||||||
SafetyConfig,
|
SafetyConfig,
|
||||||
ShieldInput,
|
ShieldInput,
|
||||||
TelemetryConfig,
|
|
||||||
ToolGroupInput,
|
ToolGroupInput,
|
||||||
VectorStoresConfig,
|
VectorStoresConfig,
|
||||||
)
|
)
|
||||||
|
|
@ -189,7 +188,6 @@ class RunConfigSettings(BaseModel):
|
||||||
default_benchmarks: list[BenchmarkInput] | None = None
|
default_benchmarks: list[BenchmarkInput] | None = None
|
||||||
vector_stores_config: VectorStoresConfig | None = None
|
vector_stores_config: VectorStoresConfig | None = None
|
||||||
safety_config: SafetyConfig | None = None
|
safety_config: SafetyConfig | None = None
|
||||||
telemetry: TelemetryConfig = Field(default_factory=lambda: TelemetryConfig(enabled=True))
|
|
||||||
storage_backends: dict[str, Any] | None = None
|
storage_backends: dict[str, Any] | None = None
|
||||||
storage_stores: dict[str, Any] | None = None
|
storage_stores: dict[str, Any] | None = None
|
||||||
|
|
||||||
|
|
@ -289,7 +287,6 @@ class RunConfigSettings(BaseModel):
|
||||||
"server": {
|
"server": {
|
||||||
"port": 8321,
|
"port": 8321,
|
||||||
},
|
},
|
||||||
"telemetry": self.telemetry.model_dump(exclude_none=True) if self.telemetry else None,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if self.vector_stores_config:
|
if self.vector_stores_config:
|
||||||
|
|
|
||||||
|
|
@ -132,5 +132,3 @@ registered_resources:
|
||||||
provider_id: rag-runtime
|
provider_id: rag-runtime
|
||||||
server:
|
server:
|
||||||
port: 8321
|
port: 8321
|
||||||
telemetry:
|
|
||||||
enabled: true
|
|
||||||
|
|
|
||||||
|
|
@ -37,7 +37,6 @@ CATEGORIES = [
|
||||||
"eval",
|
"eval",
|
||||||
"tools",
|
"tools",
|
||||||
"client",
|
"client",
|
||||||
"telemetry",
|
|
||||||
"openai",
|
"openai",
|
||||||
"openai_responses",
|
"openai_responses",
|
||||||
"openai_conversations",
|
"openai_conversations",
|
||||||
|
|
|
||||||
|
|
@ -15,7 +15,6 @@ async def get_provider_impl(
|
||||||
config: MetaReferenceAgentsImplConfig,
|
config: MetaReferenceAgentsImplConfig,
|
||||||
deps: dict[Api, Any],
|
deps: dict[Api, Any],
|
||||||
policy: list[AccessRule],
|
policy: list[AccessRule],
|
||||||
telemetry_enabled: bool = False,
|
|
||||||
):
|
):
|
||||||
from .agents import MetaReferenceAgentsImpl
|
from .agents import MetaReferenceAgentsImpl
|
||||||
|
|
||||||
|
|
@ -29,7 +28,6 @@ async def get_provider_impl(
|
||||||
deps[Api.conversations],
|
deps[Api.conversations],
|
||||||
deps[Api.prompts],
|
deps[Api.prompts],
|
||||||
deps[Api.files],
|
deps[Api.files],
|
||||||
telemetry_enabled,
|
|
||||||
policy,
|
policy,
|
||||||
)
|
)
|
||||||
await impl.initialize()
|
await impl.initialize()
|
||||||
|
|
|
||||||
|
|
@ -50,7 +50,6 @@ class MetaReferenceAgentsImpl(Agents):
|
||||||
prompts_api: Prompts,
|
prompts_api: Prompts,
|
||||||
files_api: Files,
|
files_api: Files,
|
||||||
policy: list[AccessRule],
|
policy: list[AccessRule],
|
||||||
telemetry_enabled: bool = False,
|
|
||||||
):
|
):
|
||||||
self.config = config
|
self.config = config
|
||||||
self.inference_api = inference_api
|
self.inference_api = inference_api
|
||||||
|
|
@ -59,7 +58,6 @@ class MetaReferenceAgentsImpl(Agents):
|
||||||
self.tool_runtime_api = tool_runtime_api
|
self.tool_runtime_api = tool_runtime_api
|
||||||
self.tool_groups_api = tool_groups_api
|
self.tool_groups_api = tool_groups_api
|
||||||
self.conversations_api = conversations_api
|
self.conversations_api = conversations_api
|
||||||
self.telemetry_enabled = telemetry_enabled
|
|
||||||
self.prompts_api = prompts_api
|
self.prompts_api = prompts_api
|
||||||
self.files_api = files_api
|
self.files_api = files_api
|
||||||
self.in_memory_store = InmemoryKVStoreImpl()
|
self.in_memory_store = InmemoryKVStoreImpl()
|
||||||
|
|
@ -111,6 +109,7 @@ class MetaReferenceAgentsImpl(Agents):
|
||||||
max_infer_iters: int | None = 10,
|
max_infer_iters: int | None = 10,
|
||||||
guardrails: list[ResponseGuardrail] | None = None,
|
guardrails: list[ResponseGuardrail] | None = None,
|
||||||
max_tool_calls: int | None = None,
|
max_tool_calls: int | None = None,
|
||||||
|
metadata: dict[str, str] | None = None,
|
||||||
) -> OpenAIResponseObject:
|
) -> OpenAIResponseObject:
|
||||||
assert self.openai_responses_impl is not None, "OpenAI responses not initialized"
|
assert self.openai_responses_impl is not None, "OpenAI responses not initialized"
|
||||||
result = await self.openai_responses_impl.create_openai_response(
|
result = await self.openai_responses_impl.create_openai_response(
|
||||||
|
|
@ -130,6 +129,7 @@ class MetaReferenceAgentsImpl(Agents):
|
||||||
guardrails,
|
guardrails,
|
||||||
parallel_tool_calls,
|
parallel_tool_calls,
|
||||||
max_tool_calls,
|
max_tool_calls,
|
||||||
|
metadata,
|
||||||
)
|
)
|
||||||
return result # type: ignore[no-any-return]
|
return result # type: ignore[no-any-return]
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -336,6 +336,7 @@ class OpenAIResponsesImpl:
|
||||||
guardrails: list[str | ResponseGuardrailSpec] | None = None,
|
guardrails: list[str | ResponseGuardrailSpec] | None = None,
|
||||||
parallel_tool_calls: bool | None = None,
|
parallel_tool_calls: bool | None = None,
|
||||||
max_tool_calls: int | None = None,
|
max_tool_calls: int | None = None,
|
||||||
|
metadata: dict[str, str] | None = None,
|
||||||
):
|
):
|
||||||
stream = bool(stream)
|
stream = bool(stream)
|
||||||
text = OpenAIResponseText(format=OpenAIResponseTextFormat(type="text")) if text is None else text
|
text = OpenAIResponseText(format=OpenAIResponseTextFormat(type="text")) if text is None else text
|
||||||
|
|
@ -390,6 +391,7 @@ class OpenAIResponsesImpl:
|
||||||
guardrail_ids=guardrail_ids,
|
guardrail_ids=guardrail_ids,
|
||||||
parallel_tool_calls=parallel_tool_calls,
|
parallel_tool_calls=parallel_tool_calls,
|
||||||
max_tool_calls=max_tool_calls,
|
max_tool_calls=max_tool_calls,
|
||||||
|
metadata=metadata,
|
||||||
)
|
)
|
||||||
|
|
||||||
if stream:
|
if stream:
|
||||||
|
|
@ -442,6 +444,7 @@ class OpenAIResponsesImpl:
|
||||||
guardrail_ids: list[str] | None = None,
|
guardrail_ids: list[str] | None = None,
|
||||||
parallel_tool_calls: bool | None = True,
|
parallel_tool_calls: bool | None = True,
|
||||||
max_tool_calls: int | None = None,
|
max_tool_calls: int | None = None,
|
||||||
|
metadata: dict[str, str] | None = None,
|
||||||
) -> AsyncIterator[OpenAIResponseObjectStream]:
|
) -> AsyncIterator[OpenAIResponseObjectStream]:
|
||||||
# These should never be None when called from create_openai_response (which sets defaults)
|
# These should never be None when called from create_openai_response (which sets defaults)
|
||||||
# but we assert here to help mypy understand the types
|
# but we assert here to help mypy understand the types
|
||||||
|
|
@ -490,6 +493,7 @@ class OpenAIResponsesImpl:
|
||||||
guardrail_ids=guardrail_ids,
|
guardrail_ids=guardrail_ids,
|
||||||
instructions=instructions,
|
instructions=instructions,
|
||||||
max_tool_calls=max_tool_calls,
|
max_tool_calls=max_tool_calls,
|
||||||
|
metadata=metadata,
|
||||||
)
|
)
|
||||||
|
|
||||||
# Stream the response
|
# Stream the response
|
||||||
|
|
|
||||||
|
|
@ -8,7 +8,8 @@ import uuid
|
||||||
from collections.abc import AsyncIterator
|
from collections.abc import AsyncIterator
|
||||||
from typing import Any
|
from typing import Any
|
||||||
|
|
||||||
from llama_stack.core.telemetry import tracing
|
from opentelemetry import trace
|
||||||
|
|
||||||
from llama_stack.log import get_logger
|
from llama_stack.log import get_logger
|
||||||
from llama_stack.providers.utils.inference.prompt_adapter import interleaved_content_as_str
|
from llama_stack.providers.utils.inference.prompt_adapter import interleaved_content_as_str
|
||||||
from llama_stack_api import (
|
from llama_stack_api import (
|
||||||
|
|
@ -79,6 +80,7 @@ from .utils import (
|
||||||
)
|
)
|
||||||
|
|
||||||
logger = get_logger(name=__name__, category="agents::meta_reference")
|
logger = get_logger(name=__name__, category="agents::meta_reference")
|
||||||
|
tracer = trace.get_tracer(__name__)
|
||||||
|
|
||||||
|
|
||||||
def convert_tooldef_to_chat_tool(tool_def):
|
def convert_tooldef_to_chat_tool(tool_def):
|
||||||
|
|
@ -118,6 +120,7 @@ class StreamingResponseOrchestrator:
|
||||||
prompt: OpenAIResponsePrompt | None = None,
|
prompt: OpenAIResponsePrompt | None = None,
|
||||||
parallel_tool_calls: bool | None = None,
|
parallel_tool_calls: bool | None = None,
|
||||||
max_tool_calls: int | None = None,
|
max_tool_calls: int | None = None,
|
||||||
|
metadata: dict[str, str] | None = None,
|
||||||
):
|
):
|
||||||
self.inference_api = inference_api
|
self.inference_api = inference_api
|
||||||
self.ctx = ctx
|
self.ctx = ctx
|
||||||
|
|
@ -135,6 +138,7 @@ class StreamingResponseOrchestrator:
|
||||||
self.parallel_tool_calls = parallel_tool_calls
|
self.parallel_tool_calls = parallel_tool_calls
|
||||||
# Max number of total calls to built-in tools that can be processed in a response
|
# Max number of total calls to built-in tools that can be processed in a response
|
||||||
self.max_tool_calls = max_tool_calls
|
self.max_tool_calls = max_tool_calls
|
||||||
|
self.metadata = metadata
|
||||||
self.sequence_number = 0
|
self.sequence_number = 0
|
||||||
# Store MCP tool mapping that gets built during tool processing
|
# Store MCP tool mapping that gets built during tool processing
|
||||||
self.mcp_tool_to_server: dict[str, OpenAIResponseInputToolMCP] = (
|
self.mcp_tool_to_server: dict[str, OpenAIResponseInputToolMCP] = (
|
||||||
|
|
@ -162,6 +166,7 @@ class StreamingResponseOrchestrator:
|
||||||
model=self.ctx.model,
|
model=self.ctx.model,
|
||||||
status="completed",
|
status="completed",
|
||||||
output=[OpenAIResponseMessage(role="assistant", content=[refusal_content], type="message")],
|
output=[OpenAIResponseMessage(role="assistant", content=[refusal_content], type="message")],
|
||||||
|
metadata=self.metadata,
|
||||||
)
|
)
|
||||||
|
|
||||||
return OpenAIResponseObjectStreamResponseCompleted(response=refusal_response)
|
return OpenAIResponseObjectStreamResponseCompleted(response=refusal_response)
|
||||||
|
|
@ -197,6 +202,7 @@ class StreamingResponseOrchestrator:
|
||||||
prompt=self.prompt,
|
prompt=self.prompt,
|
||||||
parallel_tool_calls=self.parallel_tool_calls,
|
parallel_tool_calls=self.parallel_tool_calls,
|
||||||
max_tool_calls=self.max_tool_calls,
|
max_tool_calls=self.max_tool_calls,
|
||||||
|
metadata=self.metadata,
|
||||||
)
|
)
|
||||||
|
|
||||||
async def create_response(self) -> AsyncIterator[OpenAIResponseObjectStream]:
|
async def create_response(self) -> AsyncIterator[OpenAIResponseObjectStream]:
|
||||||
|
|
@ -1106,8 +1112,10 @@ class StreamingResponseOrchestrator:
|
||||||
"server_url": mcp_tool.server_url,
|
"server_url": mcp_tool.server_url,
|
||||||
"mcp_list_tools_id": list_id,
|
"mcp_list_tools_id": list_id,
|
||||||
}
|
}
|
||||||
# List MCP tools with authorization from tool config
|
|
||||||
async with tracing.span("list_mcp_tools", attributes):
|
# TODO: follow semantic conventions for Open Telemetry tool spans
|
||||||
|
# https://opentelemetry.io/docs/specs/semconv/gen-ai/gen-ai-spans/#execute-tool-span
|
||||||
|
with tracer.start_as_current_span("list_mcp_tools", attributes=attributes):
|
||||||
tool_defs = await list_mcp_tools(
|
tool_defs = await list_mcp_tools(
|
||||||
endpoint=mcp_tool.server_url,
|
endpoint=mcp_tool.server_url,
|
||||||
headers=mcp_tool.headers,
|
headers=mcp_tool.headers,
|
||||||
|
|
@ -1183,9 +1191,9 @@ class StreamingResponseOrchestrator:
|
||||||
if mcp_server.require_approval == "never":
|
if mcp_server.require_approval == "never":
|
||||||
return False
|
return False
|
||||||
if isinstance(mcp_server, ApprovalFilter):
|
if isinstance(mcp_server, ApprovalFilter):
|
||||||
if tool_name in mcp_server.always:
|
if mcp_server.always and tool_name in mcp_server.always:
|
||||||
return True
|
return True
|
||||||
if tool_name in mcp_server.never:
|
if mcp_server.never and tool_name in mcp_server.never:
|
||||||
return False
|
return False
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -9,7 +9,8 @@ import json
|
||||||
from collections.abc import AsyncIterator
|
from collections.abc import AsyncIterator
|
||||||
from typing import Any
|
from typing import Any
|
||||||
|
|
||||||
from llama_stack.core.telemetry import tracing
|
from opentelemetry import trace
|
||||||
|
|
||||||
from llama_stack.log import get_logger
|
from llama_stack.log import get_logger
|
||||||
from llama_stack_api import (
|
from llama_stack_api import (
|
||||||
ImageContentItem,
|
ImageContentItem,
|
||||||
|
|
@ -42,6 +43,7 @@ from llama_stack_api import (
|
||||||
from .types import ChatCompletionContext, ToolExecutionResult
|
from .types import ChatCompletionContext, ToolExecutionResult
|
||||||
|
|
||||||
logger = get_logger(name=__name__, category="agents::meta_reference")
|
logger = get_logger(name=__name__, category="agents::meta_reference")
|
||||||
|
tracer = trace.get_tracer(__name__)
|
||||||
|
|
||||||
|
|
||||||
class ToolExecutor:
|
class ToolExecutor:
|
||||||
|
|
@ -296,8 +298,9 @@ class ToolExecutor:
|
||||||
"server_url": mcp_tool.server_url,
|
"server_url": mcp_tool.server_url,
|
||||||
"tool_name": function_name,
|
"tool_name": function_name,
|
||||||
}
|
}
|
||||||
# Invoke MCP tool with authorization from tool config
|
# TODO: follow semantic conventions for Open Telemetry tool spans
|
||||||
async with tracing.span("invoke_mcp_tool", attributes):
|
# https://opentelemetry.io/docs/specs/semconv/gen-ai/gen-ai-spans/#execute-tool-span
|
||||||
|
with tracer.start_as_current_span("invoke_mcp_tool", attributes=attributes):
|
||||||
result = await invoke_mcp_tool(
|
result = await invoke_mcp_tool(
|
||||||
endpoint=mcp_tool.server_url,
|
endpoint=mcp_tool.server_url,
|
||||||
tool_name=function_name,
|
tool_name=function_name,
|
||||||
|
|
@ -318,7 +321,7 @@ class ToolExecutor:
|
||||||
# Use vector_stores.search API instead of knowledge_search tool
|
# Use vector_stores.search API instead of knowledge_search tool
|
||||||
# to support filters and ranking_options
|
# to support filters and ranking_options
|
||||||
query = tool_kwargs.get("query", "")
|
query = tool_kwargs.get("query", "")
|
||||||
async with tracing.span("knowledge_search", {}):
|
with tracer.start_as_current_span("knowledge_search"):
|
||||||
result = await self._execute_knowledge_search_via_vector_store(
|
result = await self._execute_knowledge_search_via_vector_store(
|
||||||
query=query,
|
query=query,
|
||||||
response_file_search_tool=response_file_search_tool,
|
response_file_search_tool=response_file_search_tool,
|
||||||
|
|
@ -327,7 +330,9 @@ class ToolExecutor:
|
||||||
attributes = {
|
attributes = {
|
||||||
"tool_name": function_name,
|
"tool_name": function_name,
|
||||||
}
|
}
|
||||||
async with tracing.span("invoke_tool", attributes):
|
# TODO: follow semantic conventions for Open Telemetry tool spans
|
||||||
|
# https://opentelemetry.io/docs/specs/semconv/gen-ai/gen-ai-spans/#execute-tool-span
|
||||||
|
with tracer.start_as_current_span("invoke_tool", attributes=attributes):
|
||||||
result = await self.tool_runtime_api.invoke_tool(
|
result = await self.tool_runtime_api.invoke_tool(
|
||||||
tool_name=function_name,
|
tool_name=function_name,
|
||||||
kwargs=tool_kwargs,
|
kwargs=tool_kwargs,
|
||||||
|
|
|
||||||
|
|
@ -6,7 +6,6 @@
|
||||||
|
|
||||||
import asyncio
|
import asyncio
|
||||||
|
|
||||||
from llama_stack.core.telemetry import tracing
|
|
||||||
from llama_stack.log import get_logger
|
from llama_stack.log import get_logger
|
||||||
from llama_stack_api import OpenAIMessageParam, Safety, SafetyViolation, ViolationLevel
|
from llama_stack_api import OpenAIMessageParam, Safety, SafetyViolation, ViolationLevel
|
||||||
|
|
||||||
|
|
@ -31,15 +30,12 @@ class ShieldRunnerMixin:
|
||||||
self.output_shields = output_shields
|
self.output_shields = output_shields
|
||||||
|
|
||||||
async def run_multiple_shields(self, messages: list[OpenAIMessageParam], identifiers: list[str]) -> None:
|
async def run_multiple_shields(self, messages: list[OpenAIMessageParam], identifiers: list[str]) -> None:
|
||||||
async def run_shield_with_span(identifier: str):
|
responses = await asyncio.gather(
|
||||||
async with tracing.span(f"run_shield_{identifier}"):
|
*[
|
||||||
return await self.safety_api.run_shield(
|
self.safety_api.run_shield(shield_id=identifier, messages=messages, params={})
|
||||||
shield_id=identifier,
|
for identifier in identifiers
|
||||||
messages=messages,
|
]
|
||||||
params={},
|
)
|
||||||
)
|
|
||||||
|
|
||||||
responses = await asyncio.gather(*[run_shield_with_span(identifier) for identifier in identifiers])
|
|
||||||
for identifier, response in zip(identifiers, responses, strict=False):
|
for identifier, response in zip(identifiers, responses, strict=False):
|
||||||
if not response.violation:
|
if not response.violation:
|
||||||
continue
|
continue
|
||||||
|
|
|
||||||
|
|
@ -4,8 +4,6 @@
|
||||||
# This source code is licensed under the terms described in the LICENSE file in
|
# This source code is licensed under the terms described in the LICENSE file in
|
||||||
# the root directory of this source tree.
|
# the root directory of this source tree.
|
||||||
|
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
import uuid
|
import uuid
|
||||||
from datetime import UTC, datetime
|
from datetime import UTC, datetime
|
||||||
from typing import TYPE_CHECKING, Annotated, Any, cast
|
from typing import TYPE_CHECKING, Annotated, Any, cast
|
||||||
|
|
@ -39,7 +37,7 @@ from .config import S3FilesImplConfig
|
||||||
# TODO: provider data for S3 credentials
|
# TODO: provider data for S3 credentials
|
||||||
|
|
||||||
|
|
||||||
def _create_s3_client(config: S3FilesImplConfig) -> S3Client:
|
def _create_s3_client(config: S3FilesImplConfig) -> "S3Client":
|
||||||
try:
|
try:
|
||||||
s3_config = {
|
s3_config = {
|
||||||
"region_name": config.region,
|
"region_name": config.region,
|
||||||
|
|
@ -66,7 +64,7 @@ def _create_s3_client(config: S3FilesImplConfig) -> S3Client:
|
||||||
raise RuntimeError(f"Failed to initialize S3 client: {e}") from e
|
raise RuntimeError(f"Failed to initialize S3 client: {e}") from e
|
||||||
|
|
||||||
|
|
||||||
async def _create_bucket_if_not_exists(client: S3Client, config: S3FilesImplConfig) -> None:
|
async def _create_bucket_if_not_exists(client: "S3Client", config: S3FilesImplConfig) -> None:
|
||||||
try:
|
try:
|
||||||
client.head_bucket(Bucket=config.bucket_name)
|
client.head_bucket(Bucket=config.bucket_name)
|
||||||
except ClientError as e:
|
except ClientError as e:
|
||||||
|
|
@ -192,7 +190,7 @@ class S3FilesImpl(Files):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def client(self) -> S3Client:
|
def client(self) -> "S3Client":
|
||||||
assert self._client is not None, "Provider not initialized"
|
assert self._client is not None, "Provider not initialized"
|
||||||
return self._client
|
return self._client
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -8,7 +8,6 @@ from collections.abc import AsyncIterator, Iterable
|
||||||
|
|
||||||
from openai import AuthenticationError
|
from openai import AuthenticationError
|
||||||
|
|
||||||
from llama_stack.core.telemetry.tracing import get_current_span
|
|
||||||
from llama_stack.log import get_logger
|
from llama_stack.log import get_logger
|
||||||
from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
|
from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
|
||||||
from llama_stack_api import (
|
from llama_stack_api import (
|
||||||
|
|
@ -84,7 +83,7 @@ class BedrockInferenceAdapter(OpenAIMixin):
|
||||||
) -> OpenAIChatCompletion | AsyncIterator[OpenAIChatCompletionChunk]:
|
) -> OpenAIChatCompletion | AsyncIterator[OpenAIChatCompletionChunk]:
|
||||||
"""Override to enable streaming usage metrics and handle authentication errors."""
|
"""Override to enable streaming usage metrics and handle authentication errors."""
|
||||||
# Enable streaming usage metrics when telemetry is active
|
# Enable streaming usage metrics when telemetry is active
|
||||||
if params.stream and get_current_span() is not None:
|
if params.stream:
|
||||||
if params.stream_options is None:
|
if params.stream_options is None:
|
||||||
params.stream_options = {"include_usage": True}
|
params.stream_options = {"include_usage": True}
|
||||||
elif "include_usage" not in params.stream_options:
|
elif "include_usage" not in params.stream_options:
|
||||||
|
|
|
||||||
|
|
@ -10,7 +10,6 @@ from typing import Any
|
||||||
import litellm
|
import litellm
|
||||||
import requests
|
import requests
|
||||||
|
|
||||||
from llama_stack.core.telemetry.tracing import get_current_span
|
|
||||||
from llama_stack.log import get_logger
|
from llama_stack.log import get_logger
|
||||||
from llama_stack.providers.remote.inference.watsonx.config import WatsonXConfig
|
from llama_stack.providers.remote.inference.watsonx.config import WatsonXConfig
|
||||||
from llama_stack.providers.utils.inference.litellm_openai_mixin import LiteLLMOpenAIMixin
|
from llama_stack.providers.utils.inference.litellm_openai_mixin import LiteLLMOpenAIMixin
|
||||||
|
|
@ -59,7 +58,7 @@ class WatsonXInferenceAdapter(LiteLLMOpenAIMixin):
|
||||||
|
|
||||||
# Add usage tracking for streaming when telemetry is active
|
# Add usage tracking for streaming when telemetry is active
|
||||||
stream_options = params.stream_options
|
stream_options = params.stream_options
|
||||||
if params.stream and get_current_span() is not None:
|
if params.stream:
|
||||||
if stream_options is None:
|
if stream_options is None:
|
||||||
stream_options = {"include_usage": True}
|
stream_options = {"include_usage": True}
|
||||||
elif "include_usage" not in stream_options:
|
elif "include_usage" not in stream_options:
|
||||||
|
|
|
||||||
|
|
@ -217,10 +217,9 @@ class LiteLLMOpenAIMixin(
|
||||||
params: OpenAIChatCompletionRequestWithExtraBody,
|
params: OpenAIChatCompletionRequestWithExtraBody,
|
||||||
) -> OpenAIChatCompletion | AsyncIterator[OpenAIChatCompletionChunk]:
|
) -> OpenAIChatCompletion | AsyncIterator[OpenAIChatCompletionChunk]:
|
||||||
# Add usage tracking for streaming when telemetry is active
|
# Add usage tracking for streaming when telemetry is active
|
||||||
from llama_stack.core.telemetry.tracing import get_current_span
|
|
||||||
|
|
||||||
stream_options = params.stream_options
|
stream_options = params.stream_options
|
||||||
if params.stream and get_current_span() is not None:
|
if params.stream:
|
||||||
if stream_options is None:
|
if stream_options is None:
|
||||||
stream_options = {"include_usage": True}
|
stream_options = {"include_usage": True}
|
||||||
elif "include_usage" not in stream_options:
|
elif "include_usage" not in stream_options:
|
||||||
|
|
|
||||||
|
|
@ -89,6 +89,7 @@ async def client_wrapper(endpoint: str, headers: dict[str, str]) -> AsyncGenerat
|
||||||
# sse_client and streamablehttp_client have different signatures, but both
|
# sse_client and streamablehttp_client have different signatures, but both
|
||||||
# are called the same way here, so we cast to Any to avoid type errors
|
# are called the same way here, so we cast to Any to avoid type errors
|
||||||
client = cast(Any, sse_client)
|
client = cast(Any, sse_client)
|
||||||
|
|
||||||
async with client(endpoint, headers=headers) as client_streams:
|
async with client(endpoint, headers=headers) as client_streams:
|
||||||
async with ClientSession(read_stream=client_streams[0], write_stream=client_streams[1]) as session:
|
async with ClientSession(read_stream=client_streams[0], write_stream=client_streams[1]) as session:
|
||||||
await session.initialize()
|
await session.initialize()
|
||||||
|
|
|
||||||
5
src/llama_stack/telemetry/__init__.py
Normal file
5
src/llama_stack/telemetry/__init__.py
Normal file
|
|
@ -0,0 +1,5 @@
|
||||||
|
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||||
|
# All rights reserved.
|
||||||
|
#
|
||||||
|
# This source code is licensed under the terms described in the LICENSE file in
|
||||||
|
# the root directory of this source tree.
|
||||||
27
src/llama_stack/telemetry/constants.py
Normal file
27
src/llama_stack/telemetry/constants.py
Normal file
|
|
@ -0,0 +1,27 @@
|
||||||
|
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||||
|
# All rights reserved.
|
||||||
|
#
|
||||||
|
# This source code is licensed under the terms described in the LICENSE file in
|
||||||
|
# the root directory of this source tree.
|
||||||
|
|
||||||
|
"""
|
||||||
|
This file contains constants used for naming data captured for telemetry.
|
||||||
|
|
||||||
|
This is used to ensure that the data captured for telemetry is consistent and can be used to
|
||||||
|
identify and correlate data. If custom telemetry data is added to llama stack, please add
|
||||||
|
constants for it here.
|
||||||
|
"""
|
||||||
|
|
||||||
|
llama_stack_prefix = "llama_stack"
|
||||||
|
|
||||||
|
# Safety Attributes
|
||||||
|
RUN_SHIELD_OPERATION_NAME = "run_shield"
|
||||||
|
|
||||||
|
SAFETY_REQUEST_PREFIX = f"{llama_stack_prefix}.safety.request"
|
||||||
|
SAFETY_REQUEST_SHIELD_ID_ATTRIBUTE = f"{SAFETY_REQUEST_PREFIX}.shield_id"
|
||||||
|
SAFETY_REQUEST_MESSAGES_ATTRIBUTE = f"{SAFETY_REQUEST_PREFIX}.messages"
|
||||||
|
|
||||||
|
SAFETY_RESPONSE_PREFIX = f"{llama_stack_prefix}.safety.response"
|
||||||
|
SAFETY_RESPONSE_METADATA_ATTRIBUTE = f"{SAFETY_RESPONSE_PREFIX}.metadata"
|
||||||
|
SAFETY_RESPONSE_VIOLATION_LEVEL_ATTRIBUTE = f"{SAFETY_RESPONSE_PREFIX}.violation.level"
|
||||||
|
SAFETY_RESPONSE_USER_MESSAGE_ATTRIBUTE = f"{SAFETY_RESPONSE_PREFIX}.violation.user_message"
|
||||||
43
src/llama_stack/telemetry/helpers.py
Normal file
43
src/llama_stack/telemetry/helpers.py
Normal file
|
|
@ -0,0 +1,43 @@
|
||||||
|
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||||
|
# All rights reserved.
|
||||||
|
#
|
||||||
|
# This source code is licensed under the terms described in the LICENSE file in
|
||||||
|
# the root directory of this source tree.
|
||||||
|
|
||||||
|
import json
|
||||||
|
|
||||||
|
from opentelemetry import trace
|
||||||
|
|
||||||
|
from llama_stack_api import OpenAIMessageParam, RunShieldResponse
|
||||||
|
|
||||||
|
from .constants import (
|
||||||
|
RUN_SHIELD_OPERATION_NAME,
|
||||||
|
SAFETY_REQUEST_MESSAGES_ATTRIBUTE,
|
||||||
|
SAFETY_REQUEST_SHIELD_ID_ATTRIBUTE,
|
||||||
|
SAFETY_RESPONSE_METADATA_ATTRIBUTE,
|
||||||
|
SAFETY_RESPONSE_USER_MESSAGE_ATTRIBUTE,
|
||||||
|
SAFETY_RESPONSE_VIOLATION_LEVEL_ATTRIBUTE,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def safety_span_name(shield_id: str) -> str:
|
||||||
|
return f"{RUN_SHIELD_OPERATION_NAME} {shield_id}"
|
||||||
|
|
||||||
|
|
||||||
|
# TODO: Consider using Wrapt to automatically instrument code
|
||||||
|
# This is the industry standard way to package automatically instrumentation in python.
|
||||||
|
def safety_request_span_attributes(
|
||||||
|
shield_id: str, messages: list[OpenAIMessageParam], response: RunShieldResponse
|
||||||
|
) -> None:
|
||||||
|
span = trace.get_current_span()
|
||||||
|
span.set_attribute(SAFETY_REQUEST_SHIELD_ID_ATTRIBUTE, shield_id)
|
||||||
|
messages_json = json.dumps([msg.model_dump() for msg in messages])
|
||||||
|
span.set_attribute(SAFETY_REQUEST_MESSAGES_ATTRIBUTE, messages_json)
|
||||||
|
|
||||||
|
if response.violation:
|
||||||
|
if response.violation.metadata:
|
||||||
|
metadata_json = json.dumps(response.violation.metadata)
|
||||||
|
span.set_attribute(SAFETY_RESPONSE_METADATA_ATTRIBUTE, metadata_json)
|
||||||
|
if response.violation.user_message:
|
||||||
|
span.set_attribute(SAFETY_RESPONSE_USER_MESSAGE_ATTRIBUTE, response.violation.user_message)
|
||||||
|
span.set_attribute(SAFETY_RESPONSE_VIOLATION_LEVEL_ATTRIBUTE, response.violation.violation_level.value)
|
||||||
|
|
@ -89,6 +89,7 @@ class Agents(Protocol):
|
||||||
),
|
),
|
||||||
] = None,
|
] = None,
|
||||||
max_tool_calls: int | None = None,
|
max_tool_calls: int | None = None,
|
||||||
|
metadata: dict[str, str] | None = None,
|
||||||
) -> OpenAIResponseObject | AsyncIterator[OpenAIResponseObjectStream]:
|
) -> OpenAIResponseObject | AsyncIterator[OpenAIResponseObjectStream]:
|
||||||
"""Create a model response.
|
"""Create a model response.
|
||||||
|
|
||||||
|
|
@ -100,6 +101,7 @@ class Agents(Protocol):
|
||||||
:param include: (Optional) Additional fields to include in the response.
|
:param include: (Optional) Additional fields to include in the response.
|
||||||
:param guardrails: (Optional) List of guardrails to apply during response generation. Can be guardrail IDs (strings) or guardrail specifications.
|
:param guardrails: (Optional) List of guardrails to apply during response generation. Can be guardrail IDs (strings) or guardrail specifications.
|
||||||
:param max_tool_calls: (Optional) Max number of total calls to built-in tools that can be processed in a response.
|
:param max_tool_calls: (Optional) Max number of total calls to built-in tools that can be processed in a response.
|
||||||
|
:param metadata: (Optional) Dictionary of metadata key-value pairs to attach to the response.
|
||||||
:returns: An OpenAIResponseObject.
|
:returns: An OpenAIResponseObject.
|
||||||
"""
|
"""
|
||||||
...
|
...
|
||||||
|
|
|
||||||
|
|
@ -1,22 +0,0 @@
|
||||||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
||||||
# All rights reserved.
|
|
||||||
#
|
|
||||||
# This source code is licensed under the terms described in the LICENSE file in
|
|
||||||
# the root directory of this source tree.
|
|
||||||
|
|
||||||
|
|
||||||
def telemetry_traceable(cls):
|
|
||||||
"""
|
|
||||||
Mark a protocol for automatic tracing when telemetry is enabled.
|
|
||||||
|
|
||||||
This is a metadata-only decorator with no dependencies on core.
|
|
||||||
Actual tracing is applied by core routers at runtime if telemetry is enabled.
|
|
||||||
|
|
||||||
Usage:
|
|
||||||
@runtime_checkable
|
|
||||||
@telemetry_traceable
|
|
||||||
class MyProtocol(Protocol):
|
|
||||||
...
|
|
||||||
"""
|
|
||||||
cls.__marked_for_tracing__ = True
|
|
||||||
return cls
|
|
||||||
|
|
@ -9,7 +9,6 @@ from typing import Annotated, Literal, Protocol, runtime_checkable
|
||||||
|
|
||||||
from pydantic import BaseModel, Field
|
from pydantic import BaseModel, Field
|
||||||
|
|
||||||
from llama_stack_api.common.tracing import telemetry_traceable
|
|
||||||
from llama_stack_api.openai_responses import (
|
from llama_stack_api.openai_responses import (
|
||||||
OpenAIResponseInputFunctionToolCallOutput,
|
OpenAIResponseInputFunctionToolCallOutput,
|
||||||
OpenAIResponseMCPApprovalRequest,
|
OpenAIResponseMCPApprovalRequest,
|
||||||
|
|
@ -157,7 +156,6 @@ class ConversationItemDeletedResource(BaseModel):
|
||||||
|
|
||||||
|
|
||||||
@runtime_checkable
|
@runtime_checkable
|
||||||
@telemetry_traceable
|
|
||||||
class Conversations(Protocol):
|
class Conversations(Protocol):
|
||||||
"""Conversations
|
"""Conversations
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -11,7 +11,6 @@ from fastapi import File, Form, Response, UploadFile
|
||||||
from pydantic import BaseModel, Field
|
from pydantic import BaseModel, Field
|
||||||
|
|
||||||
from llama_stack_api.common.responses import Order
|
from llama_stack_api.common.responses import Order
|
||||||
from llama_stack_api.common.tracing import telemetry_traceable
|
|
||||||
from llama_stack_api.schema_utils import json_schema_type, webmethod
|
from llama_stack_api.schema_utils import json_schema_type, webmethod
|
||||||
from llama_stack_api.version import LLAMA_STACK_API_V1
|
from llama_stack_api.version import LLAMA_STACK_API_V1
|
||||||
|
|
||||||
|
|
@ -102,7 +101,6 @@ class OpenAIFileDeleteResponse(BaseModel):
|
||||||
|
|
||||||
|
|
||||||
@runtime_checkable
|
@runtime_checkable
|
||||||
@telemetry_traceable
|
|
||||||
class Files(Protocol):
|
class Files(Protocol):
|
||||||
"""Files
|
"""Files
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -22,7 +22,6 @@ from llama_stack_api.common.content_types import InterleavedContent
|
||||||
from llama_stack_api.common.responses import (
|
from llama_stack_api.common.responses import (
|
||||||
Order,
|
Order,
|
||||||
)
|
)
|
||||||
from llama_stack_api.common.tracing import telemetry_traceable
|
|
||||||
from llama_stack_api.models import Model
|
from llama_stack_api.models import Model
|
||||||
from llama_stack_api.schema_utils import json_schema_type, register_schema, webmethod
|
from llama_stack_api.schema_utils import json_schema_type, register_schema, webmethod
|
||||||
from llama_stack_api.version import LLAMA_STACK_API_V1, LLAMA_STACK_API_V1ALPHA
|
from llama_stack_api.version import LLAMA_STACK_API_V1, LLAMA_STACK_API_V1ALPHA
|
||||||
|
|
@ -989,7 +988,6 @@ class OpenAIEmbeddingsRequestWithExtraBody(BaseModel, extra="allow"):
|
||||||
|
|
||||||
|
|
||||||
@runtime_checkable
|
@runtime_checkable
|
||||||
@telemetry_traceable
|
|
||||||
class InferenceProvider(Protocol):
|
class InferenceProvider(Protocol):
|
||||||
"""
|
"""
|
||||||
This protocol defines the interface that should be implemented by all inference providers.
|
This protocol defines the interface that should be implemented by all inference providers.
|
||||||
|
|
|
||||||
|
|
@ -9,7 +9,6 @@ from typing import Any, Literal, Protocol, runtime_checkable
|
||||||
|
|
||||||
from pydantic import BaseModel, ConfigDict, Field, field_validator
|
from pydantic import BaseModel, ConfigDict, Field, field_validator
|
||||||
|
|
||||||
from llama_stack_api.common.tracing import telemetry_traceable
|
|
||||||
from llama_stack_api.resource import Resource, ResourceType
|
from llama_stack_api.resource import Resource, ResourceType
|
||||||
from llama_stack_api.schema_utils import json_schema_type, webmethod
|
from llama_stack_api.schema_utils import json_schema_type, webmethod
|
||||||
from llama_stack_api.version import LLAMA_STACK_API_V1
|
from llama_stack_api.version import LLAMA_STACK_API_V1
|
||||||
|
|
@ -106,7 +105,6 @@ class OpenAIListModelsResponse(BaseModel):
|
||||||
|
|
||||||
|
|
||||||
@runtime_checkable
|
@runtime_checkable
|
||||||
@telemetry_traceable
|
|
||||||
class Models(Protocol):
|
class Models(Protocol):
|
||||||
async def list_models(self) -> ListModelsResponse:
|
async def list_models(self) -> ListModelsResponse:
|
||||||
"""List all models.
|
"""List all models.
|
||||||
|
|
|
||||||
|
|
@ -597,6 +597,7 @@ class OpenAIResponseObject(BaseModel):
|
||||||
:param usage: (Optional) Token usage information for the response
|
:param usage: (Optional) Token usage information for the response
|
||||||
:param instructions: (Optional) System message inserted into the model's context
|
:param instructions: (Optional) System message inserted into the model's context
|
||||||
:param max_tool_calls: (Optional) Max number of total calls to built-in tools that can be processed in a response
|
:param max_tool_calls: (Optional) Max number of total calls to built-in tools that can be processed in a response
|
||||||
|
:param metadata: (Optional) Dictionary of metadata key-value pairs
|
||||||
"""
|
"""
|
||||||
|
|
||||||
created_at: int
|
created_at: int
|
||||||
|
|
@ -619,6 +620,7 @@ class OpenAIResponseObject(BaseModel):
|
||||||
usage: OpenAIResponseUsage | None = None
|
usage: OpenAIResponseUsage | None = None
|
||||||
instructions: str | None = None
|
instructions: str | None = None
|
||||||
max_tool_calls: int | None = None
|
max_tool_calls: int | None = None
|
||||||
|
metadata: dict[str, str] | None = None
|
||||||
|
|
||||||
|
|
||||||
@json_schema_type
|
@json_schema_type
|
||||||
|
|
|
||||||
|
|
@ -10,7 +10,6 @@ from typing import Protocol, runtime_checkable
|
||||||
|
|
||||||
from pydantic import BaseModel, Field, field_validator, model_validator
|
from pydantic import BaseModel, Field, field_validator, model_validator
|
||||||
|
|
||||||
from llama_stack_api.common.tracing import telemetry_traceable
|
|
||||||
from llama_stack_api.schema_utils import json_schema_type, webmethod
|
from llama_stack_api.schema_utils import json_schema_type, webmethod
|
||||||
from llama_stack_api.version import LLAMA_STACK_API_V1
|
from llama_stack_api.version import LLAMA_STACK_API_V1
|
||||||
|
|
||||||
|
|
@ -93,7 +92,6 @@ class ListPromptsResponse(BaseModel):
|
||||||
|
|
||||||
|
|
||||||
@runtime_checkable
|
@runtime_checkable
|
||||||
@telemetry_traceable
|
|
||||||
class Prompts(Protocol):
|
class Prompts(Protocol):
|
||||||
"""Prompts
|
"""Prompts
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -9,7 +9,6 @@ from typing import Any, Protocol, runtime_checkable
|
||||||
|
|
||||||
from pydantic import BaseModel, Field
|
from pydantic import BaseModel, Field
|
||||||
|
|
||||||
from llama_stack_api.common.tracing import telemetry_traceable
|
|
||||||
from llama_stack_api.inference import OpenAIMessageParam
|
from llama_stack_api.inference import OpenAIMessageParam
|
||||||
from llama_stack_api.schema_utils import json_schema_type, webmethod
|
from llama_stack_api.schema_utils import json_schema_type, webmethod
|
||||||
from llama_stack_api.shields import Shield
|
from llama_stack_api.shields import Shield
|
||||||
|
|
@ -94,7 +93,6 @@ class ShieldStore(Protocol):
|
||||||
|
|
||||||
|
|
||||||
@runtime_checkable
|
@runtime_checkable
|
||||||
@telemetry_traceable
|
|
||||||
class Safety(Protocol):
|
class Safety(Protocol):
|
||||||
"""Safety
|
"""Safety
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -8,7 +8,6 @@ from typing import Any, Literal, Protocol, runtime_checkable
|
||||||
|
|
||||||
from pydantic import BaseModel
|
from pydantic import BaseModel
|
||||||
|
|
||||||
from llama_stack_api.common.tracing import telemetry_traceable
|
|
||||||
from llama_stack_api.resource import Resource, ResourceType
|
from llama_stack_api.resource import Resource, ResourceType
|
||||||
from llama_stack_api.schema_utils import json_schema_type, webmethod
|
from llama_stack_api.schema_utils import json_schema_type, webmethod
|
||||||
from llama_stack_api.version import LLAMA_STACK_API_V1
|
from llama_stack_api.version import LLAMA_STACK_API_V1
|
||||||
|
|
@ -49,7 +48,6 @@ class ListShieldsResponse(BaseModel):
|
||||||
|
|
||||||
|
|
||||||
@runtime_checkable
|
@runtime_checkable
|
||||||
@telemetry_traceable
|
|
||||||
class Shields(Protocol):
|
class Shields(Protocol):
|
||||||
@webmethod(route="/shields", method="GET", level=LLAMA_STACK_API_V1)
|
@webmethod(route="/shields", method="GET", level=LLAMA_STACK_API_V1)
|
||||||
async def list_shields(self) -> ListShieldsResponse:
|
async def list_shields(self) -> ListShieldsResponse:
|
||||||
|
|
|
||||||
|
|
@ -11,7 +11,6 @@ from pydantic import BaseModel
|
||||||
from typing_extensions import runtime_checkable
|
from typing_extensions import runtime_checkable
|
||||||
|
|
||||||
from llama_stack_api.common.content_types import URL, InterleavedContent
|
from llama_stack_api.common.content_types import URL, InterleavedContent
|
||||||
from llama_stack_api.common.tracing import telemetry_traceable
|
|
||||||
from llama_stack_api.resource import Resource, ResourceType
|
from llama_stack_api.resource import Resource, ResourceType
|
||||||
from llama_stack_api.schema_utils import json_schema_type, webmethod
|
from llama_stack_api.schema_utils import json_schema_type, webmethod
|
||||||
from llama_stack_api.version import LLAMA_STACK_API_V1
|
from llama_stack_api.version import LLAMA_STACK_API_V1
|
||||||
|
|
@ -109,7 +108,6 @@ class ListToolDefsResponse(BaseModel):
|
||||||
|
|
||||||
|
|
||||||
@runtime_checkable
|
@runtime_checkable
|
||||||
@telemetry_traceable
|
|
||||||
class ToolGroups(Protocol):
|
class ToolGroups(Protocol):
|
||||||
@webmethod(route="/toolgroups", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
|
@webmethod(route="/toolgroups", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
|
||||||
async def register_tool_group(
|
async def register_tool_group(
|
||||||
|
|
@ -128,7 +126,7 @@ class ToolGroups(Protocol):
|
||||||
"""
|
"""
|
||||||
...
|
...
|
||||||
|
|
||||||
@webmethod(route="/toolgroups/{toolgroup_id:path}", method="GET", level=LLAMA_STACK_API_V1)
|
@webmethod(route="/toolgroups/{toolgroup_id:path}", method="GET", level=LLAMA_STACK_API_V1, deprecated=True)
|
||||||
async def get_tool_group(
|
async def get_tool_group(
|
||||||
self,
|
self,
|
||||||
toolgroup_id: str,
|
toolgroup_id: str,
|
||||||
|
|
@ -140,7 +138,7 @@ class ToolGroups(Protocol):
|
||||||
"""
|
"""
|
||||||
...
|
...
|
||||||
|
|
||||||
@webmethod(route="/toolgroups", method="GET", level=LLAMA_STACK_API_V1)
|
@webmethod(route="/toolgroups", method="GET", level=LLAMA_STACK_API_V1, deprecated=True)
|
||||||
async def list_tool_groups(self) -> ListToolGroupsResponse:
|
async def list_tool_groups(self) -> ListToolGroupsResponse:
|
||||||
"""List tool groups with optional provider.
|
"""List tool groups with optional provider.
|
||||||
|
|
||||||
|
|
@ -148,7 +146,7 @@ class ToolGroups(Protocol):
|
||||||
"""
|
"""
|
||||||
...
|
...
|
||||||
|
|
||||||
@webmethod(route="/tools", method="GET", level=LLAMA_STACK_API_V1)
|
@webmethod(route="/tools", method="GET", level=LLAMA_STACK_API_V1, deprecated=True)
|
||||||
async def list_tools(self, toolgroup_id: str | None = None) -> ListToolDefsResponse:
|
async def list_tools(self, toolgroup_id: str | None = None) -> ListToolDefsResponse:
|
||||||
"""List tools with optional tool group.
|
"""List tools with optional tool group.
|
||||||
|
|
||||||
|
|
@ -157,7 +155,7 @@ class ToolGroups(Protocol):
|
||||||
"""
|
"""
|
||||||
...
|
...
|
||||||
|
|
||||||
@webmethod(route="/tools/{tool_name:path}", method="GET", level=LLAMA_STACK_API_V1)
|
@webmethod(route="/tools/{tool_name:path}", method="GET", level=LLAMA_STACK_API_V1, deprecated=True)
|
||||||
async def get_tool(
|
async def get_tool(
|
||||||
self,
|
self,
|
||||||
tool_name: str,
|
tool_name: str,
|
||||||
|
|
@ -191,12 +189,11 @@ class SpecialToolGroup(Enum):
|
||||||
|
|
||||||
|
|
||||||
@runtime_checkable
|
@runtime_checkable
|
||||||
@telemetry_traceable
|
|
||||||
class ToolRuntime(Protocol):
|
class ToolRuntime(Protocol):
|
||||||
tool_store: ToolStore | None = None
|
tool_store: ToolStore | None = None
|
||||||
|
|
||||||
# TODO: This needs to be renamed once OPEN API generator name conflict issue is fixed.
|
# TODO: This needs to be renamed once OPEN API generator name conflict issue is fixed.
|
||||||
@webmethod(route="/tool-runtime/list-tools", method="GET", level=LLAMA_STACK_API_V1)
|
@webmethod(route="/tool-runtime/list-tools", method="GET", level=LLAMA_STACK_API_V1, deprecated=True)
|
||||||
async def list_runtime_tools(
|
async def list_runtime_tools(
|
||||||
self,
|
self,
|
||||||
tool_group_id: str | None = None,
|
tool_group_id: str | None = None,
|
||||||
|
|
@ -212,7 +209,7 @@ class ToolRuntime(Protocol):
|
||||||
"""
|
"""
|
||||||
...
|
...
|
||||||
|
|
||||||
@webmethod(route="/tool-runtime/invoke", method="POST", level=LLAMA_STACK_API_V1)
|
@webmethod(route="/tool-runtime/invoke", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
|
||||||
async def invoke_tool(
|
async def invoke_tool(
|
||||||
self,
|
self,
|
||||||
tool_name: str,
|
tool_name: str,
|
||||||
|
|
|
||||||
|
|
@ -13,7 +13,6 @@ from typing import Annotated, Any, Literal, Protocol, runtime_checkable
|
||||||
from fastapi import Body, Query
|
from fastapi import Body, Query
|
||||||
from pydantic import BaseModel, Field, field_validator
|
from pydantic import BaseModel, Field, field_validator
|
||||||
|
|
||||||
from llama_stack_api.common.tracing import telemetry_traceable
|
|
||||||
from llama_stack_api.inference import InterleavedContent
|
from llama_stack_api.inference import InterleavedContent
|
||||||
from llama_stack_api.schema_utils import json_schema_type, register_schema, webmethod
|
from llama_stack_api.schema_utils import json_schema_type, register_schema, webmethod
|
||||||
from llama_stack_api.vector_stores import VectorStore
|
from llama_stack_api.vector_stores import VectorStore
|
||||||
|
|
@ -572,7 +571,6 @@ class VectorStoreTable(Protocol):
|
||||||
|
|
||||||
|
|
||||||
@runtime_checkable
|
@runtime_checkable
|
||||||
@telemetry_traceable
|
|
||||||
class VectorIO(Protocol):
|
class VectorIO(Protocol):
|
||||||
vector_store_table: VectorStoreTable | None = None
|
vector_store_table: VectorStoreTable | None = None
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -17,7 +17,6 @@ from unittest.mock import AsyncMock, patch
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
from llama_stack.core.library_client import LlamaStackAsLibraryClient
|
from llama_stack.core.library_client import LlamaStackAsLibraryClient
|
||||||
from llama_stack.core.telemetry.telemetry import MetricEvent
|
|
||||||
from llama_stack_api import (
|
from llama_stack_api import (
|
||||||
Api,
|
Api,
|
||||||
OpenAIAssistantMessageParam,
|
OpenAIAssistantMessageParam,
|
||||||
|
|
@ -27,10 +26,6 @@ from llama_stack_api import (
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class OpenAIChatCompletionWithMetrics(OpenAIChatCompletion):
|
|
||||||
metrics: list[MetricEvent] | None = None
|
|
||||||
|
|
||||||
|
|
||||||
def test_unregistered_model_routing_with_provider_data(client_with_models):
|
def test_unregistered_model_routing_with_provider_data(client_with_models):
|
||||||
"""
|
"""
|
||||||
Test that a model can be routed using provider_id/model_id format
|
Test that a model can be routed using provider_id/model_id format
|
||||||
|
|
@ -72,7 +67,7 @@ def test_unregistered_model_routing_with_provider_data(client_with_models):
|
||||||
# The inference router's routing_table.impls_by_provider_id should have anthropic
|
# The inference router's routing_table.impls_by_provider_id should have anthropic
|
||||||
# Let's patch the anthropic provider's openai_chat_completion method
|
# Let's patch the anthropic provider's openai_chat_completion method
|
||||||
# to avoid making real API calls
|
# to avoid making real API calls
|
||||||
mock_response = OpenAIChatCompletionWithMetrics(
|
mock_response = OpenAIChatCompletion(
|
||||||
id="chatcmpl-test-123",
|
id="chatcmpl-test-123",
|
||||||
created=1234567890,
|
created=1234567890,
|
||||||
model="claude-3-5-sonnet-20241022",
|
model="claude-3-5-sonnet-20241022",
|
||||||
|
|
|
||||||
|
|
@ -15,11 +15,10 @@ from opentelemetry.sdk.trace import TracerProvider
|
||||||
from opentelemetry.sdk.trace.export import SimpleSpanProcessor
|
from opentelemetry.sdk.trace.export import SimpleSpanProcessor
|
||||||
from opentelemetry.sdk.trace.export.in_memory_span_exporter import InMemorySpanExporter
|
from opentelemetry.sdk.trace.export.in_memory_span_exporter import InMemorySpanExporter
|
||||||
|
|
||||||
import llama_stack.core.telemetry.telemetry as telemetry_module
|
|
||||||
|
|
||||||
from .base import BaseTelemetryCollector, MetricStub, SpanStub
|
from .base import BaseTelemetryCollector, MetricStub, SpanStub
|
||||||
|
|
||||||
|
|
||||||
|
# TODO: Fix thi to work with Automatic Instrumentation
|
||||||
class InMemoryTelemetryCollector(BaseTelemetryCollector):
|
class InMemoryTelemetryCollector(BaseTelemetryCollector):
|
||||||
"""In-memory telemetry collector for library-client tests.
|
"""In-memory telemetry collector for library-client tests.
|
||||||
|
|
||||||
|
|
@ -75,13 +74,10 @@ class InMemoryTelemetryManager:
|
||||||
meter_provider = MeterProvider(metric_readers=[metric_reader])
|
meter_provider = MeterProvider(metric_readers=[metric_reader])
|
||||||
metrics.set_meter_provider(meter_provider)
|
metrics.set_meter_provider(meter_provider)
|
||||||
|
|
||||||
telemetry_module._TRACER_PROVIDER = tracer_provider
|
|
||||||
|
|
||||||
self.collector = InMemoryTelemetryCollector(span_exporter, metric_reader)
|
self.collector = InMemoryTelemetryCollector(span_exporter, metric_reader)
|
||||||
self._tracer_provider = tracer_provider
|
self._tracer_provider = tracer_provider
|
||||||
self._meter_provider = meter_provider
|
self._meter_provider = meter_provider
|
||||||
|
|
||||||
def shutdown(self) -> None:
|
def shutdown(self) -> None:
|
||||||
telemetry_module._TRACER_PROVIDER = None
|
|
||||||
self._tracer_provider.shutdown()
|
self._tracer_provider.shutdown()
|
||||||
self._meter_provider.shutdown()
|
self._meter_provider.shutdown()
|
||||||
|
|
|
||||||
|
|
@ -15,6 +15,7 @@ from tests.integration.fixtures.common import instantiate_llama_stack_client
|
||||||
from tests.integration.telemetry.collectors import InMemoryTelemetryManager, OtlpHttpTestCollector
|
from tests.integration.telemetry.collectors import InMemoryTelemetryManager, OtlpHttpTestCollector
|
||||||
|
|
||||||
|
|
||||||
|
# TODO: Fix this to work with Automatic Instrumentation
|
||||||
@pytest.fixture(scope="session")
|
@pytest.fixture(scope="session")
|
||||||
def telemetry_test_collector():
|
def telemetry_test_collector():
|
||||||
stack_mode = os.environ.get("LLAMA_STACK_TEST_STACK_CONFIG_TYPE", "library_client")
|
stack_mode = os.environ.get("LLAMA_STACK_TEST_STACK_CONFIG_TYPE", "library_client")
|
||||||
|
|
@ -48,6 +49,7 @@ def telemetry_test_collector():
|
||||||
manager.shutdown()
|
manager.shutdown()
|
||||||
|
|
||||||
|
|
||||||
|
# TODO: Fix this to work with Automatic Instrumentation
|
||||||
@pytest.fixture(scope="session")
|
@pytest.fixture(scope="session")
|
||||||
def llama_stack_client(telemetry_test_collector, request):
|
def llama_stack_client(telemetry_test_collector, request):
|
||||||
"""Ensure telemetry collector is ready before initializing the stack client."""
|
"""Ensure telemetry collector is ready before initializing the stack client."""
|
||||||
|
|
|
||||||
|
|
@ -155,9 +155,6 @@ def old_config():
|
||||||
provider_type: inline::meta-reference
|
provider_type: inline::meta-reference
|
||||||
config: {{}}
|
config: {{}}
|
||||||
api_providers:
|
api_providers:
|
||||||
telemetry:
|
|
||||||
provider_type: noop
|
|
||||||
config: {{}}
|
|
||||||
"""
|
"""
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
@ -181,7 +178,7 @@ def test_parse_and_maybe_upgrade_config_up_to_date(up_to_date_config):
|
||||||
def test_parse_and_maybe_upgrade_config_old_format(old_config):
|
def test_parse_and_maybe_upgrade_config_old_format(old_config):
|
||||||
result = parse_and_maybe_upgrade_config(old_config)
|
result = parse_and_maybe_upgrade_config(old_config)
|
||||||
assert result.version == LLAMA_STACK_RUN_CONFIG_VERSION
|
assert result.version == LLAMA_STACK_RUN_CONFIG_VERSION
|
||||||
assert all(api in result.providers for api in ["inference", "safety", "memory", "telemetry"])
|
assert all(api in result.providers for api in ["inference", "safety", "memory"])
|
||||||
safety_provider = result.providers["safety"][0]
|
safety_provider = result.providers["safety"][0]
|
||||||
assert safety_provider.provider_type == "inline::meta-reference"
|
assert safety_provider.provider_type == "inline::meta-reference"
|
||||||
assert "llama_guard_shield" in safety_provider.config
|
assert "llama_guard_shield" in safety_provider.config
|
||||||
|
|
|
||||||
|
|
@ -83,7 +83,7 @@ class TestProviderInitialization:
|
||||||
new_callable=AsyncMock,
|
new_callable=AsyncMock,
|
||||||
):
|
):
|
||||||
# Should not raise any exception
|
# Should not raise any exception
|
||||||
provider = await get_provider_impl(config, mock_deps, policy=[], telemetry_enabled=False)
|
provider = await get_provider_impl(config, mock_deps, policy=[])
|
||||||
assert provider is not None
|
assert provider is not None
|
||||||
|
|
||||||
async def test_initialization_without_safety_api(self, mock_persistence_config, mock_deps):
|
async def test_initialization_without_safety_api(self, mock_persistence_config, mock_deps):
|
||||||
|
|
@ -97,7 +97,7 @@ class TestProviderInitialization:
|
||||||
new_callable=AsyncMock,
|
new_callable=AsyncMock,
|
||||||
):
|
):
|
||||||
# Should not raise any exception
|
# Should not raise any exception
|
||||||
provider = await get_provider_impl(config, mock_deps, policy=[], telemetry_enabled=False)
|
provider = await get_provider_impl(config, mock_deps, policy=[])
|
||||||
assert provider is not None
|
assert provider is not None
|
||||||
assert provider.safety_api is None
|
assert provider.safety_api is None
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -364,23 +364,6 @@ def test_invalid_auth_header_format_oauth2(oauth2_client):
|
||||||
assert "Invalid Authorization header format" in response.json()["error"]["message"]
|
assert "Invalid Authorization header format" in response.json()["error"]["message"]
|
||||||
|
|
||||||
|
|
||||||
async def mock_jwks_response(*args, **kwargs):
|
|
||||||
return MockResponse(
|
|
||||||
200,
|
|
||||||
{
|
|
||||||
"keys": [
|
|
||||||
{
|
|
||||||
"kid": "1234567890",
|
|
||||||
"kty": "oct",
|
|
||||||
"alg": "HS256",
|
|
||||||
"use": "sig",
|
|
||||||
"k": base64.b64encode(b"foobarbaz").decode(),
|
|
||||||
}
|
|
||||||
]
|
|
||||||
},
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture
|
@pytest.fixture
|
||||||
def jwt_token_valid():
|
def jwt_token_valid():
|
||||||
import jwt
|
import jwt
|
||||||
|
|
@ -421,28 +404,60 @@ def mock_jwks_urlopen():
|
||||||
yield mock_urlopen
|
yield mock_urlopen
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def mock_jwks_urlopen_with_auth_required():
|
||||||
|
"""Mock urllib.request.urlopen that requires Bearer token for JWKS requests."""
|
||||||
|
with patch("urllib.request.urlopen") as mock_urlopen:
|
||||||
|
|
||||||
|
def side_effect(request, **kwargs):
|
||||||
|
# Check if Authorization header is present
|
||||||
|
auth_header = request.headers.get("Authorization") if hasattr(request, "headers") else None
|
||||||
|
|
||||||
|
if not auth_header or not auth_header.startswith("Bearer "):
|
||||||
|
# Simulate 401 Unauthorized
|
||||||
|
import urllib.error
|
||||||
|
|
||||||
|
raise urllib.error.HTTPError(
|
||||||
|
url=request.full_url if hasattr(request, "full_url") else "",
|
||||||
|
code=401,
|
||||||
|
msg="Unauthorized",
|
||||||
|
hdrs={},
|
||||||
|
fp=None,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Mock the JWKS response for PyJWKClient
|
||||||
|
mock_response = Mock()
|
||||||
|
mock_response.read.return_value = json.dumps(
|
||||||
|
{
|
||||||
|
"keys": [
|
||||||
|
{
|
||||||
|
"kid": "1234567890",
|
||||||
|
"kty": "oct",
|
||||||
|
"alg": "HS256",
|
||||||
|
"use": "sig",
|
||||||
|
"k": base64.b64encode(b"foobarbaz").decode(),
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
).encode()
|
||||||
|
return mock_response
|
||||||
|
|
||||||
|
mock_urlopen.side_effect = side_effect
|
||||||
|
yield mock_urlopen
|
||||||
|
|
||||||
|
|
||||||
def test_valid_oauth2_authentication(oauth2_client, jwt_token_valid, mock_jwks_urlopen):
|
def test_valid_oauth2_authentication(oauth2_client, jwt_token_valid, mock_jwks_urlopen):
|
||||||
response = oauth2_client.get("/test", headers={"Authorization": f"Bearer {jwt_token_valid}"})
|
response = oauth2_client.get("/test", headers={"Authorization": f"Bearer {jwt_token_valid}"})
|
||||||
assert response.status_code == 200
|
assert response.status_code == 200
|
||||||
assert response.json() == {"message": "Authentication successful"}
|
assert response.json() == {"message": "Authentication successful"}
|
||||||
|
|
||||||
|
|
||||||
@patch("httpx.AsyncClient.get", new=mock_jwks_response)
|
def test_invalid_oauth2_authentication(oauth2_client, invalid_token, mock_jwks_urlopen, suppress_auth_errors):
|
||||||
def test_invalid_oauth2_authentication(oauth2_client, invalid_token, suppress_auth_errors):
|
|
||||||
response = oauth2_client.get("/test", headers={"Authorization": f"Bearer {invalid_token}"})
|
response = oauth2_client.get("/test", headers={"Authorization": f"Bearer {invalid_token}"})
|
||||||
assert response.status_code == 401
|
assert response.status_code == 401
|
||||||
assert "Invalid JWT token" in response.json()["error"]["message"]
|
assert "Invalid JWT token" in response.json()["error"]["message"]
|
||||||
|
|
||||||
|
|
||||||
async def mock_auth_jwks_response(*args, **kwargs):
|
|
||||||
if "headers" not in kwargs or "Authorization" not in kwargs["headers"]:
|
|
||||||
return MockResponse(401, {})
|
|
||||||
authz = kwargs["headers"]["Authorization"]
|
|
||||||
if authz != "Bearer my-jwks-token":
|
|
||||||
return MockResponse(401, {})
|
|
||||||
return await mock_jwks_response(args, kwargs)
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture
|
@pytest.fixture
|
||||||
def oauth2_app_with_jwks_token():
|
def oauth2_app_with_jwks_token():
|
||||||
app = FastAPI()
|
app = FastAPI()
|
||||||
|
|
@ -472,8 +487,9 @@ def oauth2_client_with_jwks_token(oauth2_app_with_jwks_token):
|
||||||
return TestClient(oauth2_app_with_jwks_token)
|
return TestClient(oauth2_app_with_jwks_token)
|
||||||
|
|
||||||
|
|
||||||
@patch("httpx.AsyncClient.get", new=mock_auth_jwks_response)
|
def test_oauth2_with_jwks_token_expected(
|
||||||
def test_oauth2_with_jwks_token_expected(oauth2_client, jwt_token_valid, suppress_auth_errors):
|
oauth2_client, jwt_token_valid, mock_jwks_urlopen_with_auth_required, suppress_auth_errors
|
||||||
|
):
|
||||||
response = oauth2_client.get("/test", headers={"Authorization": f"Bearer {jwt_token_valid}"})
|
response = oauth2_client.get("/test", headers={"Authorization": f"Bearer {jwt_token_valid}"})
|
||||||
assert response.status_code == 401
|
assert response.status_code == 401
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue